aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c6
-rw-r--r--net/8021q/vlan.c41
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_core.c85
-rw-r--r--net/8021q/vlan_dev.c243
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c168
-rw-r--r--net/9p/protocol.c56
-rw-r--r--net/9p/trans_common.c92
-rw-r--r--net/9p/trans_common.h32
-rw-r--r--net/9p/trans_fd.c56
-rw-r--r--net/9p/trans_rdma.c9
-rw-r--r--net/9p/trans_virtio.c192
-rw-r--r--net/9p/util.c4
-rw-r--r--net/Kconfig20
-rw-r--r--net/Makefile4
-rw-r--r--net/appletalk/ddp.c44
-rw-r--r--net/atm/br2684.c2
-rw-r--r--net/atm/clip.c8
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/atm/lec.h2
-rw-r--r--net/ax25/af_ax25.c16
-rw-r--r--net/ax25/ax25_iface.c3
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/aggregation.c41
-rw-r--r--net/batman-adv/aggregation.h10
-rw-r--r--net/batman-adv/bat_debugfs.c10
-rw-r--r--net/batman-adv/bat_debugfs.h2
-rw-r--r--net/batman-adv/bat_sysfs.c63
-rw-r--r--net/batman-adv/bat_sysfs.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/gateway_client.c322
-rw-r--r--net/batman-adv/gateway_client.h4
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c483
-rw-r--r--net/batman-adv/hard-interface.h37
-rw-r--r--net/batman-adv/hash.c28
-rw-r--r--net/batman-adv/hash.h119
-rw-r--r--net/batman-adv/icmp_socket.c62
-rw-r--r--net/batman-adv/icmp_socket.h4
-rw-r--r--net/batman-adv/main.c36
-rw-r--r--net/batman-adv/main.h73
-rw-r--r--net/batman-adv/originator.c324
-rw-r--r--net/batman-adv/originator.h53
-rw-r--r--net/batman-adv/packet.h8
-rw-r--r--net/batman-adv/ring_buffer.c2
-rw-r--r--net/batman-adv/ring_buffer.h2
-rw-r--r--net/batman-adv/routing.c1190
-rw-r--r--net/batman-adv/routing.h36
-rw-r--r--net/batman-adv/send.c176
-rw-r--r--net/batman-adv/send.h14
-rw-r--r--net/batman-adv/soft-interface.c512
-rw-r--r--net/batman-adv/soft-interface.h5
-rw-r--r--net/batman-adv/translation-table.c584
-rw-r--r--net/batman-adv/translation-table.h28
-rw-r--r--net/batman-adv/types.h92
-rw-r--r--net/batman-adv/unicast.c137
-rw-r--r--net/batman-adv/unicast.h27
-rw-r--r--net/batman-adv/vis.c282
-rw-r--r--net/batman-adv/vis.h2
-rw-r--r--net/bluetooth/Kconfig20
-rw-r--r--net/bluetooth/Makefile4
-rw-r--r--net/bluetooth/af_bluetooth.c51
-rw-r--r--net/bluetooth/bnep/bnep.h148
-rw-r--r--net/bluetooth/bnep/core.c73
-rw-r--r--net/bluetooth/bnep/sock.c3
-rw-r--r--net/bluetooth/cmtp/capi.c9
-rw-r--r--net/bluetooth/cmtp/cmtp.h11
-rw-r--r--net/bluetooth/cmtp/core.c39
-rw-r--r--net/bluetooth/cmtp/sock.c2
-rw-r--r--net/bluetooth/hci_conn.c175
-rw-r--r--net/bluetooth/hci_core.c490
-rw-r--r--net/bluetooth/hci_event.c930
-rw-r--r--net/bluetooth/hci_sock.c8
-rw-r--r--net/bluetooth/hci_sysfs.c129
-rw-r--r--net/bluetooth/hidp/core.c300
-rw-r--r--net/bluetooth/hidp/hidp.h21
-rw-r--r--net/bluetooth/hidp/sock.c7
-rw-r--r--net/bluetooth/l2cap.c4930
-rw-r--r--net/bluetooth/l2cap_core.c4240
-rw-r--r--net/bluetooth/l2cap_sock.c1119
-rw-r--r--net/bluetooth/mgmt.c2053
-rw-r--r--net/bluetooth/rfcomm/core.c25
-rw-r--r--net/bluetooth/rfcomm/sock.c5
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/bluetooth/sco.c24
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_device.c119
-rw-r--r--net/bridge/br_fdb.c313
-rw-r--r--net/bridge/br_if.c121
-rw-r--r--net/bridge/br_input.c32
-rw-r--r--net/bridge/br_ioctl.c42
-rw-r--r--net/bridge/br_multicast.c16
-rw-r--r--net/bridge/br_netfilter.c27
-rw-r--r--net/bridge/br_netlink.c60
-rw-r--r--net/bridge/br_notify.c11
-rw-r--r--net/bridge/br_private.h26
-rw-r--r--net/bridge/br_private_stp.h13
-rw-r--r--net/bridge/br_stp.c87
-rw-r--r--net/bridge/br_stp_if.c32
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/bridge/br_sysfs_br.c39
-rw-r--r--net/bridge/br_sysfs_if.c26
-rw-r--r--net/bridge/netfilter/ebt_ip6.c46
-rw-r--r--net/bridge/netfilter/ebtables.c65
-rw-r--r--net/caif/Makefile2
-rw-r--r--net/caif/caif_config_util.c99
-rw-r--r--net/caif/caif_dev.c387
-rw-r--r--net/caif/caif_socket.c104
-rw-r--r--net/caif/cfcnfg.c509
-rw-r--r--net/caif/cfctrl.c196
-rw-r--r--net/caif/cfdgml.c14
-rw-r--r--net/caif/cffrml.c60
-rw-r--r--net/caif/cfmuxl.c162
-rw-r--r--net/caif/cfpkt_skbuff.c205
-rw-r--r--net/caif/cfrfml.c4
-rw-r--r--net/caif/cfserl.c8
-rw-r--r--net/caif/cfsrvl.c40
-rw-r--r--net/caif/cfutill.c9
-rw-r--r--net/caif/cfveil.c13
-rw-r--r--net/caif/cfvidl.c5
-rw-r--r--net/caif/chnl_net.c45
-rw-r--r--net/can/af_can.c76
-rw-r--r--net/can/bcm.c15
-rw-r--r--net/can/raw.c13
-rw-r--r--net/ceph/Kconfig1
-rw-r--r--net/ceph/armor.c4
-rw-r--r--net/ceph/auth.c8
-rw-r--r--net/ceph/auth_x.c8
-rw-r--r--net/ceph/ceph_common.c113
-rw-r--r--net/ceph/crypto.c73
-rw-r--r--net/ceph/crypto.h4
-rw-r--r--net/ceph/messenger.c97
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c638
-rw-r--r--net/ceph/pagevec.c18
-rw-r--r--net/compat.c16
-rw-r--r--net/core/dev.c744
-rw-r--r--net/core/dev_addr_lists.c18
-rw-r--r--net/core/drop_monitor.c14
-rw-r--r--net/core/dst.c92
-rw-r--r--net/core/ethtool.c804
-rw-r--r--net/core/fib_rules.c9
-rw-r--r--net/core/filter.c73
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/gen_estimator.c9
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c13
-rw-r--r--net/core/net-sysfs.c77
-rw-r--r--net/core/net_namespace.c22
-rw-r--r--net/core/netpoll.c41
-rw-r--r--net/core/pktgen.c388
-rw-r--r--net/core/rtnetlink.c115
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c14
-rw-r--r--net/core/sock.c10
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/core/utils.c24
-rw-r--r--net/dcb/dcbnl.c150
-rw-r--r--net/dccp/ccids/ccid2.c9
-rw-r--r--net/dccp/input.c7
-rw-r--r--net/dccp/ipv4.c97
-rw-r--r--net/dccp/ipv6.c198
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/output.c6
-rw-r--r--net/decnet/af_decnet.c16
-rw-r--r--net/decnet/dn_dev.c17
-rw-r--r--net/decnet/dn_fib.c23
-rw-r--r--net/decnet/dn_nsp_out.c16
-rw-r--r--net/decnet/dn_route.c311
-rw-r--r--net/decnet/dn_rules.c17
-rw-r--r--net/decnet/dn_table.c11
-rw-r--r--net/dns_resolver/dns_key.c20
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/mv88e6060.c7
-rw-r--r--net/dsa/mv88e6131.c51
-rw-r--r--net/dsa/mv88e6xxx.h2
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/econet/af_econet.c10
-rw-r--r--net/ieee802154/Makefile2
-rw-r--r--net/ipv4/Kconfig42
-rw-r--r--net/ipv4/Makefile6
-rw-r--r--net/ipv4/af_inet.c95
-rw-r--r--net/ipv4/ah4.c34
-rw-r--r--net/ipv4/arp.c28
-rw-r--r--net/ipv4/cipso_ipv4.c121
-rw-r--r--net/ipv4/datagram.c31
-rw-r--r--net/ipv4/devinet.c120
-rw-r--r--net/ipv4/esp4.c111
-rw-r--r--net/ipv4/fib_frontend.c227
-rw-r--r--net/ipv4/fib_hash.c1133
-rw-r--r--net/ipv4/fib_lookup.h13
-rw-r--r--net/ipv4/fib_rules.c25
-rw-r--r--net/ipv4/fib_semantics.c266
-rw-r--r--net/ipv4/fib_trie.c388
-rw-r--r--net/ipv4/icmp.c307
-rw-r--r--net/ipv4/igmp.c87
-rw-r--r--net/ipv4/inet_connection_sock.c63
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_lro.c4
-rw-r--r--net/ipv4/inetpeer.c161
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c89
-rw-r--r--net/ipv4/ip_gre.c92
-rw-r--r--net/ipv4/ip_input.c6
-rw-r--r--net/ipv4/ip_options.c67
-rw-r--r--net/ipv4/ip_output.c453
-rw-r--r--net/ipv4/ip_sockglue.c37
-rw-r--r--net/ipv4/ipcomp.c4
-rw-r--r--net/ipv4/ipconfig.c37
-rw-r--r--net/ipv4/ipip.c59
-rw-r--r--net/ipv4/ipmr.c100
-rw-r--r--net/ipv4/netfilter.c39
-rw-r--r--net/ipv4/netfilter/Kconfig13
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c27
-rw-r--r--net/ipv4/netfilter/ip_tables.c37
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c12
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c3
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c134
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c37
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c9
-rw-r--r--net/ipv4/ping.c935
-rw-r--r--net/ipv4/raw.c96
-rw-r--r--net/ipv4/route.c1319
-rw-r--r--net/ipv4/syncookies.c25
-rw-r--r--net/ipv4/sysctl_net_ipv4.c71
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cubic.c56
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c26
-rw-r--r--net/ipv4/tcp_ipv4.c131
-rw-r--r--net/ipv4/tcp_lp.c4
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv4/tcp_scalable.c2
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tcp_yeah.c4
-rw-r--r--net/ipv4/udp.c173
-rw-r--r--net/ipv4/xfrm4_output.c8
-rw-r--r--net/ipv4/xfrm4_policy.c97
-rw-r--r--net/ipv4/xfrm4_state.c23
-rw-r--r--net/ipv6/addrconf.c67
-rw-r--r--net/ipv6/af_inet6.c53
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/anycast.c16
-rw-r--r--net/ipv6/datagram.c88
-rw-r--r--net/ipv6/esp6.c116
-rw-r--r--net/ipv6/exthdrs.c12
-rw-r--r--net/ipv6/fib6_rules.c19
-rw-r--r--net/ipv6/icmp.c234
-rw-r--r--net/ipv6/inet6_connection_sock.c85
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c24
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_output.c202
-rw-r--r--net/ipv6/ip6_tunnel.c125
-rw-r--r--net/ipv6/ip6mr.c62
-rw-r--r--net/ipv6/ipcomp6.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/mcast.c71
-rw-r--r--net/ipv6/mip6.c24
-rw-r--r--net/ipv6/ndisc.c75
-rw-r--r--net/ipv6/netfilter.c40
-rw-r--r--net/ipv6/netfilter/ip6_tables.c30
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c3
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c25
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c3
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/proc.c40
-rw-r--r--net/ipv6/raw.c124
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c367
-rw-r--r--net/ipv6/sit.c101
-rw-r--r--net/ipv6/syncookies.c44
-rw-r--r--net/ipv6/sysctl_net_ipv6.c18
-rw-r--r--net/ipv6/tcp_ipv6.c224
-rw-r--r--net/ipv6/udp.c118
-rw-r--r--net/ipv6/xfrm6_mode_beet.c2
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c6
-rw-r--r--net/ipv6/xfrm6_output.c6
-rw-r--r--net/ipv6/xfrm6_policy.c51
-rw-r--r--net/ipv6/xfrm6_state.c21
-rw-r--r--net/ipv6/xfrm6_tunnel.c10
-rw-r--r--net/ipx/Kconfig1
-rw-r--r--net/ipx/af_ipx.c54
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/irda/ircomm/ircomm_core.c6
-rw-r--r--net/irda/ircomm/ircomm_lmp.c5
-rw-r--r--net/irda/ircomm/ircomm_tty.c14
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c12
-rw-r--r--net/irda/iriap.c6
-rw-r--r--net/irda/irlan/irlan_filter.c4
-rw-r--r--net/irda/irlan/irlan_provider.c3
-rw-r--r--net/irda/irlap.c2
-rw-r--r--net/irda/irlap_event.c11
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/irda/irlmp_event.c2
-rw-r--r--net/irda/irnet/irnet.h2
-rw-r--r--net/irda/irnet/irnet_ppp.c3
-rw-r--r--net/irda/irproc.c5
-rw-r--r--net/irda/irqueue.c2
-rw-r--r--net/irda/irttp.c2
-rw-r--r--net/irda/qos.c8
-rw-r--r--net/irda/timer.c2
-rw-r--r--net/iucv/af_iucv.c11
-rw-r--r--net/iucv/iucv.c81
-rw-r--r--net/key/af_key.c245
-rw-r--r--net/l2tp/l2tp_core.c28
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c88
-rw-r--r--net/l2tp/l2tp_netlink.c3
-rw-r--r--net/llc/llc_input.c28
-rw-r--r--net/mac80211/Kconfig5
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/agg-rx.c10
-rw-r--r--net/mac80211/agg-tx.c86
-rw-r--r--net/mac80211/cfg.c306
-rw-r--r--net/mac80211/chan.c3
-rw-r--r--net/mac80211/debugfs.c97
-rw-r--r--net/mac80211/debugfs_key.c21
-rw-r--r--net/mac80211/debugfs_netdev.c126
-rw-r--r--net/mac80211/debugfs_sta.c26
-rw-r--r--net/mac80211/driver-ops.h154
-rw-r--r--net/mac80211/driver-trace.h525
-rw-r--r--net/mac80211/ht.c32
-rw-r--r--net/mac80211/ibss.c38
-rw-r--r--net/mac80211/ieee80211_i.h70
-rw-r--r--net/mac80211/iface.c16
-rw-r--r--net/mac80211/key.c58
-rw-r--r--net/mac80211/key.h5
-rw-r--r--net/mac80211/main.c131
-rw-r--r--net/mac80211/mesh.c66
-rw-r--r--net/mac80211/mesh.h9
-rw-r--r--net/mac80211/mesh_hwmp.c42
-rw-r--r--net/mac80211/mesh_pathtbl.c168
-rw-r--r--net/mac80211/mesh_plink.c112
-rw-r--r--net/mac80211/mlme.c176
-rw-r--r--net/mac80211/offchannel.c68
-rw-r--r--net/mac80211/pm.c29
-rw-r--r--net/mac80211/rc80211_minstrel.c4
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c114
-rw-r--r--net/mac80211/rc80211_pid.h5
-rw-r--r--net/mac80211/rx.c223
-rw-r--r--net/mac80211/scan.c258
-rw-r--r--net/mac80211/sta_info.c60
-rw-r--r--net/mac80211/sta_info.h62
-rw-r--r--net/mac80211/status.c29
-rw-r--r--net/mac80211/tkip.c4
-rw-r--r--net/mac80211/tkip.h4
-rw-r--r--net/mac80211/tx.c227
-rw-r--r--net/mac80211/util.c27
-rw-r--r--net/mac80211/wep.c34
-rw-r--r--net/mac80211/wep.h4
-rw-r--r--net/mac80211/work.c138
-rw-r--r--net/mac80211/wpa.c99
-rw-r--r--net/netfilter/Kconfig76
-rw-r--r--net/netfilter/Makefile10
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipset/Kconfig122
-rw-r--r--net/netfilter/ipset/Makefile24
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c586
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c655
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c514
-rw-r--r--net/netfilter/ipset/ip_set_core.c1708
-rw-r--r--net/netfilter/ipset/ip_set_getport.c155
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c464
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c530
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c548
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c614
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c458
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c564
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c577
-rw-r--r--net/netfilter/ipset/pfxlen.c291
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c66
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c248
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c563
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c1097
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c161
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c101
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c116
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c122
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c161
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c142
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c110
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c25
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c1267
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c192
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c82
-rw-r--r--net/netfilter/nf_conntrack_core.c62
-rw-r--r--net/netfilter/nf_conntrack_expect.c34
-rw-r--r--net/netfilter/nf_conntrack_extend.c19
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c32
-rw-r--r--net/netfilter/nf_conntrack_helper.c20
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c74
-rw-r--r--net/netfilter/nf_conntrack_netlink.c53
-rw-r--r--net/netfilter/nf_conntrack_proto.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c18
-rw-r--r--net/netfilter/nf_conntrack_sip.c18
-rw-r--r--net/netfilter/nf_conntrack_snmp.c77
-rw-r--r--net/netfilter/nf_conntrack_standalone.c47
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c120
-rw-r--r--net/netfilter/nf_log.c10
-rw-r--r--net/netfilter/nf_queue.c84
-rw-r--r--net/netfilter/nfnetlink_log.c11
-rw-r--r--net/netfilter/nfnetlink_queue.c22
-rw-r--r--net/netfilter/x_tables.c133
-rw-r--r--net/netfilter/xt_AUDIT.c222
-rw-r--r--net/netfilter/xt_CLASSIFY.c36
-rw-r--r--net/netfilter/xt_DSCP.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_LED.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c34
-rw-r--r--net/netfilter/xt_TCPMSS.c17
-rw-r--r--net/netfilter/xt_TEE.c27
-rw-r--r--net/netfilter/xt_addrtype.c243
-rw-r--r--net/netfilter/xt_connlimit.c99
-rw-r--r--net/netfilter/xt_conntrack.c77
-rw-r--r--net/netfilter/xt_cpu.c2
-rw-r--r--net/netfilter/xt_devgroup.c82
-rw-r--r--net/netfilter/xt_iprange.c18
-rw-r--r--net/netfilter/xt_ipvs.c2
-rw-r--r--net/netfilter/xt_osf.c11
-rw-r--r--net/netfilter/xt_set.c373
-rw-r--r--net/netlabel/netlabel_cipso_v4.c4
-rw-r--r--net/netlabel/netlabel_domainhash.c10
-rw-r--r--net/netlabel/netlabel_mgmt.c2
-rw-r--r--net/netlabel/netlabel_unlabeled.c42
-rw-r--r--net/netlabel/netlabel_user.h6
-rw-r--r--net/netlink/af_netlink.c35
-rw-r--r--net/netrom/af_netrom.c12
-rw-r--r--net/packet/af_packet.c43
-rw-r--r--net/phonet/Kconfig12
-rw-r--r--net/phonet/af_phonet.c32
-rw-r--r--net/phonet/pep.c834
-rw-r--r--net/phonet/pn_dev.c16
-rw-r--r--net/phonet/pn_netlink.c4
-rw-r--r--net/phonet/socket.c171
-rw-r--r--net/rds/cong.c9
-rw-r--r--net/rds/ib.c9
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c27
-rw-r--r--net/rds/ib_send.c7
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c2
-rw-r--r--net/rds/iw_send.c2
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c2
-rw-r--r--net/rfkill/Kconfig11
-rw-r--r--net/rfkill/Makefile1
-rw-r--r--net/rfkill/core.c2
-rw-r--r--net/rfkill/rfkill-regulator.c164
-rw-r--r--net/rose/af_rose.c31
-rw-r--r--net/rose/rose_loopback.c13
-rw-r--r--net/rose/rose_route.c50
-rw-r--r--net/rose/rose_subr.c101
-rw-r--r--net/rxrpc/ar-ack.c2
-rw-r--r--net/rxrpc/ar-connevent.c3
-rw-r--r--net/rxrpc/ar-error.c5
-rw-r--r--net/rxrpc/ar-input.c1
-rw-r--r--net/rxrpc/ar-key.c19
-rw-r--r--net/rxrpc/ar-peer.c32
-rw-r--r--net/rxrpc/ar-transport.c3
-rw-r--r--net/sched/Kconfig51
-rw-r--r--net/sched/Makefile5
-rw-r--r--net/sched/act_api.c55
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ipt.c16
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c12
-rw-r--r--net/sched/act_police.c17
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/cls_api.c33
-rw-r--r--net/sched/cls_basic.c17
-rw-r--r--net/sched/cls_cgroup.c8
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_fw.c38
-rw-r--r--net/sched/cls_route.c126
-rw-r--r--net/sched/cls_rsvp.h95
-rw-r--r--net/sched/cls_tcindex.c2
-rw-r--r--net/sched/cls_u32.c89
-rw-r--r--net/sched/em_cmp.c47
-rw-r--r--net/sched/em_meta.c50
-rw-r--r--net/sched/em_nbyte.c3
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sched/em_u32.c2
-rw-r--r--net/sched/ematch.c37
-rw-r--r--net/sched/sch_api.c171
-rw-r--r--net/sched/sch_atm.c16
-rw-r--r--net/sched/sch_cbq.c362
-rw-r--r--net/sched/sch_choke.c688
-rw-r--r--net/sched/sch_dsmark.c21
-rw-r--r--net/sched/sch_fifo.c50
-rw-r--r--net/sched/sch_generic.c58
-rw-r--r--net/sched/sch_gred.c85
-rw-r--r--net/sched/sch_hfsc.c37
-rw-r--r--net/sched/sch_htb.c108
-rw-r--r--net/sched/sch_mq.c1
-rw-r--r--net/sched/sch_mqprio.c418
-rw-r--r--net/sched/sch_multiq.c8
-rw-r--r--net/sched/sch_netem.c411
-rw-r--r--net/sched/sch_prio.c34
-rw-r--r--net/sched/sch_qfq.c1137
-rw-r--r--net/sched/sch_red.c61
-rw-r--r--net/sched/sch_sfb.c709
-rw-r--r--net/sched/sch_sfq.c69
-rw-r--r--net/sched/sch_tbf.c39
-rw-r--r--net/sched/sch_teql.c36
-rw-r--r--net/sctp/associola.c8
-rw-r--r--net/sctp/auth.c6
-rw-r--r--net/sctp/bind_addr.c12
-rw-r--r--net/sctp/debug.c1
-rw-r--r--net/sctp/endpointola.c20
-rw-r--r--net/sctp/input.c24
-rw-r--r--net/sctp/ipv6.c213
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c27
-rw-r--r--net/sctp/protocol.c87
-rw-r--r--net/sctp/sm_make_chunk.c69
-rw-r--r--net/sctp/sm_sideeffect.c11
-rw-r--r--net/sctp/sm_statefuns.c109
-rw-r--r--net/sctp/sm_statetable.c78
-rw-r--r--net/sctp/socket.c108
-rw-r--r--net/sctp/transport.c27
-rw-r--r--net/sctp/tsnmap.c2
-rw-r--r--net/sctp/ulpevent.c32
-rw-r--r--net/sctp/ulpqueue.c9
-rw-r--r--net/socket.c361
-rw-r--r--net/sunrpc/Kconfig9
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c10
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c4
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c38
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/clnt.c23
-rw-r--r--net/sunrpc/sched.c110
-rw-r--r--net/sunrpc/svcauth_unix.c18
-rw-r--r--net/sunrpc/svcsock.c32
-rw-r--r--net/sunrpc/xprt.c26
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c86
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/sunrpc/xprtrdma/verbs.c53
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c9
-rw-r--r--net/tipc/Kconfig12
-rw-r--r--net/tipc/addr.c15
-rw-r--r--net/tipc/addr.h20
-rw-r--r--net/tipc/bcast.c69
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/bearer.c151
-rw-r--r--net/tipc/bearer.h73
-rw-r--r--net/tipc/config.c31
-rw-r--r--net/tipc/core.c12
-rw-r--r--net/tipc/core.h4
-rw-r--r--net/tipc/discover.c284
-rw-r--r--net/tipc/discover.h16
-rw-r--r--net/tipc/link.c232
-rw-r--r--net/tipc/link.h30
-rw-r--r--net/tipc/msg.c66
-rw-r--r--net/tipc/msg.h225
-rw-r--r--net/tipc/name_distr.c20
-rw-r--r--net/tipc/net.c32
-rw-r--r--net/tipc/net.h19
-rw-r--r--net/tipc/node.c125
-rw-r--r--net/tipc/node.h36
-rw-r--r--net/tipc/node_subscr.c21
-rw-r--r--net/tipc/node_subscr.h3
-rw-r--r--net/tipc/port.c351
-rw-r--r--net/tipc/port.h87
-rw-r--r--net/tipc/socket.c103
-rw-r--r--net/tipc/subscr.c15
-rw-r--r--net/unix/af_unix.c109
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wanrouter/wanmain.c2
-rw-r--r--net/wanrouter/wanproc.c2
-rw-r--r--net/wireless/core.c126
-rw-r--r--net/wireless/core.h33
-rw-r--r--net/wireless/ethtool.c33
-rw-r--r--net/wireless/lib80211_crypt_wep.c3
-rw-r--r--net/wireless/mesh.c23
-rw-r--r--net/wireless/mlme.c19
-rw-r--r--net/wireless/nl80211.c850
-rw-r--r--net/wireless/nl80211.h11
-rw-r--r--net/wireless/reg.c123
-rw-r--r--net/wireless/reg.h1
-rw-r--r--net/wireless/scan.c108
-rw-r--r--net/wireless/sysfs.c2
-rw-r--r--net/wireless/util.c173
-rw-r--r--net/wireless/wext-compat.c5
-rw-r--r--net/x25/Kconfig1
-rw-r--r--net/x25/af_x25.c58
-rw-r--r--net/x25/x25_facilities.c2
-rw-r--r--net/x25/x25_forward.c4
-rw-r--r--net/x25/x25_out.c7
-rw-r--r--net/xfrm/Makefile2
-rw-r--r--net/xfrm/xfrm_algo.c8
-rw-r--r--net/xfrm/xfrm_hash.h32
-rw-r--r--net/xfrm/xfrm_input.c15
-rw-r--r--net/xfrm/xfrm_output.c19
-rw-r--r--net/xfrm/xfrm_policy.c233
-rw-r--r--net/xfrm/xfrm_replay.c550
-rw-r--r--net/xfrm/xfrm_state.c208
-rw-r--r--net/xfrm/xfrm_user.c244
639 files changed, 47782 insertions, 23237 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index c1df2dad8c6b..16102951d36a 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -554,8 +554,7 @@ static void garp_release_port(struct net_device *dev)
554 return; 554 return;
555 } 555 }
556 rcu_assign_pointer(dev->garp_port, NULL); 556 rcu_assign_pointer(dev->garp_port, NULL);
557 synchronize_rcu(); 557 kfree_rcu(port, rcu);
558 kfree(port);
559} 558}
560 559
561int garp_init_applicant(struct net_device *dev, struct garp_application *appl) 560int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
@@ -607,7 +606,6 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
607 ASSERT_RTNL(); 606 ASSERT_RTNL();
608 607
609 rcu_assign_pointer(port->applicants[appl->type], NULL); 608 rcu_assign_pointer(port->applicants[appl->type], NULL);
610 synchronize_rcu();
611 609
612 /* Delete timer and generate a final TRANSMIT_PDU event to flush out 610 /* Delete timer and generate a final TRANSMIT_PDU event to flush out
613 * all pending messages before the applicant is gone. */ 611 * all pending messages before the applicant is gone. */
@@ -617,7 +615,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
617 garp_queue_xmit(app); 615 garp_queue_xmit(app);
618 616
619 dev_mc_del(dev, appl->proto.group_address); 617 dev_mc_del(dev, appl->proto.group_address);
620 kfree(app); 618 kfree_rcu(app, rcu);
621 garp_release_port(dev); 619 garp_release_port(dev);
622} 620}
623EXPORT_SYMBOL_GPL(garp_uninit_applicant); 621EXPORT_SYMBOL_GPL(garp_uninit_applicant);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6e64f7c6a2e9..b2274d1fd605 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -49,11 +49,6 @@ const char vlan_version[] = DRV_VERSION;
49static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; 49static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
50static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; 50static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
51 51
52static struct packet_type vlan_packet_type __read_mostly = {
53 .type = cpu_to_be16(ETH_P_8021Q),
54 .func = vlan_skb_recv, /* VLAN receive method */
55};
56
57/* End of global variables definitions. */ 52/* End of global variables definitions. */
58 53
59static void vlan_group_free(struct vlan_group *grp) 54static void vlan_group_free(struct vlan_group *grp)
@@ -124,10 +119,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
124 119
125 grp->nr_vlans--; 120 grp->nr_vlans--;
126 121
127 vlan_group_set_device(grp, vlan_id, NULL); 122 if (vlan->flags & VLAN_FLAG_GVRP)
128 if (!grp->killall) 123 vlan_gvrp_request_leave(dev);
129 synchronize_net();
130 124
125 vlan_group_set_device(grp, vlan_id, NULL);
126 /* Because unregister_netdevice_queue() makes sure at least one rcu
127 * grace period is respected before device freeing,
128 * we dont need to call synchronize_net() here.
129 */
131 unregister_netdevice_queue(dev, head); 130 unregister_netdevice_queue(dev, head);
132 131
133 /* If the group is now empty, kill off the group. */ 132 /* If the group is now empty, kill off the group. */
@@ -327,10 +326,6 @@ static void vlan_sync_address(struct net_device *dev,
327static void vlan_transfer_features(struct net_device *dev, 326static void vlan_transfer_features(struct net_device *dev,
328 struct net_device *vlandev) 327 struct net_device *vlandev)
329{ 328{
330 unsigned long old_features = vlandev->features;
331
332 vlandev->features &= ~dev->vlan_features;
333 vlandev->features |= dev->features & dev->vlan_features;
334 vlandev->gso_max_size = dev->gso_max_size; 329 vlandev->gso_max_size = dev->gso_max_size;
335 330
336 if (dev->features & NETIF_F_HW_VLAN_TX) 331 if (dev->features & NETIF_F_HW_VLAN_TX)
@@ -341,8 +336,8 @@ static void vlan_transfer_features(struct net_device *dev,
341#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 336#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
342 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; 337 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
343#endif 338#endif
344 if (old_features != vlandev->features) 339
345 netdev_features_change(vlandev); 340 netdev_update_features(vlandev);
346} 341}
347 342
348static void __vlan_device_event(struct net_device *dev, unsigned long event) 343static void __vlan_device_event(struct net_device *dev, unsigned long event)
@@ -487,9 +482,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
487 if (dev->reg_state != NETREG_UNREGISTERING) 482 if (dev->reg_state != NETREG_UNREGISTERING)
488 break; 483 break;
489 484
490 /* Delete all VLANs for this dev. */
491 grp->killall = 1;
492
493 for (i = 0; i < VLAN_N_VID; i++) { 485 for (i = 0; i < VLAN_N_VID; i++) {
494 vlandev = vlan_group_get_device(grp, i); 486 vlandev = vlan_group_get_device(grp, i);
495 if (!vlandev) 487 if (!vlandev)
@@ -508,6 +500,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
508 case NETDEV_PRE_TYPE_CHANGE: 500 case NETDEV_PRE_TYPE_CHANGE:
509 /* Forbid underlaying device to change its type. */ 501 /* Forbid underlaying device to change its type. */
510 return NOTIFY_BAD; 502 return NOTIFY_BAD;
503
504 case NETDEV_NOTIFY_PEERS:
505 case NETDEV_BONDING_FAILOVER:
506 /* Propagate to vlan devices */
507 for (i = 0; i < VLAN_N_VID; i++) {
508 vlandev = vlan_group_get_device(grp, i);
509 if (!vlandev)
510 continue;
511
512 call_netdevice_notifiers(event, vlandev);
513 }
514 break;
511 } 515 }
512 516
513out: 517out:
@@ -688,7 +692,6 @@ static int __init vlan_proto_init(void)
688 if (err < 0) 692 if (err < 0)
689 goto err4; 693 goto err4;
690 694
691 dev_add_pack(&vlan_packet_type);
692 vlan_ioctl_set(vlan_ioctl_handler); 695 vlan_ioctl_set(vlan_ioctl_handler);
693 return 0; 696 return 0;
694 697
@@ -709,8 +712,6 @@ static void __exit vlan_cleanup_module(void)
709 712
710 unregister_netdevice_notifier(&vlan_notifier_block); 713 unregister_netdevice_notifier(&vlan_notifier_block);
711 714
712 dev_remove_pack(&vlan_packet_type);
713
714 unregister_pernet_subsys(&vlan_net_ops); 715 unregister_pernet_subsys(&vlan_net_ops);
715 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 716 rcu_barrier(); /* Wait for completion of call_rcu()'s */
716 717
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5687c9b95f33..c3408def8a19 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -75,8 +75,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
75} 75}
76 76
77/* found in vlan_dev.c */ 77/* found in vlan_dev.c */
78int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
79 struct packet_type *ptype, struct net_device *orig_dev);
80void vlan_dev_set_ingress_priority(const struct net_device *dev, 78void vlan_dev_set_ingress_priority(const struct net_device *dev,
81 u32 skb_prio, u16 vlan_prio); 79 u32 skb_prio, u16 vlan_prio);
82int vlan_dev_set_egress_priority(const struct net_device *dev, 80int vlan_dev_set_egress_priority(const struct net_device *dev,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index ce8e3ab3e7a5..41495dc2a4c9 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,7 +4,7 @@
4#include <linux/netpoll.h> 4#include <linux/netpoll.h>
5#include "vlan.h" 5#include "vlan.h"
6 6
7bool vlan_hwaccel_do_receive(struct sk_buff **skbp) 7bool vlan_do_receive(struct sk_buff **skbp)
8{ 8{
9 struct sk_buff *skb = *skbp; 9 struct sk_buff *skb = *skbp;
10 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; 10 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
@@ -88,3 +88,86 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
88 return napi_gro_frags(napi); 88 return napi_gro_frags(napi);
89} 89}
90EXPORT_SYMBOL(vlan_gro_frags); 90EXPORT_SYMBOL(vlan_gro_frags);
91
92static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
93{
94 if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
95 if (skb_cow(skb, skb_headroom(skb)) < 0)
96 skb = NULL;
97 if (skb) {
98 /* Lifted from Gleb's VLAN code... */
99 memmove(skb->data - ETH_HLEN,
100 skb->data - VLAN_ETH_HLEN, 12);
101 skb->mac_header += VLAN_HLEN;
102 }
103 }
104 return skb;
105}
106
107static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
108{
109 __be16 proto;
110 unsigned char *rawp;
111
112 /*
113 * Was a VLAN packet, grab the encapsulated protocol, which the layer
114 * three protocols care about.
115 */
116
117 proto = vhdr->h_vlan_encapsulated_proto;
118 if (ntohs(proto) >= 1536) {
119 skb->protocol = proto;
120 return;
121 }
122
123 rawp = skb->data;
124 if (*(unsigned short *) rawp == 0xFFFF)
125 /*
126 * This is a magic hack to spot IPX packets. Older Novell
127 * breaks the protocol design and runs IPX over 802.3 without
128 * an 802.2 LLC layer. We look for FFFF which isn't a used
129 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
130 * but does for the rest.
131 */
132 skb->protocol = htons(ETH_P_802_3);
133 else
134 /*
135 * Real 802.2 LLC
136 */
137 skb->protocol = htons(ETH_P_802_2);
138}
139
140struct sk_buff *vlan_untag(struct sk_buff *skb)
141{
142 struct vlan_hdr *vhdr;
143 u16 vlan_tci;
144
145 if (unlikely(vlan_tx_tag_present(skb))) {
146 /* vlan_tci is already set-up so leave this for another time */
147 return skb;
148 }
149
150 skb = skb_share_check(skb, GFP_ATOMIC);
151 if (unlikely(!skb))
152 goto err_free;
153
154 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
155 goto err_free;
156
157 vhdr = (struct vlan_hdr *) skb->data;
158 vlan_tci = ntohs(vhdr->h_vlan_TCI);
159 __vlan_hwaccel_put_tag(skb, vlan_tci);
160
161 skb_pull_rcsum(skb, VLAN_HLEN);
162 vlan_set_encap_proto(skb, vhdr);
163
164 skb = vlan_check_reorder_header(skb);
165 if (unlikely(!skb))
166 goto err_free;
167
168 return skb;
169
170err_free:
171 kfree_skb(skb);
172 return NULL;
173}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index be737539f34d..f247f5bff88d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -65,179 +65,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
65 return 0; 65 return 0;
66} 66}
67 67
68static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
69{
70 if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
71 if (skb_cow(skb, skb_headroom(skb)) < 0)
72 skb = NULL;
73 if (skb) {
74 /* Lifted from Gleb's VLAN code... */
75 memmove(skb->data - ETH_HLEN,
76 skb->data - VLAN_ETH_HLEN, 12);
77 skb->mac_header += VLAN_HLEN;
78 }
79 }
80
81 return skb;
82}
83
84static inline void vlan_set_encap_proto(struct sk_buff *skb,
85 struct vlan_hdr *vhdr)
86{
87 __be16 proto;
88 unsigned char *rawp;
89
90 /*
91 * Was a VLAN packet, grab the encapsulated protocol, which the layer
92 * three protocols care about.
93 */
94
95 proto = vhdr->h_vlan_encapsulated_proto;
96 if (ntohs(proto) >= 1536) {
97 skb->protocol = proto;
98 return;
99 }
100
101 rawp = skb->data;
102 if (*(unsigned short *)rawp == 0xFFFF)
103 /*
104 * This is a magic hack to spot IPX packets. Older Novell
105 * breaks the protocol design and runs IPX over 802.3 without
106 * an 802.2 LLC layer. We look for FFFF which isn't a used
107 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
108 * but does for the rest.
109 */
110 skb->protocol = htons(ETH_P_802_3);
111 else
112 /*
113 * Real 802.2 LLC
114 */
115 skb->protocol = htons(ETH_P_802_2);
116}
117
118/*
119 * Determine the packet's protocol ID. The rule here is that we
120 * assume 802.3 if the type field is short enough to be a length.
121 * This is normal practice and works for any 'now in use' protocol.
122 *
123 * Also, at this point we assume that we ARE dealing exclusively with
124 * VLAN packets, or packets that should be made into VLAN packets based
125 * on a default VLAN ID.
126 *
127 * NOTE: Should be similar to ethernet/eth.c.
128 *
129 * SANITY NOTE: This method is called when a packet is moving up the stack
130 * towards userland. To get here, it would have already passed
131 * through the ethernet/eth.c eth_type_trans() method.
132 * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
133 * stored UNALIGNED in the memory. RISC systems don't like
134 * such cases very much...
135 * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
136 * aligned, so there doesn't need to be any of the unaligned
137 * stuff. It has been commented out now... --Ben
138 *
139 */
140int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
141 struct packet_type *ptype, struct net_device *orig_dev)
142{
143 struct vlan_hdr *vhdr;
144 struct vlan_pcpu_stats *rx_stats;
145 struct net_device *vlan_dev;
146 u16 vlan_id;
147 u16 vlan_tci;
148
149 skb = skb_share_check(skb, GFP_ATOMIC);
150 if (skb == NULL)
151 goto err_free;
152
153 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
154 goto err_free;
155
156 vhdr = (struct vlan_hdr *)skb->data;
157 vlan_tci = ntohs(vhdr->h_vlan_TCI);
158 vlan_id = vlan_tci & VLAN_VID_MASK;
159
160 rcu_read_lock();
161 vlan_dev = vlan_find_dev(dev, vlan_id);
162
163 /* If the VLAN device is defined, we use it.
164 * If not, and the VID is 0, it is a 802.1p packet (not
165 * really a VLAN), so we will just netif_rx it later to the
166 * original interface, but with the skb->proto set to the
167 * wrapped proto: we do nothing here.
168 */
169
170 if (!vlan_dev) {
171 if (vlan_id) {
172 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
173 __func__, vlan_id, dev->name);
174 goto err_unlock;
175 }
176 rx_stats = NULL;
177 } else {
178 skb->dev = vlan_dev;
179
180 rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
181
182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++;
184 rx_stats->rx_bytes += skb->len;
185
186 skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
187
188 pr_debug("%s: priority: %u for TCI: %hu\n",
189 __func__, skb->priority, vlan_tci);
190
191 switch (skb->pkt_type) {
192 case PACKET_BROADCAST:
193 /* Yeah, stats collect these together.. */
194 /* stats->broadcast ++; // no such counter :-( */
195 break;
196
197 case PACKET_MULTICAST:
198 rx_stats->rx_multicast++;
199 break;
200
201 case PACKET_OTHERHOST:
202 /* Our lower layer thinks this is not local, let's make
203 * sure.
204 * This allows the VLAN to have a different MAC than the
205 * underlying device, and still route correctly.
206 */
207 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
208 skb->dev->dev_addr))
209 skb->pkt_type = PACKET_HOST;
210 break;
211 default:
212 break;
213 }
214 u64_stats_update_end(&rx_stats->syncp);
215 }
216
217 skb_pull_rcsum(skb, VLAN_HLEN);
218 vlan_set_encap_proto(skb, vhdr);
219
220 if (vlan_dev) {
221 skb = vlan_check_reorder_header(skb);
222 if (!skb) {
223 rx_stats->rx_errors++;
224 goto err_unlock;
225 }
226 }
227
228 netif_rx(skb);
229
230 rcu_read_unlock();
231 return NET_RX_SUCCESS;
232
233err_unlock:
234 rcu_read_unlock();
235err_free:
236 atomic_long_inc(&dev->rx_dropped);
237 kfree_skb(skb);
238 return NET_RX_DROP;
239}
240
241static inline u16 68static inline u16
242vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) 69vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
243{ 70{
@@ -487,9 +314,6 @@ static int vlan_dev_stop(struct net_device *dev)
487 struct vlan_dev_info *vlan = vlan_dev_info(dev); 314 struct vlan_dev_info *vlan = vlan_dev_info(dev);
488 struct net_device *real_dev = vlan->real_dev; 315 struct net_device *real_dev = vlan->real_dev;
489 316
490 if (vlan->flags & VLAN_FLAG_GVRP)
491 vlan_gvrp_request_leave(dev);
492
493 dev_mc_unsync(real_dev, dev); 317 dev_mc_unsync(real_dev, dev);
494 dev_uc_unsync(real_dev, dev); 318 dev_uc_unsync(real_dev, dev);
495 if (dev->flags & IFF_ALLMULTI) 319 if (dev->flags & IFF_ALLMULTI)
@@ -625,6 +449,19 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
625 rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); 449 rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
626 return rc; 450 return rc;
627} 451}
452
453static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
454 struct scatterlist *sgl, unsigned int sgc)
455{
456 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
457 const struct net_device_ops *ops = real_dev->netdev_ops;
458 int rc = 0;
459
460 if (ops->ndo_fcoe_ddp_target)
461 rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
462
463 return rc;
464}
628#endif 465#endif
629 466
630static void vlan_dev_change_rx_flags(struct net_device *dev, int change) 467static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
@@ -691,8 +528,8 @@ static int vlan_dev_init(struct net_device *dev)
691 (1<<__LINK_STATE_DORMANT))) | 528 (1<<__LINK_STATE_DORMANT))) |
692 (1<<__LINK_STATE_PRESENT); 529 (1<<__LINK_STATE_PRESENT);
693 530
694 dev->features |= real_dev->features & real_dev->vlan_features; 531 dev->hw_features = NETIF_F_ALL_TX_OFFLOADS;
695 dev->features |= NETIF_F_LLTX; 532 dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
696 dev->gso_max_size = real_dev->gso_max_size; 533 dev->gso_max_size = real_dev->gso_max_size;
697 534
698 /* ipv6 shared card related stuff */ 535 /* ipv6 shared card related stuff */
@@ -707,6 +544,7 @@ static int vlan_dev_init(struct net_device *dev)
707 dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; 544 dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
708#endif 545#endif
709 546
547 dev->needed_headroom = real_dev->needed_headroom;
710 if (real_dev->features & NETIF_F_HW_VLAN_TX) { 548 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
711 dev->header_ops = real_dev->header_ops; 549 dev->header_ops = real_dev->header_ops;
712 dev->hard_header_len = real_dev->hard_header_len; 550 dev->hard_header_len = real_dev->hard_header_len;
@@ -745,6 +583,19 @@ static void vlan_dev_uninit(struct net_device *dev)
745 } 583 }
746} 584}
747 585
586static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
587{
588 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
589
590 features &= real_dev->features;
591 features &= real_dev->vlan_features;
592 if (dev_ethtool_get_rx_csum(real_dev))
593 features |= NETIF_F_RXCSUM;
594 features |= NETIF_F_LLTX;
595
596 return features;
597}
598
748static int vlan_ethtool_get_settings(struct net_device *dev, 599static int vlan_ethtool_get_settings(struct net_device *dev,
749 struct ethtool_cmd *cmd) 600 struct ethtool_cmd *cmd)
750{ 601{
@@ -760,18 +611,6 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
760 strcpy(info->fw_version, "N/A"); 611 strcpy(info->fw_version, "N/A");
761} 612}
762 613
763static u32 vlan_ethtool_get_rx_csum(struct net_device *dev)
764{
765 const struct vlan_dev_info *vlan = vlan_dev_info(dev);
766 return dev_ethtool_get_rx_csum(vlan->real_dev);
767}
768
769static u32 vlan_ethtool_get_flags(struct net_device *dev)
770{
771 const struct vlan_dev_info *vlan = vlan_dev_info(dev);
772 return dev_ethtool_get_flags(vlan->real_dev);
773}
774
775static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) 614static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
776{ 615{
777 616
@@ -809,32 +648,10 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
809 return stats; 648 return stats;
810} 649}
811 650
812static int vlan_ethtool_set_tso(struct net_device *dev, u32 data)
813{
814 if (data) {
815 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
816
817 /* Underlying device must support TSO for VLAN-tagged packets
818 * and must have TSO enabled now.
819 */
820 if (!(real_dev->vlan_features & NETIF_F_TSO))
821 return -EOPNOTSUPP;
822 if (!(real_dev->features & NETIF_F_TSO))
823 return -EINVAL;
824 dev->features |= NETIF_F_TSO;
825 } else {
826 dev->features &= ~NETIF_F_TSO;
827 }
828 return 0;
829}
830
831static const struct ethtool_ops vlan_ethtool_ops = { 651static const struct ethtool_ops vlan_ethtool_ops = {
832 .get_settings = vlan_ethtool_get_settings, 652 .get_settings = vlan_ethtool_get_settings,
833 .get_drvinfo = vlan_ethtool_get_drvinfo, 653 .get_drvinfo = vlan_ethtool_get_drvinfo,
834 .get_link = ethtool_op_get_link, 654 .get_link = ethtool_op_get_link,
835 .get_rx_csum = vlan_ethtool_get_rx_csum,
836 .get_flags = vlan_ethtool_get_flags,
837 .set_tso = vlan_ethtool_set_tso,
838}; 655};
839 656
840static const struct net_device_ops vlan_netdev_ops = { 657static const struct net_device_ops vlan_netdev_ops = {
@@ -858,7 +675,9 @@ static const struct net_device_ops vlan_netdev_ops = {
858 .ndo_fcoe_enable = vlan_dev_fcoe_enable, 675 .ndo_fcoe_enable = vlan_dev_fcoe_enable,
859 .ndo_fcoe_disable = vlan_dev_fcoe_disable, 676 .ndo_fcoe_disable = vlan_dev_fcoe_disable,
860 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, 677 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
678 .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target,
861#endif 679#endif
680 .ndo_fix_features = vlan_dev_fix_features,
862}; 681};
863 682
864void vlan_setup(struct net_device *dev) 683void vlan_setup(struct net_device *dev)
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d1314cf18adf..d940c49d168a 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -54,7 +54,7 @@ static const char name_conf[] = "config";
54 54
55/* 55/*
56 * Structures for interfacing with the /proc filesystem. 56 * Structures for interfacing with the /proc filesystem.
57 * VLAN creates its own directory /proc/net/vlan with the folowing 57 * VLAN creates its own directory /proc/net/vlan with the following
58 * entries: 58 * entries:
59 * config device status/configuration 59 * config device status/configuration
60 * <device> entry for each device 60 * <device> entry for each device
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a6..a0874cc1f718 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9 util.o \ 9 util.o \
10 protocol.o \ 10 protocol.o \
11 trans_fd.o \ 11 trans_fd.o \
12 trans_common.o \
12 13
139pnet_virtio-objs := \ 149pnet_virtio-objs := \
14 trans_virtio.o \ 15 trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbff..ceab943dfc49 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -178,7 +178,7 @@ free_and_return:
178 * @tag: numeric id for transaction 178 * @tag: numeric id for transaction
179 * 179 *
180 * this is a simple array lookup, but will grow the 180 * this is a simple array lookup, but will grow the
181 * request_slots as necessary to accomodate transaction 181 * request_slots as necessary to accommodate transaction
182 * ids which did not previously have a slot. 182 * ids which did not previously have a slot.
183 * 183 *
184 * this code relies on the client spinlock to manage locks, its 184 * this code relies on the client spinlock to manage locks, its
@@ -223,16 +223,29 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
223 223
224 req = &c->reqs[row][col]; 224 req = &c->reqs[row][col];
225 if (!req->tc) { 225 if (!req->tc) {
226 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); 226 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
227 if (!req->wq) { 227 if (!req->wq) {
228 printk(KERN_ERR "Couldn't grow tag array\n"); 228 printk(KERN_ERR "Couldn't grow tag array\n");
229 return ERR_PTR(-ENOMEM); 229 return ERR_PTR(-ENOMEM);
230 } 230 }
231 init_waitqueue_head(req->wq); 231 init_waitqueue_head(req->wq);
232 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, 232 if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
233 GFP_KERNEL); 233 P9_TRANS_PREF_PAYLOAD_SEP) {
234 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, 234 int alloc_msize = min(c->msize, 4096);
235 GFP_KERNEL); 235 req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
236 GFP_NOFS);
237 req->tc->capacity = alloc_msize;
238 req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
239 GFP_NOFS);
240 req->rc->capacity = alloc_msize;
241 } else {
242 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
243 GFP_NOFS);
244 req->tc->capacity = c->msize;
245 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
246 GFP_NOFS);
247 req->rc->capacity = c->msize;
248 }
236 if ((!req->tc) || (!req->rc)) { 249 if ((!req->tc) || (!req->rc)) {
237 printk(KERN_ERR "Couldn't grow tag array\n"); 250 printk(KERN_ERR "Couldn't grow tag array\n");
238 kfree(req->tc); 251 kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
243 return ERR_PTR(-ENOMEM); 256 return ERR_PTR(-ENOMEM);
244 } 257 }
245 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); 258 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
246 req->tc->capacity = c->msize;
247 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); 259 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
248 req->rc->capacity = c->msize;
249 } 260 }
250 261
251 p9pdu_reset(req->tc); 262 p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
443{ 454{
444 int8_t type; 455 int8_t type;
445 int err; 456 int err;
457 int ecode;
446 458
447 err = p9_parse_header(req->rc, NULL, &type, NULL, 0); 459 err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
448 if (err) { 460 if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
450 return err; 462 return err;
451 } 463 }
452 464
453 if (type == P9_RERROR || type == P9_RLERROR) { 465 if (type != P9_RERROR && type != P9_RLERROR)
454 int ecode; 466 return 0;
455
456 if (!p9_is_proto_dotl(c)) {
457 char *ename;
458
459 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
460 &ename, &ecode);
461 if (err)
462 goto out_err;
463 467
464 if (p9_is_proto_dotu(c)) 468 if (!p9_is_proto_dotl(c)) {
465 err = -ecode; 469 char *ename;
470
471 if (req->tc->pbuf_size) {
472 /* Handle user buffers */
473 size_t len = req->rc->size - req->rc->offset;
474 if (req->tc->pubuf) {
475 /* User Buffer */
476 err = copy_from_user(
477 &req->rc->sdata[req->rc->offset],
478 req->tc->pubuf, len);
479 if (err) {
480 err = -EFAULT;
481 goto out_err;
482 }
483 } else {
484 /* Kernel Buffer */
485 memmove(&req->rc->sdata[req->rc->offset],
486 req->tc->pkbuf, len);
487 }
488 }
489 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
490 &ename, &ecode);
491 if (err)
492 goto out_err;
466 493
467 if (!err || !IS_ERR_VALUE(err)) { 494 if (p9_is_proto_dotu(c))
468 err = p9_errstr2errno(ename, strlen(ename)); 495 err = -ecode;
469 496
470 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); 497 if (!err || !IS_ERR_VALUE(err)) {
498 err = p9_errstr2errno(ename, strlen(ename));
471 499
472 kfree(ename); 500 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
473 } 501 ename);
474 } else {
475 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
476 err = -ecode;
477 502
478 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); 503 kfree(ename);
479 } 504 }
505 } else {
506 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
507 err = -ecode;
508
509 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
510 }
480 511
481 } else
482 err = 0;
483 512
484 return err; 513 return err;
485 514
@@ -585,7 +614,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
585 614
586 err = c->trans_mod->request(c, req); 615 err = c->trans_mod->request(c, req);
587 if (err < 0) { 616 if (err < 0) {
588 if (err != -ERESTARTSYS) 617 if (err != -ERESTARTSYS && err != -EFAULT)
589 c->status = Disconnected; 618 c->status = Disconnected;
590 goto reterr; 619 goto reterr;
591 } 620 }
@@ -900,15 +929,15 @@ error:
900} 929}
901EXPORT_SYMBOL(p9_client_attach); 930EXPORT_SYMBOL(p9_client_attach);
902 931
903struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, 932struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
904 int clone) 933 char **wnames, int clone)
905{ 934{
906 int err; 935 int err;
907 struct p9_client *clnt; 936 struct p9_client *clnt;
908 struct p9_fid *fid; 937 struct p9_fid *fid;
909 struct p9_qid *wqids; 938 struct p9_qid *wqids;
910 struct p9_req_t *req; 939 struct p9_req_t *req;
911 int16_t nwqids, count; 940 uint16_t nwqids, count;
912 941
913 err = 0; 942 err = 0;
914 wqids = NULL; 943 wqids = NULL;
@@ -926,7 +955,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
926 fid = oldfid; 955 fid = oldfid;
927 956
928 957
929 P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", 958 P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
930 oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); 959 oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
931 960
932 req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, 961 req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
@@ -1252,7 +1281,7 @@ int
1252p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, 1281p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1253 u32 count) 1282 u32 count)
1254{ 1283{
1255 int err, rsize, total; 1284 int err, rsize;
1256 struct p9_client *clnt; 1285 struct p9_client *clnt;
1257 struct p9_req_t *req; 1286 struct p9_req_t *req;
1258 char *dataptr; 1287 char *dataptr;
@@ -1261,7 +1290,6 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1261 (long long unsigned) offset, count); 1290 (long long unsigned) offset, count);
1262 err = 0; 1291 err = 0;
1263 clnt = fid->clnt; 1292 clnt = fid->clnt;
1264 total = 0;
1265 1293
1266 rsize = fid->iounit; 1294 rsize = fid->iounit;
1267 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) 1295 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
@@ -1270,7 +1298,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1270 if (count < rsize) 1298 if (count < rsize)
1271 rsize = count; 1299 rsize = count;
1272 1300
1273 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); 1301 /* Don't bother zerocopy form small IO (< 1024) */
1302 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1303 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1304 req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
1305 rsize, data, udata);
1306 } else {
1307 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
1308 rsize);
1309 }
1274 if (IS_ERR(req)) { 1310 if (IS_ERR(req)) {
1275 err = PTR_ERR(req); 1311 err = PTR_ERR(req);
1276 goto error; 1312 goto error;
@@ -1284,13 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1284 1320
1285 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1321 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
1286 1322
1287 if (data) { 1323 if (!req->tc->pbuf_size) {
1288 memmove(data, dataptr, count); 1324 if (data) {
1289 } else { 1325 memmove(data, dataptr, count);
1290 err = copy_to_user(udata, dataptr, count); 1326 } else {
1291 if (err) { 1327 err = copy_to_user(udata, dataptr, count);
1292 err = -EFAULT; 1328 if (err) {
1293 goto free_and_error; 1329 err = -EFAULT;
1330 goto free_and_error;
1331 }
1294 } 1332 }
1295 } 1333 }
1296 p9_free_req(clnt, req); 1334 p9_free_req(clnt, req);
@@ -1307,7 +1345,7 @@ int
1307p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, 1345p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1308 u64 offset, u32 count) 1346 u64 offset, u32 count)
1309{ 1347{
1310 int err, rsize, total; 1348 int err, rsize;
1311 struct p9_client *clnt; 1349 struct p9_client *clnt;
1312 struct p9_req_t *req; 1350 struct p9_req_t *req;
1313 1351
@@ -1315,7 +1353,6 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1315 fid->fid, (long long unsigned) offset, count); 1353 fid->fid, (long long unsigned) offset, count);
1316 err = 0; 1354 err = 0;
1317 clnt = fid->clnt; 1355 clnt = fid->clnt;
1318 total = 0;
1319 1356
1320 rsize = fid->iounit; 1357 rsize = fid->iounit;
1321 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) 1358 if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
@@ -1323,12 +1360,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1323 1360
1324 if (count < rsize) 1361 if (count < rsize)
1325 rsize = count; 1362 rsize = count;
1326 if (data) 1363
1327 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, 1364 /* Don't bother zerocopy form small IO (< 1024) */
1328 rsize, data); 1365 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1329 else 1366 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1330 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, 1367 req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
1331 rsize, udata); 1368 rsize, data, udata);
1369 } else {
1370
1371 if (data)
1372 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
1373 offset, rsize, data);
1374 else
1375 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
1376 offset, rsize, udata);
1377 }
1332 if (IS_ERR(req)) { 1378 if (IS_ERR(req)) {
1333 err = PTR_ERR(req); 1379 err = PTR_ERR(req);
1334 goto error; 1380 goto error;
@@ -1697,7 +1743,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
1697 1743
1698int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) 1744int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1699{ 1745{
1700 int err, rsize, total; 1746 int err, rsize;
1701 struct p9_client *clnt; 1747 struct p9_client *clnt;
1702 struct p9_req_t *req; 1748 struct p9_req_t *req;
1703 char *dataptr; 1749 char *dataptr;
@@ -1707,7 +1753,6 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1707 1753
1708 err = 0; 1754 err = 0;
1709 clnt = fid->clnt; 1755 clnt = fid->clnt;
1710 total = 0;
1711 1756
1712 rsize = fid->iounit; 1757 rsize = fid->iounit;
1713 if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) 1758 if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ)
@@ -1716,7 +1761,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1716 if (count < rsize) 1761 if (count < rsize)
1717 rsize = count; 1762 rsize = count;
1718 1763
1719 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); 1764 if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1765 P9_TRANS_PREF_PAYLOAD_SEP) {
1766 req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
1767 offset, rsize, data);
1768 } else {
1769 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
1770 offset, rsize);
1771 }
1720 if (IS_ERR(req)) { 1772 if (IS_ERR(req)) {
1721 err = PTR_ERR(req); 1773 err = PTR_ERR(req);
1722 goto error; 1774 goto error;
@@ -1730,7 +1782,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1730 1782
1731 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); 1783 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
1732 1784
1733 if (data) 1785 if (!req->tc->pbuf_size && data)
1734 memmove(data, dataptr, count); 1786 memmove(data, dataptr, count);
1735 1787
1736 p9_free_req(clnt, req); 1788 p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f210928..a873277cb996 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
114 return size - len; 114 return size - len;
115} 115}
116 116
117static size_t
118pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
119 size_t size)
120{
121 BUG_ON(pdu->size > P9_IOHDRSZ);
122 pdu->pubuf = (char __user *)udata;
123 pdu->pkbuf = (char *)kdata;
124 pdu->pbuf_size = size;
125 return 0;
126}
127
128static size_t
129pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
130{
131 BUG_ON(pdu->size > P9_READDIRHDRSZ);
132 pdu->pkbuf = (char *)kdata;
133 pdu->pbuf_size = size;
134 return 0;
135}
136
117/* 137/*
118 b - int8_t 138 b - int8_t
119 w - int16_t 139 w - int16_t
@@ -185,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
185 if (errcode) 205 if (errcode)
186 break; 206 break;
187 207
188 *sptr = kmalloc(len + 1, GFP_KERNEL); 208 *sptr = kmalloc(len + 1, GFP_NOFS);
189 if (*sptr == NULL) { 209 if (*sptr == NULL) {
190 errcode = -EFAULT; 210 errcode = -EFAULT;
191 break; 211 break;
@@ -245,7 +265,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
245 } 265 }
246 break; 266 break;
247 case 'T':{ 267 case 'T':{
248 int16_t *nwname = va_arg(ap, int16_t *); 268 uint16_t *nwname = va_arg(ap, uint16_t *);
249 char ***wnames = va_arg(ap, char ***); 269 char ***wnames = va_arg(ap, char ***);
250 270
251 errcode = p9pdu_readf(pdu, proto_version, 271 errcode = p9pdu_readf(pdu, proto_version,
@@ -253,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
253 if (!errcode) { 273 if (!errcode) {
254 *wnames = 274 *wnames =
255 kmalloc(sizeof(char *) * *nwname, 275 kmalloc(sizeof(char *) * *nwname,
256 GFP_KERNEL); 276 GFP_NOFS);
257 if (!*wnames) 277 if (!*wnames)
258 errcode = -ENOMEM; 278 errcode = -ENOMEM;
259 } 279 }
@@ -297,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
297 *wqids = 317 *wqids =
298 kmalloc(*nwqid * 318 kmalloc(*nwqid *
299 sizeof(struct p9_qid), 319 sizeof(struct p9_qid),
300 GFP_KERNEL); 320 GFP_NOFS);
301 if (*wqids == NULL) 321 if (*wqids == NULL)
302 errcode = -ENOMEM; 322 errcode = -ENOMEM;
303 } 323 }
@@ -445,6 +465,26 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
445 errcode = -EFAULT; 465 errcode = -EFAULT;
446 } 466 }
447 break; 467 break;
468 case 'E':{
469 int32_t cnt = va_arg(ap, int32_t);
470 const char *k = va_arg(ap, const void *);
471 const char __user *u = va_arg(ap,
472 const void __user *);
473 errcode = p9pdu_writef(pdu, proto_version, "d",
474 cnt);
475 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
476 errcode = -EFAULT;
477 }
478 break;
479 case 'F':{
480 int32_t cnt = va_arg(ap, int32_t);
481 const char *k = va_arg(ap, const void *);
482 errcode = p9pdu_writef(pdu, proto_version, "d",
483 cnt);
484 if (!errcode && pdu_write_readdir(pdu, k, cnt))
485 errcode = -EFAULT;
486 }
487 break;
448 case 'U':{ 488 case 'U':{
449 int32_t count = va_arg(ap, int32_t); 489 int32_t count = va_arg(ap, int32_t);
450 const char __user *udata = 490 const char __user *udata =
@@ -456,7 +496,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
456 } 496 }
457 break; 497 break;
458 case 'T':{ 498 case 'T':{
459 int16_t nwname = va_arg(ap, int); 499 uint16_t nwname = va_arg(ap, int);
460 const char **wnames = va_arg(ap, const char **); 500 const char **wnames = va_arg(ap, const char **);
461 501
462 errcode = p9pdu_writef(pdu, proto_version, "w", 502 errcode = p9pdu_writef(pdu, proto_version, "w",
@@ -579,6 +619,7 @@ EXPORT_SYMBOL(p9stat_read);
579 619
580int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) 620int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
581{ 621{
622 pdu->id = type;
582 return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); 623 return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
583} 624}
584 625
@@ -606,6 +647,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
606{ 647{
607 pdu->offset = 0; 648 pdu->offset = 0;
608 pdu->size = 0; 649 pdu->size = 0;
650 pdu->private = NULL;
651 pdu->pubuf = NULL;
652 pdu->pkbuf = NULL;
653 pdu->pbuf_size = 0;
609} 654}
610 655
611int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, 656int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
@@ -629,6 +674,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
629 } 674 }
630 675
631 strcpy(dirent->d_name, nameptr); 676 strcpy(dirent->d_name, nameptr);
677 kfree(nameptr);
632 678
633out: 679out:
634 return fake_pdu.offset; 680 return fake_pdu.offset;
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 000000000000..9a70ebdec56e
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15#include <linux/slab.h>
16#include <linux/module.h>
17#include <net/9p/9p.h>
18#include <net/9p/client.h>
19#include <linux/scatterlist.h>
20#include "trans_common.h"
21
22/**
23 * p9_release_req_pages - Release pages after the transaction.
24 * @*private: PDU's private page of struct trans_rpage_info
25 */
26void
27p9_release_req_pages(struct trans_rpage_info *rpinfo)
28{
29 int i = 0;
30
31 while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
32 put_page(rpinfo->rp_data[i]);
33 i++;
34 }
35}
36EXPORT_SYMBOL(p9_release_req_pages);
37
38/**
39 * p9_nr_pages - Return number of pages needed to accommodate the payload.
40 */
41int
42p9_nr_pages(struct p9_req_t *req)
43{
44 unsigned long start_page, end_page;
45 start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
46 end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
47 PAGE_SIZE - 1) >> PAGE_SHIFT;
48 return end_page - start_page;
49}
50EXPORT_SYMBOL(p9_nr_pages);
51
52/**
53 * payload_gup - Translates user buffer into kernel pages and
54 * pins them either for read/write through get_user_pages_fast().
55 * @req: Request to be sent to server.
56 * @pdata_off: data offset into the first page after translation (gup).
57 * @pdata_len: Total length of the IO. gup may not return requested # of pages.
58 * @nr_pages: number of pages to accommodate the payload
59 * @rw: Indicates if the pages are for read or write.
60 */
61int
62p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
63 int nr_pages, u8 rw)
64{
65 uint32_t first_page_bytes = 0;
66 int32_t pdata_mapped_pages;
67 struct trans_rpage_info *rpinfo;
68
69 *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
70
71 if (*pdata_off)
72 first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
73 req->tc->pbuf_size);
74
75 rpinfo = req->tc->private;
76 pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
77 nr_pages, rw, &rpinfo->rp_data[0]);
78 if (pdata_mapped_pages <= 0)
79 return pdata_mapped_pages;
80
81 rpinfo->rp_nr_pages = pdata_mapped_pages;
82 if (*pdata_off) {
83 *pdata_len = first_page_bytes;
84 *pdata_len += min((req->tc->pbuf_size - *pdata_len),
85 ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
86 } else {
87 *pdata_len = min(req->tc->pbuf_size,
88 (size_t)pdata_mapped_pages << PAGE_SHIFT);
89 }
90 return 0;
91}
92EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 000000000000..76309223bb02
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15/* TRUE if it is user context */
16#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
17
18/**
19 * struct trans_rpage_info - To store mapped page information in PDU.
20 * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
21 * @rp_nr_pages: Number of mapped pages
22 * @rp_data: Array of page pointers
23 */
24struct trans_rpage_info {
25 u8 rp_alloc;
26 int rp_nr_pages;
27 struct page *rp_data[0];
28};
29
30void p9_release_req_pages(struct trans_rpage_info *);
31int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
32int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 078eb162d9bf..4a9084395d35 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -153,10 +153,11 @@ struct p9_conn {
153 unsigned long wsched; 153 unsigned long wsched;
154}; 154};
155 155
156static void p9_poll_workfn(struct work_struct *work);
157
156static DEFINE_SPINLOCK(p9_poll_lock); 158static DEFINE_SPINLOCK(p9_poll_lock);
157static LIST_HEAD(p9_poll_pending_list); 159static LIST_HEAD(p9_poll_pending_list);
158static struct workqueue_struct *p9_mux_wq; 160static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
159static struct task_struct *p9_poll_task;
160 161
161static void p9_mux_poll_stop(struct p9_conn *m) 162static void p9_mux_poll_stop(struct p9_conn *m)
162{ 163{
@@ -349,7 +350,7 @@ static void p9_read_work(struct work_struct *work)
349 350
350 if (m->req->rc == NULL) { 351 if (m->req->rc == NULL) {
351 m->req->rc = kmalloc(sizeof(struct p9_fcall) + 352 m->req->rc = kmalloc(sizeof(struct p9_fcall) +
352 m->client->msize, GFP_KERNEL); 353 m->client->msize, GFP_NOFS);
353 if (!m->req->rc) { 354 if (!m->req->rc) {
354 m->req = NULL; 355 m->req = NULL;
355 err = -ENOMEM; 356 err = -ENOMEM;
@@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work)
384 385
385 if (n & POLLIN) { 386 if (n & POLLIN) {
386 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); 387 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
387 queue_work(p9_mux_wq, &m->rq); 388 schedule_work(&m->rq);
388 } else 389 } else
389 clear_bit(Rworksched, &m->wsched); 390 clear_bit(Rworksched, &m->wsched);
390 } else 391 } else
@@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work)
497 498
498 if (n & POLLOUT) { 499 if (n & POLLOUT) {
499 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); 500 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
500 queue_work(p9_mux_wq, &m->wq); 501 schedule_work(&m->wq);
501 } else 502 } else
502 clear_bit(Wworksched, &m->wsched); 503 clear_bit(Wworksched, &m->wsched);
503 } else 504 } else
@@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
516 container_of(wait, struct p9_poll_wait, wait); 517 container_of(wait, struct p9_poll_wait, wait);
517 struct p9_conn *m = pwait->conn; 518 struct p9_conn *m = pwait->conn;
518 unsigned long flags; 519 unsigned long flags;
519 DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
520 520
521 spin_lock_irqsave(&p9_poll_lock, flags); 521 spin_lock_irqsave(&p9_poll_lock, flags);
522 if (list_empty(&m->poll_pending_link)) 522 if (list_empty(&m->poll_pending_link))
523 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); 523 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
524 spin_unlock_irqrestore(&p9_poll_lock, flags); 524 spin_unlock_irqrestore(&p9_poll_lock, flags);
525 525
526 /* perform the default wake up operation */ 526 schedule_work(&p9_poll_work);
527 return default_wake_function(&dummy_wait, mode, sync, key); 527 return 1;
528} 528}
529 529
530/** 530/**
@@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m)
629 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); 629 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
630 if (!test_and_set_bit(Rworksched, &m->wsched)) { 630 if (!test_and_set_bit(Rworksched, &m->wsched)) {
631 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); 631 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
632 queue_work(p9_mux_wq, &m->rq); 632 schedule_work(&m->rq);
633 } 633 }
634 } 634 }
635 635
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
639 if ((m->wsize || !list_empty(&m->unsent_req_list)) && 639 if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
640 !test_and_set_bit(Wworksched, &m->wsched)) { 640 !test_and_set_bit(Wworksched, &m->wsched)) {
641 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); 641 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
642 queue_work(p9_mux_wq, &m->wq); 642 schedule_work(&m->wq);
643 } 643 }
644 } 644 }
645} 645}
@@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
677 n = p9_fd_poll(m->client, NULL); 677 n = p9_fd_poll(m->client, NULL);
678 678
679 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) 679 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
680 queue_work(p9_mux_wq, &m->wq); 680 schedule_work(&m->wq);
681 681
682 return 0; 682 return 0;
683} 683}
@@ -716,7 +716,6 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
716 substring_t args[MAX_OPT_ARGS]; 716 substring_t args[MAX_OPT_ARGS];
717 int option; 717 int option;
718 char *options, *tmp_options; 718 char *options, *tmp_options;
719 int ret;
720 719
721 opts->port = P9_PORT; 720 opts->port = P9_PORT;
722 opts->rfd = ~0; 721 opts->rfd = ~0;
@@ -744,7 +743,6 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
744 if (r < 0) { 743 if (r < 0) {
745 P9_DPRINTK(P9_DEBUG_ERROR, 744 P9_DPRINTK(P9_DEBUG_ERROR,
746 "integer field, but no integer?\n"); 745 "integer field, but no integer?\n");
747 ret = r;
748 continue; 746 continue;
749 } 747 }
750 } 748 }
@@ -1047,12 +1045,12 @@ static struct p9_trans_module p9_fd_trans = {
1047 * 1045 *
1048 */ 1046 */
1049 1047
1050static int p9_poll_proc(void *a) 1048static void p9_poll_workfn(struct work_struct *work)
1051{ 1049{
1052 unsigned long flags; 1050 unsigned long flags;
1053 1051
1054 P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); 1052 P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
1055 repeat: 1053
1056 spin_lock_irqsave(&p9_poll_lock, flags); 1054 spin_lock_irqsave(&p9_poll_lock, flags);
1057 while (!list_empty(&p9_poll_pending_list)) { 1055 while (!list_empty(&p9_poll_pending_list)) {
1058 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, 1056 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
@@ -1067,35 +1065,11 @@ static int p9_poll_proc(void *a)
1067 } 1065 }
1068 spin_unlock_irqrestore(&p9_poll_lock, flags); 1066 spin_unlock_irqrestore(&p9_poll_lock, flags);
1069 1067
1070 set_current_state(TASK_INTERRUPTIBLE);
1071 if (list_empty(&p9_poll_pending_list)) {
1072 P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
1073 schedule();
1074 }
1075 __set_current_state(TASK_RUNNING);
1076
1077 if (!kthread_should_stop())
1078 goto repeat;
1079
1080 P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); 1068 P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
1081 return 0;
1082} 1069}
1083 1070
1084int p9_trans_fd_init(void) 1071int p9_trans_fd_init(void)
1085{ 1072{
1086 p9_mux_wq = create_workqueue("v9fs");
1087 if (!p9_mux_wq) {
1088 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
1089 return -ENOMEM;
1090 }
1091
1092 p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
1093 if (IS_ERR(p9_poll_task)) {
1094 destroy_workqueue(p9_mux_wq);
1095 printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
1096 return PTR_ERR(p9_poll_task);
1097 }
1098
1099 v9fs_register_trans(&p9_tcp_trans); 1073 v9fs_register_trans(&p9_tcp_trans);
1100 v9fs_register_trans(&p9_unix_trans); 1074 v9fs_register_trans(&p9_unix_trans);
1101 v9fs_register_trans(&p9_fd_trans); 1075 v9fs_register_trans(&p9_fd_trans);
@@ -1105,10 +1079,8 @@ int p9_trans_fd_init(void)
1105 1079
1106void p9_trans_fd_exit(void) 1080void p9_trans_fd_exit(void)
1107{ 1081{
1108 kthread_stop(p9_poll_task); 1082 flush_work_sync(&p9_poll_work);
1109 v9fs_unregister_trans(&p9_tcp_trans); 1083 v9fs_unregister_trans(&p9_tcp_trans);
1110 v9fs_unregister_trans(&p9_unix_trans); 1084 v9fs_unregister_trans(&p9_unix_trans);
1111 v9fs_unregister_trans(&p9_fd_trans); 1085 v9fs_unregister_trans(&p9_fd_trans);
1112
1113 destroy_workqueue(p9_mux_wq);
1114} 1086}
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 17c5ba7551a5..844a7a5607e3 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -59,7 +59,6 @@
59 * safely advertise a maxsize 59 * safely advertise a maxsize
60 * of 64k */ 60 * of 64k */
61 61
62#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
63/** 62/**
64 * struct p9_trans_rdma - RDMA transport instance 63 * struct p9_trans_rdma - RDMA transport instance
65 * 64 *
@@ -168,7 +167,6 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
168 substring_t args[MAX_OPT_ARGS]; 167 substring_t args[MAX_OPT_ARGS];
169 int option; 168 int option;
170 char *options, *tmp_options; 169 char *options, *tmp_options;
171 int ret;
172 170
173 opts->port = P9_PORT; 171 opts->port = P9_PORT;
174 opts->sq_depth = P9_RDMA_SQ_DEPTH; 172 opts->sq_depth = P9_RDMA_SQ_DEPTH;
@@ -196,7 +194,6 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
196 if (r < 0) { 194 if (r < 0) {
197 P9_DPRINTK(P9_DEBUG_ERROR, 195 P9_DPRINTK(P9_DEBUG_ERROR,
198 "integer field, but no integer?\n"); 196 "integer field, but no integer?\n");
199 ret = r;
200 continue; 197 continue;
201 } 198 }
202 switch (token) { 199 switch (token) {
@@ -425,7 +422,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
425 struct p9_rdma_context *rpl_context = NULL; 422 struct p9_rdma_context *rpl_context = NULL;
426 423
427 /* Allocate an fcall for the reply */ 424 /* Allocate an fcall for the reply */
428 rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); 425 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
429 if (!rpl_context) { 426 if (!rpl_context) {
430 err = -ENOMEM; 427 err = -ENOMEM;
431 goto err_close; 428 goto err_close;
@@ -438,7 +435,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
438 */ 435 */
439 if (!req->rc) { 436 if (!req->rc) {
440 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, 437 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
441 GFP_KERNEL); 438 GFP_NOFS);
442 if (req->rc) { 439 if (req->rc) {
443 req->rc->sdata = (char *) req->rc + 440 req->rc->sdata = (char *) req->rc +
444 sizeof(struct p9_fcall); 441 sizeof(struct p9_fcall);
@@ -469,7 +466,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
469 req->rc = NULL; 466 req->rc = NULL;
470 467
471 /* Post the request */ 468 /* Post the request */
472 c = kmalloc(sizeof *c, GFP_KERNEL); 469 c = kmalloc(sizeof *c, GFP_NOFS);
473 if (!c) { 470 if (!c) {
474 err = -ENOMEM; 471 err = -ENOMEM;
475 goto err_free1; 472 goto err_free1;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20e..244e70742183 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,13 +43,17 @@
43#include <net/9p/client.h> 43#include <net/9p/client.h>
44#include <net/9p/transport.h> 44#include <net/9p/transport.h>
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/swap.h>
46#include <linux/virtio.h> 47#include <linux/virtio.h>
47#include <linux/virtio_9p.h> 48#include <linux/virtio_9p.h>
49#include "trans_common.h"
48 50
49#define VIRTQUEUE_NUM 128 51#define VIRTQUEUE_NUM 128
50 52
51/* a single mutex to manage channel initialization and attachment */ 53/* a single mutex to manage channel initialization and attachment */
52static DEFINE_MUTEX(virtio_9p_lock); 54static DEFINE_MUTEX(virtio_9p_lock);
55static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
56static atomic_t vp_pinned = ATOMIC_INIT(0);
53 57
54/** 58/**
55 * struct virtio_chan - per-instance transport information 59 * struct virtio_chan - per-instance transport information
@@ -77,7 +81,10 @@ struct virtio_chan {
77 struct virtqueue *vq; 81 struct virtqueue *vq;
78 int ring_bufs_avail; 82 int ring_bufs_avail;
79 wait_queue_head_t *vc_wq; 83 wait_queue_head_t *vc_wq;
80 84 /* This is global limit. Since we don't have a global structure,
85 * will be placing it in each channel.
86 */
87 int p9_max_pages;
81 /* Scatterlist: can be too big for stack. */ 88 /* Scatterlist: can be too big for stack. */
82 struct scatterlist sg[VIRTQUEUE_NUM]; 89 struct scatterlist sg[VIRTQUEUE_NUM];
83 90
@@ -140,26 +147,36 @@ static void req_done(struct virtqueue *vq)
140 147
141 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 148 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
142 149
143 do { 150 while (1) {
144 spin_lock_irqsave(&chan->lock, flags); 151 spin_lock_irqsave(&chan->lock, flags);
145 rc = virtqueue_get_buf(chan->vq, &len); 152 rc = virtqueue_get_buf(chan->vq, &len);
146 153
147 if (rc != NULL) { 154 if (rc == NULL) {
148 if (!chan->ring_bufs_avail) {
149 chan->ring_bufs_avail = 1;
150 wake_up(chan->vc_wq);
151 }
152 spin_unlock_irqrestore(&chan->lock, flags);
153 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
154 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
155 rc->tag);
156 req = p9_tag_lookup(chan->client, rc->tag);
157 req->status = REQ_STATUS_RCVD;
158 p9_client_cb(chan->client, req);
159 } else {
160 spin_unlock_irqrestore(&chan->lock, flags); 155 spin_unlock_irqrestore(&chan->lock, flags);
156 break;
157 }
158
159 chan->ring_bufs_avail = 1;
160 spin_unlock_irqrestore(&chan->lock, flags);
161 /* Wakeup if anyone waiting for VirtIO ring space. */
162 wake_up(chan->vc_wq);
163 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
164 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
165 req = p9_tag_lookup(chan->client, rc->tag);
166 if (req->tc->private) {
167 struct trans_rpage_info *rp = req->tc->private;
168 int p = rp->rp_nr_pages;
169 /*Release pages */
170 p9_release_req_pages(rp);
171 atomic_sub(p, &vp_pinned);
172 wake_up(&vp_wq);
173 if (rp->rp_alloc)
174 kfree(rp);
175 req->tc->private = NULL;
161 } 176 }
162 } while (rc != NULL); 177 req->status = REQ_STATUS_RCVD;
178 p9_client_cb(chan->client, req);
179 }
163} 180}
164 181
165/** 182/**
@@ -203,6 +220,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
203} 220}
204 221
205/** 222/**
223 * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
224 * this takes a list of pages.
225 * @sg: scatter/gather list to pack into
226 * @start: which segment of the sg_list to start at
227 * @pdata_off: Offset into the first page
228 * @**pdata: a list of pages to add into sg.
229 * @count: amount of data to pack into the scatter/gather list
230 */
231static int
232pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
233 struct page **pdata, int count)
234{
235 int s;
236 int i = 0;
237 int index = start;
238
239 if (pdata_off) {
240 s = min((int)(PAGE_SIZE - pdata_off), count);
241 sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
242 count -= s;
243 }
244
245 while (count) {
246 BUG_ON(index > limit);
247 s = min((int)PAGE_SIZE, count);
248 sg_set_page(&sg[index++], pdata[i++], s, 0);
249 count -= s;
250 }
251 return index-start;
252}
253
254/**
206 * p9_virtio_request - issue a request 255 * p9_virtio_request - issue a request
207 * @client: client instance issuing the request 256 * @client: client instance issuing the request
208 * @req: request to be issued 257 * @req: request to be issued
@@ -212,22 +261,114 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
212static int 261static int
213p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 262p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
214{ 263{
215 int in, out; 264 int in, out, inp, outp;
216 struct virtio_chan *chan = client->trans; 265 struct virtio_chan *chan = client->trans;
217 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 266 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
218 unsigned long flags; 267 unsigned long flags;
219 int err; 268 size_t pdata_off = 0;
269 struct trans_rpage_info *rpinfo = NULL;
270 int err, pdata_len = 0;
220 271
221 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 272 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
222 273
223req_retry:
224 req->status = REQ_STATUS_SENT; 274 req->status = REQ_STATUS_SENT;
225 275
276 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
277 int nr_pages = p9_nr_pages(req);
278 int rpinfo_size = sizeof(struct trans_rpage_info) +
279 sizeof(struct page *) * nr_pages;
280
281 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
282 err = wait_event_interruptible(vp_wq,
283 atomic_read(&vp_pinned) < chan->p9_max_pages);
284 if (err == -ERESTARTSYS)
285 return err;
286 P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
287 }
288
289 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
290 /* We can use sdata */
291 req->tc->private = req->tc->sdata + req->tc->size;
292 rpinfo = (struct trans_rpage_info *)req->tc->private;
293 rpinfo->rp_alloc = 0;
294 } else {
295 req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
296 if (!req->tc->private) {
297 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
298 "private kmalloc returned NULL");
299 return -ENOMEM;
300 }
301 rpinfo = (struct trans_rpage_info *)req->tc->private;
302 rpinfo->rp_alloc = 1;
303 }
304
305 err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
306 req->tc->id == P9_TREAD ? 1 : 0);
307 if (err < 0) {
308 if (rpinfo->rp_alloc)
309 kfree(rpinfo);
310 return err;
311 } else {
312 atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
313 }
314 }
315
316req_retry_pinned:
226 spin_lock_irqsave(&chan->lock, flags); 317 spin_lock_irqsave(&chan->lock, flags);
318
319 /* Handle out VirtIO ring buffers */
227 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 320 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
228 req->tc->size); 321 req->tc->size);
229 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, 322
230 client->msize); 323 if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
324 /* We have additional write payload buffer to take care */
325 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
326 outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
327 pdata_off, rpinfo->rp_data, pdata_len);
328 } else {
329 char *pbuf;
330 if (req->tc->pubuf)
331 pbuf = (__force char *) req->tc->pubuf;
332 else
333 pbuf = req->tc->pkbuf;
334 outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
335 req->tc->pbuf_size);
336 }
337 out += outp;
338 }
339
340 /* Handle in VirtIO ring buffers */
341 if (req->tc->pbuf_size &&
342 ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
343 /*
344 * Take care of additional Read payload.
345 * 11 is the read/write header = PDU Header(7) + IO Size (4).
346 * Arrange in such a way that server places header in the
347 * alloced memory and payload onto the user buffer.
348 */
349 inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
350 /*
351 * Running executables in the filesystem may result in
352 * a read request with kernel buffer as opposed to user buffer.
353 */
354 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
355 in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
356 pdata_off, rpinfo->rp_data, pdata_len);
357 } else {
358 char *pbuf;
359 if (req->tc->pubuf)
360 pbuf = (__force char *) req->tc->pubuf;
361 else
362 pbuf = req->tc->pkbuf;
363
364 in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
365 pbuf, req->tc->pbuf_size);
366 }
367 in += inp;
368 } else {
369 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
370 client->msize);
371 }
231 372
232 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 373 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
233 if (err < 0) { 374 if (err < 0) {
@@ -240,12 +381,14 @@ req_retry:
240 return err; 381 return err;
241 382
242 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 383 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
243 goto req_retry; 384 goto req_retry_pinned;
244 } else { 385 } else {
245 spin_unlock_irqrestore(&chan->lock, flags); 386 spin_unlock_irqrestore(&chan->lock, flags);
246 P9_DPRINTK(P9_DEBUG_TRANS, 387 P9_DPRINTK(P9_DEBUG_TRANS,
247 "9p debug: " 388 "9p debug: "
248 "virtio rpc add_buf returned failure"); 389 "virtio rpc add_buf returned failure");
390 if (rpinfo && rpinfo->rp_alloc)
391 kfree(rpinfo);
249 return -EIO; 392 return -EIO;
250 } 393 }
251 } 394 }
@@ -335,6 +478,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
335 } 478 }
336 init_waitqueue_head(chan->vc_wq); 479 init_waitqueue_head(chan->vc_wq);
337 chan->ring_bufs_avail = 1; 480 chan->ring_bufs_avail = 1;
481 /* Ceiling limit to avoid denial of service attacks */
482 chan->p9_max_pages = nr_free_buffer_pages()/4;
338 483
339 mutex_lock(&virtio_9p_lock); 484 mutex_lock(&virtio_9p_lock);
340 list_add_tail(&chan->chan_list, &virtio_chan_list); 485 list_add_tail(&chan->chan_list, &virtio_chan_list);
@@ -448,6 +593,7 @@ static struct p9_trans_module p9_virtio_trans = {
448 .request = p9_virtio_request, 593 .request = p9_virtio_request,
449 .cancel = p9_virtio_cancel, 594 .cancel = p9_virtio_cancel,
450 .maxsize = PAGE_SIZE*16, 595 .maxsize = PAGE_SIZE*16,
596 .pref = P9_TRANS_PREF_PAYLOAD_SEP,
451 .def = 0, 597 .def = 0,
452 .owner = THIS_MODULE, 598 .owner = THIS_MODULE,
453}; 599};
diff --git a/net/9p/util.c b/net/9p/util.c
index e048701a72d2..da6af81e59d9 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL(p9_idpool_create);
67 67
68/** 68/**
69 * p9_idpool_destroy - create a new per-connection id pool 69 * p9_idpool_destroy - create a new per-connection id pool
70 * @p: idpool to destory 70 * @p: idpool to destroy
71 */ 71 */
72 72
73void p9_idpool_destroy(struct p9_idpool *p) 73void p9_idpool_destroy(struct p9_idpool *p)
@@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p)
92 unsigned long flags; 92 unsigned long flags;
93 93
94retry: 94retry:
95 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) 95 if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
96 return 0; 96 return 0;
97 97
98 spin_lock_irqsave(&p->lock, flags); 98 spin_lock_irqsave(&p->lock, flags);
diff --git a/net/Kconfig b/net/Kconfig
index 72840626284b..878151c772c9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -221,11 +221,31 @@ config RPS
221 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 221 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
222 default y 222 default y
223 223
224config RFS_ACCEL
225 boolean
226 depends on RPS && GENERIC_HARDIRQS
227 select CPU_RMAP
228 default y
229
224config XPS 230config XPS
225 boolean 231 boolean
226 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 232 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
227 default y 233 default y
228 234
235config HAVE_BPF_JIT
236 bool
237
238config BPF_JIT
239 bool "enable BPF Just In Time compiler"
240 depends on HAVE_BPF_JIT
241 depends on MODULES
242 ---help---
243 Berkeley Packet Filter filtering capabilities are normally handled
244 by an interpreter. This option allows kernel to generate a native
245 code when filter is loaded in memory. This should speedup
246 packet sniffing (libpcap/tcpdump). Note : Admin should enable
247 this feature changing /proc/sys/net/core/bpf_jit_enable
248
229menu "Network testing" 249menu "Network testing"
230 250
231config NET_PKTGEN 251config NET_PKTGEN
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c53..a51d9465e628 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
19obj-$(CONFIG_INET) += ipv4/ 19obj-$(CONFIG_INET) += ipv4/
20obj-$(CONFIG_XFRM) += xfrm/ 20obj-$(CONFIG_XFRM) += xfrm/
21obj-$(CONFIG_UNIX) += unix/ 21obj-$(CONFIG_UNIX) += unix/
22ifneq ($(CONFIG_IPV6),) 22obj-$(CONFIG_NET) += ipv6/
23obj-y += ipv6/
24endif
25obj-$(CONFIG_PACKET) += packet/ 23obj-$(CONFIG_PACKET) += packet/
26obj-$(CONFIG_NET_KEY) += key/ 24obj-$(CONFIG_NET_KEY) += key/
27obj-$(CONFIG_BRIDGE) += bridge/ 25obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c410b93fda2e..956a5302002a 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -54,7 +54,6 @@
54#include <linux/capability.h> 54#include <linux/capability.h>
55#include <linux/module.h> 55#include <linux/module.h>
56#include <linux/if_arp.h> 56#include <linux/if_arp.h>
57#include <linux/smp_lock.h>
58#include <linux/termios.h> /* For TIOCOUTQ/INQ */ 57#include <linux/termios.h> /* For TIOCOUTQ/INQ */
59#include <linux/compat.h> 58#include <linux/compat.h>
60#include <linux/slab.h> 59#include <linux/slab.h>
@@ -1052,13 +1051,17 @@ static int atalk_release(struct socket *sock)
1052{ 1051{
1053 struct sock *sk = sock->sk; 1052 struct sock *sk = sock->sk;
1054 1053
1055 lock_kernel();
1056 if (sk) { 1054 if (sk) {
1055 sock_hold(sk);
1056 lock_sock(sk);
1057
1057 sock_orphan(sk); 1058 sock_orphan(sk);
1058 sock->sk = NULL; 1059 sock->sk = NULL;
1059 atalk_destroy_socket(sk); 1060 atalk_destroy_socket(sk);
1061
1062 release_sock(sk);
1063 sock_put(sk);
1060 } 1064 }
1061 unlock_kernel();
1062 return 0; 1065 return 0;
1063} 1066}
1064 1067
@@ -1143,7 +1146,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1143 if (addr->sat_family != AF_APPLETALK) 1146 if (addr->sat_family != AF_APPLETALK)
1144 return -EAFNOSUPPORT; 1147 return -EAFNOSUPPORT;
1145 1148
1146 lock_kernel(); 1149 lock_sock(sk);
1147 if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) { 1150 if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) {
1148 struct atalk_addr *ap = atalk_find_primary(); 1151 struct atalk_addr *ap = atalk_find_primary();
1149 1152
@@ -1179,7 +1182,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1179 sock_reset_flag(sk, SOCK_ZAPPED); 1182 sock_reset_flag(sk, SOCK_ZAPPED);
1180 err = 0; 1183 err = 0;
1181out: 1184out:
1182 unlock_kernel(); 1185 release_sock(sk);
1183 return err; 1186 return err;
1184} 1187}
1185 1188
@@ -1215,7 +1218,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
1215#endif 1218#endif
1216 } 1219 }
1217 1220
1218 lock_kernel(); 1221 lock_sock(sk);
1219 err = -EBUSY; 1222 err = -EBUSY;
1220 if (sock_flag(sk, SOCK_ZAPPED)) 1223 if (sock_flag(sk, SOCK_ZAPPED))
1221 if (atalk_autobind(sk) < 0) 1224 if (atalk_autobind(sk) < 0)
@@ -1233,7 +1236,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
1233 sk->sk_state = TCP_ESTABLISHED; 1236 sk->sk_state = TCP_ESTABLISHED;
1234 err = 0; 1237 err = 0;
1235out: 1238out:
1236 unlock_kernel(); 1239 release_sock(sk);
1237 return err; 1240 return err;
1238} 1241}
1239 1242
@@ -1249,7 +1252,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1249 struct atalk_sock *at = at_sk(sk); 1252 struct atalk_sock *at = at_sk(sk);
1250 int err; 1253 int err;
1251 1254
1252 lock_kernel(); 1255 lock_sock(sk);
1253 err = -ENOBUFS; 1256 err = -ENOBUFS;
1254 if (sock_flag(sk, SOCK_ZAPPED)) 1257 if (sock_flag(sk, SOCK_ZAPPED))
1255 if (atalk_autobind(sk) < 0) 1258 if (atalk_autobind(sk) < 0)
@@ -1277,17 +1280,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1277 memcpy(uaddr, &sat, sizeof(sat)); 1280 memcpy(uaddr, &sat, sizeof(sat));
1278 1281
1279out: 1282out:
1280 unlock_kernel(); 1283 release_sock(sk);
1281 return err;
1282}
1283
1284static unsigned int atalk_poll(struct file *file, struct socket *sock,
1285 poll_table *wait)
1286{
1287 int err;
1288 lock_kernel();
1289 err = datagram_poll(file, sock, wait);
1290 unlock_kernel();
1291 return err; 1284 return err;
1292} 1285}
1293 1286
@@ -1596,7 +1589,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1596 if (len > DDP_MAXSZ) 1589 if (len > DDP_MAXSZ)
1597 return -EMSGSIZE; 1590 return -EMSGSIZE;
1598 1591
1599 lock_kernel(); 1592 lock_sock(sk);
1600 if (usat) { 1593 if (usat) {
1601 err = -EBUSY; 1594 err = -EBUSY;
1602 if (sock_flag(sk, SOCK_ZAPPED)) 1595 if (sock_flag(sk, SOCK_ZAPPED))
@@ -1651,7 +1644,9 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1651 sk, size, dev->name); 1644 sk, size, dev->name);
1652 1645
1653 size += dev->hard_header_len; 1646 size += dev->hard_header_len;
1647 release_sock(sk);
1654 skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); 1648 skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
1649 lock_sock(sk);
1655 if (!skb) 1650 if (!skb)
1656 goto out; 1651 goto out;
1657 1652
@@ -1738,7 +1733,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1738 SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len); 1733 SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len);
1739 1734
1740out: 1735out:
1741 unlock_kernel(); 1736 release_sock(sk);
1742 return err ? : len; 1737 return err ? : len;
1743} 1738}
1744 1739
@@ -1753,9 +1748,10 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1753 int err = 0; 1748 int err = 0;
1754 struct sk_buff *skb; 1749 struct sk_buff *skb;
1755 1750
1756 lock_kernel();
1757 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1751 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1758 flags & MSG_DONTWAIT, &err); 1752 flags & MSG_DONTWAIT, &err);
1753 lock_sock(sk);
1754
1759 if (!skb) 1755 if (!skb)
1760 goto out; 1756 goto out;
1761 1757
@@ -1787,7 +1783,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1787 skb_free_datagram(sk, skb); /* Free the datagram. */ 1783 skb_free_datagram(sk, skb); /* Free the datagram. */
1788 1784
1789out: 1785out:
1790 unlock_kernel(); 1786 release_sock(sk);
1791 return err ? : copied; 1787 return err ? : copied;
1792} 1788}
1793 1789
@@ -1887,7 +1883,7 @@ static const struct proto_ops atalk_dgram_ops = {
1887 .socketpair = sock_no_socketpair, 1883 .socketpair = sock_no_socketpair,
1888 .accept = sock_no_accept, 1884 .accept = sock_no_accept,
1889 .getname = atalk_getname, 1885 .getname = atalk_getname,
1890 .poll = atalk_poll, 1886 .poll = datagram_poll,
1891 .ioctl = atalk_ioctl, 1887 .ioctl = atalk_ioctl,
1892#ifdef CONFIG_COMPAT 1888#ifdef CONFIG_COMPAT
1893 .compat_ioctl = atalk_compat_ioctl, 1889 .compat_ioctl = atalk_compat_ioctl,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fce2eae8d476..2252c2085dac 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -509,7 +509,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
509 write_lock_irq(&devs_lock); 509 write_lock_irq(&devs_lock);
510 net_dev = br2684_find_dev(&be.ifspec); 510 net_dev = br2684_find_dev(&be.ifspec);
511 if (net_dev == NULL) { 511 if (net_dev == NULL) {
512 pr_err("tried to attach to non-existant device\n"); 512 pr_err("tried to attach to non-existent device\n");
513 err = -ENXIO; 513 err = -ENXIO;
514 goto error; 514 goto error;
515 } 515 }
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d257da50fcfb..1d4be60e1390 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -502,8 +502,6 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
502 struct atmarp_entry *entry; 502 struct atmarp_entry *entry;
503 int error; 503 int error;
504 struct clip_vcc *clip_vcc; 504 struct clip_vcc *clip_vcc;
505 struct flowi fl = { .fl4_dst = ip,
506 .fl4_tos = 1 };
507 struct rtable *rt; 505 struct rtable *rt;
508 506
509 if (vcc->push != clip_push) { 507 if (vcc->push != clip_push) {
@@ -520,9 +518,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
520 unlink_clip_vcc(clip_vcc); 518 unlink_clip_vcc(clip_vcc);
521 return 0; 519 return 0;
522 } 520 }
523 error = ip_route_output_key(&init_net, &rt, &fl); 521 rt = ip_route_output(&init_net, ip, 0, 1, 0);
524 if (error) 522 if (IS_ERR(rt))
525 return error; 523 return PTR_ERR(rt);
526 neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); 524 neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
527 ip_rt_put(rt); 525 ip_rt_put(rt);
528 if (!neigh) 526 if (!neigh)
diff --git a/net/atm/common.c b/net/atm/common.c
index 1b9c52a02cd3..22b963d06a10 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -252,6 +252,7 @@ void atm_dev_release_vccs(struct atm_dev *dev)
252 } 252 }
253 write_unlock_irq(&vcc_sklist_lock); 253 write_unlock_irq(&vcc_sklist_lock);
254} 254}
255EXPORT_SYMBOL(atm_dev_release_vccs);
255 256
256static int adjust_tp(struct atm_trafprm *tp, unsigned char aal) 257static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
257{ 258{
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 38754fdb88ba..25073b6ef474 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -129,7 +129,6 @@ static struct net_device *dev_lec[MAX_LEC_ITF];
129#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 129#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
130static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) 130static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
131{ 131{
132 struct ethhdr *eth;
133 char *buff; 132 char *buff;
134 struct lec_priv *priv; 133 struct lec_priv *priv;
135 134
@@ -138,7 +137,6 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
138 * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit 137 * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit
139 * as the Config BPDU has 138 * as the Config BPDU has
140 */ 139 */
141 eth = (struct ethhdr *)skb->data;
142 buff = skb->data + skb->dev->hard_header_len; 140 buff = skb->data + skb->dev->hard_header_len;
143 if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) { 141 if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) {
144 struct sock *sk; 142 struct sock *sk;
@@ -1180,7 +1178,6 @@ static int __init lane_module_init(void)
1180static void __exit lane_module_cleanup(void) 1178static void __exit lane_module_cleanup(void)
1181{ 1179{
1182 int i; 1180 int i;
1183 struct lec_priv *priv;
1184 1181
1185 remove_proc_entry("lec", atm_proc_root); 1182 remove_proc_entry("lec", atm_proc_root);
1186 1183
@@ -1188,7 +1185,6 @@ static void __exit lane_module_cleanup(void)
1188 1185
1189 for (i = 0; i < MAX_LEC_ITF; i++) { 1186 for (i = 0; i < MAX_LEC_ITF; i++) {
1190 if (dev_lec[i] != NULL) { 1187 if (dev_lec[i] != NULL) {
1191 priv = netdev_priv(dev_lec[i]);
1192 unregister_netdev(dev_lec[i]); 1188 unregister_netdev(dev_lec[i]);
1193 free_netdev(dev_lec[i]); 1189 free_netdev(dev_lec[i]);
1194 dev_lec[i] = NULL; 1190 dev_lec[i] = NULL;
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 9d14d196cc1d..dfc071966463 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -35,7 +35,7 @@ struct lecdatahdr_8025 {
35 * Operations that LANE2 capable device can do. Two first functions 35 * Operations that LANE2 capable device can do. Two first functions
36 * are used to make the device do things. See spec 3.1.3 and 3.1.4. 36 * are used to make the device do things. See spec 3.1.3 and 3.1.4.
37 * 37 *
38 * The third function is intented for the MPOA component sitting on 38 * The third function is intended for the MPOA component sitting on
39 * top of the LANE device. The MPOA component assigns it's own function 39 * top of the LANE device. The MPOA component assigns it's own function
40 * to (*associate_indicator)() and the LANE device will use that 40 * to (*associate_indicator)() and the LANE device will use that
41 * function to tell about TLVs it sees floating through. 41 * function to tell about TLVs it sees floating through.
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 6da5daeebab7..e7c69f4619ec 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1538,8 +1538,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1538 } 1538 }
1539 1539
1540 /* Build a packet */ 1540 /* Build a packet */
1541 SOCK_DEBUG(sk, "AX.25: sendto: Addresses built. Building packet.\n");
1542
1543 /* Assume the worst case */ 1541 /* Assume the worst case */
1544 size = len + ax25->ax25_dev->dev->hard_header_len; 1542 size = len + ax25->ax25_dev->dev->hard_header_len;
1545 1543
@@ -1549,8 +1547,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1549 1547
1550 skb_reserve(skb, size - len); 1548 skb_reserve(skb, size - len);
1551 1549
1552 SOCK_DEBUG(sk, "AX.25: Appending user data\n");
1553
1554 /* User data follows immediately after the AX.25 data */ 1550 /* User data follows immediately after the AX.25 data */
1555 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1551 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1556 err = -EFAULT; 1552 err = -EFAULT;
@@ -1564,8 +1560,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1564 if (!ax25->pidincl) 1560 if (!ax25->pidincl)
1565 *skb_push(skb, 1) = sk->sk_protocol; 1561 *skb_push(skb, 1) = sk->sk_protocol;
1566 1562
1567 SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
1568
1569 if (sk->sk_type == SOCK_SEQPACKET) { 1563 if (sk->sk_type == SOCK_SEQPACKET) {
1570 /* Connected mode sockets go via the LAPB machine */ 1564 /* Connected mode sockets go via the LAPB machine */
1571 if (sk->sk_state != TCP_ESTABLISHED) { 1565 if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1583,22 +1577,14 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1583 1577
1584 skb_push(skb, 1 + ax25_addr_size(dp)); 1578 skb_push(skb, 1 + ax25_addr_size(dp));
1585 1579
1586 SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp); 1580 /* Building AX.25 Header */
1587
1588 if (dp != NULL)
1589 SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
1590 1581
1591 /* Build an AX.25 header */ 1582 /* Build an AX.25 header */
1592 lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call, 1583 lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
1593 dp, AX25_COMMAND, AX25_MODULUS); 1584 dp, AX25_COMMAND, AX25_MODULUS);
1594 1585
1595 SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
1596
1597 skb_set_transport_header(skb, lv); 1586 skb_set_transport_header(skb, lv);
1598 1587
1599 SOCK_DEBUG(sk, "base=%p pos=%p\n",
1600 skb->data, skb_transport_header(skb));
1601
1602 *skb_transport_header(skb) = AX25_UI; 1588 *skb_transport_header(skb) = AX25_UI;
1603 1589
1604 /* Datagram frames go straight out of the door as UI */ 1590 /* Datagram frames go straight out of the door as UI */
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 5a0dda8df492..60b545e2822a 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ax25_register_pid);
58 58
59void ax25_protocol_release(unsigned int pid) 59void ax25_protocol_release(unsigned int pid)
60{ 60{
61 struct ax25_protocol *s, *protocol; 61 struct ax25_protocol *protocol;
62 62
63 write_lock_bh(&protocol_list_lock); 63 write_lock_bh(&protocol_list_lock);
64 protocol = protocol_list; 64 protocol = protocol_list;
@@ -72,7 +72,6 @@ void ax25_protocol_release(unsigned int pid)
72 72
73 while (protocol != NULL && protocol->next != NULL) { 73 while (protocol != NULL && protocol->next != NULL) {
74 if (protocol->next->pid == pid) { 74 if (protocol->next->pid == pid) {
75 s = protocol->next;
76 protocol->next = protocol->next->next; 75 protocol->next = protocol->next->next;
77 goto out; 76 goto out;
78 } 77 }
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index d936aeccd194..2de93d00631b 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
index 3850a3ecf947..a8c32030527c 100644
--- a/net/batman-adv/aggregation.c
+++ b/net/batman-adv/aggregation.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -23,11 +23,12 @@
23#include "aggregation.h" 23#include "aggregation.h"
24#include "send.h" 24#include "send.h"
25#include "routing.h" 25#include "routing.h"
26#include "hard-interface.h"
26 27
27/* calculate the size of the hna information for a given packet */ 28/* calculate the size of the tt information for a given packet */
28static int hna_len(struct batman_packet *batman_packet) 29static int tt_len(struct batman_packet *batman_packet)
29{ 30{
30 return batman_packet->num_hna * ETH_ALEN; 31 return batman_packet->num_tt * ETH_ALEN;
31} 32}
32 33
33/* return true if new_packet can be aggregated with forw_packet */ 34/* return true if new_packet can be aggregated with forw_packet */
@@ -35,7 +36,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet,
35 int packet_len, 36 int packet_len,
36 unsigned long send_time, 37 unsigned long send_time,
37 bool directlink, 38 bool directlink,
38 struct batman_if *if_incoming, 39 struct hard_iface *if_incoming,
39 struct forw_packet *forw_packet) 40 struct forw_packet *forw_packet)
40{ 41{
41 struct batman_packet *batman_packet = 42 struct batman_packet *batman_packet =
@@ -95,23 +96,25 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet,
95 return false; 96 return false;
96} 97}
97 98
98#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
99/* create a new aggregated packet and add this packet to it */ 99/* create a new aggregated packet and add this packet to it */
100static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, 100static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
101 unsigned long send_time, bool direct_link, 101 unsigned long send_time, bool direct_link,
102 struct batman_if *if_incoming, 102 struct hard_iface *if_incoming,
103 int own_packet) 103 int own_packet)
104{ 104{
105 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 105 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
106 struct forw_packet *forw_packet_aggr; 106 struct forw_packet *forw_packet_aggr;
107 unsigned char *skb_buff; 107 unsigned char *skb_buff;
108 108
109 if (!atomic_inc_not_zero(&if_incoming->refcount))
110 return;
111
109 /* own packet should always be scheduled */ 112 /* own packet should always be scheduled */
110 if (!own_packet) { 113 if (!own_packet) {
111 if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) { 114 if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
112 bat_dbg(DBG_BATMAN, bat_priv, 115 bat_dbg(DBG_BATMAN, bat_priv,
113 "batman packet queue full\n"); 116 "batman packet queue full\n");
114 return; 117 goto out;
115 } 118 }
116 } 119 }
117 120
@@ -119,7 +122,7 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
119 if (!forw_packet_aggr) { 122 if (!forw_packet_aggr) {
120 if (!own_packet) 123 if (!own_packet)
121 atomic_inc(&bat_priv->batman_queue_left); 124 atomic_inc(&bat_priv->batman_queue_left);
122 return; 125 goto out;
123 } 126 }
124 127
125 if ((atomic_read(&bat_priv->aggregated_ogms)) && 128 if ((atomic_read(&bat_priv->aggregated_ogms)) &&
@@ -134,7 +137,7 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
134 if (!own_packet) 137 if (!own_packet)
135 atomic_inc(&bat_priv->batman_queue_left); 138 atomic_inc(&bat_priv->batman_queue_left);
136 kfree(forw_packet_aggr); 139 kfree(forw_packet_aggr);
137 return; 140 goto out;
138 } 141 }
139 skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr)); 142 skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
140 143
@@ -165,6 +168,10 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
165 queue_delayed_work(bat_event_workqueue, 168 queue_delayed_work(bat_event_workqueue,
166 &forw_packet_aggr->delayed_work, 169 &forw_packet_aggr->delayed_work,
167 send_time - jiffies); 170 send_time - jiffies);
171
172 return;
173out:
174 hardif_free_ref(if_incoming);
168} 175}
169 176
170/* aggregate a new packet into the existing aggregation */ 177/* aggregate a new packet into the existing aggregation */
@@ -188,7 +195,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr,
188 195
189void add_bat_packet_to_list(struct bat_priv *bat_priv, 196void add_bat_packet_to_list(struct bat_priv *bat_priv,
190 unsigned char *packet_buff, int packet_len, 197 unsigned char *packet_buff, int packet_len,
191 struct batman_if *if_incoming, char own_packet, 198 struct hard_iface *if_incoming, char own_packet,
192 unsigned long send_time) 199 unsigned long send_time)
193{ 200{
194 /** 201 /**
@@ -247,11 +254,11 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv,
247 254
248/* unpack the aggregated packets and process them one by one */ 255/* unpack the aggregated packets and process them one by one */
249void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, 256void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
250 int packet_len, struct batman_if *if_incoming) 257 int packet_len, struct hard_iface *if_incoming)
251{ 258{
252 struct batman_packet *batman_packet; 259 struct batman_packet *batman_packet;
253 int buff_pos = 0; 260 int buff_pos = 0;
254 unsigned char *hna_buff; 261 unsigned char *tt_buff;
255 262
256 batman_packet = (struct batman_packet *)packet_buff; 263 batman_packet = (struct batman_packet *)packet_buff;
257 264
@@ -260,14 +267,14 @@ void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
260 orig_interval. */ 267 orig_interval. */
261 batman_packet->seqno = ntohl(batman_packet->seqno); 268 batman_packet->seqno = ntohl(batman_packet->seqno);
262 269
263 hna_buff = packet_buff + buff_pos + BAT_PACKET_LEN; 270 tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
264 receive_bat_packet(ethhdr, batman_packet, 271 receive_bat_packet(ethhdr, batman_packet,
265 hna_buff, hna_len(batman_packet), 272 tt_buff, tt_len(batman_packet),
266 if_incoming); 273 if_incoming);
267 274
268 buff_pos += BAT_PACKET_LEN + hna_len(batman_packet); 275 buff_pos += BAT_PACKET_LEN + tt_len(batman_packet);
269 batman_packet = (struct batman_packet *) 276 batman_packet = (struct batman_packet *)
270 (packet_buff + buff_pos); 277 (packet_buff + buff_pos);
271 } while (aggregated_packet(buff_pos, packet_len, 278 } while (aggregated_packet(buff_pos, packet_len,
272 batman_packet->num_hna)); 279 batman_packet->num_tt));
273} 280}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
index 71a91b3da913..7e6d72fbf540 100644
--- a/net/batman-adv/aggregation.h
+++ b/net/batman-adv/aggregation.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -25,9 +25,9 @@
25#include "main.h" 25#include "main.h"
26 26
27/* is there another aggregated packet here? */ 27/* is there another aggregated packet here? */
28static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna) 28static inline int aggregated_packet(int buff_pos, int packet_len, int num_tt)
29{ 29{
30 int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_hna * ETH_ALEN); 30 int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_tt * ETH_ALEN);
31 31
32 return (next_buff_pos <= packet_len) && 32 return (next_buff_pos <= packet_len) &&
33 (next_buff_pos <= MAX_AGGREGATION_BYTES); 33 (next_buff_pos <= MAX_AGGREGATION_BYTES);
@@ -35,9 +35,9 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna)
35 35
36void add_bat_packet_to_list(struct bat_priv *bat_priv, 36void add_bat_packet_to_list(struct bat_priv *bat_priv,
37 unsigned char *packet_buff, int packet_len, 37 unsigned char *packet_buff, int packet_len,
38 struct batman_if *if_incoming, char own_packet, 38 struct hard_iface *if_incoming, char own_packet,
39 unsigned long send_time); 39 unsigned long send_time);
40void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, 40void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
41 int packet_len, struct batman_if *if_incoming); 41 int packet_len, struct hard_iface *if_incoming);
42 42
43#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ 43#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
index 0ae81d07f102..abaeec5f6247 100644
--- a/net/batman-adv/bat_debugfs.c
+++ b/net/batman-adv/bat_debugfs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -52,7 +52,6 @@ static void emit_log_char(struct debug_log *debug_log, char c)
52 52
53static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) 53static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
54{ 54{
55 int printed_len;
56 va_list args; 55 va_list args;
57 static char debug_log_buf[256]; 56 static char debug_log_buf[256];
58 char *p; 57 char *p;
@@ -62,8 +61,7 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
62 61
63 spin_lock_bh(&debug_log->lock); 62 spin_lock_bh(&debug_log->lock);
64 va_start(args, fmt); 63 va_start(args, fmt);
65 printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf), 64 vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
66 fmt, args);
67 va_end(args); 65 va_end(args);
68 66
69 for (p = debug_log_buf; *p != 0; p++) 67 for (p = debug_log_buf; *p != 0; p++)
@@ -243,13 +241,13 @@ static int softif_neigh_open(struct inode *inode, struct file *file)
243static int transtable_global_open(struct inode *inode, struct file *file) 241static int transtable_global_open(struct inode *inode, struct file *file)
244{ 242{
245 struct net_device *net_dev = (struct net_device *)inode->i_private; 243 struct net_device *net_dev = (struct net_device *)inode->i_private;
246 return single_open(file, hna_global_seq_print_text, net_dev); 244 return single_open(file, tt_global_seq_print_text, net_dev);
247} 245}
248 246
249static int transtable_local_open(struct inode *inode, struct file *file) 247static int transtable_local_open(struct inode *inode, struct file *file)
250{ 248{
251 struct net_device *net_dev = (struct net_device *)inode->i_private; 249 struct net_device *net_dev = (struct net_device *)inode->i_private;
252 return single_open(file, hna_local_seq_print_text, net_dev); 250 return single_open(file, tt_local_seq_print_text, net_dev);
253} 251}
254 252
255static int vis_data_open(struct inode *inode, struct file *file) 253static int vis_data_open(struct inode *inode, struct file *file)
diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h
index 72df532b7d5f..bc9cda3f01e1 100644
--- a/net/batman-adv/bat_debugfs.h
+++ b/net/batman-adv/bat_debugfs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
index cd7bb51825f1..497a0700cc3c 100644
--- a/net/batman-adv/bat_sysfs.c
+++ b/net/batman-adv/bat_sysfs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -441,16 +441,16 @@ static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
441 char *buff) 441 char *buff)
442{ 442{
443 struct net_device *net_dev = kobj_to_netdev(kobj); 443 struct net_device *net_dev = kobj_to_netdev(kobj);
444 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 444 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
445 ssize_t length; 445 ssize_t length;
446 446
447 if (!batman_if) 447 if (!hard_iface)
448 return 0; 448 return 0;
449 449
450 length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ? 450 length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ?
451 "none" : batman_if->soft_iface->name); 451 "none" : hard_iface->soft_iface->name);
452 452
453 kref_put(&batman_if->refcount, hardif_free_ref); 453 hardif_free_ref(hard_iface);
454 454
455 return length; 455 return length;
456} 456}
@@ -459,11 +459,11 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
459 char *buff, size_t count) 459 char *buff, size_t count)
460{ 460{
461 struct net_device *net_dev = kobj_to_netdev(kobj); 461 struct net_device *net_dev = kobj_to_netdev(kobj);
462 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 462 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
463 int status_tmp = -1; 463 int status_tmp = -1;
464 int ret; 464 int ret = count;
465 465
466 if (!batman_if) 466 if (!hard_iface)
467 return count; 467 return count;
468 468
469 if (buff[count - 1] == '\n') 469 if (buff[count - 1] == '\n')
@@ -472,7 +472,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
472 if (strlen(buff) >= IFNAMSIZ) { 472 if (strlen(buff) >= IFNAMSIZ) {
473 pr_err("Invalid parameter for 'mesh_iface' setting received: " 473 pr_err("Invalid parameter for 'mesh_iface' setting received: "
474 "interface name too long '%s'\n", buff); 474 "interface name too long '%s'\n", buff);
475 kref_put(&batman_if->refcount, hardif_free_ref); 475 hardif_free_ref(hard_iface);
476 return -EINVAL; 476 return -EINVAL;
477 } 477 }
478 478
@@ -481,30 +481,33 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
481 else 481 else
482 status_tmp = IF_I_WANT_YOU; 482 status_tmp = IF_I_WANT_YOU;
483 483
484 if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) && 484 if (hard_iface->if_status == status_tmp)
485 (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) { 485 goto out;
486 kref_put(&batman_if->refcount, hardif_free_ref); 486
487 return count; 487 if ((hard_iface->soft_iface) &&
488 (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
489 goto out;
490
491 if (!rtnl_trylock()) {
492 ret = -ERESTARTSYS;
493 goto out;
488 } 494 }
489 495
490 if (status_tmp == IF_NOT_IN_USE) { 496 if (status_tmp == IF_NOT_IN_USE) {
491 rtnl_lock(); 497 hardif_disable_interface(hard_iface);
492 hardif_disable_interface(batman_if); 498 goto unlock;
493 rtnl_unlock();
494 kref_put(&batman_if->refcount, hardif_free_ref);
495 return count;
496 } 499 }
497 500
498 /* if the interface already is in use */ 501 /* if the interface already is in use */
499 if (batman_if->if_status != IF_NOT_IN_USE) { 502 if (hard_iface->if_status != IF_NOT_IN_USE)
500 rtnl_lock(); 503 hardif_disable_interface(hard_iface);
501 hardif_disable_interface(batman_if);
502 rtnl_unlock();
503 }
504 504
505 ret = hardif_enable_interface(batman_if, buff); 505 ret = hardif_enable_interface(hard_iface, buff);
506 kref_put(&batman_if->refcount, hardif_free_ref);
507 506
507unlock:
508 rtnl_unlock();
509out:
510 hardif_free_ref(hard_iface);
508 return ret; 511 return ret;
509} 512}
510 513
@@ -512,13 +515,13 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
512 char *buff) 515 char *buff)
513{ 516{
514 struct net_device *net_dev = kobj_to_netdev(kobj); 517 struct net_device *net_dev = kobj_to_netdev(kobj);
515 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 518 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
516 ssize_t length; 519 ssize_t length;
517 520
518 if (!batman_if) 521 if (!hard_iface)
519 return 0; 522 return 0;
520 523
521 switch (batman_if->if_status) { 524 switch (hard_iface->if_status) {
522 case IF_TO_BE_REMOVED: 525 case IF_TO_BE_REMOVED:
523 length = sprintf(buff, "disabling\n"); 526 length = sprintf(buff, "disabling\n");
524 break; 527 break;
@@ -537,7 +540,7 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
537 break; 540 break;
538 } 541 }
539 542
540 kref_put(&batman_if->refcount, hardif_free_ref); 543 hardif_free_ref(hard_iface);
541 544
542 return length; 545 return length;
543} 546}
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
index 7f186c007b4f..02f1fa7aadfa 100644
--- a/net/batman-adv/bat_sysfs.h
+++ b/net/batman-adv/bat_sysfs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bbcd8f744cdd..ad2ca925b3e0 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ac54017601b1..769c246d1fc1 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 0065ffb8d96d..61605a0f3f39 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -23,63 +23,88 @@
23#include "gateway_client.h" 23#include "gateway_client.h"
24#include "gateway_common.h" 24#include "gateway_common.h"
25#include "hard-interface.h" 25#include "hard-interface.h"
26#include "originator.h"
26#include <linux/ip.h> 27#include <linux/ip.h>
27#include <linux/ipv6.h> 28#include <linux/ipv6.h>
28#include <linux/udp.h> 29#include <linux/udp.h>
29#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
30 31
31static void gw_node_free_ref(struct kref *refcount) 32static void gw_node_free_ref(struct gw_node *gw_node)
32{ 33{
33 struct gw_node *gw_node; 34 if (atomic_dec_and_test(&gw_node->refcount))
34 35 kfree_rcu(gw_node, rcu);
35 gw_node = container_of(refcount, struct gw_node, refcount);
36 kfree(gw_node);
37} 36}
38 37
39static void gw_node_free_rcu(struct rcu_head *rcu) 38static struct gw_node *gw_get_selected_gw_node(struct bat_priv *bat_priv)
40{ 39{
41 struct gw_node *gw_node; 40 struct gw_node *gw_node;
42 41
43 gw_node = container_of(rcu, struct gw_node, rcu); 42 rcu_read_lock();
44 kref_put(&gw_node->refcount, gw_node_free_ref); 43 gw_node = rcu_dereference(bat_priv->curr_gw);
45} 44 if (!gw_node)
46 45 goto out;
47void *gw_get_selected(struct bat_priv *bat_priv)
48{
49 struct gw_node *curr_gateway_tmp = bat_priv->curr_gw;
50 46
51 if (!curr_gateway_tmp) 47 if (!atomic_inc_not_zero(&gw_node->refcount))
52 return NULL; 48 gw_node = NULL;
53 49
54 return curr_gateway_tmp->orig_node; 50out:
51 rcu_read_unlock();
52 return gw_node;
55} 53}
56 54
57void gw_deselect(struct bat_priv *bat_priv) 55struct orig_node *gw_get_selected_orig(struct bat_priv *bat_priv)
58{ 56{
59 struct gw_node *gw_node = bat_priv->curr_gw; 57 struct gw_node *gw_node;
58 struct orig_node *orig_node = NULL;
59
60 gw_node = gw_get_selected_gw_node(bat_priv);
61 if (!gw_node)
62 goto out;
63
64 rcu_read_lock();
65 orig_node = gw_node->orig_node;
66 if (!orig_node)
67 goto unlock;
60 68
61 bat_priv->curr_gw = NULL; 69 if (!atomic_inc_not_zero(&orig_node->refcount))
70 orig_node = NULL;
62 71
72unlock:
73 rcu_read_unlock();
74out:
63 if (gw_node) 75 if (gw_node)
64 kref_put(&gw_node->refcount, gw_node_free_ref); 76 gw_node_free_ref(gw_node);
77 return orig_node;
65} 78}
66 79
67static struct gw_node *gw_select(struct bat_priv *bat_priv, 80static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node)
68 struct gw_node *new_gw_node)
69{ 81{
70 struct gw_node *curr_gw_node = bat_priv->curr_gw; 82 struct gw_node *curr_gw_node;
83
84 spin_lock_bh(&bat_priv->gw_list_lock);
71 85
72 if (new_gw_node) 86 if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
73 kref_get(&new_gw_node->refcount); 87 new_gw_node = NULL;
74 88
75 bat_priv->curr_gw = new_gw_node; 89 curr_gw_node = bat_priv->curr_gw;
76 return curr_gw_node; 90 rcu_assign_pointer(bat_priv->curr_gw, new_gw_node);
91
92 if (curr_gw_node)
93 gw_node_free_ref(curr_gw_node);
94
95 spin_unlock_bh(&bat_priv->gw_list_lock);
96}
97
98void gw_deselect(struct bat_priv *bat_priv)
99{
100 gw_select(bat_priv, NULL);
77} 101}
78 102
79void gw_election(struct bat_priv *bat_priv) 103void gw_election(struct bat_priv *bat_priv)
80{ 104{
81 struct hlist_node *node; 105 struct hlist_node *node;
82 struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL; 106 struct gw_node *gw_node, *curr_gw = NULL, *curr_gw_tmp = NULL;
107 struct neigh_node *router;
83 uint8_t max_tq = 0; 108 uint8_t max_tq = 0;
84 uint32_t max_gw_factor = 0, tmp_gw_factor = 0; 109 uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
85 int down, up; 110 int down, up;
@@ -93,28 +118,25 @@ void gw_election(struct bat_priv *bat_priv)
93 if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) 118 if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT)
94 return; 119 return;
95 120
96 if (bat_priv->curr_gw) 121 curr_gw = gw_get_selected_gw_node(bat_priv);
97 return; 122 if (curr_gw)
123 goto out;
98 124
99 rcu_read_lock(); 125 rcu_read_lock();
100 if (hlist_empty(&bat_priv->gw_list)) { 126 if (hlist_empty(&bat_priv->gw_list)) {
101 rcu_read_unlock(); 127 bat_dbg(DBG_BATMAN, bat_priv,
102 128 "Removing selected gateway - "
103 if (bat_priv->curr_gw) { 129 "no gateway in range\n");
104 bat_dbg(DBG_BATMAN, bat_priv, 130 gw_deselect(bat_priv);
105 "Removing selected gateway - " 131 goto unlock;
106 "no gateway in range\n");
107 gw_deselect(bat_priv);
108 }
109
110 return;
111 } 132 }
112 133
113 hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { 134 hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
114 if (!gw_node->orig_node->router) 135 if (gw_node->deleted)
115 continue; 136 continue;
116 137
117 if (gw_node->deleted) 138 router = orig_node_get_router(gw_node->orig_node);
139 if (!router)
118 continue; 140 continue;
119 141
120 switch (atomic_read(&bat_priv->gw_sel_class)) { 142 switch (atomic_read(&bat_priv->gw_sel_class)) {
@@ -122,15 +144,14 @@ void gw_election(struct bat_priv *bat_priv)
122 gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, 144 gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags,
123 &down, &up); 145 &down, &up);
124 146
125 tmp_gw_factor = (gw_node->orig_node->router->tq_avg * 147 tmp_gw_factor = (router->tq_avg * router->tq_avg *
126 gw_node->orig_node->router->tq_avg *
127 down * 100 * 100) / 148 down * 100 * 100) /
128 (TQ_LOCAL_WINDOW_SIZE * 149 (TQ_LOCAL_WINDOW_SIZE *
129 TQ_LOCAL_WINDOW_SIZE * 64); 150 TQ_LOCAL_WINDOW_SIZE * 64);
130 151
131 if ((tmp_gw_factor > max_gw_factor) || 152 if ((tmp_gw_factor > max_gw_factor) ||
132 ((tmp_gw_factor == max_gw_factor) && 153 ((tmp_gw_factor == max_gw_factor) &&
133 (gw_node->orig_node->router->tq_avg > max_tq))) 154 (router->tq_avg > max_tq)))
134 curr_gw_tmp = gw_node; 155 curr_gw_tmp = gw_node;
135 break; 156 break;
136 157
@@ -142,75 +163,83 @@ void gw_election(struct bat_priv *bat_priv)
142 * soon as a better gateway appears which has 163 * soon as a better gateway appears which has
143 * $routing_class more tq points) 164 * $routing_class more tq points)
144 **/ 165 **/
145 if (gw_node->orig_node->router->tq_avg > max_tq) 166 if (router->tq_avg > max_tq)
146 curr_gw_tmp = gw_node; 167 curr_gw_tmp = gw_node;
147 break; 168 break;
148 } 169 }
149 170
150 if (gw_node->orig_node->router->tq_avg > max_tq) 171 if (router->tq_avg > max_tq)
151 max_tq = gw_node->orig_node->router->tq_avg; 172 max_tq = router->tq_avg;
152 173
153 if (tmp_gw_factor > max_gw_factor) 174 if (tmp_gw_factor > max_gw_factor)
154 max_gw_factor = tmp_gw_factor; 175 max_gw_factor = tmp_gw_factor;
176
177 neigh_node_free_ref(router);
155 } 178 }
156 179
157 if (bat_priv->curr_gw != curr_gw_tmp) { 180 if (curr_gw != curr_gw_tmp) {
158 if ((bat_priv->curr_gw) && (!curr_gw_tmp)) 181 router = orig_node_get_router(curr_gw_tmp->orig_node);
182 if (!router)
183 goto unlock;
184
185 if ((curr_gw) && (!curr_gw_tmp))
159 bat_dbg(DBG_BATMAN, bat_priv, 186 bat_dbg(DBG_BATMAN, bat_priv,
160 "Removing selected gateway - " 187 "Removing selected gateway - "
161 "no gateway in range\n"); 188 "no gateway in range\n");
162 else if ((!bat_priv->curr_gw) && (curr_gw_tmp)) 189 else if ((!curr_gw) && (curr_gw_tmp))
163 bat_dbg(DBG_BATMAN, bat_priv, 190 bat_dbg(DBG_BATMAN, bat_priv,
164 "Adding route to gateway %pM " 191 "Adding route to gateway %pM "
165 "(gw_flags: %i, tq: %i)\n", 192 "(gw_flags: %i, tq: %i)\n",
166 curr_gw_tmp->orig_node->orig, 193 curr_gw_tmp->orig_node->orig,
167 curr_gw_tmp->orig_node->gw_flags, 194 curr_gw_tmp->orig_node->gw_flags,
168 curr_gw_tmp->orig_node->router->tq_avg); 195 router->tq_avg);
169 else 196 else
170 bat_dbg(DBG_BATMAN, bat_priv, 197 bat_dbg(DBG_BATMAN, bat_priv,
171 "Changing route to gateway %pM " 198 "Changing route to gateway %pM "
172 "(gw_flags: %i, tq: %i)\n", 199 "(gw_flags: %i, tq: %i)\n",
173 curr_gw_tmp->orig_node->orig, 200 curr_gw_tmp->orig_node->orig,
174 curr_gw_tmp->orig_node->gw_flags, 201 curr_gw_tmp->orig_node->gw_flags,
175 curr_gw_tmp->orig_node->router->tq_avg); 202 router->tq_avg);
176 203
177 old_gw_node = gw_select(bat_priv, curr_gw_tmp); 204 neigh_node_free_ref(router);
205 gw_select(bat_priv, curr_gw_tmp);
178 } 206 }
179 207
208unlock:
180 rcu_read_unlock(); 209 rcu_read_unlock();
181 210out:
182 /* the kfree() has to be outside of the rcu lock */ 211 if (curr_gw)
183 if (old_gw_node) 212 gw_node_free_ref(curr_gw);
184 kref_put(&old_gw_node->refcount, gw_node_free_ref);
185} 213}
186 214
187void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) 215void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
188{ 216{
189 struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; 217 struct orig_node *curr_gw_orig;
218 struct neigh_node *router_gw = NULL, *router_orig = NULL;
190 uint8_t gw_tq_avg, orig_tq_avg; 219 uint8_t gw_tq_avg, orig_tq_avg;
191 220
192 if (!curr_gateway_tmp) 221 curr_gw_orig = gw_get_selected_orig(bat_priv);
193 return; 222 if (!curr_gw_orig)
194
195 if (!curr_gateway_tmp->orig_node)
196 goto deselect; 223 goto deselect;
197 224
198 if (!curr_gateway_tmp->orig_node->router) 225 router_gw = orig_node_get_router(curr_gw_orig);
226 if (!router_gw)
199 goto deselect; 227 goto deselect;
200 228
201 /* this node already is the gateway */ 229 /* this node already is the gateway */
202 if (curr_gateway_tmp->orig_node == orig_node) 230 if (curr_gw_orig == orig_node)
203 return; 231 goto out;
204 232
205 if (!orig_node->router) 233 router_orig = orig_node_get_router(orig_node);
206 return; 234 if (!router_orig)
235 goto out;
207 236
208 gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg; 237 gw_tq_avg = router_gw->tq_avg;
209 orig_tq_avg = orig_node->router->tq_avg; 238 orig_tq_avg = router_orig->tq_avg;
210 239
211 /* the TQ value has to be better */ 240 /* the TQ value has to be better */
212 if (orig_tq_avg < gw_tq_avg) 241 if (orig_tq_avg < gw_tq_avg)
213 return; 242 goto out;
214 243
215 /** 244 /**
216 * if the routing class is greater than 3 the value tells us how much 245 * if the routing class is greater than 3 the value tells us how much
@@ -218,7 +247,7 @@ void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
218 **/ 247 **/
219 if ((atomic_read(&bat_priv->gw_sel_class) > 3) && 248 if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
220 (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) 249 (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
221 return; 250 goto out;
222 251
223 bat_dbg(DBG_BATMAN, bat_priv, 252 bat_dbg(DBG_BATMAN, bat_priv,
224 "Restarting gateway selection: better gateway found (tq curr: " 253 "Restarting gateway selection: better gateway found (tq curr: "
@@ -227,6 +256,15 @@ void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
227 256
228deselect: 257deselect:
229 gw_deselect(bat_priv); 258 gw_deselect(bat_priv);
259out:
260 if (curr_gw_orig)
261 orig_node_free_ref(curr_gw_orig);
262 if (router_gw)
263 neigh_node_free_ref(router_gw);
264 if (router_orig)
265 neigh_node_free_ref(router_orig);
266
267 return;
230} 268}
231 269
232static void gw_node_add(struct bat_priv *bat_priv, 270static void gw_node_add(struct bat_priv *bat_priv,
@@ -242,7 +280,7 @@ static void gw_node_add(struct bat_priv *bat_priv,
242 memset(gw_node, 0, sizeof(struct gw_node)); 280 memset(gw_node, 0, sizeof(struct gw_node));
243 INIT_HLIST_NODE(&gw_node->list); 281 INIT_HLIST_NODE(&gw_node->list);
244 gw_node->orig_node = orig_node; 282 gw_node->orig_node = orig_node;
245 kref_init(&gw_node->refcount); 283 atomic_set(&gw_node->refcount, 1);
246 284
247 spin_lock_bh(&bat_priv->gw_list_lock); 285 spin_lock_bh(&bat_priv->gw_list_lock);
248 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); 286 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list);
@@ -262,7 +300,15 @@ void gw_node_update(struct bat_priv *bat_priv,
262 struct orig_node *orig_node, uint8_t new_gwflags) 300 struct orig_node *orig_node, uint8_t new_gwflags)
263{ 301{
264 struct hlist_node *node; 302 struct hlist_node *node;
265 struct gw_node *gw_node; 303 struct gw_node *gw_node, *curr_gw;
304
305 /**
306 * Note: We don't need a NULL check here, since curr_gw never gets
307 * dereferenced. If curr_gw is NULL we also should not exit as we may
308 * have this gateway in our list (duplication check!) even though we
309 * have no currently selected gateway.
310 */
311 curr_gw = gw_get_selected_gw_node(bat_priv);
266 312
267 rcu_read_lock(); 313 rcu_read_lock();
268 hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { 314 hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
@@ -283,22 +329,26 @@ void gw_node_update(struct bat_priv *bat_priv,
283 "Gateway %pM removed from gateway list\n", 329 "Gateway %pM removed from gateway list\n",
284 orig_node->orig); 330 orig_node->orig);
285 331
286 if (gw_node == bat_priv->curr_gw) { 332 if (gw_node == curr_gw)
287 rcu_read_unlock(); 333 goto deselect;
288 gw_deselect(bat_priv);
289 return;
290 }
291 } 334 }
292 335
293 rcu_read_unlock(); 336 goto unlock;
294 return;
295 } 337 }
296 rcu_read_unlock();
297 338
298 if (new_gwflags == 0) 339 if (new_gwflags == 0)
299 return; 340 goto unlock;
300 341
301 gw_node_add(bat_priv, orig_node, new_gwflags); 342 gw_node_add(bat_priv, orig_node, new_gwflags);
343 goto unlock;
344
345deselect:
346 gw_deselect(bat_priv);
347unlock:
348 rcu_read_unlock();
349
350 if (curr_gw)
351 gw_node_free_ref(curr_gw);
302} 352}
303 353
304void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node) 354void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node)
@@ -308,9 +358,12 @@ void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node)
308 358
309void gw_node_purge(struct bat_priv *bat_priv) 359void gw_node_purge(struct bat_priv *bat_priv)
310{ 360{
311 struct gw_node *gw_node; 361 struct gw_node *gw_node, *curr_gw;
312 struct hlist_node *node, *node_tmp; 362 struct hlist_node *node, *node_tmp;
313 unsigned long timeout = 2 * PURGE_TIMEOUT * HZ; 363 unsigned long timeout = 2 * PURGE_TIMEOUT * HZ;
364 char do_deselect = 0;
365
366 curr_gw = gw_get_selected_gw_node(bat_priv);
314 367
315 spin_lock_bh(&bat_priv->gw_list_lock); 368 spin_lock_bh(&bat_priv->gw_list_lock);
316 369
@@ -321,75 +374,99 @@ void gw_node_purge(struct bat_priv *bat_priv)
321 atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) 374 atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE)
322 continue; 375 continue;
323 376
324 if (bat_priv->curr_gw == gw_node) 377 if (curr_gw == gw_node)
325 gw_deselect(bat_priv); 378 do_deselect = 1;
326 379
327 hlist_del_rcu(&gw_node->list); 380 hlist_del_rcu(&gw_node->list);
328 call_rcu(&gw_node->rcu, gw_node_free_rcu); 381 gw_node_free_ref(gw_node);
329 } 382 }
330 383
331
332 spin_unlock_bh(&bat_priv->gw_list_lock); 384 spin_unlock_bh(&bat_priv->gw_list_lock);
385
386 /* gw_deselect() needs to acquire the gw_list_lock */
387 if (do_deselect)
388 gw_deselect(bat_priv);
389
390 if (curr_gw)
391 gw_node_free_ref(curr_gw);
333} 392}
334 393
394/**
395 * fails if orig_node has no router
396 */
335static int _write_buffer_text(struct bat_priv *bat_priv, 397static int _write_buffer_text(struct bat_priv *bat_priv,
336 struct seq_file *seq, struct gw_node *gw_node) 398 struct seq_file *seq, struct gw_node *gw_node)
337{ 399{
338 int down, up; 400 struct gw_node *curr_gw;
401 struct neigh_node *router;
402 int down, up, ret = -1;
339 403
340 gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up); 404 gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up);
341 405
342 return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", 406 router = orig_node_get_router(gw_node->orig_node);
343 (bat_priv->curr_gw == gw_node ? "=>" : " "), 407 if (!router)
344 gw_node->orig_node->orig, 408 goto out;
345 gw_node->orig_node->router->tq_avg, 409
346 gw_node->orig_node->router->addr, 410 curr_gw = gw_get_selected_gw_node(bat_priv);
347 gw_node->orig_node->router->if_incoming->net_dev->name, 411
348 gw_node->orig_node->gw_flags, 412 ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n",
349 (down > 2048 ? down / 1024 : down), 413 (curr_gw == gw_node ? "=>" : " "),
350 (down > 2048 ? "MBit" : "KBit"), 414 gw_node->orig_node->orig,
351 (up > 2048 ? up / 1024 : up), 415 router->tq_avg, router->addr,
352 (up > 2048 ? "MBit" : "KBit")); 416 router->if_incoming->net_dev->name,
417 gw_node->orig_node->gw_flags,
418 (down > 2048 ? down / 1024 : down),
419 (down > 2048 ? "MBit" : "KBit"),
420 (up > 2048 ? up / 1024 : up),
421 (up > 2048 ? "MBit" : "KBit"));
422
423 neigh_node_free_ref(router);
424 if (curr_gw)
425 gw_node_free_ref(curr_gw);
426out:
427 return ret;
353} 428}
354 429
355int gw_client_seq_print_text(struct seq_file *seq, void *offset) 430int gw_client_seq_print_text(struct seq_file *seq, void *offset)
356{ 431{
357 struct net_device *net_dev = (struct net_device *)seq->private; 432 struct net_device *net_dev = (struct net_device *)seq->private;
358 struct bat_priv *bat_priv = netdev_priv(net_dev); 433 struct bat_priv *bat_priv = netdev_priv(net_dev);
434 struct hard_iface *primary_if;
359 struct gw_node *gw_node; 435 struct gw_node *gw_node;
360 struct hlist_node *node; 436 struct hlist_node *node;
361 int gw_count = 0; 437 int gw_count = 0, ret = 0;
362 438
363 if (!bat_priv->primary_if) { 439 primary_if = primary_if_get_selected(bat_priv);
364 440 if (!primary_if) {
365 return seq_printf(seq, "BATMAN mesh %s disabled - please " 441 ret = seq_printf(seq, "BATMAN mesh %s disabled - please "
366 "specify interfaces to enable it\n", 442 "specify interfaces to enable it\n",
367 net_dev->name); 443 net_dev->name);
444 goto out;
368 } 445 }
369 446
370 if (bat_priv->primary_if->if_status != IF_ACTIVE) { 447 if (primary_if->if_status != IF_ACTIVE) {
371 448 ret = seq_printf(seq, "BATMAN mesh %s disabled - "
372 return seq_printf(seq, "BATMAN mesh %s disabled - " 449 "primary interface not active\n",
373 "primary interface not active\n", 450 net_dev->name);
374 net_dev->name); 451 goto out;
375 } 452 }
376 453
377 seq_printf(seq, " %-12s (%s/%i) %17s [%10s]: gw_class ... " 454 seq_printf(seq, " %-12s (%s/%i) %17s [%10s]: gw_class ... "
378 "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n", 455 "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n",
379 "Gateway", "#", TQ_MAX_VALUE, "Nexthop", 456 "Gateway", "#", TQ_MAX_VALUE, "Nexthop",
380 "outgoingIF", SOURCE_VERSION, REVISION_VERSION_STR, 457 "outgoingIF", SOURCE_VERSION, REVISION_VERSION_STR,
381 bat_priv->primary_if->net_dev->name, 458 primary_if->net_dev->name,
382 bat_priv->primary_if->net_dev->dev_addr, net_dev->name); 459 primary_if->net_dev->dev_addr, net_dev->name);
383 460
384 rcu_read_lock(); 461 rcu_read_lock();
385 hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { 462 hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
386 if (gw_node->deleted) 463 if (gw_node->deleted)
387 continue; 464 continue;
388 465
389 if (!gw_node->orig_node->router) 466 /* fails if orig_node has no router */
467 if (_write_buffer_text(bat_priv, seq, gw_node) < 0)
390 continue; 468 continue;
391 469
392 _write_buffer_text(bat_priv, seq, gw_node);
393 gw_count++; 470 gw_count++;
394 } 471 }
395 rcu_read_unlock(); 472 rcu_read_unlock();
@@ -397,7 +474,10 @@ int gw_client_seq_print_text(struct seq_file *seq, void *offset)
397 if (gw_count == 0) 474 if (gw_count == 0)
398 seq_printf(seq, "No gateways in range ...\n"); 475 seq_printf(seq, "No gateways in range ...\n");
399 476
400 return 0; 477out:
478 if (primary_if)
479 hardif_free_ref(primary_if);
480 return ret;
401} 481}
402 482
403int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) 483int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb)
@@ -406,6 +486,7 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb)
406 struct iphdr *iphdr; 486 struct iphdr *iphdr;
407 struct ipv6hdr *ipv6hdr; 487 struct ipv6hdr *ipv6hdr;
408 struct udphdr *udphdr; 488 struct udphdr *udphdr;
489 struct gw_node *curr_gw;
409 unsigned int header_len = 0; 490 unsigned int header_len = 0;
410 491
411 if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF) 492 if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF)
@@ -470,8 +551,11 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb)
470 if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER) 551 if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)
471 return -1; 552 return -1;
472 553
473 if (!bat_priv->curr_gw) 554 curr_gw = gw_get_selected_gw_node(bat_priv);
555 if (!curr_gw)
474 return 0; 556 return 0;
475 557
558 if (curr_gw)
559 gw_node_free_ref(curr_gw);
476 return 1; 560 return 1;
477} 561}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 4585e6549844..1ce8c6066da1 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -24,7 +24,7 @@
24 24
25void gw_deselect(struct bat_priv *bat_priv); 25void gw_deselect(struct bat_priv *bat_priv);
26void gw_election(struct bat_priv *bat_priv); 26void gw_election(struct bat_priv *bat_priv);
27void *gw_get_selected(struct bat_priv *bat_priv); 27struct orig_node *gw_get_selected_orig(struct bat_priv *bat_priv);
28void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node); 28void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node);
29void gw_node_update(struct bat_priv *bat_priv, 29void gw_node_update(struct bat_priv *bat_priv,
30 struct orig_node *orig_node, uint8_t new_gwflags); 30 struct orig_node *orig_node, uint8_t new_gwflags);
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b962982f017e..50d3a59a3d73 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 5e728d0b7959..55e527a489fe 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4f95777ce080..dfbfccc9fe40 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -31,36 +31,37 @@
31 31
32#include <linux/if_arp.h> 32#include <linux/if_arp.h>
33 33
34/* protect update critical side of if_list - but not the content */
35static DEFINE_SPINLOCK(if_list_lock);
36 34
37static void hardif_free_rcu(struct rcu_head *rcu) 35static int batman_skb_recv(struct sk_buff *skb,
36 struct net_device *dev,
37 struct packet_type *ptype,
38 struct net_device *orig_dev);
39
40void hardif_free_rcu(struct rcu_head *rcu)
38{ 41{
39 struct batman_if *batman_if; 42 struct hard_iface *hard_iface;
40 43
41 batman_if = container_of(rcu, struct batman_if, rcu); 44 hard_iface = container_of(rcu, struct hard_iface, rcu);
42 dev_put(batman_if->net_dev); 45 dev_put(hard_iface->net_dev);
43 kref_put(&batman_if->refcount, hardif_free_ref); 46 kfree(hard_iface);
44} 47}
45 48
46struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) 49struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev)
47{ 50{
48 struct batman_if *batman_if; 51 struct hard_iface *hard_iface;
49 52
50 rcu_read_lock(); 53 rcu_read_lock();
51 list_for_each_entry_rcu(batman_if, &if_list, list) { 54 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
52 if (batman_if->net_dev == net_dev) 55 if (hard_iface->net_dev == net_dev &&
56 atomic_inc_not_zero(&hard_iface->refcount))
53 goto out; 57 goto out;
54 } 58 }
55 59
56 batman_if = NULL; 60 hard_iface = NULL;
57 61
58out: 62out:
59 if (batman_if)
60 kref_get(&batman_if->refcount);
61
62 rcu_read_unlock(); 63 rcu_read_unlock();
63 return batman_if; 64 return hard_iface;
64} 65}
65 66
66static int is_valid_iface(struct net_device *net_dev) 67static int is_valid_iface(struct net_device *net_dev)
@@ -75,13 +76,8 @@ static int is_valid_iface(struct net_device *net_dev)
75 return 0; 76 return 0;
76 77
77 /* no batman over batman */ 78 /* no batman over batman */
78#ifdef HAVE_NET_DEVICE_OPS 79 if (softif_is_valid(net_dev))
79 if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
80 return 0;
81#else
82 if (net_dev->hard_start_xmit == interface_tx)
83 return 0; 80 return 0;
84#endif
85 81
86 /* Device is being bridged */ 82 /* Device is being bridged */
87 /* if (net_dev->priv_flags & IFF_BRIDGE_PORT) 83 /* if (net_dev->priv_flags & IFF_BRIDGE_PORT)
@@ -90,108 +86,116 @@ static int is_valid_iface(struct net_device *net_dev)
90 return 1; 86 return 1;
91} 87}
92 88
93static struct batman_if *get_active_batman_if(struct net_device *soft_iface) 89static struct hard_iface *hardif_get_active(struct net_device *soft_iface)
94{ 90{
95 struct batman_if *batman_if; 91 struct hard_iface *hard_iface;
96 92
97 rcu_read_lock(); 93 rcu_read_lock();
98 list_for_each_entry_rcu(batman_if, &if_list, list) { 94 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
99 if (batman_if->soft_iface != soft_iface) 95 if (hard_iface->soft_iface != soft_iface)
100 continue; 96 continue;
101 97
102 if (batman_if->if_status == IF_ACTIVE) 98 if (hard_iface->if_status == IF_ACTIVE &&
99 atomic_inc_not_zero(&hard_iface->refcount))
103 goto out; 100 goto out;
104 } 101 }
105 102
106 batman_if = NULL; 103 hard_iface = NULL;
107 104
108out: 105out:
109 if (batman_if)
110 kref_get(&batman_if->refcount);
111
112 rcu_read_unlock(); 106 rcu_read_unlock();
113 return batman_if; 107 return hard_iface;
114} 108}
115 109
116static void update_primary_addr(struct bat_priv *bat_priv) 110static void primary_if_update_addr(struct bat_priv *bat_priv)
117{ 111{
118 struct vis_packet *vis_packet; 112 struct vis_packet *vis_packet;
113 struct hard_iface *primary_if;
114
115 primary_if = primary_if_get_selected(bat_priv);
116 if (!primary_if)
117 goto out;
119 118
120 vis_packet = (struct vis_packet *) 119 vis_packet = (struct vis_packet *)
121 bat_priv->my_vis_info->skb_packet->data; 120 bat_priv->my_vis_info->skb_packet->data;
122 memcpy(vis_packet->vis_orig, 121 memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
123 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
124 memcpy(vis_packet->sender_orig, 122 memcpy(vis_packet->sender_orig,
125 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 123 primary_if->net_dev->dev_addr, ETH_ALEN);
124
125out:
126 if (primary_if)
127 hardif_free_ref(primary_if);
126} 128}
127 129
128static void set_primary_if(struct bat_priv *bat_priv, 130static void primary_if_select(struct bat_priv *bat_priv,
129 struct batman_if *batman_if) 131 struct hard_iface *new_hard_iface)
130{ 132{
133 struct hard_iface *curr_hard_iface;
131 struct batman_packet *batman_packet; 134 struct batman_packet *batman_packet;
132 struct batman_if *old_if;
133 135
134 if (batman_if) 136 ASSERT_RTNL();
135 kref_get(&batman_if->refcount); 137
138 if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount))
139 new_hard_iface = NULL;
136 140
137 old_if = bat_priv->primary_if; 141 curr_hard_iface = bat_priv->primary_if;
138 bat_priv->primary_if = batman_if; 142 rcu_assign_pointer(bat_priv->primary_if, new_hard_iface);
139 143
140 if (old_if) 144 if (curr_hard_iface)
141 kref_put(&old_if->refcount, hardif_free_ref); 145 hardif_free_ref(curr_hard_iface);
142 146
143 if (!bat_priv->primary_if) 147 if (!new_hard_iface)
144 return; 148 return;
145 149
146 batman_packet = (struct batman_packet *)(batman_if->packet_buff); 150 batman_packet = (struct batman_packet *)(new_hard_iface->packet_buff);
147 batman_packet->flags = PRIMARIES_FIRST_HOP; 151 batman_packet->flags = PRIMARIES_FIRST_HOP;
148 batman_packet->ttl = TTL; 152 batman_packet->ttl = TTL;
149 153
150 update_primary_addr(bat_priv); 154 primary_if_update_addr(bat_priv);
151 155
152 /*** 156 /***
153 * hacky trick to make sure that we send the HNA information via 157 * hacky trick to make sure that we send the TT information via
154 * our new primary interface 158 * our new primary interface
155 */ 159 */
156 atomic_set(&bat_priv->hna_local_changed, 1); 160 atomic_set(&bat_priv->tt_local_changed, 1);
157} 161}
158 162
159static bool hardif_is_iface_up(struct batman_if *batman_if) 163static bool hardif_is_iface_up(struct hard_iface *hard_iface)
160{ 164{
161 if (batman_if->net_dev->flags & IFF_UP) 165 if (hard_iface->net_dev->flags & IFF_UP)
162 return true; 166 return true;
163 167
164 return false; 168 return false;
165} 169}
166 170
167static void update_mac_addresses(struct batman_if *batman_if) 171static void update_mac_addresses(struct hard_iface *hard_iface)
168{ 172{
169 memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig, 173 memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig,
170 batman_if->net_dev->dev_addr, ETH_ALEN); 174 hard_iface->net_dev->dev_addr, ETH_ALEN);
171 memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender, 175 memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender,
172 batman_if->net_dev->dev_addr, ETH_ALEN); 176 hard_iface->net_dev->dev_addr, ETH_ALEN);
173} 177}
174 178
175static void check_known_mac_addr(struct net_device *net_dev) 179static void check_known_mac_addr(struct net_device *net_dev)
176{ 180{
177 struct batman_if *batman_if; 181 struct hard_iface *hard_iface;
178 182
179 rcu_read_lock(); 183 rcu_read_lock();
180 list_for_each_entry_rcu(batman_if, &if_list, list) { 184 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
181 if ((batman_if->if_status != IF_ACTIVE) && 185 if ((hard_iface->if_status != IF_ACTIVE) &&
182 (batman_if->if_status != IF_TO_BE_ACTIVATED)) 186 (hard_iface->if_status != IF_TO_BE_ACTIVATED))
183 continue; 187 continue;
184 188
185 if (batman_if->net_dev == net_dev) 189 if (hard_iface->net_dev == net_dev)
186 continue; 190 continue;
187 191
188 if (!compare_orig(batman_if->net_dev->dev_addr, 192 if (!compare_eth(hard_iface->net_dev->dev_addr,
189 net_dev->dev_addr)) 193 net_dev->dev_addr))
190 continue; 194 continue;
191 195
192 pr_warning("The newly added mac address (%pM) already exists " 196 pr_warning("The newly added mac address (%pM) already exists "
193 "on: %s\n", net_dev->dev_addr, 197 "on: %s\n", net_dev->dev_addr,
194 batman_if->net_dev->name); 198 hard_iface->net_dev->name);
195 pr_warning("It is strongly recommended to keep mac addresses " 199 pr_warning("It is strongly recommended to keep mac addresses "
196 "unique to avoid problems!\n"); 200 "unique to avoid problems!\n");
197 } 201 }
@@ -201,7 +205,7 @@ static void check_known_mac_addr(struct net_device *net_dev)
201int hardif_min_mtu(struct net_device *soft_iface) 205int hardif_min_mtu(struct net_device *soft_iface)
202{ 206{
203 struct bat_priv *bat_priv = netdev_priv(soft_iface); 207 struct bat_priv *bat_priv = netdev_priv(soft_iface);
204 struct batman_if *batman_if; 208 struct hard_iface *hard_iface;
205 /* allow big frames if all devices are capable to do so 209 /* allow big frames if all devices are capable to do so
206 * (have MTU > 1500 + BAT_HEADER_LEN) */ 210 * (have MTU > 1500 + BAT_HEADER_LEN) */
207 int min_mtu = ETH_DATA_LEN; 211 int min_mtu = ETH_DATA_LEN;
@@ -210,15 +214,15 @@ int hardif_min_mtu(struct net_device *soft_iface)
210 goto out; 214 goto out;
211 215
212 rcu_read_lock(); 216 rcu_read_lock();
213 list_for_each_entry_rcu(batman_if, &if_list, list) { 217 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
214 if ((batman_if->if_status != IF_ACTIVE) && 218 if ((hard_iface->if_status != IF_ACTIVE) &&
215 (batman_if->if_status != IF_TO_BE_ACTIVATED)) 219 (hard_iface->if_status != IF_TO_BE_ACTIVATED))
216 continue; 220 continue;
217 221
218 if (batman_if->soft_iface != soft_iface) 222 if (hard_iface->soft_iface != soft_iface)
219 continue; 223 continue;
220 224
221 min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN, 225 min_mtu = min_t(int, hard_iface->net_dev->mtu - BAT_HEADER_LEN,
222 min_mtu); 226 min_mtu);
223 } 227 }
224 rcu_read_unlock(); 228 rcu_read_unlock();
@@ -236,260 +240,281 @@ void update_min_mtu(struct net_device *soft_iface)
236 soft_iface->mtu = min_mtu; 240 soft_iface->mtu = min_mtu;
237} 241}
238 242
239static void hardif_activate_interface(struct batman_if *batman_if) 243static void hardif_activate_interface(struct hard_iface *hard_iface)
240{ 244{
241 struct bat_priv *bat_priv; 245 struct bat_priv *bat_priv;
246 struct hard_iface *primary_if = NULL;
242 247
243 if (batman_if->if_status != IF_INACTIVE) 248 if (hard_iface->if_status != IF_INACTIVE)
244 return; 249 goto out;
245 250
246 bat_priv = netdev_priv(batman_if->soft_iface); 251 bat_priv = netdev_priv(hard_iface->soft_iface);
247 252
248 update_mac_addresses(batman_if); 253 update_mac_addresses(hard_iface);
249 batman_if->if_status = IF_TO_BE_ACTIVATED; 254 hard_iface->if_status = IF_TO_BE_ACTIVATED;
250 255
251 /** 256 /**
252 * the first active interface becomes our primary interface or 257 * the first active interface becomes our primary interface or
253 * the next active interface after the old primay interface was removed 258 * the next active interface after the old primay interface was removed
254 */ 259 */
255 if (!bat_priv->primary_if) 260 primary_if = primary_if_get_selected(bat_priv);
256 set_primary_if(bat_priv, batman_if); 261 if (!primary_if)
262 primary_if_select(bat_priv, hard_iface);
257 263
258 bat_info(batman_if->soft_iface, "Interface activated: %s\n", 264 bat_info(hard_iface->soft_iface, "Interface activated: %s\n",
259 batman_if->net_dev->name); 265 hard_iface->net_dev->name);
260 266
261 update_min_mtu(batman_if->soft_iface); 267 update_min_mtu(hard_iface->soft_iface);
262 return; 268
269out:
270 if (primary_if)
271 hardif_free_ref(primary_if);
263} 272}
264 273
265static void hardif_deactivate_interface(struct batman_if *batman_if) 274static void hardif_deactivate_interface(struct hard_iface *hard_iface)
266{ 275{
267 if ((batman_if->if_status != IF_ACTIVE) && 276 if ((hard_iface->if_status != IF_ACTIVE) &&
268 (batman_if->if_status != IF_TO_BE_ACTIVATED)) 277 (hard_iface->if_status != IF_TO_BE_ACTIVATED))
269 return; 278 return;
270 279
271 batman_if->if_status = IF_INACTIVE; 280 hard_iface->if_status = IF_INACTIVE;
272 281
273 bat_info(batman_if->soft_iface, "Interface deactivated: %s\n", 282 bat_info(hard_iface->soft_iface, "Interface deactivated: %s\n",
274 batman_if->net_dev->name); 283 hard_iface->net_dev->name);
275 284
276 update_min_mtu(batman_if->soft_iface); 285 update_min_mtu(hard_iface->soft_iface);
277} 286}
278 287
279int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) 288int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name)
280{ 289{
281 struct bat_priv *bat_priv; 290 struct bat_priv *bat_priv;
282 struct batman_packet *batman_packet; 291 struct batman_packet *batman_packet;
292 struct net_device *soft_iface;
293 int ret;
294
295 if (hard_iface->if_status != IF_NOT_IN_USE)
296 goto out;
283 297
284 if (batman_if->if_status != IF_NOT_IN_USE) 298 if (!atomic_inc_not_zero(&hard_iface->refcount))
285 goto out; 299 goto out;
286 300
287 batman_if->soft_iface = dev_get_by_name(&init_net, iface_name); 301 soft_iface = dev_get_by_name(&init_net, iface_name);
288 302
289 if (!batman_if->soft_iface) { 303 if (!soft_iface) {
290 batman_if->soft_iface = softif_create(iface_name); 304 soft_iface = softif_create(iface_name);
291 305
292 if (!batman_if->soft_iface) 306 if (!soft_iface) {
307 ret = -ENOMEM;
293 goto err; 308 goto err;
309 }
294 310
295 /* dev_get_by_name() increases the reference counter for us */ 311 /* dev_get_by_name() increases the reference counter for us */
296 dev_hold(batman_if->soft_iface); 312 dev_hold(soft_iface);
297 } 313 }
298 314
299 bat_priv = netdev_priv(batman_if->soft_iface); 315 if (!softif_is_valid(soft_iface)) {
300 batman_if->packet_len = BAT_PACKET_LEN; 316 pr_err("Can't create batman mesh interface %s: "
301 batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC); 317 "already exists as regular interface\n",
318 soft_iface->name);
319 dev_put(soft_iface);
320 ret = -EINVAL;
321 goto err;
322 }
323
324 hard_iface->soft_iface = soft_iface;
325 bat_priv = netdev_priv(hard_iface->soft_iface);
326 hard_iface->packet_len = BAT_PACKET_LEN;
327 hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
302 328
303 if (!batman_if->packet_buff) { 329 if (!hard_iface->packet_buff) {
304 bat_err(batman_if->soft_iface, "Can't add interface packet " 330 bat_err(hard_iface->soft_iface, "Can't add interface packet "
305 "(%s): out of memory\n", batman_if->net_dev->name); 331 "(%s): out of memory\n", hard_iface->net_dev->name);
332 ret = -ENOMEM;
306 goto err; 333 goto err;
307 } 334 }
308 335
309 batman_packet = (struct batman_packet *)(batman_if->packet_buff); 336 batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
310 batman_packet->packet_type = BAT_PACKET; 337 batman_packet->packet_type = BAT_PACKET;
311 batman_packet->version = COMPAT_VERSION; 338 batman_packet->version = COMPAT_VERSION;
312 batman_packet->flags = 0; 339 batman_packet->flags = 0;
313 batman_packet->ttl = 2; 340 batman_packet->ttl = 2;
314 batman_packet->tq = TQ_MAX_VALUE; 341 batman_packet->tq = TQ_MAX_VALUE;
315 batman_packet->num_hna = 0; 342 batman_packet->num_tt = 0;
316 343
317 batman_if->if_num = bat_priv->num_ifaces; 344 hard_iface->if_num = bat_priv->num_ifaces;
318 bat_priv->num_ifaces++; 345 bat_priv->num_ifaces++;
319 batman_if->if_status = IF_INACTIVE; 346 hard_iface->if_status = IF_INACTIVE;
320 orig_hash_add_if(batman_if, bat_priv->num_ifaces); 347 orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
321 348
322 batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); 349 hard_iface->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN);
323 batman_if->batman_adv_ptype.func = batman_skb_recv; 350 hard_iface->batman_adv_ptype.func = batman_skb_recv;
324 batman_if->batman_adv_ptype.dev = batman_if->net_dev; 351 hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
325 kref_get(&batman_if->refcount); 352 dev_add_pack(&hard_iface->batman_adv_ptype);
326 dev_add_pack(&batman_if->batman_adv_ptype);
327 353
328 atomic_set(&batman_if->seqno, 1); 354 atomic_set(&hard_iface->seqno, 1);
329 atomic_set(&batman_if->frag_seqno, 1); 355 atomic_set(&hard_iface->frag_seqno, 1);
330 bat_info(batman_if->soft_iface, "Adding interface: %s\n", 356 bat_info(hard_iface->soft_iface, "Adding interface: %s\n",
331 batman_if->net_dev->name); 357 hard_iface->net_dev->name);
332 358
333 if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < 359 if (atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu <
334 ETH_DATA_LEN + BAT_HEADER_LEN) 360 ETH_DATA_LEN + BAT_HEADER_LEN)
335 bat_info(batman_if->soft_iface, 361 bat_info(hard_iface->soft_iface,
336 "The MTU of interface %s is too small (%i) to handle " 362 "The MTU of interface %s is too small (%i) to handle "
337 "the transport of batman-adv packets. Packets going " 363 "the transport of batman-adv packets. Packets going "
338 "over this interface will be fragmented on layer2 " 364 "over this interface will be fragmented on layer2 "
339 "which could impact the performance. Setting the MTU " 365 "which could impact the performance. Setting the MTU "
340 "to %zi would solve the problem.\n", 366 "to %zi would solve the problem.\n",
341 batman_if->net_dev->name, batman_if->net_dev->mtu, 367 hard_iface->net_dev->name, hard_iface->net_dev->mtu,
342 ETH_DATA_LEN + BAT_HEADER_LEN); 368 ETH_DATA_LEN + BAT_HEADER_LEN);
343 369
344 if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < 370 if (!atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu <
345 ETH_DATA_LEN + BAT_HEADER_LEN) 371 ETH_DATA_LEN + BAT_HEADER_LEN)
346 bat_info(batman_if->soft_iface, 372 bat_info(hard_iface->soft_iface,
347 "The MTU of interface %s is too small (%i) to handle " 373 "The MTU of interface %s is too small (%i) to handle "
348 "the transport of batman-adv packets. If you experience" 374 "the transport of batman-adv packets. If you experience"
349 " problems getting traffic through try increasing the " 375 " problems getting traffic through try increasing the "
350 "MTU to %zi.\n", 376 "MTU to %zi.\n",
351 batman_if->net_dev->name, batman_if->net_dev->mtu, 377 hard_iface->net_dev->name, hard_iface->net_dev->mtu,
352 ETH_DATA_LEN + BAT_HEADER_LEN); 378 ETH_DATA_LEN + BAT_HEADER_LEN);
353 379
354 if (hardif_is_iface_up(batman_if)) 380 if (hardif_is_iface_up(hard_iface))
355 hardif_activate_interface(batman_if); 381 hardif_activate_interface(hard_iface);
356 else 382 else
357 bat_err(batman_if->soft_iface, "Not using interface %s " 383 bat_err(hard_iface->soft_iface, "Not using interface %s "
358 "(retrying later): interface not active\n", 384 "(retrying later): interface not active\n",
359 batman_if->net_dev->name); 385 hard_iface->net_dev->name);
360 386
361 /* begin scheduling originator messages on that interface */ 387 /* begin scheduling originator messages on that interface */
362 schedule_own_packet(batman_if); 388 schedule_own_packet(hard_iface);
363 389
364out: 390out:
365 return 0; 391 return 0;
366 392
367err: 393err:
368 return -ENOMEM; 394 hardif_free_ref(hard_iface);
395 return ret;
369} 396}
370 397
371void hardif_disable_interface(struct batman_if *batman_if) 398void hardif_disable_interface(struct hard_iface *hard_iface)
372{ 399{
373 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 400 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
401 struct hard_iface *primary_if = NULL;
374 402
375 if (batman_if->if_status == IF_ACTIVE) 403 if (hard_iface->if_status == IF_ACTIVE)
376 hardif_deactivate_interface(batman_if); 404 hardif_deactivate_interface(hard_iface);
377 405
378 if (batman_if->if_status != IF_INACTIVE) 406 if (hard_iface->if_status != IF_INACTIVE)
379 return; 407 goto out;
380 408
381 bat_info(batman_if->soft_iface, "Removing interface: %s\n", 409 bat_info(hard_iface->soft_iface, "Removing interface: %s\n",
382 batman_if->net_dev->name); 410 hard_iface->net_dev->name);
383 dev_remove_pack(&batman_if->batman_adv_ptype); 411 dev_remove_pack(&hard_iface->batman_adv_ptype);
384 kref_put(&batman_if->refcount, hardif_free_ref);
385 412
386 bat_priv->num_ifaces--; 413 bat_priv->num_ifaces--;
387 orig_hash_del_if(batman_if, bat_priv->num_ifaces); 414 orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
388 415
389 if (batman_if == bat_priv->primary_if) { 416 primary_if = primary_if_get_selected(bat_priv);
390 struct batman_if *new_if; 417 if (hard_iface == primary_if) {
418 struct hard_iface *new_if;
391 419
392 new_if = get_active_batman_if(batman_if->soft_iface); 420 new_if = hardif_get_active(hard_iface->soft_iface);
393 set_primary_if(bat_priv, new_if); 421 primary_if_select(bat_priv, new_if);
394 422
395 if (new_if) 423 if (new_if)
396 kref_put(&new_if->refcount, hardif_free_ref); 424 hardif_free_ref(new_if);
397 } 425 }
398 426
399 kfree(batman_if->packet_buff); 427 kfree(hard_iface->packet_buff);
400 batman_if->packet_buff = NULL; 428 hard_iface->packet_buff = NULL;
401 batman_if->if_status = IF_NOT_IN_USE; 429 hard_iface->if_status = IF_NOT_IN_USE;
402 430
403 /* delete all references to this batman_if */ 431 /* delete all references to this hard_iface */
404 purge_orig_ref(bat_priv); 432 purge_orig_ref(bat_priv);
405 purge_outstanding_packets(bat_priv, batman_if); 433 purge_outstanding_packets(bat_priv, hard_iface);
406 dev_put(batman_if->soft_iface); 434 dev_put(hard_iface->soft_iface);
407 435
408 /* nobody uses this interface anymore */ 436 /* nobody uses this interface anymore */
409 if (!bat_priv->num_ifaces) 437 if (!bat_priv->num_ifaces)
410 softif_destroy(batman_if->soft_iface); 438 softif_destroy(hard_iface->soft_iface);
411 439
412 batman_if->soft_iface = NULL; 440 hard_iface->soft_iface = NULL;
441 hardif_free_ref(hard_iface);
442
443out:
444 if (primary_if)
445 hardif_free_ref(primary_if);
413} 446}
414 447
415static struct batman_if *hardif_add_interface(struct net_device *net_dev) 448static struct hard_iface *hardif_add_interface(struct net_device *net_dev)
416{ 449{
417 struct batman_if *batman_if; 450 struct hard_iface *hard_iface;
418 int ret; 451 int ret;
419 452
453 ASSERT_RTNL();
454
420 ret = is_valid_iface(net_dev); 455 ret = is_valid_iface(net_dev);
421 if (ret != 1) 456 if (ret != 1)
422 goto out; 457 goto out;
423 458
424 dev_hold(net_dev); 459 dev_hold(net_dev);
425 460
426 batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC); 461 hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC);
427 if (!batman_if) { 462 if (!hard_iface) {
428 pr_err("Can't add interface (%s): out of memory\n", 463 pr_err("Can't add interface (%s): out of memory\n",
429 net_dev->name); 464 net_dev->name);
430 goto release_dev; 465 goto release_dev;
431 } 466 }
432 467
433 ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev); 468 ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev);
434 if (ret) 469 if (ret)
435 goto free_if; 470 goto free_if;
436 471
437 batman_if->if_num = -1; 472 hard_iface->if_num = -1;
438 batman_if->net_dev = net_dev; 473 hard_iface->net_dev = net_dev;
439 batman_if->soft_iface = NULL; 474 hard_iface->soft_iface = NULL;
440 batman_if->if_status = IF_NOT_IN_USE; 475 hard_iface->if_status = IF_NOT_IN_USE;
441 INIT_LIST_HEAD(&batman_if->list); 476 INIT_LIST_HEAD(&hard_iface->list);
442 kref_init(&batman_if->refcount); 477 /* extra reference for return */
443 478 atomic_set(&hard_iface->refcount, 2);
444 check_known_mac_addr(batman_if->net_dev);
445 479
446 spin_lock(&if_list_lock); 480 check_known_mac_addr(hard_iface->net_dev);
447 list_add_tail_rcu(&batman_if->list, &if_list); 481 list_add_tail_rcu(&hard_iface->list, &hardif_list);
448 spin_unlock(&if_list_lock);
449 482
450 /* extra reference for return */ 483 return hard_iface;
451 kref_get(&batman_if->refcount);
452 return batman_if;
453 484
454free_if: 485free_if:
455 kfree(batman_if); 486 kfree(hard_iface);
456release_dev: 487release_dev:
457 dev_put(net_dev); 488 dev_put(net_dev);
458out: 489out:
459 return NULL; 490 return NULL;
460} 491}
461 492
462static void hardif_remove_interface(struct batman_if *batman_if) 493static void hardif_remove_interface(struct hard_iface *hard_iface)
463{ 494{
495 ASSERT_RTNL();
496
464 /* first deactivate interface */ 497 /* first deactivate interface */
465 if (batman_if->if_status != IF_NOT_IN_USE) 498 if (hard_iface->if_status != IF_NOT_IN_USE)
466 hardif_disable_interface(batman_if); 499 hardif_disable_interface(hard_iface);
467 500
468 if (batman_if->if_status != IF_NOT_IN_USE) 501 if (hard_iface->if_status != IF_NOT_IN_USE)
469 return; 502 return;
470 503
471 batman_if->if_status = IF_TO_BE_REMOVED; 504 hard_iface->if_status = IF_TO_BE_REMOVED;
472 sysfs_del_hardif(&batman_if->hardif_obj); 505 sysfs_del_hardif(&hard_iface->hardif_obj);
473 call_rcu(&batman_if->rcu, hardif_free_rcu); 506 hardif_free_ref(hard_iface);
474} 507}
475 508
476void hardif_remove_interfaces(void) 509void hardif_remove_interfaces(void)
477{ 510{
478 struct batman_if *batman_if, *batman_if_tmp; 511 struct hard_iface *hard_iface, *hard_iface_tmp;
479 struct list_head if_queue;
480
481 INIT_LIST_HEAD(&if_queue);
482
483 spin_lock(&if_list_lock);
484 list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) {
485 list_del_rcu(&batman_if->list);
486 list_add_tail(&batman_if->list, &if_queue);
487 }
488 spin_unlock(&if_list_lock);
489 512
490 rtnl_lock(); 513 rtnl_lock();
491 list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) { 514 list_for_each_entry_safe(hard_iface, hard_iface_tmp,
492 hardif_remove_interface(batman_if); 515 &hardif_list, list) {
516 list_del_rcu(&hard_iface->list);
517 hardif_remove_interface(hard_iface);
493 } 518 }
494 rtnl_unlock(); 519 rtnl_unlock();
495} 520}
@@ -498,66 +523,72 @@ static int hard_if_event(struct notifier_block *this,
498 unsigned long event, void *ptr) 523 unsigned long event, void *ptr)
499{ 524{
500 struct net_device *net_dev = (struct net_device *)ptr; 525 struct net_device *net_dev = (struct net_device *)ptr;
501 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 526 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
527 struct hard_iface *primary_if = NULL;
502 struct bat_priv *bat_priv; 528 struct bat_priv *bat_priv;
503 529
504 if (!batman_if && event == NETDEV_REGISTER) 530 if (!hard_iface && event == NETDEV_REGISTER)
505 batman_if = hardif_add_interface(net_dev); 531 hard_iface = hardif_add_interface(net_dev);
506 532
507 if (!batman_if) 533 if (!hard_iface)
508 goto out; 534 goto out;
509 535
510 switch (event) { 536 switch (event) {
511 case NETDEV_UP: 537 case NETDEV_UP:
512 hardif_activate_interface(batman_if); 538 hardif_activate_interface(hard_iface);
513 break; 539 break;
514 case NETDEV_GOING_DOWN: 540 case NETDEV_GOING_DOWN:
515 case NETDEV_DOWN: 541 case NETDEV_DOWN:
516 hardif_deactivate_interface(batman_if); 542 hardif_deactivate_interface(hard_iface);
517 break; 543 break;
518 case NETDEV_UNREGISTER: 544 case NETDEV_UNREGISTER:
519 spin_lock(&if_list_lock); 545 list_del_rcu(&hard_iface->list);
520 list_del_rcu(&batman_if->list);
521 spin_unlock(&if_list_lock);
522 546
523 hardif_remove_interface(batman_if); 547 hardif_remove_interface(hard_iface);
524 break; 548 break;
525 case NETDEV_CHANGEMTU: 549 case NETDEV_CHANGEMTU:
526 if (batman_if->soft_iface) 550 if (hard_iface->soft_iface)
527 update_min_mtu(batman_if->soft_iface); 551 update_min_mtu(hard_iface->soft_iface);
528 break; 552 break;
529 case NETDEV_CHANGEADDR: 553 case NETDEV_CHANGEADDR:
530 if (batman_if->if_status == IF_NOT_IN_USE) 554 if (hard_iface->if_status == IF_NOT_IN_USE)
531 goto hardif_put; 555 goto hardif_put;
532 556
533 check_known_mac_addr(batman_if->net_dev); 557 check_known_mac_addr(hard_iface->net_dev);
534 update_mac_addresses(batman_if); 558 update_mac_addresses(hard_iface);
559
560 bat_priv = netdev_priv(hard_iface->soft_iface);
561 primary_if = primary_if_get_selected(bat_priv);
562 if (!primary_if)
563 goto hardif_put;
535 564
536 bat_priv = netdev_priv(batman_if->soft_iface); 565 if (hard_iface == primary_if)
537 if (batman_if == bat_priv->primary_if) 566 primary_if_update_addr(bat_priv);
538 update_primary_addr(bat_priv);
539 break; 567 break;
540 default: 568 default:
541 break; 569 break;
542 }; 570 };
543 571
544hardif_put: 572hardif_put:
545 kref_put(&batman_if->refcount, hardif_free_ref); 573 hardif_free_ref(hard_iface);
546out: 574out:
575 if (primary_if)
576 hardif_free_ref(primary_if);
547 return NOTIFY_DONE; 577 return NOTIFY_DONE;
548} 578}
549 579
550/* receive a packet with the batman ethertype coming on a hard 580/* receive a packet with the batman ethertype coming on a hard
551 * interface */ 581 * interface */
552int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 582static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
553 struct packet_type *ptype, struct net_device *orig_dev) 583 struct packet_type *ptype,
584 struct net_device *orig_dev)
554{ 585{
555 struct bat_priv *bat_priv; 586 struct bat_priv *bat_priv;
556 struct batman_packet *batman_packet; 587 struct batman_packet *batman_packet;
557 struct batman_if *batman_if; 588 struct hard_iface *hard_iface;
558 int ret; 589 int ret;
559 590
560 batman_if = container_of(ptype, struct batman_if, batman_adv_ptype); 591 hard_iface = container_of(ptype, struct hard_iface, batman_adv_ptype);
561 skb = skb_share_check(skb, GFP_ATOMIC); 592 skb = skb_share_check(skb, GFP_ATOMIC);
562 593
563 /* skb was released by skb_share_check() */ 594 /* skb was released by skb_share_check() */
@@ -573,16 +604,16 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
573 || !skb_mac_header(skb))) 604 || !skb_mac_header(skb)))
574 goto err_free; 605 goto err_free;
575 606
576 if (!batman_if->soft_iface) 607 if (!hard_iface->soft_iface)
577 goto err_free; 608 goto err_free;
578 609
579 bat_priv = netdev_priv(batman_if->soft_iface); 610 bat_priv = netdev_priv(hard_iface->soft_iface);
580 611
581 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) 612 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
582 goto err_free; 613 goto err_free;
583 614
584 /* discard frames on not active interfaces */ 615 /* discard frames on not active interfaces */
585 if (batman_if->if_status != IF_ACTIVE) 616 if (hard_iface->if_status != IF_ACTIVE)
586 goto err_free; 617 goto err_free;
587 618
588 batman_packet = (struct batman_packet *)skb->data; 619 batman_packet = (struct batman_packet *)skb->data;
@@ -600,32 +631,32 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
600 switch (batman_packet->packet_type) { 631 switch (batman_packet->packet_type) {
601 /* batman originator packet */ 632 /* batman originator packet */
602 case BAT_PACKET: 633 case BAT_PACKET:
603 ret = recv_bat_packet(skb, batman_if); 634 ret = recv_bat_packet(skb, hard_iface);
604 break; 635 break;
605 636
606 /* batman icmp packet */ 637 /* batman icmp packet */
607 case BAT_ICMP: 638 case BAT_ICMP:
608 ret = recv_icmp_packet(skb, batman_if); 639 ret = recv_icmp_packet(skb, hard_iface);
609 break; 640 break;
610 641
611 /* unicast packet */ 642 /* unicast packet */
612 case BAT_UNICAST: 643 case BAT_UNICAST:
613 ret = recv_unicast_packet(skb, batman_if); 644 ret = recv_unicast_packet(skb, hard_iface);
614 break; 645 break;
615 646
616 /* fragmented unicast packet */ 647 /* fragmented unicast packet */
617 case BAT_UNICAST_FRAG: 648 case BAT_UNICAST_FRAG:
618 ret = recv_ucast_frag_packet(skb, batman_if); 649 ret = recv_ucast_frag_packet(skb, hard_iface);
619 break; 650 break;
620 651
621 /* broadcast packet */ 652 /* broadcast packet */
622 case BAT_BCAST: 653 case BAT_BCAST:
623 ret = recv_bcast_packet(skb, batman_if); 654 ret = recv_bcast_packet(skb, hard_iface);
624 break; 655 break;
625 656
626 /* vis packet */ 657 /* vis packet */
627 case BAT_VIS: 658 case BAT_VIS:
628 ret = recv_vis_packet(skb, batman_if); 659 ret = recv_vis_packet(skb, hard_iface);
629 break; 660 break;
630 default: 661 default:
631 ret = NET_RX_DROP; 662 ret = NET_RX_DROP;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 30ec3b8db459..64265991460b 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -31,23 +31,36 @@
31 31
32extern struct notifier_block hard_if_notifier; 32extern struct notifier_block hard_if_notifier;
33 33
34struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); 34struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev);
35int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); 35int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name);
36void hardif_disable_interface(struct batman_if *batman_if); 36void hardif_disable_interface(struct hard_iface *hard_iface);
37void hardif_remove_interfaces(void); 37void hardif_remove_interfaces(void);
38int batman_skb_recv(struct sk_buff *skb,
39 struct net_device *dev,
40 struct packet_type *ptype,
41 struct net_device *orig_dev);
42int hardif_min_mtu(struct net_device *soft_iface); 38int hardif_min_mtu(struct net_device *soft_iface);
43void update_min_mtu(struct net_device *soft_iface); 39void update_min_mtu(struct net_device *soft_iface);
40void hardif_free_rcu(struct rcu_head *rcu);
44 41
45static inline void hardif_free_ref(struct kref *refcount) 42static inline void hardif_free_ref(struct hard_iface *hard_iface)
46{ 43{
47 struct batman_if *batman_if; 44 if (atomic_dec_and_test(&hard_iface->refcount))
45 call_rcu(&hard_iface->rcu, hardif_free_rcu);
46}
47
48static inline struct hard_iface *primary_if_get_selected(
49 struct bat_priv *bat_priv)
50{
51 struct hard_iface *hard_iface;
52
53 rcu_read_lock();
54 hard_iface = rcu_dereference(bat_priv->primary_if);
55 if (!hard_iface)
56 goto out;
57
58 if (!atomic_inc_not_zero(&hard_iface->refcount))
59 hard_iface = NULL;
48 60
49 batman_if = container_of(refcount, struct batman_if, refcount); 61out:
50 kfree(batman_if); 62 rcu_read_unlock();
63 return hard_iface;
51} 64}
52 65
53#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ 66#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 26e623eb9def..c5213d8f2cca 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
@@ -27,13 +27,16 @@ static void hash_init(struct hashtable_t *hash)
27{ 27{
28 int i; 28 int i;
29 29
30 for (i = 0 ; i < hash->size; i++) 30 for (i = 0 ; i < hash->size; i++) {
31 INIT_HLIST_HEAD(&hash->table[i]); 31 INIT_HLIST_HEAD(&hash->table[i]);
32 spin_lock_init(&hash->list_locks[i]);
33 }
32} 34}
33 35
34/* free only the hashtable and the hash itself. */ 36/* free only the hashtable and the hash itself. */
35void hash_destroy(struct hashtable_t *hash) 37void hash_destroy(struct hashtable_t *hash)
36{ 38{
39 kfree(hash->list_locks);
37 kfree(hash->table); 40 kfree(hash->table);
38 kfree(hash); 41 kfree(hash);
39} 42}
@@ -43,20 +46,25 @@ struct hashtable_t *hash_new(int size)
43{ 46{
44 struct hashtable_t *hash; 47 struct hashtable_t *hash;
45 48
46 hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC); 49 hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC);
47
48 if (!hash) 50 if (!hash)
49 return NULL; 51 return NULL;
50 52
51 hash->size = size;
52 hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); 53 hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC);
54 if (!hash->table)
55 goto free_hash;
53 56
54 if (!hash->table) { 57 hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC);
55 kfree(hash); 58 if (!hash->list_locks)
56 return NULL; 59 goto free_table;
57 }
58 60
61 hash->size = size;
59 hash_init(hash); 62 hash_init(hash);
60
61 return hash; 63 return hash;
64
65free_table:
66 kfree(hash->table);
67free_hash:
68 kfree(hash);
69 return NULL;
62} 70}
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 09216ade16f1..434822b27473 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
@@ -28,32 +28,23 @@
28 * compare 2 element datas for their keys, 28 * compare 2 element datas for their keys,
29 * return 0 if same and not 0 if not 29 * return 0 if same and not 0 if not
30 * same */ 30 * same */
31typedef int (*hashdata_compare_cb)(void *, void *); 31typedef int (*hashdata_compare_cb)(struct hlist_node *, void *);
32 32
33/* the hashfunction, should return an index 33/* the hashfunction, should return an index
34 * based on the key in the data of the first 34 * based on the key in the data of the first
35 * argument and the size the second */ 35 * argument and the size the second */
36typedef int (*hashdata_choose_cb)(void *, int); 36typedef int (*hashdata_choose_cb)(void *, int);
37typedef void (*hashdata_free_cb)(void *, void *); 37typedef void (*hashdata_free_cb)(struct hlist_node *, void *);
38
39struct element_t {
40 void *data; /* pointer to the data */
41 struct hlist_node hlist; /* bucket list pointer */
42};
43 38
44struct hashtable_t { 39struct hashtable_t {
45 struct hlist_head *table; /* the hashtable itself, with the buckets */ 40 struct hlist_head *table; /* the hashtable itself with the buckets */
41 spinlock_t *list_locks; /* spinlock for each hash list entry */
46 int size; /* size of hashtable */ 42 int size; /* size of hashtable */
47}; 43};
48 44
49/* allocates and clears the hash */ 45/* allocates and clears the hash */
50struct hashtable_t *hash_new(int size); 46struct hashtable_t *hash_new(int size);
51 47
52/* remove element if you already found the element you want to delete and don't
53 * need the overhead to find it again with hash_remove(). But usually, you
54 * don't want to use this function, as it fiddles with hash-internals. */
55void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem);
56
57/* free only the hashtable and the hash itself. */ 48/* free only the hashtable and the hash itself. */
58void hash_destroy(struct hashtable_t *hash); 49void hash_destroy(struct hashtable_t *hash);
59 50
@@ -64,21 +55,22 @@ static inline void hash_delete(struct hashtable_t *hash,
64 hashdata_free_cb free_cb, void *arg) 55 hashdata_free_cb free_cb, void *arg)
65{ 56{
66 struct hlist_head *head; 57 struct hlist_head *head;
67 struct hlist_node *walk, *safe; 58 struct hlist_node *node, *node_tmp;
68 struct element_t *bucket; 59 spinlock_t *list_lock; /* spinlock to protect write access */
69 int i; 60 int i;
70 61
71 for (i = 0; i < hash->size; i++) { 62 for (i = 0; i < hash->size; i++) {
72 head = &hash->table[i]; 63 head = &hash->table[i];
64 list_lock = &hash->list_locks[i];
73 65
74 hlist_for_each_safe(walk, safe, head) { 66 spin_lock_bh(list_lock);
75 bucket = hlist_entry(walk, struct element_t, hlist); 67 hlist_for_each_safe(node, node_tmp, head) {
76 if (free_cb) 68 hlist_del_rcu(node);
77 free_cb(bucket->data, arg);
78 69
79 hlist_del(walk); 70 if (free_cb)
80 kfree(bucket); 71 free_cb(node, arg);
81 } 72 }
73 spin_unlock_bh(list_lock);
82 } 74 }
83 75
84 hash_destroy(hash); 76 hash_destroy(hash);
@@ -87,35 +79,41 @@ static inline void hash_delete(struct hashtable_t *hash,
87/* adds data to the hashtable. returns 0 on success, -1 on error */ 79/* adds data to the hashtable. returns 0 on success, -1 on error */
88static inline int hash_add(struct hashtable_t *hash, 80static inline int hash_add(struct hashtable_t *hash,
89 hashdata_compare_cb compare, 81 hashdata_compare_cb compare,
90 hashdata_choose_cb choose, void *data) 82 hashdata_choose_cb choose,
83 void *data, struct hlist_node *data_node)
91{ 84{
92 int index; 85 int index;
93 struct hlist_head *head; 86 struct hlist_head *head;
94 struct hlist_node *walk, *safe; 87 struct hlist_node *node;
95 struct element_t *bucket; 88 spinlock_t *list_lock; /* spinlock to protect write access */
96 89
97 if (!hash) 90 if (!hash)
98 return -1; 91 goto err;
99 92
100 index = choose(data, hash->size); 93 index = choose(data, hash->size);
101 head = &hash->table[index]; 94 head = &hash->table[index];
95 list_lock = &hash->list_locks[index];
96
97 rcu_read_lock();
98 __hlist_for_each_rcu(node, head) {
99 if (!compare(node, data))
100 continue;
102 101
103 hlist_for_each_safe(walk, safe, head) { 102 goto err_unlock;
104 bucket = hlist_entry(walk, struct element_t, hlist);
105 if (compare(bucket->data, data))
106 return -1;
107 } 103 }
104 rcu_read_unlock();
108 105
109 /* no duplicate found in list, add new element */ 106 /* no duplicate found in list, add new element */
110 bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC); 107 spin_lock_bh(list_lock);
111 108 hlist_add_head_rcu(data_node, head);
112 if (!bucket) 109 spin_unlock_bh(list_lock);
113 return -1;
114
115 bucket->data = data;
116 hlist_add_head(&bucket->hlist, head);
117 110
118 return 0; 111 return 0;
112
113err_unlock:
114 rcu_read_unlock();
115err:
116 return -1;
119} 117}
120 118
121/* removes data from hash, if found. returns pointer do data on success, so you 119/* removes data from hash, if found. returns pointer do data on success, so you
@@ -127,50 +125,25 @@ static inline void *hash_remove(struct hashtable_t *hash,
127 hashdata_choose_cb choose, void *data) 125 hashdata_choose_cb choose, void *data)
128{ 126{
129 size_t index; 127 size_t index;
130 struct hlist_node *walk; 128 struct hlist_node *node;
131 struct element_t *bucket;
132 struct hlist_head *head; 129 struct hlist_head *head;
133 void *data_save; 130 void *data_save = NULL;
134 131
135 index = choose(data, hash->size); 132 index = choose(data, hash->size);
136 head = &hash->table[index]; 133 head = &hash->table[index];
137 134
138 hlist_for_each_entry(bucket, walk, head, hlist) { 135 spin_lock_bh(&hash->list_locks[index]);
139 if (compare(bucket->data, data)) { 136 hlist_for_each(node, head) {
140 data_save = bucket->data; 137 if (!compare(node, data))
141 hlist_del(walk); 138 continue;
142 kfree(bucket);
143 return data_save;
144 }
145 }
146
147 return NULL;
148}
149
150/* finds data, based on the key in keydata. returns the found data on success,
151 * or NULL on error */
152static inline void *hash_find(struct hashtable_t *hash,
153 hashdata_compare_cb compare,
154 hashdata_choose_cb choose, void *keydata)
155{
156 int index;
157 struct hlist_head *head;
158 struct hlist_node *walk;
159 struct element_t *bucket;
160
161 if (!hash)
162 return NULL;
163
164 index = choose(keydata , hash->size);
165 head = &hash->table[index];
166 139
167 hlist_for_each(walk, head) { 140 data_save = node;
168 bucket = hlist_entry(walk, struct element_t, hlist); 141 hlist_del_rcu(node);
169 if (compare(bucket->data, keydata)) 142 break;
170 return bucket->data;
171 } 143 }
144 spin_unlock_bh(&hash->list_locks[index]);
172 145
173 return NULL; 146 return data_save;
174} 147}
175 148
176#endif /* _NET_BATMAN_ADV_HASH_H_ */ 149#endif /* _NET_BATMAN_ADV_HASH_H_ */
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ecf6d7ffab2e..fa22ba2bb832 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -24,7 +24,6 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include "icmp_socket.h" 25#include "icmp_socket.h"
26#include "send.h" 26#include "send.h"
27#include "types.h"
28#include "hash.h" 27#include "hash.h"
29#include "originator.h" 28#include "originator.h"
30#include "hard-interface.h" 29#include "hard-interface.h"
@@ -154,13 +153,13 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
154{ 153{
155 struct socket_client *socket_client = file->private_data; 154 struct socket_client *socket_client = file->private_data;
156 struct bat_priv *bat_priv = socket_client->bat_priv; 155 struct bat_priv *bat_priv = socket_client->bat_priv;
156 struct hard_iface *primary_if = NULL;
157 struct sk_buff *skb; 157 struct sk_buff *skb;
158 struct icmp_packet_rr *icmp_packet; 158 struct icmp_packet_rr *icmp_packet;
159 159
160 struct orig_node *orig_node; 160 struct orig_node *orig_node = NULL;
161 struct batman_if *batman_if; 161 struct neigh_node *neigh_node = NULL;
162 size_t packet_len = sizeof(struct icmp_packet); 162 size_t packet_len = sizeof(struct icmp_packet);
163 uint8_t dstaddr[ETH_ALEN];
164 163
165 if (len < sizeof(struct icmp_packet)) { 164 if (len < sizeof(struct icmp_packet)) {
166 bat_dbg(DBG_BATMAN, bat_priv, 165 bat_dbg(DBG_BATMAN, bat_priv,
@@ -169,15 +168,21 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
169 return -EINVAL; 168 return -EINVAL;
170 } 169 }
171 170
172 if (!bat_priv->primary_if) 171 primary_if = primary_if_get_selected(bat_priv);
173 return -EFAULT; 172
173 if (!primary_if) {
174 len = -EFAULT;
175 goto out;
176 }
174 177
175 if (len >= sizeof(struct icmp_packet_rr)) 178 if (len >= sizeof(struct icmp_packet_rr))
176 packet_len = sizeof(struct icmp_packet_rr); 179 packet_len = sizeof(struct icmp_packet_rr);
177 180
178 skb = dev_alloc_skb(packet_len + sizeof(struct ethhdr)); 181 skb = dev_alloc_skb(packet_len + sizeof(struct ethhdr));
179 if (!skb) 182 if (!skb) {
180 return -ENOMEM; 183 len = -ENOMEM;
184 goto out;
185 }
181 186
182 skb_reserve(skb, sizeof(struct ethhdr)); 187 skb_reserve(skb, sizeof(struct ethhdr));
183 icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len); 188 icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len);
@@ -220,47 +225,42 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
220 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) 225 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
221 goto dst_unreach; 226 goto dst_unreach;
222 227
223 spin_lock_bh(&bat_priv->orig_hash_lock); 228 orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
224 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
225 compare_orig, choose_orig,
226 icmp_packet->dst));
227
228 if (!orig_node) 229 if (!orig_node)
229 goto unlock; 230 goto dst_unreach;
230
231 if (!orig_node->router)
232 goto unlock;
233
234 batman_if = orig_node->router->if_incoming;
235 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
236 231
237 spin_unlock_bh(&bat_priv->orig_hash_lock); 232 neigh_node = orig_node_get_router(orig_node);
233 if (!neigh_node)
234 goto dst_unreach;
238 235
239 if (!batman_if) 236 if (!neigh_node->if_incoming)
240 goto dst_unreach; 237 goto dst_unreach;
241 238
242 if (batman_if->if_status != IF_ACTIVE) 239 if (neigh_node->if_incoming->if_status != IF_ACTIVE)
243 goto dst_unreach; 240 goto dst_unreach;
244 241
245 memcpy(icmp_packet->orig, 242 memcpy(icmp_packet->orig,
246 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 243 primary_if->net_dev->dev_addr, ETH_ALEN);
247 244
248 if (packet_len == sizeof(struct icmp_packet_rr)) 245 if (packet_len == sizeof(struct icmp_packet_rr))
249 memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN); 246 memcpy(icmp_packet->rr,
250 247 neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN);
251
252 send_skb_packet(skb, batman_if, dstaddr);
253 248
249 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
254 goto out; 250 goto out;
255 251
256unlock:
257 spin_unlock_bh(&bat_priv->orig_hash_lock);
258dst_unreach: 252dst_unreach:
259 icmp_packet->msg_type = DESTINATION_UNREACHABLE; 253 icmp_packet->msg_type = DESTINATION_UNREACHABLE;
260 bat_socket_add_packet(socket_client, icmp_packet, packet_len); 254 bat_socket_add_packet(socket_client, icmp_packet, packet_len);
261free_skb: 255free_skb:
262 kfree_skb(skb); 256 kfree_skb(skb);
263out: 257out:
258 if (primary_if)
259 hardif_free_ref(primary_if);
260 if (neigh_node)
261 neigh_node_free_ref(neigh_node);
262 if (orig_node)
263 orig_node_free_ref(orig_node);
264 return len; 264 return len;
265} 265}
266 266
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index bf9b348cde27..462b190fa101 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -22,8 +22,6 @@
22#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ 22#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
23#define _NET_BATMAN_ADV_ICMP_SOCKET_H_ 23#define _NET_BATMAN_ADV_ICMP_SOCKET_H_
24 24
25#include "types.h"
26
27#define ICMP_SOCKET "socket" 25#define ICMP_SOCKET "socket"
28 26
29void bat_socket_init(void); 27void bat_socket_init(void);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b827f6a158cb..0a7cee0076f4 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -30,11 +30,13 @@
30#include "translation-table.h" 30#include "translation-table.h"
31#include "hard-interface.h" 31#include "hard-interface.h"
32#include "gateway_client.h" 32#include "gateway_client.h"
33#include "types.h"
34#include "vis.h" 33#include "vis.h"
35#include "hash.h" 34#include "hash.h"
36 35
37struct list_head if_list; 36
37/* List manipulations on hardif_list have to be rtnl_lock()'ed,
38 * list traversals just rcu-locked */
39struct list_head hardif_list;
38 40
39unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 41unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
40 42
@@ -42,7 +44,7 @@ struct workqueue_struct *bat_event_workqueue;
42 44
43static int __init batman_init(void) 45static int __init batman_init(void)
44{ 46{
45 INIT_LIST_HEAD(&if_list); 47 INIT_LIST_HEAD(&hardif_list);
46 48
47 /* the name should not be longer than 10 chars - see 49 /* the name should not be longer than 10 chars - see
48 * http://lwn.net/Articles/23634/ */ 50 * http://lwn.net/Articles/23634/ */
@@ -80,31 +82,31 @@ int mesh_init(struct net_device *soft_iface)
80{ 82{
81 struct bat_priv *bat_priv = netdev_priv(soft_iface); 83 struct bat_priv *bat_priv = netdev_priv(soft_iface);
82 84
83 spin_lock_init(&bat_priv->orig_hash_lock);
84 spin_lock_init(&bat_priv->forw_bat_list_lock); 85 spin_lock_init(&bat_priv->forw_bat_list_lock);
85 spin_lock_init(&bat_priv->forw_bcast_list_lock); 86 spin_lock_init(&bat_priv->forw_bcast_list_lock);
86 spin_lock_init(&bat_priv->hna_lhash_lock); 87 spin_lock_init(&bat_priv->tt_lhash_lock);
87 spin_lock_init(&bat_priv->hna_ghash_lock); 88 spin_lock_init(&bat_priv->tt_ghash_lock);
88 spin_lock_init(&bat_priv->gw_list_lock); 89 spin_lock_init(&bat_priv->gw_list_lock);
89 spin_lock_init(&bat_priv->vis_hash_lock); 90 spin_lock_init(&bat_priv->vis_hash_lock);
90 spin_lock_init(&bat_priv->vis_list_lock); 91 spin_lock_init(&bat_priv->vis_list_lock);
91 spin_lock_init(&bat_priv->softif_neigh_lock); 92 spin_lock_init(&bat_priv->softif_neigh_lock);
93 spin_lock_init(&bat_priv->softif_neigh_vid_lock);
92 94
93 INIT_HLIST_HEAD(&bat_priv->forw_bat_list); 95 INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
94 INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); 96 INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
95 INIT_HLIST_HEAD(&bat_priv->gw_list); 97 INIT_HLIST_HEAD(&bat_priv->gw_list);
96 INIT_HLIST_HEAD(&bat_priv->softif_neigh_list); 98 INIT_HLIST_HEAD(&bat_priv->softif_neigh_vids);
97 99
98 if (originator_init(bat_priv) < 1) 100 if (originator_init(bat_priv) < 1)
99 goto err; 101 goto err;
100 102
101 if (hna_local_init(bat_priv) < 1) 103 if (tt_local_init(bat_priv) < 1)
102 goto err; 104 goto err;
103 105
104 if (hna_global_init(bat_priv) < 1) 106 if (tt_global_init(bat_priv) < 1)
105 goto err; 107 goto err;
106 108
107 hna_local_add(soft_iface, soft_iface->dev_addr); 109 tt_local_add(soft_iface, soft_iface->dev_addr);
108 110
109 if (vis_init(bat_priv) < 1) 111 if (vis_init(bat_priv) < 1)
110 goto err; 112 goto err;
@@ -135,8 +137,8 @@ void mesh_free(struct net_device *soft_iface)
135 gw_node_purge(bat_priv); 137 gw_node_purge(bat_priv);
136 originator_free(bat_priv); 138 originator_free(bat_priv);
137 139
138 hna_local_free(bat_priv); 140 tt_local_free(bat_priv);
139 hna_global_free(bat_priv); 141 tt_global_free(bat_priv);
140 142
141 softif_neigh_purge(bat_priv); 143 softif_neigh_purge(bat_priv);
142 144
@@ -155,14 +157,14 @@ void dec_module_count(void)
155 157
156int is_my_mac(uint8_t *addr) 158int is_my_mac(uint8_t *addr)
157{ 159{
158 struct batman_if *batman_if; 160 struct hard_iface *hard_iface;
159 161
160 rcu_read_lock(); 162 rcu_read_lock();
161 list_for_each_entry_rcu(batman_if, &if_list, list) { 163 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
162 if (batman_if->if_status != IF_ACTIVE) 164 if (hard_iface->if_status != IF_ACTIVE)
163 continue; 165 continue;
164 166
165 if (compare_orig(batman_if->net_dev->dev_addr, addr)) { 167 if (compare_eth(hard_iface->net_dev->dev_addr, addr)) {
166 rcu_read_unlock(); 168 rcu_read_unlock();
167 return 1; 169 return 1;
168 } 170 }
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 65106fb61b8f..148b49e02642 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,9 +22,6 @@
22#ifndef _NET_BATMAN_ADV_MAIN_H_ 22#ifndef _NET_BATMAN_ADV_MAIN_H_
23#define _NET_BATMAN_ADV_MAIN_H_ 23#define _NET_BATMAN_ADV_MAIN_H_
24 24
25/* Kernel Programming */
26#define LINUX
27
28#define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \ 25#define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \
29 "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>" 26 "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>"
30#define DRIVER_DESC "B.A.T.M.A.N. advanced" 27#define DRIVER_DESC "B.A.T.M.A.N. advanced"
@@ -37,16 +34,18 @@
37 34
38#define TQ_MAX_VALUE 255 35#define TQ_MAX_VALUE 255
39#define JITTER 20 36#define JITTER 20
40#define TTL 50 /* Time To Live of broadcast messages */
41 37
42#define PURGE_TIMEOUT 200 /* purge originators after time in seconds if no 38 /* Time To Live of broadcast messages */
43 * valid packet comes in -> TODO: check 39#define TTL 50
44 * influence on TQ_LOCAL_WINDOW_SIZE */ 40
45#define LOCAL_HNA_TIMEOUT 3600 /* in seconds */ 41/* purge originators after time in seconds if no valid packet comes in
42 * -> TODO: check influence on TQ_LOCAL_WINDOW_SIZE */
43#define PURGE_TIMEOUT 200
44#define TT_LOCAL_TIMEOUT 3600 /* in seconds */
46 45
47#define TQ_LOCAL_WINDOW_SIZE 64 /* sliding packet range of received originator 46/* sliding packet range of received originator messages in squence numbers
48 * messages in squence numbers (should be a 47 * (should be a multiple of our word size) */
49 * multiple of our word size) */ 48#define TQ_LOCAL_WINDOW_SIZE 64
50#define TQ_GLOBAL_WINDOW_SIZE 5 49#define TQ_GLOBAL_WINDOW_SIZE 5
51#define TQ_LOCAL_BIDRECT_SEND_MINIMUM 1 50#define TQ_LOCAL_BIDRECT_SEND_MINIMUM 1
52#define TQ_LOCAL_BIDRECT_RECV_MINIMUM 1 51#define TQ_LOCAL_BIDRECT_RECV_MINIMUM 1
@@ -54,26 +53,24 @@
54 53
55#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) 54#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE)
56 55
57#define PACKBUFF_SIZE 2000
58#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ 56#define LOG_BUF_LEN 8192 /* has to be a power of 2 */
59 57
60#define VIS_INTERVAL 5000 /* 5 seconds */ 58#define VIS_INTERVAL 5000 /* 5 seconds */
61 59
62/* how much worse secondary interfaces may be to 60/* how much worse secondary interfaces may be to be considered as bonding
63 * to be considered as bonding candidates */ 61 * candidates */
64
65#define BONDING_TQ_THRESHOLD 50 62#define BONDING_TQ_THRESHOLD 50
66 63
67#define MAX_AGGREGATION_BYTES 512 /* should not be bigger than 512 bytes or 64/* should not be bigger than 512 bytes or change the size of
68 * change the size of 65 * forw_packet->direct_link_flags */
69 * forw_packet->direct_link_flags */ 66#define MAX_AGGREGATION_BYTES 512
70#define MAX_AGGREGATION_MS 100 67#define MAX_AGGREGATION_MS 100
71 68
72#define SOFTIF_NEIGH_TIMEOUT 180000 /* 3 minutes */ 69#define SOFTIF_NEIGH_TIMEOUT 180000 /* 3 minutes */
73 70
71/* don't reset again within 30 seconds */
74#define RESET_PROTECTION_MS 30000 72#define RESET_PROTECTION_MS 30000
75#define EXPECTED_SEQNO_RANGE 65536 73#define EXPECTED_SEQNO_RANGE 65536
76/* don't reset again within 30 seconds */
77 74
78#define MESH_INACTIVE 0 75#define MESH_INACTIVE 0
79#define MESH_ACTIVE 1 76#define MESH_ACTIVE 1
@@ -88,23 +85,20 @@
88#ifdef pr_fmt 85#ifdef pr_fmt
89#undef pr_fmt 86#undef pr_fmt
90#endif 87#endif
91#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* Append 'batman-adv: ' before 88/* Append 'batman-adv: ' before kernel messages */
92 * kernel messages */ 89#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93 90
94#define DBG_BATMAN 1 /* all messages related to routing / flooding / 91/* all messages related to routing / flooding / broadcasting / etc */
95 * broadcasting / etc */ 92#define DBG_BATMAN 1
96#define DBG_ROUTES 2 /* route or hna added / changed / deleted */ 93/* route or tt entry added / changed / deleted */
94#define DBG_ROUTES 2
97#define DBG_ALL 3 95#define DBG_ALL 3
98 96
99#define LOG_BUF_LEN 8192 /* has to be a power of 2 */
100
101 97
102/* 98/*
103 * Vis 99 * Vis
104 */ 100 */
105 101
106/* #define VIS_SUBCLUSTERS_DISABLED */
107
108/* 102/*
109 * Kernel headers 103 * Kernel headers
110 */ 104 */
@@ -130,7 +124,7 @@
130#define REVISION_VERSION_STR " "REVISION_VERSION 124#define REVISION_VERSION_STR " "REVISION_VERSION
131#endif 125#endif
132 126
133extern struct list_head if_list; 127extern struct list_head hardif_list;
134 128
135extern unsigned char broadcast_addr[]; 129extern unsigned char broadcast_addr[];
136extern struct workqueue_struct *bat_event_workqueue; 130extern struct workqueue_struct *bat_event_workqueue;
@@ -158,13 +152,6 @@ static inline void bat_dbg(char type __always_unused,
158} 152}
159#endif 153#endif
160 154
161#define bat_warning(net_dev, fmt, arg...) \
162 do { \
163 struct net_device *_netdev = (net_dev); \
164 struct bat_priv *_batpriv = netdev_priv(_netdev); \
165 bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \
166 pr_warning("%s: " fmt, _netdev->name, ## arg); \
167 } while (0)
168#define bat_info(net_dev, fmt, arg...) \ 155#define bat_info(net_dev, fmt, arg...) \
169 do { \ 156 do { \
170 struct net_device *_netdev = (net_dev); \ 157 struct net_device *_netdev = (net_dev); \
@@ -180,4 +167,16 @@ static inline void bat_dbg(char type __always_unused,
180 pr_err("%s: " fmt, _netdev->name, ## arg); \ 167 pr_err("%s: " fmt, _netdev->name, ## arg); \
181 } while (0) 168 } while (0)
182 169
170/**
171 * returns 1 if they are the same ethernet addr
172 *
173 * note: can't use compare_ether_addr() as it requires aligned memory
174 */
175static inline int compare_eth(void *data1, void *data2)
176{
177 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
178}
179
180#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
181
183#endif /* _NET_BATMAN_ADV_MAIN_H_ */ 182#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6b7fb6b7e6f9..40a30bbcd147 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -19,8 +19,6 @@
19 * 19 *
20 */ 20 */
21 21
22/* increase the reference counter for this originator */
23
24#include "main.h" 22#include "main.h"
25#include "originator.h" 23#include "originator.h"
26#include "hash.h" 24#include "hash.h"
@@ -44,24 +42,43 @@ int originator_init(struct bat_priv *bat_priv)
44 if (bat_priv->orig_hash) 42 if (bat_priv->orig_hash)
45 return 1; 43 return 1;
46 44
47 spin_lock_bh(&bat_priv->orig_hash_lock);
48 bat_priv->orig_hash = hash_new(1024); 45 bat_priv->orig_hash = hash_new(1024);
49 46
50 if (!bat_priv->orig_hash) 47 if (!bat_priv->orig_hash)
51 goto err; 48 goto err;
52 49
53 spin_unlock_bh(&bat_priv->orig_hash_lock);
54 start_purge_timer(bat_priv); 50 start_purge_timer(bat_priv);
55 return 1; 51 return 1;
56 52
57err: 53err:
58 spin_unlock_bh(&bat_priv->orig_hash_lock);
59 return 0; 54 return 0;
60} 55}
61 56
62struct neigh_node * 57void neigh_node_free_ref(struct neigh_node *neigh_node)
63create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, 58{
64 uint8_t *neigh, struct batman_if *if_incoming) 59 if (atomic_dec_and_test(&neigh_node->refcount))
60 kfree_rcu(neigh_node, rcu);
61}
62
63/* increases the refcounter of a found router */
64struct neigh_node *orig_node_get_router(struct orig_node *orig_node)
65{
66 struct neigh_node *router;
67
68 rcu_read_lock();
69 router = rcu_dereference(orig_node->router);
70
71 if (router && !atomic_inc_not_zero(&router->refcount))
72 router = NULL;
73
74 rcu_read_unlock();
75 return router;
76}
77
78struct neigh_node *create_neighbor(struct orig_node *orig_node,
79 struct orig_node *orig_neigh_node,
80 uint8_t *neigh,
81 struct hard_iface *if_incoming)
65{ 82{
66 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 83 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
67 struct neigh_node *neigh_node; 84 struct neigh_node *neigh_node;
@@ -73,50 +90,95 @@ create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
73 if (!neigh_node) 90 if (!neigh_node)
74 return NULL; 91 return NULL;
75 92
76 INIT_LIST_HEAD(&neigh_node->list); 93 INIT_HLIST_NODE(&neigh_node->list);
94 INIT_LIST_HEAD(&neigh_node->bonding_list);
95 spin_lock_init(&neigh_node->tq_lock);
77 96
78 memcpy(neigh_node->addr, neigh, ETH_ALEN); 97 memcpy(neigh_node->addr, neigh, ETH_ALEN);
79 neigh_node->orig_node = orig_neigh_node; 98 neigh_node->orig_node = orig_neigh_node;
80 neigh_node->if_incoming = if_incoming; 99 neigh_node->if_incoming = if_incoming;
81 100
82 list_add_tail(&neigh_node->list, &orig_node->neigh_list); 101 /* extra reference for return */
102 atomic_set(&neigh_node->refcount, 2);
103
104 spin_lock_bh(&orig_node->neigh_list_lock);
105 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
106 spin_unlock_bh(&orig_node->neigh_list_lock);
83 return neigh_node; 107 return neigh_node;
84} 108}
85 109
86static void free_orig_node(void *data, void *arg) 110static void orig_node_free_rcu(struct rcu_head *rcu)
87{ 111{
88 struct list_head *list_pos, *list_pos_tmp; 112 struct hlist_node *node, *node_tmp;
89 struct neigh_node *neigh_node; 113 struct neigh_node *neigh_node, *tmp_neigh_node;
90 struct orig_node *orig_node = (struct orig_node *)data; 114 struct orig_node *orig_node;
91 struct bat_priv *bat_priv = (struct bat_priv *)arg;
92 115
93 /* for all neighbors towards this originator ... */ 116 orig_node = container_of(rcu, struct orig_node, rcu);
94 list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { 117
95 neigh_node = list_entry(list_pos, struct neigh_node, list); 118 spin_lock_bh(&orig_node->neigh_list_lock);
96 119
97 list_del(list_pos); 120 /* for all bonding members ... */
98 kfree(neigh_node); 121 list_for_each_entry_safe(neigh_node, tmp_neigh_node,
122 &orig_node->bond_list, bonding_list) {
123 list_del_rcu(&neigh_node->bonding_list);
124 neigh_node_free_ref(neigh_node);
99 } 125 }
100 126
127 /* for all neighbors towards this originator ... */
128 hlist_for_each_entry_safe(neigh_node, node, node_tmp,
129 &orig_node->neigh_list, list) {
130 hlist_del_rcu(&neigh_node->list);
131 neigh_node_free_ref(neigh_node);
132 }
133
134 spin_unlock_bh(&orig_node->neigh_list_lock);
135
101 frag_list_free(&orig_node->frag_list); 136 frag_list_free(&orig_node->frag_list);
102 hna_global_del_orig(bat_priv, orig_node, "originator timed out"); 137 tt_global_del_orig(orig_node->bat_priv, orig_node,
138 "originator timed out");
103 139
104 kfree(orig_node->bcast_own); 140 kfree(orig_node->bcast_own);
105 kfree(orig_node->bcast_own_sum); 141 kfree(orig_node->bcast_own_sum);
106 kfree(orig_node); 142 kfree(orig_node);
107} 143}
108 144
145void orig_node_free_ref(struct orig_node *orig_node)
146{
147 if (atomic_dec_and_test(&orig_node->refcount))
148 call_rcu(&orig_node->rcu, orig_node_free_rcu);
149}
150
109void originator_free(struct bat_priv *bat_priv) 151void originator_free(struct bat_priv *bat_priv)
110{ 152{
111 if (!bat_priv->orig_hash) 153 struct hashtable_t *hash = bat_priv->orig_hash;
154 struct hlist_node *node, *node_tmp;
155 struct hlist_head *head;
156 spinlock_t *list_lock; /* spinlock to protect write access */
157 struct orig_node *orig_node;
158 int i;
159
160 if (!hash)
112 return; 161 return;
113 162
114 cancel_delayed_work_sync(&bat_priv->orig_work); 163 cancel_delayed_work_sync(&bat_priv->orig_work);
115 164
116 spin_lock_bh(&bat_priv->orig_hash_lock);
117 hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv);
118 bat_priv->orig_hash = NULL; 165 bat_priv->orig_hash = NULL;
119 spin_unlock_bh(&bat_priv->orig_hash_lock); 166
167 for (i = 0; i < hash->size; i++) {
168 head = &hash->table[i];
169 list_lock = &hash->list_locks[i];
170
171 spin_lock_bh(list_lock);
172 hlist_for_each_entry_safe(orig_node, node, node_tmp,
173 head, hash_entry) {
174
175 hlist_del_rcu(node);
176 orig_node_free_ref(orig_node);
177 }
178 spin_unlock_bh(list_lock);
179 }
180
181 hash_destroy(hash);
120} 182}
121 183
122/* this function finds or creates an originator entry for the given 184/* this function finds or creates an originator entry for the given
@@ -127,10 +189,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
127 int size; 189 int size;
128 int hash_added; 190 int hash_added;
129 191
130 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 192 orig_node = orig_hash_find(bat_priv, addr);
131 compare_orig, choose_orig,
132 addr));
133
134 if (orig_node) 193 if (orig_node)
135 return orig_node; 194 return orig_node;
136 195
@@ -141,16 +200,26 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
141 if (!orig_node) 200 if (!orig_node)
142 return NULL; 201 return NULL;
143 202
144 INIT_LIST_HEAD(&orig_node->neigh_list); 203 INIT_HLIST_HEAD(&orig_node->neigh_list);
204 INIT_LIST_HEAD(&orig_node->bond_list);
205 spin_lock_init(&orig_node->ogm_cnt_lock);
206 spin_lock_init(&orig_node->bcast_seqno_lock);
207 spin_lock_init(&orig_node->neigh_list_lock);
208
209 /* extra reference for return */
210 atomic_set(&orig_node->refcount, 2);
145 211
212 orig_node->bat_priv = bat_priv;
146 memcpy(orig_node->orig, addr, ETH_ALEN); 213 memcpy(orig_node->orig, addr, ETH_ALEN);
147 orig_node->router = NULL; 214 orig_node->router = NULL;
148 orig_node->hna_buff = NULL; 215 orig_node->tt_buff = NULL;
149 orig_node->bcast_seqno_reset = jiffies - 1 216 orig_node->bcast_seqno_reset = jiffies - 1
150 - msecs_to_jiffies(RESET_PROTECTION_MS); 217 - msecs_to_jiffies(RESET_PROTECTION_MS);
151 orig_node->batman_seqno_reset = jiffies - 1 218 orig_node->batman_seqno_reset = jiffies - 1
152 - msecs_to_jiffies(RESET_PROTECTION_MS); 219 - msecs_to_jiffies(RESET_PROTECTION_MS);
153 220
221 atomic_set(&orig_node->bond_candidates, 0);
222
154 size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS; 223 size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS;
155 224
156 orig_node->bcast_own = kzalloc(size, GFP_ATOMIC); 225 orig_node->bcast_own = kzalloc(size, GFP_ATOMIC);
@@ -166,8 +235,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
166 if (!orig_node->bcast_own_sum) 235 if (!orig_node->bcast_own_sum)
167 goto free_bcast_own; 236 goto free_bcast_own;
168 237
169 hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, 238 hash_added = hash_add(bat_priv->orig_hash, compare_orig,
170 orig_node); 239 choose_orig, orig_node, &orig_node->hash_entry);
171 if (hash_added < 0) 240 if (hash_added < 0)
172 goto free_bcast_own_sum; 241 goto free_bcast_own_sum;
173 242
@@ -185,23 +254,30 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
185 struct orig_node *orig_node, 254 struct orig_node *orig_node,
186 struct neigh_node **best_neigh_node) 255 struct neigh_node **best_neigh_node)
187{ 256{
188 struct list_head *list_pos, *list_pos_tmp; 257 struct hlist_node *node, *node_tmp;
189 struct neigh_node *neigh_node; 258 struct neigh_node *neigh_node;
190 bool neigh_purged = false; 259 bool neigh_purged = false;
191 260
192 *best_neigh_node = NULL; 261 *best_neigh_node = NULL;
193 262
263 spin_lock_bh(&orig_node->neigh_list_lock);
264
194 /* for all neighbors towards this originator ... */ 265 /* for all neighbors towards this originator ... */
195 list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { 266 hlist_for_each_entry_safe(neigh_node, node, node_tmp,
196 neigh_node = list_entry(list_pos, struct neigh_node, list); 267 &orig_node->neigh_list, list) {
197 268
198 if ((time_after(jiffies, 269 if ((time_after(jiffies,
199 neigh_node->last_valid + PURGE_TIMEOUT * HZ)) || 270 neigh_node->last_valid + PURGE_TIMEOUT * HZ)) ||
200 (neigh_node->if_incoming->if_status == IF_INACTIVE) || 271 (neigh_node->if_incoming->if_status == IF_INACTIVE) ||
272 (neigh_node->if_incoming->if_status == IF_NOT_IN_USE) ||
201 (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) { 273 (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) {
202 274
203 if (neigh_node->if_incoming->if_status == 275 if ((neigh_node->if_incoming->if_status ==
204 IF_TO_BE_REMOVED) 276 IF_INACTIVE) ||
277 (neigh_node->if_incoming->if_status ==
278 IF_NOT_IN_USE) ||
279 (neigh_node->if_incoming->if_status ==
280 IF_TO_BE_REMOVED))
205 bat_dbg(DBG_BATMAN, bat_priv, 281 bat_dbg(DBG_BATMAN, bat_priv,
206 "neighbor purge: originator %pM, " 282 "neighbor purge: originator %pM, "
207 "neighbor: %pM, iface: %s\n", 283 "neighbor: %pM, iface: %s\n",
@@ -215,14 +291,18 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
215 (neigh_node->last_valid / HZ)); 291 (neigh_node->last_valid / HZ));
216 292
217 neigh_purged = true; 293 neigh_purged = true;
218 list_del(list_pos); 294
219 kfree(neigh_node); 295 hlist_del_rcu(&neigh_node->list);
296 bonding_candidate_del(orig_node, neigh_node);
297 neigh_node_free_ref(neigh_node);
220 } else { 298 } else {
221 if ((!*best_neigh_node) || 299 if ((!*best_neigh_node) ||
222 (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) 300 (neigh_node->tq_avg > (*best_neigh_node)->tq_avg))
223 *best_neigh_node = neigh_node; 301 *best_neigh_node = neigh_node;
224 } 302 }
225 } 303 }
304
305 spin_unlock_bh(&orig_node->neigh_list_lock);
226 return neigh_purged; 306 return neigh_purged;
227} 307}
228 308
@@ -243,11 +323,8 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
243 &best_neigh_node)) { 323 &best_neigh_node)) {
244 update_routes(bat_priv, orig_node, 324 update_routes(bat_priv, orig_node,
245 best_neigh_node, 325 best_neigh_node,
246 orig_node->hna_buff, 326 orig_node->tt_buff,
247 orig_node->hna_buff_len); 327 orig_node->tt_buff_len);
248 /* update bonding candidates, we could have lost
249 * some candidates. */
250 update_bonding_candidates(bat_priv, orig_node);
251 } 328 }
252 } 329 }
253 330
@@ -257,40 +334,38 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
257static void _purge_orig(struct bat_priv *bat_priv) 334static void _purge_orig(struct bat_priv *bat_priv)
258{ 335{
259 struct hashtable_t *hash = bat_priv->orig_hash; 336 struct hashtable_t *hash = bat_priv->orig_hash;
260 struct hlist_node *walk, *safe; 337 struct hlist_node *node, *node_tmp;
261 struct hlist_head *head; 338 struct hlist_head *head;
262 struct element_t *bucket; 339 spinlock_t *list_lock; /* spinlock to protect write access */
263 struct orig_node *orig_node; 340 struct orig_node *orig_node;
264 int i; 341 int i;
265 342
266 if (!hash) 343 if (!hash)
267 return; 344 return;
268 345
269 spin_lock_bh(&bat_priv->orig_hash_lock);
270
271 /* for all origins... */ 346 /* for all origins... */
272 for (i = 0; i < hash->size; i++) { 347 for (i = 0; i < hash->size; i++) {
273 head = &hash->table[i]; 348 head = &hash->table[i];
349 list_lock = &hash->list_locks[i];
274 350
275 hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { 351 spin_lock_bh(list_lock);
276 orig_node = bucket->data; 352 hlist_for_each_entry_safe(orig_node, node, node_tmp,
277 353 head, hash_entry) {
278 if (purge_orig_node(bat_priv, orig_node)) { 354 if (purge_orig_node(bat_priv, orig_node)) {
279 if (orig_node->gw_flags) 355 if (orig_node->gw_flags)
280 gw_node_delete(bat_priv, orig_node); 356 gw_node_delete(bat_priv, orig_node);
281 hlist_del(walk); 357 hlist_del_rcu(node);
282 kfree(bucket); 358 orig_node_free_ref(orig_node);
283 free_orig_node(orig_node, bat_priv); 359 continue;
284 } 360 }
285 361
286 if (time_after(jiffies, orig_node->last_frag_packet + 362 if (time_after(jiffies, orig_node->last_frag_packet +
287 msecs_to_jiffies(FRAG_TIMEOUT))) 363 msecs_to_jiffies(FRAG_TIMEOUT)))
288 frag_list_free(&orig_node->frag_list); 364 frag_list_free(&orig_node->frag_list);
289 } 365 }
366 spin_unlock_bh(list_lock);
290 } 367 }
291 368
292 spin_unlock_bh(&bat_priv->orig_hash_lock);
293
294 gw_node_purge(bat_priv); 369 gw_node_purge(bat_priv);
295 gw_election(bat_priv); 370 gw_election(bat_priv);
296 371
@@ -318,79 +393,86 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
318 struct net_device *net_dev = (struct net_device *)seq->private; 393 struct net_device *net_dev = (struct net_device *)seq->private;
319 struct bat_priv *bat_priv = netdev_priv(net_dev); 394 struct bat_priv *bat_priv = netdev_priv(net_dev);
320 struct hashtable_t *hash = bat_priv->orig_hash; 395 struct hashtable_t *hash = bat_priv->orig_hash;
321 struct hlist_node *walk; 396 struct hlist_node *node, *node_tmp;
322 struct hlist_head *head; 397 struct hlist_head *head;
323 struct element_t *bucket; 398 struct hard_iface *primary_if;
324 struct orig_node *orig_node; 399 struct orig_node *orig_node;
325 struct neigh_node *neigh_node; 400 struct neigh_node *neigh_node, *neigh_node_tmp;
326 int batman_count = 0; 401 int batman_count = 0;
327 int last_seen_secs; 402 int last_seen_secs;
328 int last_seen_msecs; 403 int last_seen_msecs;
329 int i; 404 int i, ret = 0;
330 405
331 if ((!bat_priv->primary_if) || 406 primary_if = primary_if_get_selected(bat_priv);
332 (bat_priv->primary_if->if_status != IF_ACTIVE)) {
333 if (!bat_priv->primary_if)
334 return seq_printf(seq, "BATMAN mesh %s disabled - "
335 "please specify interfaces to enable it\n",
336 net_dev->name);
337 407
338 return seq_printf(seq, "BATMAN mesh %s " 408 if (!primary_if) {
339 "disabled - primary interface not active\n", 409 ret = seq_printf(seq, "BATMAN mesh %s disabled - "
340 net_dev->name); 410 "please specify interfaces to enable it\n",
411 net_dev->name);
412 goto out;
413 }
414
415 if (primary_if->if_status != IF_ACTIVE) {
416 ret = seq_printf(seq, "BATMAN mesh %s "
417 "disabled - primary interface not active\n",
418 net_dev->name);
419 goto out;
341 } 420 }
342 421
343 seq_printf(seq, "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n", 422 seq_printf(seq, "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n",
344 SOURCE_VERSION, REVISION_VERSION_STR, 423 SOURCE_VERSION, REVISION_VERSION_STR,
345 bat_priv->primary_if->net_dev->name, 424 primary_if->net_dev->name,
346 bat_priv->primary_if->net_dev->dev_addr, net_dev->name); 425 primary_if->net_dev->dev_addr, net_dev->name);
347 seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", 426 seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
348 "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", 427 "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop",
349 "outgoingIF", "Potential nexthops"); 428 "outgoingIF", "Potential nexthops");
350 429
351 spin_lock_bh(&bat_priv->orig_hash_lock);
352
353 for (i = 0; i < hash->size; i++) { 430 for (i = 0; i < hash->size; i++) {
354 head = &hash->table[i]; 431 head = &hash->table[i];
355 432
356 hlist_for_each_entry(bucket, walk, head, hlist) { 433 rcu_read_lock();
357 orig_node = bucket->data; 434 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
358 435 neigh_node = orig_node_get_router(orig_node);
359 if (!orig_node->router) 436 if (!neigh_node)
360 continue; 437 continue;
361 438
362 if (orig_node->router->tq_avg == 0) 439 if (neigh_node->tq_avg == 0)
363 continue; 440 goto next;
364 441
365 last_seen_secs = jiffies_to_msecs(jiffies - 442 last_seen_secs = jiffies_to_msecs(jiffies -
366 orig_node->last_valid) / 1000; 443 orig_node->last_valid) / 1000;
367 last_seen_msecs = jiffies_to_msecs(jiffies - 444 last_seen_msecs = jiffies_to_msecs(jiffies -
368 orig_node->last_valid) % 1000; 445 orig_node->last_valid) % 1000;
369 446
370 neigh_node = orig_node->router;
371 seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", 447 seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:",
372 orig_node->orig, last_seen_secs, 448 orig_node->orig, last_seen_secs,
373 last_seen_msecs, neigh_node->tq_avg, 449 last_seen_msecs, neigh_node->tq_avg,
374 neigh_node->addr, 450 neigh_node->addr,
375 neigh_node->if_incoming->net_dev->name); 451 neigh_node->if_incoming->net_dev->name);
376 452
377 list_for_each_entry(neigh_node, &orig_node->neigh_list, 453 hlist_for_each_entry_rcu(neigh_node_tmp, node_tmp,
378 list) { 454 &orig_node->neigh_list, list) {
379 seq_printf(seq, " %pM (%3i)", neigh_node->addr, 455 seq_printf(seq, " %pM (%3i)",
380 neigh_node->tq_avg); 456 neigh_node_tmp->addr,
457 neigh_node_tmp->tq_avg);
381 } 458 }
382 459
383 seq_printf(seq, "\n"); 460 seq_printf(seq, "\n");
384 batman_count++; 461 batman_count++;
462
463next:
464 neigh_node_free_ref(neigh_node);
385 } 465 }
466 rcu_read_unlock();
386 } 467 }
387 468
388 spin_unlock_bh(&bat_priv->orig_hash_lock); 469 if (batman_count == 0)
389
390 if ((batman_count == 0))
391 seq_printf(seq, "No batman nodes in range ...\n"); 470 seq_printf(seq, "No batman nodes in range ...\n");
392 471
393 return 0; 472out:
473 if (primary_if)
474 hardif_free_ref(primary_if);
475 return ret;
394} 476}
395 477
396static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) 478static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
@@ -423,36 +505,36 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
423 return 0; 505 return 0;
424} 506}
425 507
426int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) 508int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num)
427{ 509{
428 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 510 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
429 struct hashtable_t *hash = bat_priv->orig_hash; 511 struct hashtable_t *hash = bat_priv->orig_hash;
430 struct hlist_node *walk; 512 struct hlist_node *node;
431 struct hlist_head *head; 513 struct hlist_head *head;
432 struct element_t *bucket;
433 struct orig_node *orig_node; 514 struct orig_node *orig_node;
434 int i; 515 int i, ret;
435 516
436 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on 517 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on
437 * if_num */ 518 * if_num */
438 spin_lock_bh(&bat_priv->orig_hash_lock);
439
440 for (i = 0; i < hash->size; i++) { 519 for (i = 0; i < hash->size; i++) {
441 head = &hash->table[i]; 520 head = &hash->table[i];
442 521
443 hlist_for_each_entry(bucket, walk, head, hlist) { 522 rcu_read_lock();
444 orig_node = bucket->data; 523 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
524 spin_lock_bh(&orig_node->ogm_cnt_lock);
525 ret = orig_node_add_if(orig_node, max_if_num);
526 spin_unlock_bh(&orig_node->ogm_cnt_lock);
445 527
446 if (orig_node_add_if(orig_node, max_if_num) == -1) 528 if (ret == -1)
447 goto err; 529 goto err;
448 } 530 }
531 rcu_read_unlock();
449 } 532 }
450 533
451 spin_unlock_bh(&bat_priv->orig_hash_lock);
452 return 0; 534 return 0;
453 535
454err: 536err:
455 spin_unlock_bh(&bat_priv->orig_hash_lock); 537 rcu_read_unlock();
456 return -ENOMEM; 538 return -ENOMEM;
457} 539}
458 540
@@ -508,57 +590,55 @@ free_own_sum:
508 return 0; 590 return 0;
509} 591}
510 592
511int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) 593int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num)
512{ 594{
513 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 595 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
514 struct hashtable_t *hash = bat_priv->orig_hash; 596 struct hashtable_t *hash = bat_priv->orig_hash;
515 struct hlist_node *walk; 597 struct hlist_node *node;
516 struct hlist_head *head; 598 struct hlist_head *head;
517 struct element_t *bucket; 599 struct hard_iface *hard_iface_tmp;
518 struct batman_if *batman_if_tmp;
519 struct orig_node *orig_node; 600 struct orig_node *orig_node;
520 int i, ret; 601 int i, ret;
521 602
522 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on 603 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on
523 * if_num */ 604 * if_num */
524 spin_lock_bh(&bat_priv->orig_hash_lock);
525
526 for (i = 0; i < hash->size; i++) { 605 for (i = 0; i < hash->size; i++) {
527 head = &hash->table[i]; 606 head = &hash->table[i];
528 607
529 hlist_for_each_entry(bucket, walk, head, hlist) { 608 rcu_read_lock();
530 orig_node = bucket->data; 609 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
531 610 spin_lock_bh(&orig_node->ogm_cnt_lock);
532 ret = orig_node_del_if(orig_node, max_if_num, 611 ret = orig_node_del_if(orig_node, max_if_num,
533 batman_if->if_num); 612 hard_iface->if_num);
613 spin_unlock_bh(&orig_node->ogm_cnt_lock);
534 614
535 if (ret == -1) 615 if (ret == -1)
536 goto err; 616 goto err;
537 } 617 }
618 rcu_read_unlock();
538 } 619 }
539 620
540 /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ 621 /* renumber remaining batman interfaces _inside_ of orig_hash_lock */
541 rcu_read_lock(); 622 rcu_read_lock();
542 list_for_each_entry_rcu(batman_if_tmp, &if_list, list) { 623 list_for_each_entry_rcu(hard_iface_tmp, &hardif_list, list) {
543 if (batman_if_tmp->if_status == IF_NOT_IN_USE) 624 if (hard_iface_tmp->if_status == IF_NOT_IN_USE)
544 continue; 625 continue;
545 626
546 if (batman_if == batman_if_tmp) 627 if (hard_iface == hard_iface_tmp)
547 continue; 628 continue;
548 629
549 if (batman_if->soft_iface != batman_if_tmp->soft_iface) 630 if (hard_iface->soft_iface != hard_iface_tmp->soft_iface)
550 continue; 631 continue;
551 632
552 if (batman_if_tmp->if_num > batman_if->if_num) 633 if (hard_iface_tmp->if_num > hard_iface->if_num)
553 batman_if_tmp->if_num--; 634 hard_iface_tmp->if_num--;
554 } 635 }
555 rcu_read_unlock(); 636 rcu_read_unlock();
556 637
557 batman_if->if_num = -1; 638 hard_iface->if_num = -1;
558 spin_unlock_bh(&bat_priv->orig_hash_lock);
559 return 0; 639 return 0;
560 640
561err: 641err:
562 spin_unlock_bh(&bat_priv->orig_hash_lock); 642 rcu_read_unlock();
563 return -ENOMEM; 643 return -ENOMEM;
564} 644}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index d474ceb2a4eb..e1d641f27aa9 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,21 +22,29 @@
22#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ 22#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
23#define _NET_BATMAN_ADV_ORIGINATOR_H_ 23#define _NET_BATMAN_ADV_ORIGINATOR_H_
24 24
25#include "hash.h"
26
25int originator_init(struct bat_priv *bat_priv); 27int originator_init(struct bat_priv *bat_priv);
26void originator_free(struct bat_priv *bat_priv); 28void originator_free(struct bat_priv *bat_priv);
27void purge_orig_ref(struct bat_priv *bat_priv); 29void purge_orig_ref(struct bat_priv *bat_priv);
30void orig_node_free_ref(struct orig_node *orig_node);
28struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); 31struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr);
29struct neigh_node * 32struct neigh_node *create_neighbor(struct orig_node *orig_node,
30create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, 33 struct orig_node *orig_neigh_node,
31 uint8_t *neigh, struct batman_if *if_incoming); 34 uint8_t *neigh,
35 struct hard_iface *if_incoming);
36void neigh_node_free_ref(struct neigh_node *neigh_node);
37struct neigh_node *orig_node_get_router(struct orig_node *orig_node);
32int orig_seq_print_text(struct seq_file *seq, void *offset); 38int orig_seq_print_text(struct seq_file *seq, void *offset);
33int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); 39int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num);
34int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); 40int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num);
35 41
36 42
37/* returns 1 if they are the same originator */ 43/* returns 1 if they are the same originator */
38static inline int compare_orig(void *data1, void *data2) 44static inline int compare_orig(struct hlist_node *node, void *data2)
39{ 45{
46 void *data1 = container_of(node, struct orig_node, hash_entry);
47
40 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); 48 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
41} 49}
42 50
@@ -61,4 +69,35 @@ static inline int choose_orig(void *data, int32_t size)
61 return hash % size; 69 return hash % size;
62} 70}
63 71
72static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv,
73 void *data)
74{
75 struct hashtable_t *hash = bat_priv->orig_hash;
76 struct hlist_head *head;
77 struct hlist_node *node;
78 struct orig_node *orig_node, *orig_node_tmp = NULL;
79 int index;
80
81 if (!hash)
82 return NULL;
83
84 index = choose_orig(data, hash->size);
85 head = &hash->table[index];
86
87 rcu_read_lock();
88 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
89 if (!compare_eth(orig_node, data))
90 continue;
91
92 if (!atomic_inc_not_zero(&orig_node->refcount))
93 continue;
94
95 orig_node_tmp = orig_node;
96 break;
97 }
98 rcu_read_unlock();
99
100 return orig_node_tmp;
101}
102
64#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ 103#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 2284e8129cb2..eda99650e9f8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -50,6 +50,7 @@
50 50
51/* fragmentation defines */ 51/* fragmentation defines */
52#define UNI_FRAG_HEAD 0x01 52#define UNI_FRAG_HEAD 0x01
53#define UNI_FRAG_LARGETAIL 0x02
53 54
54struct batman_packet { 55struct batman_packet {
55 uint8_t packet_type; 56 uint8_t packet_type;
@@ -60,7 +61,7 @@ struct batman_packet {
60 uint8_t orig[6]; 61 uint8_t orig[6];
61 uint8_t prev_sender[6]; 62 uint8_t prev_sender[6];
62 uint8_t ttl; 63 uint8_t ttl;
63 uint8_t num_hna; 64 uint8_t num_tt;
64 uint8_t gw_flags; /* flags related to gateway class */ 65 uint8_t gw_flags; /* flags related to gateway class */
65 uint8_t align; 66 uint8_t align;
66} __packed; 67} __packed;
@@ -127,8 +128,7 @@ struct vis_packet {
127 uint8_t entries; /* number of entries behind this struct */ 128 uint8_t entries; /* number of entries behind this struct */
128 uint32_t seqno; /* sequence number */ 129 uint32_t seqno; /* sequence number */
129 uint8_t ttl; /* TTL */ 130 uint8_t ttl; /* TTL */
130 uint8_t vis_orig[6]; /* originator that informs about its 131 uint8_t vis_orig[6]; /* originator that announces its neighbors */
131 * neighbors */
132 uint8_t target_orig[6]; /* who should receive this packet */ 132 uint8_t target_orig[6]; /* who should receive this packet */
133 uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */ 133 uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */
134} __packed; 134} __packed;
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
index defd37c9be1f..5bb6a619afee 100644
--- a/net/batman-adv/ring_buffer.c
+++ b/net/batman-adv/ring_buffer.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
index 6b0cb9aaeba5..0395b2741864 100644
--- a/net/batman-adv/ring_buffer.h
+++ b/net/batman-adv/ring_buffer.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 8828eddd3f72..bb1c3ec7e3ff 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -28,7 +28,6 @@
28#include "icmp_socket.h" 28#include "icmp_socket.h"
29#include "translation-table.h" 29#include "translation-table.h"
30#include "originator.h" 30#include "originator.h"
31#include "types.h"
32#include "ring_buffer.h" 31#include "ring_buffer.h"
33#include "vis.h" 32#include "vis.h"
34#include "aggregation.h" 33#include "aggregation.h"
@@ -36,176 +35,194 @@
36#include "gateway_client.h" 35#include "gateway_client.h"
37#include "unicast.h" 36#include "unicast.h"
38 37
39void slide_own_bcast_window(struct batman_if *batman_if) 38void slide_own_bcast_window(struct hard_iface *hard_iface)
40{ 39{
41 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 40 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
42 struct hashtable_t *hash = bat_priv->orig_hash; 41 struct hashtable_t *hash = bat_priv->orig_hash;
43 struct hlist_node *walk; 42 struct hlist_node *node;
44 struct hlist_head *head; 43 struct hlist_head *head;
45 struct element_t *bucket;
46 struct orig_node *orig_node; 44 struct orig_node *orig_node;
47 unsigned long *word; 45 unsigned long *word;
48 int i; 46 int i;
49 size_t word_index; 47 size_t word_index;
50 48
51 spin_lock_bh(&bat_priv->orig_hash_lock);
52
53 for (i = 0; i < hash->size; i++) { 49 for (i = 0; i < hash->size; i++) {
54 head = &hash->table[i]; 50 head = &hash->table[i];
55 51
56 hlist_for_each_entry(bucket, walk, head, hlist) { 52 rcu_read_lock();
57 orig_node = bucket->data; 53 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
58 word_index = batman_if->if_num * NUM_WORDS; 54 spin_lock_bh(&orig_node->ogm_cnt_lock);
55 word_index = hard_iface->if_num * NUM_WORDS;
59 word = &(orig_node->bcast_own[word_index]); 56 word = &(orig_node->bcast_own[word_index]);
60 57
61 bit_get_packet(bat_priv, word, 1, 0); 58 bit_get_packet(bat_priv, word, 1, 0);
62 orig_node->bcast_own_sum[batman_if->if_num] = 59 orig_node->bcast_own_sum[hard_iface->if_num] =
63 bit_packet_count(word); 60 bit_packet_count(word);
61 spin_unlock_bh(&orig_node->ogm_cnt_lock);
64 } 62 }
63 rcu_read_unlock();
65 } 64 }
66
67 spin_unlock_bh(&bat_priv->orig_hash_lock);
68} 65}
69 66
70static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node, 67static void update_TT(struct bat_priv *bat_priv, struct orig_node *orig_node,
71 unsigned char *hna_buff, int hna_buff_len) 68 unsigned char *tt_buff, int tt_buff_len)
72{ 69{
73 if ((hna_buff_len != orig_node->hna_buff_len) || 70 if ((tt_buff_len != orig_node->tt_buff_len) ||
74 ((hna_buff_len > 0) && 71 ((tt_buff_len > 0) &&
75 (orig_node->hna_buff_len > 0) && 72 (orig_node->tt_buff_len > 0) &&
76 (memcmp(orig_node->hna_buff, hna_buff, hna_buff_len) != 0))) { 73 (memcmp(orig_node->tt_buff, tt_buff, tt_buff_len) != 0))) {
77 74
78 if (orig_node->hna_buff_len > 0) 75 if (orig_node->tt_buff_len > 0)
79 hna_global_del_orig(bat_priv, orig_node, 76 tt_global_del_orig(bat_priv, orig_node,
80 "originator changed hna"); 77 "originator changed tt");
81 78
82 if ((hna_buff_len > 0) && (hna_buff)) 79 if ((tt_buff_len > 0) && (tt_buff))
83 hna_global_add_orig(bat_priv, orig_node, 80 tt_global_add_orig(bat_priv, orig_node,
84 hna_buff, hna_buff_len); 81 tt_buff, tt_buff_len);
85 } 82 }
86} 83}
87 84
88static void update_route(struct bat_priv *bat_priv, 85static void update_route(struct bat_priv *bat_priv,
89 struct orig_node *orig_node, 86 struct orig_node *orig_node,
90 struct neigh_node *neigh_node, 87 struct neigh_node *neigh_node,
91 unsigned char *hna_buff, int hna_buff_len) 88 unsigned char *tt_buff, int tt_buff_len)
92{ 89{
90 struct neigh_node *curr_router;
91
92 curr_router = orig_node_get_router(orig_node);
93
93 /* route deleted */ 94 /* route deleted */
94 if ((orig_node->router) && (!neigh_node)) { 95 if ((curr_router) && (!neigh_node)) {
95 96
96 bat_dbg(DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", 97 bat_dbg(DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n",
97 orig_node->orig); 98 orig_node->orig);
98 hna_global_del_orig(bat_priv, orig_node, 99 tt_global_del_orig(bat_priv, orig_node,
99 "originator timed out"); 100 "originator timed out");
100 101
101 /* route added */ 102 /* route added */
102 } else if ((!orig_node->router) && (neigh_node)) { 103 } else if ((!curr_router) && (neigh_node)) {
103 104
104 bat_dbg(DBG_ROUTES, bat_priv, 105 bat_dbg(DBG_ROUTES, bat_priv,
105 "Adding route towards: %pM (via %pM)\n", 106 "Adding route towards: %pM (via %pM)\n",
106 orig_node->orig, neigh_node->addr); 107 orig_node->orig, neigh_node->addr);
107 hna_global_add_orig(bat_priv, orig_node, 108 tt_global_add_orig(bat_priv, orig_node,
108 hna_buff, hna_buff_len); 109 tt_buff, tt_buff_len);
109 110
110 /* route changed */ 111 /* route changed */
111 } else { 112 } else {
112 bat_dbg(DBG_ROUTES, bat_priv, 113 bat_dbg(DBG_ROUTES, bat_priv,
113 "Changing route towards: %pM " 114 "Changing route towards: %pM "
114 "(now via %pM - was via %pM)\n", 115 "(now via %pM - was via %pM)\n",
115 orig_node->orig, neigh_node->addr, 116 orig_node->orig, neigh_node->addr,
116 orig_node->router->addr); 117 curr_router->addr);
117 } 118 }
118 119
119 orig_node->router = neigh_node; 120 if (curr_router)
121 neigh_node_free_ref(curr_router);
122
123 /* increase refcount of new best neighbor */
124 if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount))
125 neigh_node = NULL;
126
127 spin_lock_bh(&orig_node->neigh_list_lock);
128 rcu_assign_pointer(orig_node->router, neigh_node);
129 spin_unlock_bh(&orig_node->neigh_list_lock);
130
131 /* decrease refcount of previous best neighbor */
132 if (curr_router)
133 neigh_node_free_ref(curr_router);
120} 134}
121 135
122 136
123void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, 137void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
124 struct neigh_node *neigh_node, unsigned char *hna_buff, 138 struct neigh_node *neigh_node, unsigned char *tt_buff,
125 int hna_buff_len) 139 int tt_buff_len)
126{ 140{
141 struct neigh_node *router = NULL;
127 142
128 if (!orig_node) 143 if (!orig_node)
129 return; 144 goto out;
145
146 router = orig_node_get_router(orig_node);
130 147
131 if (orig_node->router != neigh_node) 148 if (router != neigh_node)
132 update_route(bat_priv, orig_node, neigh_node, 149 update_route(bat_priv, orig_node, neigh_node,
133 hna_buff, hna_buff_len); 150 tt_buff, tt_buff_len);
134 /* may be just HNA changed */ 151 /* may be just TT changed */
135 else 152 else
136 update_HNA(bat_priv, orig_node, hna_buff, hna_buff_len); 153 update_TT(bat_priv, orig_node, tt_buff, tt_buff_len);
154
155out:
156 if (router)
157 neigh_node_free_ref(router);
137} 158}
138 159
139static int is_bidirectional_neigh(struct orig_node *orig_node, 160static int is_bidirectional_neigh(struct orig_node *orig_node,
140 struct orig_node *orig_neigh_node, 161 struct orig_node *orig_neigh_node,
141 struct batman_packet *batman_packet, 162 struct batman_packet *batman_packet,
142 struct batman_if *if_incoming) 163 struct hard_iface *if_incoming)
143{ 164{
144 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 165 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
145 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; 166 struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
167 struct hlist_node *node;
146 unsigned char total_count; 168 unsigned char total_count;
169 uint8_t orig_eq_count, neigh_rq_count, tq_own;
170 int tq_asym_penalty, ret = 0;
147 171
148 if (orig_node == orig_neigh_node) { 172 /* find corresponding one hop neighbor */
149 list_for_each_entry(tmp_neigh_node, 173 rcu_read_lock();
150 &orig_node->neigh_list, 174 hlist_for_each_entry_rcu(tmp_neigh_node, node,
151 list) { 175 &orig_neigh_node->neigh_list, list) {
152 176
153 if (compare_orig(tmp_neigh_node->addr, 177 if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig))
154 orig_neigh_node->orig) && 178 continue;
155 (tmp_neigh_node->if_incoming == if_incoming))
156 neigh_node = tmp_neigh_node;
157 }
158 179
159 if (!neigh_node) 180 if (tmp_neigh_node->if_incoming != if_incoming)
160 neigh_node = create_neighbor(orig_node, 181 continue;
161 orig_neigh_node,
162 orig_neigh_node->orig,
163 if_incoming);
164 /* create_neighbor failed, return 0 */
165 if (!neigh_node)
166 return 0;
167 182
168 neigh_node->last_valid = jiffies; 183 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
169 } else { 184 continue;
170 /* find packet count of corresponding one hop neighbor */
171 list_for_each_entry(tmp_neigh_node,
172 &orig_neigh_node->neigh_list, list) {
173
174 if (compare_orig(tmp_neigh_node->addr,
175 orig_neigh_node->orig) &&
176 (tmp_neigh_node->if_incoming == if_incoming))
177 neigh_node = tmp_neigh_node;
178 }
179 185
180 if (!neigh_node) 186 neigh_node = tmp_neigh_node;
181 neigh_node = create_neighbor(orig_neigh_node, 187 break;
182 orig_neigh_node,
183 orig_neigh_node->orig,
184 if_incoming);
185 /* create_neighbor failed, return 0 */
186 if (!neigh_node)
187 return 0;
188 } 188 }
189 rcu_read_unlock();
190
191 if (!neigh_node)
192 neigh_node = create_neighbor(orig_neigh_node,
193 orig_neigh_node,
194 orig_neigh_node->orig,
195 if_incoming);
196
197 if (!neigh_node)
198 goto out;
199
200 /* if orig_node is direct neighbour update neigh_node last_valid */
201 if (orig_node == orig_neigh_node)
202 neigh_node->last_valid = jiffies;
189 203
190 orig_node->last_valid = jiffies; 204 orig_node->last_valid = jiffies;
191 205
206 /* find packet count of corresponding one hop neighbor */
207 spin_lock_bh(&orig_node->ogm_cnt_lock);
208 orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
209 neigh_rq_count = neigh_node->real_packet_count;
210 spin_unlock_bh(&orig_node->ogm_cnt_lock);
211
192 /* pay attention to not get a value bigger than 100 % */ 212 /* pay attention to not get a value bigger than 100 % */
193 total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] > 213 total_count = (orig_eq_count > neigh_rq_count ?
194 neigh_node->real_packet_count ? 214 neigh_rq_count : orig_eq_count);
195 neigh_node->real_packet_count :
196 orig_neigh_node->bcast_own_sum[if_incoming->if_num]);
197 215
198 /* if we have too few packets (too less data) we set tq_own to zero */ 216 /* if we have too few packets (too less data) we set tq_own to zero */
199 /* if we receive too few packets it is not considered bidirectional */ 217 /* if we receive too few packets it is not considered bidirectional */
200 if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || 218 if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
201 (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) 219 (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
202 orig_neigh_node->tq_own = 0; 220 tq_own = 0;
203 else 221 else
204 /* neigh_node->real_packet_count is never zero as we 222 /* neigh_node->real_packet_count is never zero as we
205 * only purge old information when getting new 223 * only purge old information when getting new
206 * information */ 224 * information */
207 orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) / 225 tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count;
208 neigh_node->real_packet_count;
209 226
210 /* 227 /*
211 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does 228 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
@@ -213,20 +230,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node,
213 * punishes asymmetric links more. This will give a value 230 * punishes asymmetric links more. This will give a value
214 * between 0 and TQ_MAX_VALUE 231 * between 0 and TQ_MAX_VALUE
215 */ 232 */
216 orig_neigh_node->tq_asym_penalty = 233 tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
217 TQ_MAX_VALUE - 234 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
218 (TQ_MAX_VALUE * 235 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
219 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * 236 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
220 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * 237 (TQ_LOCAL_WINDOW_SIZE *
221 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) / 238 TQ_LOCAL_WINDOW_SIZE *
222 (TQ_LOCAL_WINDOW_SIZE * 239 TQ_LOCAL_WINDOW_SIZE);
223 TQ_LOCAL_WINDOW_SIZE * 240
224 TQ_LOCAL_WINDOW_SIZE); 241 batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) /
225 242 (TQ_MAX_VALUE * TQ_MAX_VALUE));
226 batman_packet->tq = ((batman_packet->tq *
227 orig_neigh_node->tq_own *
228 orig_neigh_node->tq_asym_penalty) /
229 (TQ_MAX_VALUE * TQ_MAX_VALUE));
230 243
231 bat_dbg(DBG_BATMAN, bat_priv, 244 bat_dbg(DBG_BATMAN, bat_priv,
232 "bidirectional: " 245 "bidirectional: "
@@ -234,34 +247,143 @@ static int is_bidirectional_neigh(struct orig_node *orig_node,
234 "real recv = %2i, local tq: %3i, asym_penalty: %3i, " 247 "real recv = %2i, local tq: %3i, asym_penalty: %3i, "
235 "total tq: %3i\n", 248 "total tq: %3i\n",
236 orig_node->orig, orig_neigh_node->orig, total_count, 249 orig_node->orig, orig_neigh_node->orig, total_count,
237 neigh_node->real_packet_count, orig_neigh_node->tq_own, 250 neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq);
238 orig_neigh_node->tq_asym_penalty, batman_packet->tq);
239 251
240 /* if link has the minimum required transmission quality 252 /* if link has the minimum required transmission quality
241 * consider it bidirectional */ 253 * consider it bidirectional */
242 if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) 254 if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
243 return 1; 255 ret = 1;
244 256
245 return 0; 257out:
258 if (neigh_node)
259 neigh_node_free_ref(neigh_node);
260 return ret;
261}
262
263/* caller must hold the neigh_list_lock */
264void bonding_candidate_del(struct orig_node *orig_node,
265 struct neigh_node *neigh_node)
266{
267 /* this neighbor is not part of our candidate list */
268 if (list_empty(&neigh_node->bonding_list))
269 goto out;
270
271 list_del_rcu(&neigh_node->bonding_list);
272 INIT_LIST_HEAD(&neigh_node->bonding_list);
273 neigh_node_free_ref(neigh_node);
274 atomic_dec(&orig_node->bond_candidates);
275
276out:
277 return;
278}
279
280static void bonding_candidate_add(struct orig_node *orig_node,
281 struct neigh_node *neigh_node)
282{
283 struct hlist_node *node;
284 struct neigh_node *tmp_neigh_node, *router = NULL;
285 uint8_t interference_candidate = 0;
286
287 spin_lock_bh(&orig_node->neigh_list_lock);
288
289 /* only consider if it has the same primary address ... */
290 if (!compare_eth(orig_node->orig,
291 neigh_node->orig_node->primary_addr))
292 goto candidate_del;
293
294 router = orig_node_get_router(orig_node);
295 if (!router)
296 goto candidate_del;
297
298 /* ... and is good enough to be considered */
299 if (neigh_node->tq_avg < router->tq_avg - BONDING_TQ_THRESHOLD)
300 goto candidate_del;
301
302 /**
303 * check if we have another candidate with the same mac address or
304 * interface. If we do, we won't select this candidate because of
305 * possible interference.
306 */
307 hlist_for_each_entry_rcu(tmp_neigh_node, node,
308 &orig_node->neigh_list, list) {
309
310 if (tmp_neigh_node == neigh_node)
311 continue;
312
313 /* we only care if the other candidate is even
314 * considered as candidate. */
315 if (list_empty(&tmp_neigh_node->bonding_list))
316 continue;
317
318 if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) ||
319 (compare_eth(neigh_node->addr, tmp_neigh_node->addr))) {
320 interference_candidate = 1;
321 break;
322 }
323 }
324
325 /* don't care further if it is an interference candidate */
326 if (interference_candidate)
327 goto candidate_del;
328
329 /* this neighbor already is part of our candidate list */
330 if (!list_empty(&neigh_node->bonding_list))
331 goto out;
332
333 if (!atomic_inc_not_zero(&neigh_node->refcount))
334 goto out;
335
336 list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list);
337 atomic_inc(&orig_node->bond_candidates);
338 goto out;
339
340candidate_del:
341 bonding_candidate_del(orig_node, neigh_node);
342
343out:
344 spin_unlock_bh(&orig_node->neigh_list_lock);
345
346 if (router)
347 neigh_node_free_ref(router);
348}
349
350/* copy primary address for bonding */
351static void bonding_save_primary(struct orig_node *orig_node,
352 struct orig_node *orig_neigh_node,
353 struct batman_packet *batman_packet)
354{
355 if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
356 return;
357
358 memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN);
246} 359}
247 360
248static void update_orig(struct bat_priv *bat_priv, 361static void update_orig(struct bat_priv *bat_priv,
249 struct orig_node *orig_node, 362 struct orig_node *orig_node,
250 struct ethhdr *ethhdr, 363 struct ethhdr *ethhdr,
251 struct batman_packet *batman_packet, 364 struct batman_packet *batman_packet,
252 struct batman_if *if_incoming, 365 struct hard_iface *if_incoming,
253 unsigned char *hna_buff, int hna_buff_len, 366 unsigned char *tt_buff, int tt_buff_len,
254 char is_duplicate) 367 char is_duplicate)
255{ 368{
256 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; 369 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
257 int tmp_hna_buff_len; 370 struct neigh_node *router = NULL;
371 struct orig_node *orig_node_tmp;
372 struct hlist_node *node;
373 int tmp_tt_buff_len;
374 uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
258 375
259 bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " 376 bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
260 "Searching and updating originator entry of received packet\n"); 377 "Searching and updating originator entry of received packet\n");
261 378
262 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { 379 rcu_read_lock();
263 if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && 380 hlist_for_each_entry_rcu(tmp_neigh_node, node,
264 (tmp_neigh_node->if_incoming == if_incoming)) { 381 &orig_node->neigh_list, list) {
382 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
383 (tmp_neigh_node->if_incoming == if_incoming) &&
384 atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
385 if (neigh_node)
386 neigh_node_free_ref(neigh_node);
265 neigh_node = tmp_neigh_node; 387 neigh_node = tmp_neigh_node;
266 continue; 388 continue;
267 } 389 }
@@ -269,10 +391,12 @@ static void update_orig(struct bat_priv *bat_priv,
269 if (is_duplicate) 391 if (is_duplicate)
270 continue; 392 continue;
271 393
394 spin_lock_bh(&tmp_neigh_node->tq_lock);
272 ring_buffer_set(tmp_neigh_node->tq_recv, 395 ring_buffer_set(tmp_neigh_node->tq_recv,
273 &tmp_neigh_node->tq_index, 0); 396 &tmp_neigh_node->tq_index, 0);
274 tmp_neigh_node->tq_avg = 397 tmp_neigh_node->tq_avg =
275 ring_buffer_avg(tmp_neigh_node->tq_recv); 398 ring_buffer_avg(tmp_neigh_node->tq_recv);
399 spin_unlock_bh(&tmp_neigh_node->tq_lock);
276 } 400 }
277 401
278 if (!neigh_node) { 402 if (!neigh_node) {
@@ -280,57 +404,76 @@ static void update_orig(struct bat_priv *bat_priv,
280 404
281 orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); 405 orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
282 if (!orig_tmp) 406 if (!orig_tmp)
283 return; 407 goto unlock;
284 408
285 neigh_node = create_neighbor(orig_node, orig_tmp, 409 neigh_node = create_neighbor(orig_node, orig_tmp,
286 ethhdr->h_source, if_incoming); 410 ethhdr->h_source, if_incoming);
411
412 orig_node_free_ref(orig_tmp);
287 if (!neigh_node) 413 if (!neigh_node)
288 return; 414 goto unlock;
289 } else 415 } else
290 bat_dbg(DBG_BATMAN, bat_priv, 416 bat_dbg(DBG_BATMAN, bat_priv,
291 "Updating existing last-hop neighbor of originator\n"); 417 "Updating existing last-hop neighbor of originator\n");
292 418
419 rcu_read_unlock();
420
293 orig_node->flags = batman_packet->flags; 421 orig_node->flags = batman_packet->flags;
294 neigh_node->last_valid = jiffies; 422 neigh_node->last_valid = jiffies;
295 423
424 spin_lock_bh(&neigh_node->tq_lock);
296 ring_buffer_set(neigh_node->tq_recv, 425 ring_buffer_set(neigh_node->tq_recv,
297 &neigh_node->tq_index, 426 &neigh_node->tq_index,
298 batman_packet->tq); 427 batman_packet->tq);
299 neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv); 428 neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv);
429 spin_unlock_bh(&neigh_node->tq_lock);
300 430
301 if (!is_duplicate) { 431 if (!is_duplicate) {
302 orig_node->last_ttl = batman_packet->ttl; 432 orig_node->last_ttl = batman_packet->ttl;
303 neigh_node->last_ttl = batman_packet->ttl; 433 neigh_node->last_ttl = batman_packet->ttl;
304 } 434 }
305 435
306 tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ? 436 bonding_candidate_add(orig_node, neigh_node);
307 batman_packet->num_hna * ETH_ALEN : hna_buff_len); 437
438 tmp_tt_buff_len = (tt_buff_len > batman_packet->num_tt * ETH_ALEN ?
439 batman_packet->num_tt * ETH_ALEN : tt_buff_len);
308 440
309 /* if this neighbor already is our next hop there is nothing 441 /* if this neighbor already is our next hop there is nothing
310 * to change */ 442 * to change */
311 if (orig_node->router == neigh_node) 443 router = orig_node_get_router(orig_node);
312 goto update_hna; 444 if (router == neigh_node)
445 goto update_tt;
313 446
314 /* if this neighbor does not offer a better TQ we won't consider it */ 447 /* if this neighbor does not offer a better TQ we won't consider it */
315 if ((orig_node->router) && 448 if (router && (router->tq_avg > neigh_node->tq_avg))
316 (orig_node->router->tq_avg > neigh_node->tq_avg)) 449 goto update_tt;
317 goto update_hna;
318 450
319 /* if the TQ is the same and the link not more symetric we 451 /* if the TQ is the same and the link not more symetric we
320 * won't consider it either */ 452 * won't consider it either */
321 if ((orig_node->router) && 453 if (router && (neigh_node->tq_avg == router->tq_avg)) {
322 ((neigh_node->tq_avg == orig_node->router->tq_avg) && 454 orig_node_tmp = router->orig_node;
323 (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num] 455 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
324 >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num]))) 456 bcast_own_sum_orig =
325 goto update_hna; 457 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
458 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
459
460 orig_node_tmp = neigh_node->orig_node;
461 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
462 bcast_own_sum_neigh =
463 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
464 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
465
466 if (bcast_own_sum_orig >= bcast_own_sum_neigh)
467 goto update_tt;
468 }
326 469
327 update_routes(bat_priv, orig_node, neigh_node, 470 update_routes(bat_priv, orig_node, neigh_node,
328 hna_buff, tmp_hna_buff_len); 471 tt_buff, tmp_tt_buff_len);
329 goto update_gw; 472 goto update_gw;
330 473
331update_hna: 474update_tt:
332 update_routes(bat_priv, orig_node, orig_node->router, 475 update_routes(bat_priv, orig_node, router,
333 hna_buff, tmp_hna_buff_len); 476 tt_buff, tmp_tt_buff_len);
334 477
335update_gw: 478update_gw:
336 if (orig_node->gw_flags != batman_packet->gw_flags) 479 if (orig_node->gw_flags != batman_packet->gw_flags)
@@ -343,6 +486,16 @@ update_gw:
343 (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && 486 (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
344 (atomic_read(&bat_priv->gw_sel_class) > 2)) 487 (atomic_read(&bat_priv->gw_sel_class) > 2))
345 gw_check_election(bat_priv, orig_node); 488 gw_check_election(bat_priv, orig_node);
489
490 goto out;
491
492unlock:
493 rcu_read_unlock();
494out:
495 if (neigh_node)
496 neigh_node_free_ref(neigh_node);
497 if (router)
498 neigh_node_free_ref(router);
346} 499}
347 500
348/* checks whether the host restarted and is in the protection time. 501/* checks whether the host restarted and is in the protection time.
@@ -380,34 +533,38 @@ static int window_protected(struct bat_priv *bat_priv,
380 */ 533 */
381static char count_real_packets(struct ethhdr *ethhdr, 534static char count_real_packets(struct ethhdr *ethhdr,
382 struct batman_packet *batman_packet, 535 struct batman_packet *batman_packet,
383 struct batman_if *if_incoming) 536 struct hard_iface *if_incoming)
384{ 537{
385 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 538 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
386 struct orig_node *orig_node; 539 struct orig_node *orig_node;
387 struct neigh_node *tmp_neigh_node; 540 struct neigh_node *tmp_neigh_node;
541 struct hlist_node *node;
388 char is_duplicate = 0; 542 char is_duplicate = 0;
389 int32_t seq_diff; 543 int32_t seq_diff;
390 int need_update = 0; 544 int need_update = 0;
391 int set_mark; 545 int set_mark, ret = -1;
392 546
393 orig_node = get_orig_node(bat_priv, batman_packet->orig); 547 orig_node = get_orig_node(bat_priv, batman_packet->orig);
394 if (!orig_node) 548 if (!orig_node)
395 return 0; 549 return 0;
396 550
551 spin_lock_bh(&orig_node->ogm_cnt_lock);
397 seq_diff = batman_packet->seqno - orig_node->last_real_seqno; 552 seq_diff = batman_packet->seqno - orig_node->last_real_seqno;
398 553
399 /* signalize caller that the packet is to be dropped. */ 554 /* signalize caller that the packet is to be dropped. */
400 if (window_protected(bat_priv, seq_diff, 555 if (window_protected(bat_priv, seq_diff,
401 &orig_node->batman_seqno_reset)) 556 &orig_node->batman_seqno_reset))
402 return -1; 557 goto out;
403 558
404 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { 559 rcu_read_lock();
560 hlist_for_each_entry_rcu(tmp_neigh_node, node,
561 &orig_node->neigh_list, list) {
405 562
406 is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, 563 is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
407 orig_node->last_real_seqno, 564 orig_node->last_real_seqno,
408 batman_packet->seqno); 565 batman_packet->seqno);
409 566
410 if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && 567 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
411 (tmp_neigh_node->if_incoming == if_incoming)) 568 (tmp_neigh_node->if_incoming == if_incoming))
412 set_mark = 1; 569 set_mark = 1;
413 else 570 else
@@ -421,6 +578,7 @@ static char count_real_packets(struct ethhdr *ethhdr,
421 tmp_neigh_node->real_packet_count = 578 tmp_neigh_node->real_packet_count =
422 bit_packet_count(tmp_neigh_node->real_bits); 579 bit_packet_count(tmp_neigh_node->real_bits);
423 } 580 }
581 rcu_read_unlock();
424 582
425 if (need_update) { 583 if (need_update) {
426 bat_dbg(DBG_BATMAN, bat_priv, 584 bat_dbg(DBG_BATMAN, bat_priv,
@@ -429,124 +587,24 @@ static char count_real_packets(struct ethhdr *ethhdr,
429 orig_node->last_real_seqno = batman_packet->seqno; 587 orig_node->last_real_seqno = batman_packet->seqno;
430 } 588 }
431 589
432 return is_duplicate; 590 ret = is_duplicate;
433}
434
435/* copy primary address for bonding */
436static void mark_bonding_address(struct bat_priv *bat_priv,
437 struct orig_node *orig_node,
438 struct orig_node *orig_neigh_node,
439 struct batman_packet *batman_packet)
440
441{
442 if (batman_packet->flags & PRIMARIES_FIRST_HOP)
443 memcpy(orig_neigh_node->primary_addr,
444 orig_node->orig, ETH_ALEN);
445
446 return;
447}
448
449/* mark possible bond.candidates in the neighbor list */
450void update_bonding_candidates(struct bat_priv *bat_priv,
451 struct orig_node *orig_node)
452{
453 int candidates;
454 int interference_candidate;
455 int best_tq;
456 struct neigh_node *tmp_neigh_node, *tmp_neigh_node2;
457 struct neigh_node *first_candidate, *last_candidate;
458
459 /* update the candidates for this originator */
460 if (!orig_node->router) {
461 orig_node->bond.candidates = 0;
462 return;
463 }
464
465 best_tq = orig_node->router->tq_avg;
466
467 /* update bond.candidates */
468
469 candidates = 0;
470
471 /* mark other nodes which also received "PRIMARIES FIRST HOP" packets
472 * as "bonding partner" */
473
474 /* first, zero the list */
475 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
476 tmp_neigh_node->next_bond_candidate = NULL;
477 }
478
479 first_candidate = NULL;
480 last_candidate = NULL;
481 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
482
483 /* only consider if it has the same primary address ... */
484 if (memcmp(orig_node->orig,
485 tmp_neigh_node->orig_node->primary_addr,
486 ETH_ALEN) != 0)
487 continue;
488
489 /* ... and is good enough to be considered */
490 if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD)
491 continue;
492
493 /* check if we have another candidate with the same
494 * mac address or interface. If we do, we won't
495 * select this candidate because of possible interference. */
496
497 interference_candidate = 0;
498 list_for_each_entry(tmp_neigh_node2,
499 &orig_node->neigh_list, list) {
500
501 if (tmp_neigh_node2 == tmp_neigh_node)
502 continue;
503
504 /* we only care if the other candidate is even
505 * considered as candidate. */
506 if (!tmp_neigh_node2->next_bond_candidate)
507 continue;
508
509
510 if ((tmp_neigh_node->if_incoming ==
511 tmp_neigh_node2->if_incoming)
512 || (memcmp(tmp_neigh_node->addr,
513 tmp_neigh_node2->addr, ETH_ALEN) == 0)) {
514
515 interference_candidate = 1;
516 break;
517 }
518 }
519 /* don't care further if it is an interference candidate */
520 if (interference_candidate)
521 continue;
522
523 if (!first_candidate) {
524 first_candidate = tmp_neigh_node;
525 tmp_neigh_node->next_bond_candidate = first_candidate;
526 } else
527 tmp_neigh_node->next_bond_candidate = last_candidate;
528 591
529 last_candidate = tmp_neigh_node; 592out:
530 593 spin_unlock_bh(&orig_node->ogm_cnt_lock);
531 candidates++; 594 orig_node_free_ref(orig_node);
532 } 595 return ret;
533
534 if (candidates > 0) {
535 first_candidate->next_bond_candidate = last_candidate;
536 orig_node->bond.selected = first_candidate;
537 }
538
539 orig_node->bond.candidates = candidates;
540} 596}
541 597
542void receive_bat_packet(struct ethhdr *ethhdr, 598void receive_bat_packet(struct ethhdr *ethhdr,
543 struct batman_packet *batman_packet, 599 struct batman_packet *batman_packet,
544 unsigned char *hna_buff, int hna_buff_len, 600 unsigned char *tt_buff, int tt_buff_len,
545 struct batman_if *if_incoming) 601 struct hard_iface *if_incoming)
546{ 602{
547 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 603 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
548 struct batman_if *batman_if; 604 struct hard_iface *hard_iface;
549 struct orig_node *orig_neigh_node, *orig_node; 605 struct orig_node *orig_neigh_node, *orig_node;
606 struct neigh_node *router = NULL, *router_router = NULL;
607 struct neigh_node *orig_neigh_router = NULL;
550 char has_directlink_flag; 608 char has_directlink_flag;
551 char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; 609 char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
552 char is_broadcast = 0, is_bidirectional, is_single_hop_neigh; 610 char is_broadcast = 0, is_bidirectional, is_single_hop_neigh;
@@ -573,8 +631,8 @@ void receive_bat_packet(struct ethhdr *ethhdr,
573 631
574 has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); 632 has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0);
575 633
576 is_single_hop_neigh = (compare_orig(ethhdr->h_source, 634 is_single_hop_neigh = (compare_eth(ethhdr->h_source,
577 batman_packet->orig) ? 1 : 0); 635 batman_packet->orig) ? 1 : 0);
578 636
579 bat_dbg(DBG_BATMAN, bat_priv, 637 bat_dbg(DBG_BATMAN, bat_priv,
580 "Received BATMAN packet via NB: %pM, IF: %s [%pM] " 638 "Received BATMAN packet via NB: %pM, IF: %s [%pM] "
@@ -587,26 +645,26 @@ void receive_bat_packet(struct ethhdr *ethhdr,
587 has_directlink_flag); 645 has_directlink_flag);
588 646
589 rcu_read_lock(); 647 rcu_read_lock();
590 list_for_each_entry_rcu(batman_if, &if_list, list) { 648 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
591 if (batman_if->if_status != IF_ACTIVE) 649 if (hard_iface->if_status != IF_ACTIVE)
592 continue; 650 continue;
593 651
594 if (batman_if->soft_iface != if_incoming->soft_iface) 652 if (hard_iface->soft_iface != if_incoming->soft_iface)
595 continue; 653 continue;
596 654
597 if (compare_orig(ethhdr->h_source, 655 if (compare_eth(ethhdr->h_source,
598 batman_if->net_dev->dev_addr)) 656 hard_iface->net_dev->dev_addr))
599 is_my_addr = 1; 657 is_my_addr = 1;
600 658
601 if (compare_orig(batman_packet->orig, 659 if (compare_eth(batman_packet->orig,
602 batman_if->net_dev->dev_addr)) 660 hard_iface->net_dev->dev_addr))
603 is_my_orig = 1; 661 is_my_orig = 1;
604 662
605 if (compare_orig(batman_packet->prev_sender, 663 if (compare_eth(batman_packet->prev_sender,
606 batman_if->net_dev->dev_addr)) 664 hard_iface->net_dev->dev_addr))
607 is_my_oldorig = 1; 665 is_my_oldorig = 1;
608 666
609 if (compare_orig(ethhdr->h_source, broadcast_addr)) 667 if (compare_eth(ethhdr->h_source, broadcast_addr))
610 is_broadcast = 1; 668 is_broadcast = 1;
611 } 669 }
612 rcu_read_unlock(); 670 rcu_read_unlock();
@@ -638,7 +696,6 @@ void receive_bat_packet(struct ethhdr *ethhdr,
638 int offset; 696 int offset;
639 697
640 orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); 698 orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
641
642 if (!orig_neigh_node) 699 if (!orig_neigh_node)
643 return; 700 return;
644 701
@@ -647,18 +704,22 @@ void receive_bat_packet(struct ethhdr *ethhdr,
647 /* if received seqno equals last send seqno save new 704 /* if received seqno equals last send seqno save new
648 * seqno for bidirectional check */ 705 * seqno for bidirectional check */
649 if (has_directlink_flag && 706 if (has_directlink_flag &&
650 compare_orig(if_incoming->net_dev->dev_addr, 707 compare_eth(if_incoming->net_dev->dev_addr,
651 batman_packet->orig) && 708 batman_packet->orig) &&
652 (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { 709 (batman_packet->seqno - if_incoming_seqno + 2 == 0)) {
653 offset = if_incoming->if_num * NUM_WORDS; 710 offset = if_incoming->if_num * NUM_WORDS;
711
712 spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
654 word = &(orig_neigh_node->bcast_own[offset]); 713 word = &(orig_neigh_node->bcast_own[offset]);
655 bit_mark(word, 0); 714 bit_mark(word, 0);
656 orig_neigh_node->bcast_own_sum[if_incoming->if_num] = 715 orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
657 bit_packet_count(word); 716 bit_packet_count(word);
717 spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
658 } 718 }
659 719
660 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " 720 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
661 "originator packet from myself (via neighbor)\n"); 721 "originator packet from myself (via neighbor)\n");
722 orig_node_free_ref(orig_neigh_node);
662 return; 723 return;
663 } 724 }
664 725
@@ -679,27 +740,28 @@ void receive_bat_packet(struct ethhdr *ethhdr,
679 bat_dbg(DBG_BATMAN, bat_priv, 740 bat_dbg(DBG_BATMAN, bat_priv,
680 "Drop packet: packet within seqno protection time " 741 "Drop packet: packet within seqno protection time "
681 "(sender: %pM)\n", ethhdr->h_source); 742 "(sender: %pM)\n", ethhdr->h_source);
682 return; 743 goto out;
683 } 744 }
684 745
685 if (batman_packet->tq == 0) { 746 if (batman_packet->tq == 0) {
686 bat_dbg(DBG_BATMAN, bat_priv, 747 bat_dbg(DBG_BATMAN, bat_priv,
687 "Drop packet: originator packet with tq equal 0\n"); 748 "Drop packet: originator packet with tq equal 0\n");
688 return; 749 goto out;
689 } 750 }
690 751
752 router = orig_node_get_router(orig_node);
753 if (router)
754 router_router = orig_node_get_router(router->orig_node);
755
691 /* avoid temporary routing loops */ 756 /* avoid temporary routing loops */
692 if ((orig_node->router) && 757 if (router && router_router &&
693 (orig_node->router->orig_node->router) && 758 (compare_eth(router->addr, batman_packet->prev_sender)) &&
694 (compare_orig(orig_node->router->addr, 759 !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) &&
695 batman_packet->prev_sender)) && 760 (compare_eth(router->addr, router_router->addr))) {
696 !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) &&
697 (compare_orig(orig_node->router->addr,
698 orig_node->router->orig_node->router->addr))) {
699 bat_dbg(DBG_BATMAN, bat_priv, 761 bat_dbg(DBG_BATMAN, bat_priv,
700 "Drop packet: ignoring all rebroadcast packets that " 762 "Drop packet: ignoring all rebroadcast packets that "
701 "may make me loop (sender: %pM)\n", ethhdr->h_source); 763 "may make me loop (sender: %pM)\n", ethhdr->h_source);
702 return; 764 goto out;
703 } 765 }
704 766
705 /* if sender is a direct neighbor the sender mac equals 767 /* if sender is a direct neighbor the sender mac equals
@@ -708,19 +770,23 @@ void receive_bat_packet(struct ethhdr *ethhdr,
708 orig_node : 770 orig_node :
709 get_orig_node(bat_priv, ethhdr->h_source)); 771 get_orig_node(bat_priv, ethhdr->h_source));
710 if (!orig_neigh_node) 772 if (!orig_neigh_node)
711 return; 773 goto out;
774
775 orig_neigh_router = orig_node_get_router(orig_neigh_node);
712 776
713 /* drop packet if sender is not a direct neighbor and if we 777 /* drop packet if sender is not a direct neighbor and if we
714 * don't route towards it */ 778 * don't route towards it */
715 if (!is_single_hop_neigh && (!orig_neigh_node->router)) { 779 if (!is_single_hop_neigh && (!orig_neigh_router)) {
716 bat_dbg(DBG_BATMAN, bat_priv, 780 bat_dbg(DBG_BATMAN, bat_priv,
717 "Drop packet: OGM via unknown neighbor!\n"); 781 "Drop packet: OGM via unknown neighbor!\n");
718 return; 782 goto out_neigh;
719 } 783 }
720 784
721 is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, 785 is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node,
722 batman_packet, if_incoming); 786 batman_packet, if_incoming);
723 787
788 bonding_save_primary(orig_node, orig_neigh_node, batman_packet);
789
724 /* update ranking if it is not a duplicate or has the same 790 /* update ranking if it is not a duplicate or has the same
725 * seqno and similar ttl as the non-duplicate */ 791 * seqno and similar ttl as the non-duplicate */
726 if (is_bidirectional && 792 if (is_bidirectional &&
@@ -728,46 +794,54 @@ void receive_bat_packet(struct ethhdr *ethhdr,
728 ((orig_node->last_real_seqno == batman_packet->seqno) && 794 ((orig_node->last_real_seqno == batman_packet->seqno) &&
729 (orig_node->last_ttl - 3 <= batman_packet->ttl)))) 795 (orig_node->last_ttl - 3 <= batman_packet->ttl))))
730 update_orig(bat_priv, orig_node, ethhdr, batman_packet, 796 update_orig(bat_priv, orig_node, ethhdr, batman_packet,
731 if_incoming, hna_buff, hna_buff_len, is_duplicate); 797 if_incoming, tt_buff, tt_buff_len, is_duplicate);
732
733 mark_bonding_address(bat_priv, orig_node,
734 orig_neigh_node, batman_packet);
735 update_bonding_candidates(bat_priv, orig_node);
736 798
737 /* is single hop (direct) neighbor */ 799 /* is single hop (direct) neighbor */
738 if (is_single_hop_neigh) { 800 if (is_single_hop_neigh) {
739 801
740 /* mark direct link on incoming interface */ 802 /* mark direct link on incoming interface */
741 schedule_forward_packet(orig_node, ethhdr, batman_packet, 803 schedule_forward_packet(orig_node, ethhdr, batman_packet,
742 1, hna_buff_len, if_incoming); 804 1, tt_buff_len, if_incoming);
743 805
744 bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " 806 bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
745 "rebroadcast neighbor packet with direct link flag\n"); 807 "rebroadcast neighbor packet with direct link flag\n");
746 return; 808 goto out_neigh;
747 } 809 }
748 810
749 /* multihop originator */ 811 /* multihop originator */
750 if (!is_bidirectional) { 812 if (!is_bidirectional) {
751 bat_dbg(DBG_BATMAN, bat_priv, 813 bat_dbg(DBG_BATMAN, bat_priv,
752 "Drop packet: not received via bidirectional link\n"); 814 "Drop packet: not received via bidirectional link\n");
753 return; 815 goto out_neigh;
754 } 816 }
755 817
756 if (is_duplicate) { 818 if (is_duplicate) {
757 bat_dbg(DBG_BATMAN, bat_priv, 819 bat_dbg(DBG_BATMAN, bat_priv,
758 "Drop packet: duplicate packet received\n"); 820 "Drop packet: duplicate packet received\n");
759 return; 821 goto out_neigh;
760 } 822 }
761 823
762 bat_dbg(DBG_BATMAN, bat_priv, 824 bat_dbg(DBG_BATMAN, bat_priv,
763 "Forwarding packet: rebroadcast originator packet\n"); 825 "Forwarding packet: rebroadcast originator packet\n");
764 schedule_forward_packet(orig_node, ethhdr, batman_packet, 826 schedule_forward_packet(orig_node, ethhdr, batman_packet,
765 0, hna_buff_len, if_incoming); 827 0, tt_buff_len, if_incoming);
828
829out_neigh:
830 if ((orig_neigh_node) && (!is_single_hop_neigh))
831 orig_node_free_ref(orig_neigh_node);
832out:
833 if (router)
834 neigh_node_free_ref(router);
835 if (router_router)
836 neigh_node_free_ref(router_router);
837 if (orig_neigh_router)
838 neigh_node_free_ref(orig_neigh_router);
839
840 orig_node_free_ref(orig_node);
766} 841}
767 842
768int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) 843int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
769{ 844{
770 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
771 struct ethhdr *ethhdr; 845 struct ethhdr *ethhdr;
772 846
773 /* drop packet if it has not necessary minimum size */ 847 /* drop packet if it has not necessary minimum size */
@@ -794,12 +868,10 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
794 868
795 ethhdr = (struct ethhdr *)skb_mac_header(skb); 869 ethhdr = (struct ethhdr *)skb_mac_header(skb);
796 870
797 spin_lock_bh(&bat_priv->orig_hash_lock);
798 receive_aggr_bat_packet(ethhdr, 871 receive_aggr_bat_packet(ethhdr,
799 skb->data, 872 skb->data,
800 skb_headlen(skb), 873 skb_headlen(skb),
801 batman_if); 874 hard_iface);
802 spin_unlock_bh(&bat_priv->orig_hash_lock);
803 875
804 kfree_skb(skb); 876 kfree_skb(skb);
805 return NET_RX_SUCCESS; 877 return NET_RX_SUCCESS;
@@ -808,135 +880,124 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
808static int recv_my_icmp_packet(struct bat_priv *bat_priv, 880static int recv_my_icmp_packet(struct bat_priv *bat_priv,
809 struct sk_buff *skb, size_t icmp_len) 881 struct sk_buff *skb, size_t icmp_len)
810{ 882{
811 struct orig_node *orig_node; 883 struct hard_iface *primary_if = NULL;
884 struct orig_node *orig_node = NULL;
885 struct neigh_node *router = NULL;
812 struct icmp_packet_rr *icmp_packet; 886 struct icmp_packet_rr *icmp_packet;
813 struct ethhdr *ethhdr; 887 int ret = NET_RX_DROP;
814 struct batman_if *batman_if;
815 int ret;
816 uint8_t dstaddr[ETH_ALEN];
817 888
818 icmp_packet = (struct icmp_packet_rr *)skb->data; 889 icmp_packet = (struct icmp_packet_rr *)skb->data;
819 ethhdr = (struct ethhdr *)skb_mac_header(skb);
820 890
821 /* add data to device queue */ 891 /* add data to device queue */
822 if (icmp_packet->msg_type != ECHO_REQUEST) { 892 if (icmp_packet->msg_type != ECHO_REQUEST) {
823 bat_socket_receive_packet(icmp_packet, icmp_len); 893 bat_socket_receive_packet(icmp_packet, icmp_len);
824 return NET_RX_DROP; 894 goto out;
825 } 895 }
826 896
827 if (!bat_priv->primary_if) 897 primary_if = primary_if_get_selected(bat_priv);
828 return NET_RX_DROP; 898 if (!primary_if)
899 goto out;
829 900
830 /* answer echo request (ping) */ 901 /* answer echo request (ping) */
831 /* get routing information */ 902 /* get routing information */
832 spin_lock_bh(&bat_priv->orig_hash_lock); 903 orig_node = orig_hash_find(bat_priv, icmp_packet->orig);
833 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 904 if (!orig_node)
834 compare_orig, choose_orig, 905 goto out;
835 icmp_packet->orig));
836 ret = NET_RX_DROP;
837
838 if ((orig_node) && (orig_node->router)) {
839
840 /* don't lock while sending the packets ... we therefore
841 * copy the required data before sending */
842 batman_if = orig_node->router->if_incoming;
843 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
844 spin_unlock_bh(&bat_priv->orig_hash_lock);
845
846 /* create a copy of the skb, if needed, to modify it. */
847 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
848 return NET_RX_DROP;
849
850 icmp_packet = (struct icmp_packet_rr *)skb->data;
851 ethhdr = (struct ethhdr *)skb_mac_header(skb);
852 906
853 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); 907 router = orig_node_get_router(orig_node);
854 memcpy(icmp_packet->orig, 908 if (!router)
855 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 909 goto out;
856 icmp_packet->msg_type = ECHO_REPLY;
857 icmp_packet->ttl = TTL;
858 910
859 send_skb_packet(skb, batman_if, dstaddr); 911 /* create a copy of the skb, if needed, to modify it. */
860 ret = NET_RX_SUCCESS; 912 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
913 goto out;
861 914
862 } else 915 icmp_packet = (struct icmp_packet_rr *)skb->data;
863 spin_unlock_bh(&bat_priv->orig_hash_lock);
864 916
917 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
918 memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
919 icmp_packet->msg_type = ECHO_REPLY;
920 icmp_packet->ttl = TTL;
921
922 send_skb_packet(skb, router->if_incoming, router->addr);
923 ret = NET_RX_SUCCESS;
924
925out:
926 if (primary_if)
927 hardif_free_ref(primary_if);
928 if (router)
929 neigh_node_free_ref(router);
930 if (orig_node)
931 orig_node_free_ref(orig_node);
865 return ret; 932 return ret;
866} 933}
867 934
868static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, 935static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv,
869 struct sk_buff *skb, size_t icmp_len) 936 struct sk_buff *skb)
870{ 937{
871 struct orig_node *orig_node; 938 struct hard_iface *primary_if = NULL;
939 struct orig_node *orig_node = NULL;
940 struct neigh_node *router = NULL;
872 struct icmp_packet *icmp_packet; 941 struct icmp_packet *icmp_packet;
873 struct ethhdr *ethhdr; 942 int ret = NET_RX_DROP;
874 struct batman_if *batman_if;
875 int ret;
876 uint8_t dstaddr[ETH_ALEN];
877 943
878 icmp_packet = (struct icmp_packet *)skb->data; 944 icmp_packet = (struct icmp_packet *)skb->data;
879 ethhdr = (struct ethhdr *)skb_mac_header(skb);
880 945
881 /* send TTL exceeded if packet is an echo request (traceroute) */ 946 /* send TTL exceeded if packet is an echo request (traceroute) */
882 if (icmp_packet->msg_type != ECHO_REQUEST) { 947 if (icmp_packet->msg_type != ECHO_REQUEST) {
883 pr_debug("Warning - can't forward icmp packet from %pM to " 948 pr_debug("Warning - can't forward icmp packet from %pM to "
884 "%pM: ttl exceeded\n", icmp_packet->orig, 949 "%pM: ttl exceeded\n", icmp_packet->orig,
885 icmp_packet->dst); 950 icmp_packet->dst);
886 return NET_RX_DROP; 951 goto out;
887 } 952 }
888 953
889 if (!bat_priv->primary_if) 954 primary_if = primary_if_get_selected(bat_priv);
890 return NET_RX_DROP; 955 if (!primary_if)
956 goto out;
891 957
892 /* get routing information */ 958 /* get routing information */
893 spin_lock_bh(&bat_priv->orig_hash_lock); 959 orig_node = orig_hash_find(bat_priv, icmp_packet->orig);
894 orig_node = ((struct orig_node *) 960 if (!orig_node)
895 hash_find(bat_priv->orig_hash, compare_orig, choose_orig, 961 goto out;
896 icmp_packet->orig));
897 ret = NET_RX_DROP;
898
899 if ((orig_node) && (orig_node->router)) {
900
901 /* don't lock while sending the packets ... we therefore
902 * copy the required data before sending */
903 batman_if = orig_node->router->if_incoming;
904 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
905 spin_unlock_bh(&bat_priv->orig_hash_lock);
906
907 /* create a copy of the skb, if needed, to modify it. */
908 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
909 return NET_RX_DROP;
910
911 icmp_packet = (struct icmp_packet *) skb->data;
912 ethhdr = (struct ethhdr *)skb_mac_header(skb);
913 962
914 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); 963 router = orig_node_get_router(orig_node);
915 memcpy(icmp_packet->orig, 964 if (!router)
916 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 965 goto out;
917 icmp_packet->msg_type = TTL_EXCEEDED;
918 icmp_packet->ttl = TTL;
919 966
920 send_skb_packet(skb, batman_if, dstaddr); 967 /* create a copy of the skb, if needed, to modify it. */
921 ret = NET_RX_SUCCESS; 968 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
969 goto out;
922 970
923 } else 971 icmp_packet = (struct icmp_packet *)skb->data;
924 spin_unlock_bh(&bat_priv->orig_hash_lock);
925 972
973 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
974 memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
975 icmp_packet->msg_type = TTL_EXCEEDED;
976 icmp_packet->ttl = TTL;
977
978 send_skb_packet(skb, router->if_incoming, router->addr);
979 ret = NET_RX_SUCCESS;
980
981out:
982 if (primary_if)
983 hardif_free_ref(primary_if);
984 if (router)
985 neigh_node_free_ref(router);
986 if (orig_node)
987 orig_node_free_ref(orig_node);
926 return ret; 988 return ret;
927} 989}
928 990
929 991
930int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) 992int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if)
931{ 993{
932 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 994 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
933 struct icmp_packet_rr *icmp_packet; 995 struct icmp_packet_rr *icmp_packet;
934 struct ethhdr *ethhdr; 996 struct ethhdr *ethhdr;
935 struct orig_node *orig_node; 997 struct orig_node *orig_node = NULL;
936 struct batman_if *batman_if; 998 struct neigh_node *router = NULL;
937 int hdr_size = sizeof(struct icmp_packet); 999 int hdr_size = sizeof(struct icmp_packet);
938 int ret; 1000 int ret = NET_RX_DROP;
939 uint8_t dstaddr[ETH_ALEN];
940 1001
941 /** 1002 /**
942 * we truncate all incoming icmp packets if they don't match our size 1003 * we truncate all incoming icmp packets if they don't match our size
@@ -946,21 +1007,21 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
946 1007
947 /* drop packet if it has not necessary minimum size */ 1008 /* drop packet if it has not necessary minimum size */
948 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1009 if (unlikely(!pskb_may_pull(skb, hdr_size)))
949 return NET_RX_DROP; 1010 goto out;
950 1011
951 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1012 ethhdr = (struct ethhdr *)skb_mac_header(skb);
952 1013
953 /* packet with unicast indication but broadcast recipient */ 1014 /* packet with unicast indication but broadcast recipient */
954 if (is_broadcast_ether_addr(ethhdr->h_dest)) 1015 if (is_broadcast_ether_addr(ethhdr->h_dest))
955 return NET_RX_DROP; 1016 goto out;
956 1017
957 /* packet with broadcast sender address */ 1018 /* packet with broadcast sender address */
958 if (is_broadcast_ether_addr(ethhdr->h_source)) 1019 if (is_broadcast_ether_addr(ethhdr->h_source))
959 return NET_RX_DROP; 1020 goto out;
960 1021
961 /* not for me */ 1022 /* not for me */
962 if (!is_my_mac(ethhdr->h_dest)) 1023 if (!is_my_mac(ethhdr->h_dest))
963 return NET_RX_DROP; 1024 goto out;
964 1025
965 icmp_packet = (struct icmp_packet_rr *)skb->data; 1026 icmp_packet = (struct icmp_packet_rr *)skb->data;
966 1027
@@ -978,137 +1039,215 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
978 1039
979 /* TTL exceeded */ 1040 /* TTL exceeded */
980 if (icmp_packet->ttl < 2) 1041 if (icmp_packet->ttl < 2)
981 return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size); 1042 return recv_icmp_ttl_exceeded(bat_priv, skb);
982
983 ret = NET_RX_DROP;
984 1043
985 /* get routing information */ 1044 /* get routing information */
986 spin_lock_bh(&bat_priv->orig_hash_lock); 1045 orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
987 orig_node = ((struct orig_node *) 1046 if (!orig_node)
988 hash_find(bat_priv->orig_hash, compare_orig, choose_orig, 1047 goto out;
989 icmp_packet->dst));
990 1048
991 if ((orig_node) && (orig_node->router)) { 1049 router = orig_node_get_router(orig_node);
1050 if (!router)
1051 goto out;
992 1052
993 /* don't lock while sending the packets ... we therefore 1053 /* create a copy of the skb, if needed, to modify it. */
994 * copy the required data before sending */ 1054 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
995 batman_if = orig_node->router->if_incoming; 1055 goto out;
996 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
997 spin_unlock_bh(&bat_priv->orig_hash_lock);
998 1056
999 /* create a copy of the skb, if needed, to modify it. */ 1057 icmp_packet = (struct icmp_packet_rr *)skb->data;
1000 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
1001 return NET_RX_DROP;
1002 1058
1003 icmp_packet = (struct icmp_packet_rr *)skb->data; 1059 /* decrement ttl */
1004 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1060 icmp_packet->ttl--;
1005 1061
1006 /* decrement ttl */ 1062 /* route it */
1007 icmp_packet->ttl--; 1063 send_skb_packet(skb, router->if_incoming, router->addr);
1064 ret = NET_RX_SUCCESS;
1065
1066out:
1067 if (router)
1068 neigh_node_free_ref(router);
1069 if (orig_node)
1070 orig_node_free_ref(orig_node);
1071 return ret;
1072}
1008 1073
1009 /* route it */ 1074/* In the bonding case, send the packets in a round
1010 send_skb_packet(skb, batman_if, dstaddr); 1075 * robin fashion over the remaining interfaces.
1011 ret = NET_RX_SUCCESS; 1076 *
1077 * This method rotates the bonding list and increases the
1078 * returned router's refcount. */
1079static struct neigh_node *find_bond_router(struct orig_node *primary_orig,
1080 struct hard_iface *recv_if)
1081{
1082 struct neigh_node *tmp_neigh_node;
1083 struct neigh_node *router = NULL, *first_candidate = NULL;
1012 1084
1013 } else 1085 rcu_read_lock();
1014 spin_unlock_bh(&bat_priv->orig_hash_lock); 1086 list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list,
1087 bonding_list) {
1088 if (!first_candidate)
1089 first_candidate = tmp_neigh_node;
1015 1090
1016 return ret; 1091 /* recv_if == NULL on the first node. */
1092 if (tmp_neigh_node->if_incoming == recv_if)
1093 continue;
1094
1095 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
1096 continue;
1097
1098 router = tmp_neigh_node;
1099 break;
1100 }
1101
1102 /* use the first candidate if nothing was found. */
1103 if (!router && first_candidate &&
1104 atomic_inc_not_zero(&first_candidate->refcount))
1105 router = first_candidate;
1106
1107 if (!router)
1108 goto out;
1109
1110 /* selected should point to the next element
1111 * after the current router */
1112 spin_lock_bh(&primary_orig->neigh_list_lock);
1113 /* this is a list_move(), which unfortunately
1114 * does not exist as rcu version */
1115 list_del_rcu(&primary_orig->bond_list);
1116 list_add_rcu(&primary_orig->bond_list,
1117 &router->bonding_list);
1118 spin_unlock_bh(&primary_orig->neigh_list_lock);
1119
1120out:
1121 rcu_read_unlock();
1122 return router;
1123}
1124
1125/* Interface Alternating: Use the best of the
1126 * remaining candidates which are not using
1127 * this interface.
1128 *
1129 * Increases the returned router's refcount */
1130static struct neigh_node *find_ifalter_router(struct orig_node *primary_orig,
1131 struct hard_iface *recv_if)
1132{
1133 struct neigh_node *tmp_neigh_node;
1134 struct neigh_node *router = NULL, *first_candidate = NULL;
1135
1136 rcu_read_lock();
1137 list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list,
1138 bonding_list) {
1139 if (!first_candidate)
1140 first_candidate = tmp_neigh_node;
1141
1142 /* recv_if == NULL on the first node. */
1143 if (tmp_neigh_node->if_incoming == recv_if)
1144 continue;
1145
1146 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
1147 continue;
1148
1149 /* if we don't have a router yet
1150 * or this one is better, choose it. */
1151 if ((!router) ||
1152 (tmp_neigh_node->tq_avg > router->tq_avg)) {
1153 /* decrement refcount of
1154 * previously selected router */
1155 if (router)
1156 neigh_node_free_ref(router);
1157
1158 router = tmp_neigh_node;
1159 atomic_inc_not_zero(&router->refcount);
1160 }
1161
1162 neigh_node_free_ref(tmp_neigh_node);
1163 }
1164
1165 /* use the first candidate if nothing was found. */
1166 if (!router && first_candidate &&
1167 atomic_inc_not_zero(&first_candidate->refcount))
1168 router = first_candidate;
1169
1170 rcu_read_unlock();
1171 return router;
1017} 1172}
1018 1173
1019/* find a suitable router for this originator, and use 1174/* find a suitable router for this originator, and use
1020 * bonding if possible. */ 1175 * bonding if possible. increases the found neighbors
1176 * refcount.*/
1021struct neigh_node *find_router(struct bat_priv *bat_priv, 1177struct neigh_node *find_router(struct bat_priv *bat_priv,
1022 struct orig_node *orig_node, 1178 struct orig_node *orig_node,
1023 struct batman_if *recv_if) 1179 struct hard_iface *recv_if)
1024{ 1180{
1025 struct orig_node *primary_orig_node; 1181 struct orig_node *primary_orig_node;
1026 struct orig_node *router_orig; 1182 struct orig_node *router_orig;
1027 struct neigh_node *router, *first_candidate, *best_router; 1183 struct neigh_node *router;
1028 static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; 1184 static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
1029 int bonding_enabled; 1185 int bonding_enabled;
1030 1186
1031 if (!orig_node) 1187 if (!orig_node)
1032 return NULL; 1188 return NULL;
1033 1189
1034 if (!orig_node->router) 1190 router = orig_node_get_router(orig_node);
1035 return NULL; 1191 if (!router)
1192 goto err;
1036 1193
1037 /* without bonding, the first node should 1194 /* without bonding, the first node should
1038 * always choose the default router. */ 1195 * always choose the default router. */
1039
1040 bonding_enabled = atomic_read(&bat_priv->bonding); 1196 bonding_enabled = atomic_read(&bat_priv->bonding);
1041 1197
1042 if ((!recv_if) && (!bonding_enabled)) 1198 rcu_read_lock();
1043 return orig_node->router; 1199 /* select default router to output */
1200 router_orig = router->orig_node;
1201 if (!router_orig)
1202 goto err_unlock;
1044 1203
1045 router_orig = orig_node->router->orig_node; 1204 if ((!recv_if) && (!bonding_enabled))
1205 goto return_router;
1046 1206
1047 /* if we have something in the primary_addr, we can search 1207 /* if we have something in the primary_addr, we can search
1048 * for a potential bonding candidate. */ 1208 * for a potential bonding candidate. */
1049 if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0) 1209 if (compare_eth(router_orig->primary_addr, zero_mac))
1050 return orig_node->router; 1210 goto return_router;
1051 1211
1052 /* find the orig_node which has the primary interface. might 1212 /* find the orig_node which has the primary interface. might
1053 * even be the same as our router_orig in many cases */ 1213 * even be the same as our router_orig in many cases */
1054 1214
1055 if (memcmp(router_orig->primary_addr, 1215 if (compare_eth(router_orig->primary_addr, router_orig->orig)) {
1056 router_orig->orig, ETH_ALEN) == 0) {
1057 primary_orig_node = router_orig; 1216 primary_orig_node = router_orig;
1058 } else { 1217 } else {
1059 primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, 1218 primary_orig_node = orig_hash_find(bat_priv,
1060 choose_orig, 1219 router_orig->primary_addr);
1061 router_orig->primary_addr);
1062
1063 if (!primary_orig_node) 1220 if (!primary_orig_node)
1064 return orig_node->router; 1221 goto return_router;
1222
1223 orig_node_free_ref(primary_orig_node);
1065 } 1224 }
1066 1225
1067 /* with less than 2 candidates, we can't do any 1226 /* with less than 2 candidates, we can't do any
1068 * bonding and prefer the original router. */ 1227 * bonding and prefer the original router. */
1069 1228 if (atomic_read(&primary_orig_node->bond_candidates) < 2)
1070 if (primary_orig_node->bond.candidates < 2) 1229 goto return_router;
1071 return orig_node->router;
1072
1073 1230
1074 /* all nodes between should choose a candidate which 1231 /* all nodes between should choose a candidate which
1075 * is is not on the interface where the packet came 1232 * is is not on the interface where the packet came
1076 * in. */ 1233 * in. */
1077 first_candidate = primary_orig_node->bond.selected;
1078 router = first_candidate;
1079 1234
1080 if (bonding_enabled) { 1235 neigh_node_free_ref(router);
1081 /* in the bonding case, send the packets in a round
1082 * robin fashion over the remaining interfaces. */
1083 do {
1084 /* recv_if == NULL on the first node. */
1085 if (router->if_incoming != recv_if)
1086 break;
1087
1088 router = router->next_bond_candidate;
1089 } while (router != first_candidate);
1090
1091 primary_orig_node->bond.selected = router->next_bond_candidate;
1092
1093 } else {
1094 /* if bonding is disabled, use the best of the
1095 * remaining candidates which are not using
1096 * this interface. */
1097 best_router = first_candidate;
1098 1236
1099 do { 1237 if (bonding_enabled)
1100 /* recv_if == NULL on the first node. */ 1238 router = find_bond_router(primary_orig_node, recv_if);
1101 if ((router->if_incoming != recv_if) && 1239 else
1102 (router->tq_avg > best_router->tq_avg)) 1240 router = find_ifalter_router(primary_orig_node, recv_if);
1103 best_router = router;
1104
1105 router = router->next_bond_candidate;
1106 } while (router != first_candidate);
1107
1108 router = best_router;
1109 }
1110 1241
1242return_router:
1243 rcu_read_unlock();
1111 return router; 1244 return router;
1245err_unlock:
1246 rcu_read_unlock();
1247err:
1248 if (router)
1249 neigh_node_free_ref(router);
1250 return NULL;
1112} 1251}
1113 1252
1114static int check_unicast_packet(struct sk_buff *skb, int hdr_size) 1253static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
@@ -1136,17 +1275,14 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
1136 return 0; 1275 return 0;
1137} 1276}
1138 1277
1139int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, 1278int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1140 int hdr_size)
1141{ 1279{
1142 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1280 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1143 struct orig_node *orig_node; 1281 struct orig_node *orig_node = NULL;
1144 struct neigh_node *router; 1282 struct neigh_node *neigh_node = NULL;
1145 struct batman_if *batman_if;
1146 uint8_t dstaddr[ETH_ALEN];
1147 struct unicast_packet *unicast_packet; 1283 struct unicast_packet *unicast_packet;
1148 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); 1284 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
1149 int ret; 1285 int ret = NET_RX_DROP;
1150 struct sk_buff *new_skb; 1286 struct sk_buff *new_skb;
1151 1287
1152 unicast_packet = (struct unicast_packet *)skb->data; 1288 unicast_packet = (struct unicast_packet *)skb->data;
@@ -1156,53 +1292,48 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
1156 pr_debug("Warning - can't forward unicast packet from %pM to " 1292 pr_debug("Warning - can't forward unicast packet from %pM to "
1157 "%pM: ttl exceeded\n", ethhdr->h_source, 1293 "%pM: ttl exceeded\n", ethhdr->h_source,
1158 unicast_packet->dest); 1294 unicast_packet->dest);
1159 return NET_RX_DROP; 1295 goto out;
1160 } 1296 }
1161 1297
1162 /* get routing information */ 1298 /* get routing information */
1163 spin_lock_bh(&bat_priv->orig_hash_lock); 1299 orig_node = orig_hash_find(bat_priv, unicast_packet->dest);
1164 orig_node = ((struct orig_node *)
1165 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
1166 unicast_packet->dest));
1167
1168 router = find_router(bat_priv, orig_node, recv_if);
1169 1300
1170 if (!router) { 1301 if (!orig_node)
1171 spin_unlock_bh(&bat_priv->orig_hash_lock); 1302 goto out;
1172 return NET_RX_DROP;
1173 }
1174
1175 /* don't lock while sending the packets ... we therefore
1176 * copy the required data before sending */
1177 1303
1178 batman_if = router->if_incoming; 1304 /* find_router() increases neigh_nodes refcount if found. */
1179 memcpy(dstaddr, router->addr, ETH_ALEN); 1305 neigh_node = find_router(bat_priv, orig_node, recv_if);
1180 1306
1181 spin_unlock_bh(&bat_priv->orig_hash_lock); 1307 if (!neigh_node)
1308 goto out;
1182 1309
1183 /* create a copy of the skb, if needed, to modify it. */ 1310 /* create a copy of the skb, if needed, to modify it. */
1184 if (skb_cow(skb, sizeof(struct ethhdr)) < 0) 1311 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
1185 return NET_RX_DROP; 1312 goto out;
1186 1313
1187 unicast_packet = (struct unicast_packet *)skb->data; 1314 unicast_packet = (struct unicast_packet *)skb->data;
1188 1315
1189 if (unicast_packet->packet_type == BAT_UNICAST && 1316 if (unicast_packet->packet_type == BAT_UNICAST &&
1190 atomic_read(&bat_priv->fragmentation) && 1317 atomic_read(&bat_priv->fragmentation) &&
1191 skb->len > batman_if->net_dev->mtu) 1318 skb->len > neigh_node->if_incoming->net_dev->mtu) {
1192 return frag_send_skb(skb, bat_priv, batman_if, 1319 ret = frag_send_skb(skb, bat_priv,
1193 dstaddr); 1320 neigh_node->if_incoming, neigh_node->addr);
1321 goto out;
1322 }
1194 1323
1195 if (unicast_packet->packet_type == BAT_UNICAST_FRAG && 1324 if (unicast_packet->packet_type == BAT_UNICAST_FRAG &&
1196 2 * skb->len - hdr_size <= batman_if->net_dev->mtu) { 1325 frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) {
1197 1326
1198 ret = frag_reassemble_skb(skb, bat_priv, &new_skb); 1327 ret = frag_reassemble_skb(skb, bat_priv, &new_skb);
1199 1328
1200 if (ret == NET_RX_DROP) 1329 if (ret == NET_RX_DROP)
1201 return NET_RX_DROP; 1330 goto out;
1202 1331
1203 /* packet was buffered for late merge */ 1332 /* packet was buffered for late merge */
1204 if (!new_skb) 1333 if (!new_skb) {
1205 return NET_RX_SUCCESS; 1334 ret = NET_RX_SUCCESS;
1335 goto out;
1336 }
1206 1337
1207 skb = new_skb; 1338 skb = new_skb;
1208 unicast_packet = (struct unicast_packet *)skb->data; 1339 unicast_packet = (struct unicast_packet *)skb->data;
@@ -1212,12 +1343,18 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
1212 unicast_packet->ttl--; 1343 unicast_packet->ttl--;
1213 1344
1214 /* route it */ 1345 /* route it */
1215 send_skb_packet(skb, batman_if, dstaddr); 1346 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
1216 1347 ret = NET_RX_SUCCESS;
1217 return NET_RX_SUCCESS; 1348
1349out:
1350 if (neigh_node)
1351 neigh_node_free_ref(neigh_node);
1352 if (orig_node)
1353 orig_node_free_ref(orig_node);
1354 return ret;
1218} 1355}
1219 1356
1220int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) 1357int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1221{ 1358{
1222 struct unicast_packet *unicast_packet; 1359 struct unicast_packet *unicast_packet;
1223 int hdr_size = sizeof(struct unicast_packet); 1360 int hdr_size = sizeof(struct unicast_packet);
@@ -1233,10 +1370,10 @@ int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if)
1233 return NET_RX_SUCCESS; 1370 return NET_RX_SUCCESS;
1234 } 1371 }
1235 1372
1236 return route_unicast_packet(skb, recv_if, hdr_size); 1373 return route_unicast_packet(skb, recv_if);
1237} 1374}
1238 1375
1239int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) 1376int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1240{ 1377{
1241 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1378 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1242 struct unicast_frag_packet *unicast_packet; 1379 struct unicast_frag_packet *unicast_packet;
@@ -1266,89 +1403,90 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if)
1266 return NET_RX_SUCCESS; 1403 return NET_RX_SUCCESS;
1267 } 1404 }
1268 1405
1269 return route_unicast_packet(skb, recv_if, hdr_size); 1406 return route_unicast_packet(skb, recv_if);
1270} 1407}
1271 1408
1272 1409
1273int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) 1410int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1274{ 1411{
1275 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1412 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1276 struct orig_node *orig_node; 1413 struct orig_node *orig_node = NULL;
1277 struct bcast_packet *bcast_packet; 1414 struct bcast_packet *bcast_packet;
1278 struct ethhdr *ethhdr; 1415 struct ethhdr *ethhdr;
1279 int hdr_size = sizeof(struct bcast_packet); 1416 int hdr_size = sizeof(struct bcast_packet);
1417 int ret = NET_RX_DROP;
1280 int32_t seq_diff; 1418 int32_t seq_diff;
1281 1419
1282 /* drop packet if it has not necessary minimum size */ 1420 /* drop packet if it has not necessary minimum size */
1283 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1421 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1284 return NET_RX_DROP; 1422 goto out;
1285 1423
1286 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1424 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1287 1425
1288 /* packet with broadcast indication but unicast recipient */ 1426 /* packet with broadcast indication but unicast recipient */
1289 if (!is_broadcast_ether_addr(ethhdr->h_dest)) 1427 if (!is_broadcast_ether_addr(ethhdr->h_dest))
1290 return NET_RX_DROP; 1428 goto out;
1291 1429
1292 /* packet with broadcast sender address */ 1430 /* packet with broadcast sender address */
1293 if (is_broadcast_ether_addr(ethhdr->h_source)) 1431 if (is_broadcast_ether_addr(ethhdr->h_source))
1294 return NET_RX_DROP; 1432 goto out;
1295 1433
1296 /* ignore broadcasts sent by myself */ 1434 /* ignore broadcasts sent by myself */
1297 if (is_my_mac(ethhdr->h_source)) 1435 if (is_my_mac(ethhdr->h_source))
1298 return NET_RX_DROP; 1436 goto out;
1299 1437
1300 bcast_packet = (struct bcast_packet *)skb->data; 1438 bcast_packet = (struct bcast_packet *)skb->data;
1301 1439
1302 /* ignore broadcasts originated by myself */ 1440 /* ignore broadcasts originated by myself */
1303 if (is_my_mac(bcast_packet->orig)) 1441 if (is_my_mac(bcast_packet->orig))
1304 return NET_RX_DROP; 1442 goto out;
1305 1443
1306 if (bcast_packet->ttl < 2) 1444 if (bcast_packet->ttl < 2)
1307 return NET_RX_DROP; 1445 goto out;
1308 1446
1309 spin_lock_bh(&bat_priv->orig_hash_lock); 1447 orig_node = orig_hash_find(bat_priv, bcast_packet->orig);
1310 orig_node = ((struct orig_node *)
1311 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
1312 bcast_packet->orig));
1313 1448
1314 if (!orig_node) { 1449 if (!orig_node)
1315 spin_unlock_bh(&bat_priv->orig_hash_lock); 1450 goto out;
1316 return NET_RX_DROP; 1451
1317 } 1452 spin_lock_bh(&orig_node->bcast_seqno_lock);
1318 1453
1319 /* check whether the packet is a duplicate */ 1454 /* check whether the packet is a duplicate */
1320 if (get_bit_status(orig_node->bcast_bits, 1455 if (get_bit_status(orig_node->bcast_bits, orig_node->last_bcast_seqno,
1321 orig_node->last_bcast_seqno, 1456 ntohl(bcast_packet->seqno)))
1322 ntohl(bcast_packet->seqno))) { 1457 goto spin_unlock;
1323 spin_unlock_bh(&bat_priv->orig_hash_lock);
1324 return NET_RX_DROP;
1325 }
1326 1458
1327 seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; 1459 seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno;
1328 1460
1329 /* check whether the packet is old and the host just restarted. */ 1461 /* check whether the packet is old and the host just restarted. */
1330 if (window_protected(bat_priv, seq_diff, 1462 if (window_protected(bat_priv, seq_diff,
1331 &orig_node->bcast_seqno_reset)) { 1463 &orig_node->bcast_seqno_reset))
1332 spin_unlock_bh(&bat_priv->orig_hash_lock); 1464 goto spin_unlock;
1333 return NET_RX_DROP;
1334 }
1335 1465
1336 /* mark broadcast in flood history, update window position 1466 /* mark broadcast in flood history, update window position
1337 * if required. */ 1467 * if required. */
1338 if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) 1468 if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1))
1339 orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); 1469 orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno);
1340 1470
1341 spin_unlock_bh(&bat_priv->orig_hash_lock); 1471 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1472
1342 /* rebroadcast packet */ 1473 /* rebroadcast packet */
1343 add_bcast_packet_to_list(bat_priv, skb); 1474 add_bcast_packet_to_list(bat_priv, skb);
1344 1475
1345 /* broadcast for me */ 1476 /* broadcast for me */
1346 interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); 1477 interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size);
1347 1478 ret = NET_RX_SUCCESS;
1348 return NET_RX_SUCCESS; 1479 goto out;
1480
1481spin_unlock:
1482 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1483out:
1484 if (orig_node)
1485 orig_node_free_ref(orig_node);
1486 return ret;
1349} 1487}
1350 1488
1351int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if) 1489int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1352{ 1490{
1353 struct vis_packet *vis_packet; 1491 struct vis_packet *vis_packet;
1354 struct ethhdr *ethhdr; 1492 struct ethhdr *ethhdr;
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index f108f230bfdb..870f29842b28 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,27 +22,25 @@
22#ifndef _NET_BATMAN_ADV_ROUTING_H_ 22#ifndef _NET_BATMAN_ADV_ROUTING_H_
23#define _NET_BATMAN_ADV_ROUTING_H_ 23#define _NET_BATMAN_ADV_ROUTING_H_
24 24
25#include "types.h" 25void slide_own_bcast_window(struct hard_iface *hard_iface);
26
27void slide_own_bcast_window(struct batman_if *batman_if);
28void receive_bat_packet(struct ethhdr *ethhdr, 26void receive_bat_packet(struct ethhdr *ethhdr,
29 struct batman_packet *batman_packet, 27 struct batman_packet *batman_packet,
30 unsigned char *hna_buff, int hna_buff_len, 28 unsigned char *tt_buff, int tt_buff_len,
31 struct batman_if *if_incoming); 29 struct hard_iface *if_incoming);
32void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, 30void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
33 struct neigh_node *neigh_node, unsigned char *hna_buff, 31 struct neigh_node *neigh_node, unsigned char *tt_buff,
34 int hna_buff_len); 32 int tt_buff_len);
35int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, 33int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
36 int hdr_size); 34int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if);
37int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if); 35int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
38int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if); 36int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if);
39int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if); 37int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
40int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if); 38int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if);
41int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); 39int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if);
42int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if);
43struct neigh_node *find_router(struct bat_priv *bat_priv, 40struct neigh_node *find_router(struct bat_priv *bat_priv,
44 struct orig_node *orig_node, struct batman_if *recv_if); 41 struct orig_node *orig_node,
45void update_bonding_candidates(struct bat_priv *bat_priv, 42 struct hard_iface *recv_if);
46 struct orig_node *orig_node); 43void bonding_candidate_del(struct orig_node *orig_node,
44 struct neigh_node *neigh_node);
47 45
48#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ 46#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index b89b9f7709ae..33779278f1b2 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -25,7 +25,6 @@
25#include "translation-table.h" 25#include "translation-table.h"
26#include "soft-interface.h" 26#include "soft-interface.h"
27#include "hard-interface.h" 27#include "hard-interface.h"
28#include "types.h"
29#include "vis.h" 28#include "vis.h"
30#include "aggregation.h" 29#include "aggregation.h"
31#include "gateway_common.h" 30#include "gateway_common.h"
@@ -49,7 +48,7 @@ static unsigned long own_send_time(struct bat_priv *bat_priv)
49} 48}
50 49
51/* when do we schedule a forwarded packet to be sent */ 50/* when do we schedule a forwarded packet to be sent */
52static unsigned long forward_send_time(struct bat_priv *bat_priv) 51static unsigned long forward_send_time(void)
53{ 52{
54 return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); 53 return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
55} 54}
@@ -57,20 +56,20 @@ static unsigned long forward_send_time(struct bat_priv *bat_priv)
57/* send out an already prepared packet to the given address via the 56/* send out an already prepared packet to the given address via the
58 * specified batman interface */ 57 * specified batman interface */
59int send_skb_packet(struct sk_buff *skb, 58int send_skb_packet(struct sk_buff *skb,
60 struct batman_if *batman_if, 59 struct hard_iface *hard_iface,
61 uint8_t *dst_addr) 60 uint8_t *dst_addr)
62{ 61{
63 struct ethhdr *ethhdr; 62 struct ethhdr *ethhdr;
64 63
65 if (batman_if->if_status != IF_ACTIVE) 64 if (hard_iface->if_status != IF_ACTIVE)
66 goto send_skb_err; 65 goto send_skb_err;
67 66
68 if (unlikely(!batman_if->net_dev)) 67 if (unlikely(!hard_iface->net_dev))
69 goto send_skb_err; 68 goto send_skb_err;
70 69
71 if (!(batman_if->net_dev->flags & IFF_UP)) { 70 if (!(hard_iface->net_dev->flags & IFF_UP)) {
72 pr_warning("Interface %s is not up - can't send packet via " 71 pr_warning("Interface %s is not up - can't send packet via "
73 "that interface!\n", batman_if->net_dev->name); 72 "that interface!\n", hard_iface->net_dev->name);
74 goto send_skb_err; 73 goto send_skb_err;
75 } 74 }
76 75
@@ -81,7 +80,7 @@ int send_skb_packet(struct sk_buff *skb,
81 skb_reset_mac_header(skb); 80 skb_reset_mac_header(skb);
82 81
83 ethhdr = (struct ethhdr *) skb_mac_header(skb); 82 ethhdr = (struct ethhdr *) skb_mac_header(skb);
84 memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN); 83 memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
85 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); 84 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
86 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); 85 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
87 86
@@ -89,7 +88,7 @@ int send_skb_packet(struct sk_buff *skb,
89 skb->priority = TC_PRIO_CONTROL; 88 skb->priority = TC_PRIO_CONTROL;
90 skb->protocol = __constant_htons(ETH_P_BATMAN); 89 skb->protocol = __constant_htons(ETH_P_BATMAN);
91 90
92 skb->dev = batman_if->net_dev; 91 skb->dev = hard_iface->net_dev;
93 92
94 /* dev_queue_xmit() returns a negative result on error. However on 93 /* dev_queue_xmit() returns a negative result on error. However on
95 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP 94 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
@@ -103,16 +102,16 @@ send_skb_err:
103 102
104/* Send a packet to a given interface */ 103/* Send a packet to a given interface */
105static void send_packet_to_if(struct forw_packet *forw_packet, 104static void send_packet_to_if(struct forw_packet *forw_packet,
106 struct batman_if *batman_if) 105 struct hard_iface *hard_iface)
107{ 106{
108 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 107 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
109 char *fwd_str; 108 char *fwd_str;
110 uint8_t packet_num; 109 uint8_t packet_num;
111 int16_t buff_pos; 110 int16_t buff_pos;
112 struct batman_packet *batman_packet; 111 struct batman_packet *batman_packet;
113 struct sk_buff *skb; 112 struct sk_buff *skb;
114 113
115 if (batman_if->if_status != IF_ACTIVE) 114 if (hard_iface->if_status != IF_ACTIVE)
116 return; 115 return;
117 116
118 packet_num = 0; 117 packet_num = 0;
@@ -122,12 +121,12 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
122 /* adjust all flags and log packets */ 121 /* adjust all flags and log packets */
123 while (aggregated_packet(buff_pos, 122 while (aggregated_packet(buff_pos,
124 forw_packet->packet_len, 123 forw_packet->packet_len,
125 batman_packet->num_hna)) { 124 batman_packet->num_tt)) {
126 125
127 /* we might have aggregated direct link packets with an 126 /* we might have aggregated direct link packets with an
128 * ordinary base packet */ 127 * ordinary base packet */
129 if ((forw_packet->direct_link_flags & (1 << packet_num)) && 128 if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
130 (forw_packet->if_incoming == batman_if)) 129 (forw_packet->if_incoming == hard_iface))
131 batman_packet->flags |= DIRECTLINK; 130 batman_packet->flags |= DIRECTLINK;
132 else 131 else
133 batman_packet->flags &= ~DIRECTLINK; 132 batman_packet->flags &= ~DIRECTLINK;
@@ -143,10 +142,11 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
143 batman_packet->tq, batman_packet->ttl, 142 batman_packet->tq, batman_packet->ttl,
144 (batman_packet->flags & DIRECTLINK ? 143 (batman_packet->flags & DIRECTLINK ?
145 "on" : "off"), 144 "on" : "off"),
146 batman_if->net_dev->name, batman_if->net_dev->dev_addr); 145 hard_iface->net_dev->name,
146 hard_iface->net_dev->dev_addr);
147 147
148 buff_pos += sizeof(struct batman_packet) + 148 buff_pos += sizeof(struct batman_packet) +
149 (batman_packet->num_hna * ETH_ALEN); 149 (batman_packet->num_tt * ETH_ALEN);
150 packet_num++; 150 packet_num++;
151 batman_packet = (struct batman_packet *) 151 batman_packet = (struct batman_packet *)
152 (forw_packet->skb->data + buff_pos); 152 (forw_packet->skb->data + buff_pos);
@@ -155,13 +155,13 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
155 /* create clone because function is called more than once */ 155 /* create clone because function is called more than once */
156 skb = skb_clone(forw_packet->skb, GFP_ATOMIC); 156 skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
157 if (skb) 157 if (skb)
158 send_skb_packet(skb, batman_if, broadcast_addr); 158 send_skb_packet(skb, hard_iface, broadcast_addr);
159} 159}
160 160
161/* send a batman packet */ 161/* send a batman packet */
162static void send_packet(struct forw_packet *forw_packet) 162static void send_packet(struct forw_packet *forw_packet)
163{ 163{
164 struct batman_if *batman_if; 164 struct hard_iface *hard_iface;
165 struct net_device *soft_iface; 165 struct net_device *soft_iface;
166 struct bat_priv *bat_priv; 166 struct bat_priv *bat_priv;
167 struct batman_packet *batman_packet = 167 struct batman_packet *batman_packet =
@@ -205,54 +205,56 @@ static void send_packet(struct forw_packet *forw_packet)
205 205
206 /* broadcast on every interface */ 206 /* broadcast on every interface */
207 rcu_read_lock(); 207 rcu_read_lock();
208 list_for_each_entry_rcu(batman_if, &if_list, list) { 208 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
209 if (batman_if->soft_iface != soft_iface) 209 if (hard_iface->soft_iface != soft_iface)
210 continue; 210 continue;
211 211
212 send_packet_to_if(forw_packet, batman_if); 212 send_packet_to_if(forw_packet, hard_iface);
213 } 213 }
214 rcu_read_unlock(); 214 rcu_read_unlock();
215} 215}
216 216
217static void rebuild_batman_packet(struct bat_priv *bat_priv, 217static void rebuild_batman_packet(struct bat_priv *bat_priv,
218 struct batman_if *batman_if) 218 struct hard_iface *hard_iface)
219{ 219{
220 int new_len; 220 int new_len;
221 unsigned char *new_buff; 221 unsigned char *new_buff;
222 struct batman_packet *batman_packet; 222 struct batman_packet *batman_packet;
223 223
224 new_len = sizeof(struct batman_packet) + 224 new_len = sizeof(struct batman_packet) +
225 (bat_priv->num_local_hna * ETH_ALEN); 225 (bat_priv->num_local_tt * ETH_ALEN);
226 new_buff = kmalloc(new_len, GFP_ATOMIC); 226 new_buff = kmalloc(new_len, GFP_ATOMIC);
227 227
228 /* keep old buffer if kmalloc should fail */ 228 /* keep old buffer if kmalloc should fail */
229 if (new_buff) { 229 if (new_buff) {
230 memcpy(new_buff, batman_if->packet_buff, 230 memcpy(new_buff, hard_iface->packet_buff,
231 sizeof(struct batman_packet)); 231 sizeof(struct batman_packet));
232 batman_packet = (struct batman_packet *)new_buff; 232 batman_packet = (struct batman_packet *)new_buff;
233 233
234 batman_packet->num_hna = hna_local_fill_buffer(bat_priv, 234 batman_packet->num_tt = tt_local_fill_buffer(bat_priv,
235 new_buff + sizeof(struct batman_packet), 235 new_buff + sizeof(struct batman_packet),
236 new_len - sizeof(struct batman_packet)); 236 new_len - sizeof(struct batman_packet));
237 237
238 kfree(batman_if->packet_buff); 238 kfree(hard_iface->packet_buff);
239 batman_if->packet_buff = new_buff; 239 hard_iface->packet_buff = new_buff;
240 batman_if->packet_len = new_len; 240 hard_iface->packet_len = new_len;
241 } 241 }
242} 242}
243 243
244void schedule_own_packet(struct batman_if *batman_if) 244void schedule_own_packet(struct hard_iface *hard_iface)
245{ 245{
246 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 246 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
247 struct hard_iface *primary_if;
247 unsigned long send_time; 248 unsigned long send_time;
248 struct batman_packet *batman_packet; 249 struct batman_packet *batman_packet;
249 int vis_server; 250 int vis_server;
250 251
251 if ((batman_if->if_status == IF_NOT_IN_USE) || 252 if ((hard_iface->if_status == IF_NOT_IN_USE) ||
252 (batman_if->if_status == IF_TO_BE_REMOVED)) 253 (hard_iface->if_status == IF_TO_BE_REMOVED))
253 return; 254 return;
254 255
255 vis_server = atomic_read(&bat_priv->vis_mode); 256 vis_server = atomic_read(&bat_priv->vis_mode);
257 primary_if = primary_if_get_selected(bat_priv);
256 258
257 /** 259 /**
258 * the interface gets activated here to avoid race conditions between 260 * the interface gets activated here to avoid race conditions between
@@ -261,53 +263,57 @@ void schedule_own_packet(struct batman_if *batman_if)
261 * outdated packets (especially uninitialized mac addresses) in the 263 * outdated packets (especially uninitialized mac addresses) in the
262 * packet queue 264 * packet queue
263 */ 265 */
264 if (batman_if->if_status == IF_TO_BE_ACTIVATED) 266 if (hard_iface->if_status == IF_TO_BE_ACTIVATED)
265 batman_if->if_status = IF_ACTIVE; 267 hard_iface->if_status = IF_ACTIVE;
266 268
267 /* if local hna has changed and interface is a primary interface */ 269 /* if local tt has changed and interface is a primary interface */
268 if ((atomic_read(&bat_priv->hna_local_changed)) && 270 if ((atomic_read(&bat_priv->tt_local_changed)) &&
269 (batman_if == bat_priv->primary_if)) 271 (hard_iface == primary_if))
270 rebuild_batman_packet(bat_priv, batman_if); 272 rebuild_batman_packet(bat_priv, hard_iface);
271 273
272 /** 274 /**
273 * NOTE: packet_buff might just have been re-allocated in 275 * NOTE: packet_buff might just have been re-allocated in
274 * rebuild_batman_packet() 276 * rebuild_batman_packet()
275 */ 277 */
276 batman_packet = (struct batman_packet *)batman_if->packet_buff; 278 batman_packet = (struct batman_packet *)hard_iface->packet_buff;
277 279
278 /* change sequence number to network order */ 280 /* change sequence number to network order */
279 batman_packet->seqno = 281 batman_packet->seqno =
280 htonl((uint32_t)atomic_read(&batman_if->seqno)); 282 htonl((uint32_t)atomic_read(&hard_iface->seqno));
281 283
282 if (vis_server == VIS_TYPE_SERVER_SYNC) 284 if (vis_server == VIS_TYPE_SERVER_SYNC)
283 batman_packet->flags |= VIS_SERVER; 285 batman_packet->flags |= VIS_SERVER;
284 else 286 else
285 batman_packet->flags &= ~VIS_SERVER; 287 batman_packet->flags &= ~VIS_SERVER;
286 288
287 if ((batman_if == bat_priv->primary_if) && 289 if ((hard_iface == primary_if) &&
288 (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) 290 (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
289 batman_packet->gw_flags = 291 batman_packet->gw_flags =
290 (uint8_t)atomic_read(&bat_priv->gw_bandwidth); 292 (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
291 else 293 else
292 batman_packet->gw_flags = 0; 294 batman_packet->gw_flags = 0;
293 295
294 atomic_inc(&batman_if->seqno); 296 atomic_inc(&hard_iface->seqno);
295 297
296 slide_own_bcast_window(batman_if); 298 slide_own_bcast_window(hard_iface);
297 send_time = own_send_time(bat_priv); 299 send_time = own_send_time(bat_priv);
298 add_bat_packet_to_list(bat_priv, 300 add_bat_packet_to_list(bat_priv,
299 batman_if->packet_buff, 301 hard_iface->packet_buff,
300 batman_if->packet_len, 302 hard_iface->packet_len,
301 batman_if, 1, send_time); 303 hard_iface, 1, send_time);
304
305 if (primary_if)
306 hardif_free_ref(primary_if);
302} 307}
303 308
304void schedule_forward_packet(struct orig_node *orig_node, 309void schedule_forward_packet(struct orig_node *orig_node,
305 struct ethhdr *ethhdr, 310 struct ethhdr *ethhdr,
306 struct batman_packet *batman_packet, 311 struct batman_packet *batman_packet,
307 uint8_t directlink, int hna_buff_len, 312 uint8_t directlink, int tt_buff_len,
308 struct batman_if *if_incoming) 313 struct hard_iface *if_incoming)
309{ 314{
310 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 315 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
316 struct neigh_node *router;
311 unsigned char in_tq, in_ttl, tq_avg = 0; 317 unsigned char in_tq, in_ttl, tq_avg = 0;
312 unsigned long send_time; 318 unsigned long send_time;
313 319
@@ -316,6 +322,8 @@ void schedule_forward_packet(struct orig_node *orig_node,
316 return; 322 return;
317 } 323 }
318 324
325 router = orig_node_get_router(orig_node);
326
319 in_tq = batman_packet->tq; 327 in_tq = batman_packet->tq;
320 in_ttl = batman_packet->ttl; 328 in_ttl = batman_packet->ttl;
321 329
@@ -324,20 +332,22 @@ void schedule_forward_packet(struct orig_node *orig_node,
324 332
325 /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast 333 /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast
326 * of our best tq value */ 334 * of our best tq value */
327 if ((orig_node->router) && (orig_node->router->tq_avg != 0)) { 335 if (router && router->tq_avg != 0) {
328 336
329 /* rebroadcast ogm of best ranking neighbor as is */ 337 /* rebroadcast ogm of best ranking neighbor as is */
330 if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) { 338 if (!compare_eth(router->addr, ethhdr->h_source)) {
331 batman_packet->tq = orig_node->router->tq_avg; 339 batman_packet->tq = router->tq_avg;
332 340
333 if (orig_node->router->last_ttl) 341 if (router->last_ttl)
334 batman_packet->ttl = orig_node->router->last_ttl 342 batman_packet->ttl = router->last_ttl - 1;
335 - 1;
336 } 343 }
337 344
338 tq_avg = orig_node->router->tq_avg; 345 tq_avg = router->tq_avg;
339 } 346 }
340 347
348 if (router)
349 neigh_node_free_ref(router);
350
341 /* apply hop penalty */ 351 /* apply hop penalty */
342 batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv); 352 batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv);
343 353
@@ -356,10 +366,10 @@ void schedule_forward_packet(struct orig_node *orig_node,
356 else 366 else
357 batman_packet->flags &= ~DIRECTLINK; 367 batman_packet->flags &= ~DIRECTLINK;
358 368
359 send_time = forward_send_time(bat_priv); 369 send_time = forward_send_time();
360 add_bat_packet_to_list(bat_priv, 370 add_bat_packet_to_list(bat_priv,
361 (unsigned char *)batman_packet, 371 (unsigned char *)batman_packet,
362 sizeof(struct batman_packet) + hna_buff_len, 372 sizeof(struct batman_packet) + tt_buff_len,
363 if_incoming, 0, send_time); 373 if_incoming, 0, send_time);
364} 374}
365 375
@@ -367,6 +377,8 @@ static void forw_packet_free(struct forw_packet *forw_packet)
367{ 377{
368 if (forw_packet->skb) 378 if (forw_packet->skb)
369 kfree_skb(forw_packet->skb); 379 kfree_skb(forw_packet->skb);
380 if (forw_packet->if_incoming)
381 hardif_free_ref(forw_packet->if_incoming);
370 kfree(forw_packet); 382 kfree(forw_packet);
371} 383}
372 384
@@ -388,7 +400,6 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
388 send_time); 400 send_time);
389} 401}
390 402
391#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
392/* add a broadcast packet to the queue and setup timers. broadcast packets 403/* add a broadcast packet to the queue and setup timers. broadcast packets
393 * are sent multiple times to increase probability for beeing received. 404 * are sent multiple times to increase probability for beeing received.
394 * 405 *
@@ -399,6 +410,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
399 * skb is freed. */ 410 * skb is freed. */
400int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb) 411int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb)
401{ 412{
413 struct hard_iface *primary_if = NULL;
402 struct forw_packet *forw_packet; 414 struct forw_packet *forw_packet;
403 struct bcast_packet *bcast_packet; 415 struct bcast_packet *bcast_packet;
404 416
@@ -407,8 +419,9 @@ int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb)
407 goto out; 419 goto out;
408 } 420 }
409 421
410 if (!bat_priv->primary_if) 422 primary_if = primary_if_get_selected(bat_priv);
411 goto out; 423 if (!primary_if)
424 goto out_and_inc;
412 425
413 forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); 426 forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC);
414 427
@@ -426,7 +439,7 @@ int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb)
426 skb_reset_mac_header(skb); 439 skb_reset_mac_header(skb);
427 440
428 forw_packet->skb = skb; 441 forw_packet->skb = skb;
429 forw_packet->if_incoming = bat_priv->primary_if; 442 forw_packet->if_incoming = primary_if;
430 443
431 /* how often did we send the bcast packet ? */ 444 /* how often did we send the bcast packet ? */
432 forw_packet->num_packets = 0; 445 forw_packet->num_packets = 0;
@@ -439,12 +452,14 @@ packet_free:
439out_and_inc: 452out_and_inc:
440 atomic_inc(&bat_priv->bcast_queue_left); 453 atomic_inc(&bat_priv->bcast_queue_left);
441out: 454out:
455 if (primary_if)
456 hardif_free_ref(primary_if);
442 return NETDEV_TX_BUSY; 457 return NETDEV_TX_BUSY;
443} 458}
444 459
445static void send_outstanding_bcast_packet(struct work_struct *work) 460static void send_outstanding_bcast_packet(struct work_struct *work)
446{ 461{
447 struct batman_if *batman_if; 462 struct hard_iface *hard_iface;
448 struct delayed_work *delayed_work = 463 struct delayed_work *delayed_work =
449 container_of(work, struct delayed_work, work); 464 container_of(work, struct delayed_work, work);
450 struct forw_packet *forw_packet = 465 struct forw_packet *forw_packet =
@@ -462,14 +477,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work)
462 477
463 /* rebroadcast packet */ 478 /* rebroadcast packet */
464 rcu_read_lock(); 479 rcu_read_lock();
465 list_for_each_entry_rcu(batman_if, &if_list, list) { 480 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
466 if (batman_if->soft_iface != soft_iface) 481 if (hard_iface->soft_iface != soft_iface)
467 continue; 482 continue;
468 483
469 /* send a copy of the saved skb */ 484 /* send a copy of the saved skb */
470 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); 485 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
471 if (skb1) 486 if (skb1)
472 send_skb_packet(skb1, batman_if, broadcast_addr); 487 send_skb_packet(skb1, hard_iface, broadcast_addr);
473 } 488 }
474 rcu_read_unlock(); 489 rcu_read_unlock();
475 490
@@ -522,15 +537,16 @@ out:
522} 537}
523 538
524void purge_outstanding_packets(struct bat_priv *bat_priv, 539void purge_outstanding_packets(struct bat_priv *bat_priv,
525 struct batman_if *batman_if) 540 struct hard_iface *hard_iface)
526{ 541{
527 struct forw_packet *forw_packet; 542 struct forw_packet *forw_packet;
528 struct hlist_node *tmp_node, *safe_tmp_node; 543 struct hlist_node *tmp_node, *safe_tmp_node;
544 bool pending;
529 545
530 if (batman_if) 546 if (hard_iface)
531 bat_dbg(DBG_BATMAN, bat_priv, 547 bat_dbg(DBG_BATMAN, bat_priv,
532 "purge_outstanding_packets(): %s\n", 548 "purge_outstanding_packets(): %s\n",
533 batman_if->net_dev->name); 549 hard_iface->net_dev->name);
534 else 550 else
535 bat_dbg(DBG_BATMAN, bat_priv, 551 bat_dbg(DBG_BATMAN, bat_priv,
536 "purge_outstanding_packets()\n"); 552 "purge_outstanding_packets()\n");
@@ -544,8 +560,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
544 * if purge_outstanding_packets() was called with an argmument 560 * if purge_outstanding_packets() was called with an argmument
545 * we delete only packets belonging to the given interface 561 * we delete only packets belonging to the given interface
546 */ 562 */
547 if ((batman_if) && 563 if ((hard_iface) &&
548 (forw_packet->if_incoming != batman_if)) 564 (forw_packet->if_incoming != hard_iface))
549 continue; 565 continue;
550 566
551 spin_unlock_bh(&bat_priv->forw_bcast_list_lock); 567 spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
@@ -554,8 +570,13 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
554 * send_outstanding_bcast_packet() will lock the list to 570 * send_outstanding_bcast_packet() will lock the list to
555 * delete the item from the list 571 * delete the item from the list
556 */ 572 */
557 cancel_delayed_work_sync(&forw_packet->delayed_work); 573 pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
558 spin_lock_bh(&bat_priv->forw_bcast_list_lock); 574 spin_lock_bh(&bat_priv->forw_bcast_list_lock);
575
576 if (pending) {
577 hlist_del(&forw_packet->list);
578 forw_packet_free(forw_packet);
579 }
559 } 580 }
560 spin_unlock_bh(&bat_priv->forw_bcast_list_lock); 581 spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
561 582
@@ -568,8 +589,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
568 * if purge_outstanding_packets() was called with an argmument 589 * if purge_outstanding_packets() was called with an argmument
569 * we delete only packets belonging to the given interface 590 * we delete only packets belonging to the given interface
570 */ 591 */
571 if ((batman_if) && 592 if ((hard_iface) &&
572 (forw_packet->if_incoming != batman_if)) 593 (forw_packet->if_incoming != hard_iface))
573 continue; 594 continue;
574 595
575 spin_unlock_bh(&bat_priv->forw_bat_list_lock); 596 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
@@ -578,8 +599,13 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
578 * send_outstanding_bat_packet() will lock the list to 599 * send_outstanding_bat_packet() will lock the list to
579 * delete the item from the list 600 * delete the item from the list
580 */ 601 */
581 cancel_delayed_work_sync(&forw_packet->delayed_work); 602 pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
582 spin_lock_bh(&bat_priv->forw_bat_list_lock); 603 spin_lock_bh(&bat_priv->forw_bat_list_lock);
604
605 if (pending) {
606 hlist_del(&forw_packet->list);
607 forw_packet_free(forw_packet);
608 }
583 } 609 }
584 spin_unlock_bh(&bat_priv->forw_bat_list_lock); 610 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
585} 611}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index c4cefa8e4f85..247172d71e4b 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,20 +22,18 @@
22#ifndef _NET_BATMAN_ADV_SEND_H_ 22#ifndef _NET_BATMAN_ADV_SEND_H_
23#define _NET_BATMAN_ADV_SEND_H_ 23#define _NET_BATMAN_ADV_SEND_H_
24 24
25#include "types.h"
26
27int send_skb_packet(struct sk_buff *skb, 25int send_skb_packet(struct sk_buff *skb,
28 struct batman_if *batman_if, 26 struct hard_iface *hard_iface,
29 uint8_t *dst_addr); 27 uint8_t *dst_addr);
30void schedule_own_packet(struct batman_if *batman_if); 28void schedule_own_packet(struct hard_iface *hard_iface);
31void schedule_forward_packet(struct orig_node *orig_node, 29void schedule_forward_packet(struct orig_node *orig_node,
32 struct ethhdr *ethhdr, 30 struct ethhdr *ethhdr,
33 struct batman_packet *batman_packet, 31 struct batman_packet *batman_packet,
34 uint8_t directlink, int hna_buff_len, 32 uint8_t directlink, int tt_buff_len,
35 struct batman_if *if_outgoing); 33 struct hard_iface *if_outgoing);
36int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); 34int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb);
37void send_outstanding_bat_packet(struct work_struct *work); 35void send_outstanding_bat_packet(struct work_struct *work);
38void purge_outstanding_packets(struct bat_priv *bat_priv, 36void purge_outstanding_packets(struct bat_priv *bat_priv,
39 struct batman_if *batman_if); 37 struct hard_iface *hard_iface);
40 38
41#endif /* _NET_BATMAN_ADV_SEND_H_ */ 39#endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e89ede192ed0..d5aa60999e83 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -26,18 +26,15 @@
26#include "send.h" 26#include "send.h"
27#include "bat_debugfs.h" 27#include "bat_debugfs.h"
28#include "translation-table.h" 28#include "translation-table.h"
29#include "types.h"
30#include "hash.h" 29#include "hash.h"
31#include "gateway_common.h" 30#include "gateway_common.h"
32#include "gateway_client.h" 31#include "gateway_client.h"
33#include "send.h"
34#include "bat_sysfs.h" 32#include "bat_sysfs.h"
35#include <linux/slab.h> 33#include <linux/slab.h>
36#include <linux/ethtool.h> 34#include <linux/ethtool.h>
37#include <linux/etherdevice.h> 35#include <linux/etherdevice.h>
38#include <linux/if_vlan.h> 36#include <linux/if_vlan.h>
39#include "unicast.h" 37#include "unicast.h"
40#include "routing.h"
41 38
42 39
43static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); 40static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -46,8 +43,6 @@ static void bat_get_drvinfo(struct net_device *dev,
46static u32 bat_get_msglevel(struct net_device *dev); 43static u32 bat_get_msglevel(struct net_device *dev);
47static void bat_set_msglevel(struct net_device *dev, u32 value); 44static void bat_set_msglevel(struct net_device *dev, u32 value);
48static u32 bat_get_link(struct net_device *dev); 45static u32 bat_get_link(struct net_device *dev);
49static u32 bat_get_rx_csum(struct net_device *dev);
50static int bat_set_rx_csum(struct net_device *dev, u32 data);
51 46
52static const struct ethtool_ops bat_ethtool_ops = { 47static const struct ethtool_ops bat_ethtool_ops = {
53 .get_settings = bat_get_settings, 48 .get_settings = bat_get_settings,
@@ -55,8 +50,6 @@ static const struct ethtool_ops bat_ethtool_ops = {
55 .get_msglevel = bat_get_msglevel, 50 .get_msglevel = bat_get_msglevel,
56 .set_msglevel = bat_set_msglevel, 51 .set_msglevel = bat_set_msglevel,
57 .get_link = bat_get_link, 52 .get_link = bat_get_link,
58 .get_rx_csum = bat_get_rx_csum,
59 .set_rx_csum = bat_set_rx_csum
60}; 53};
61 54
62int my_skb_head_push(struct sk_buff *skb, unsigned int len) 55int my_skb_head_push(struct sk_buff *skb, unsigned int len)
@@ -79,141 +72,371 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len)
79 return 0; 72 return 0;
80} 73}
81 74
82static void softif_neigh_free_ref(struct kref *refcount) 75static void softif_neigh_free_ref(struct softif_neigh *softif_neigh)
83{ 76{
84 struct softif_neigh *softif_neigh; 77 if (atomic_dec_and_test(&softif_neigh->refcount))
85 78 kfree_rcu(softif_neigh, rcu);
86 softif_neigh = container_of(refcount, struct softif_neigh, refcount);
87 kfree(softif_neigh);
88} 79}
89 80
90static void softif_neigh_free_rcu(struct rcu_head *rcu) 81static void softif_neigh_vid_free_rcu(struct rcu_head *rcu)
91{ 82{
83 struct softif_neigh_vid *softif_neigh_vid;
92 struct softif_neigh *softif_neigh; 84 struct softif_neigh *softif_neigh;
85 struct hlist_node *node, *node_tmp;
86 struct bat_priv *bat_priv;
87
88 softif_neigh_vid = container_of(rcu, struct softif_neigh_vid, rcu);
89 bat_priv = softif_neigh_vid->bat_priv;
90
91 spin_lock_bh(&bat_priv->softif_neigh_lock);
92 hlist_for_each_entry_safe(softif_neigh, node, node_tmp,
93 &softif_neigh_vid->softif_neigh_list, list) {
94 hlist_del_rcu(&softif_neigh->list);
95 softif_neigh_free_ref(softif_neigh);
96 }
97 spin_unlock_bh(&bat_priv->softif_neigh_lock);
93 98
94 softif_neigh = container_of(rcu, struct softif_neigh, rcu); 99 kfree(softif_neigh_vid);
95 kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
96} 100}
97 101
98void softif_neigh_purge(struct bat_priv *bat_priv) 102static void softif_neigh_vid_free_ref(struct softif_neigh_vid *softif_neigh_vid)
99{ 103{
100 struct softif_neigh *softif_neigh, *softif_neigh_tmp; 104 if (atomic_dec_and_test(&softif_neigh_vid->refcount))
101 struct hlist_node *node, *node_tmp; 105 call_rcu(&softif_neigh_vid->rcu, softif_neigh_vid_free_rcu);
106}
102 107
103 spin_lock_bh(&bat_priv->softif_neigh_lock); 108static struct softif_neigh_vid *softif_neigh_vid_get(struct bat_priv *bat_priv,
109 short vid)
110{
111 struct softif_neigh_vid *softif_neigh_vid;
112 struct hlist_node *node;
104 113
105 hlist_for_each_entry_safe(softif_neigh, node, node_tmp, 114 rcu_read_lock();
106 &bat_priv->softif_neigh_list, list) { 115 hlist_for_each_entry_rcu(softif_neigh_vid, node,
116 &bat_priv->softif_neigh_vids, list) {
117 if (softif_neigh_vid->vid != vid)
118 continue;
107 119
108 if ((!time_after(jiffies, softif_neigh->last_seen + 120 if (!atomic_inc_not_zero(&softif_neigh_vid->refcount))
109 msecs_to_jiffies(SOFTIF_NEIGH_TIMEOUT))) &&
110 (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE))
111 continue; 121 continue;
112 122
113 hlist_del_rcu(&softif_neigh->list); 123 goto out;
124 }
114 125
115 if (bat_priv->softif_neigh == softif_neigh) { 126 softif_neigh_vid = kzalloc(sizeof(struct softif_neigh_vid),
116 bat_dbg(DBG_ROUTES, bat_priv, 127 GFP_ATOMIC);
117 "Current mesh exit point '%pM' vanished " 128 if (!softif_neigh_vid)
118 "(vid: %d).\n", 129 goto out;
119 softif_neigh->addr, softif_neigh->vid);
120 softif_neigh_tmp = bat_priv->softif_neigh;
121 bat_priv->softif_neigh = NULL;
122 kref_put(&softif_neigh_tmp->refcount,
123 softif_neigh_free_ref);
124 }
125 130
126 call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); 131 softif_neigh_vid->vid = vid;
127 } 132 softif_neigh_vid->bat_priv = bat_priv;
128 133
129 spin_unlock_bh(&bat_priv->softif_neigh_lock); 134 /* initialize with 2 - caller decrements counter by one */
135 atomic_set(&softif_neigh_vid->refcount, 2);
136 INIT_HLIST_HEAD(&softif_neigh_vid->softif_neigh_list);
137 INIT_HLIST_NODE(&softif_neigh_vid->list);
138 spin_lock_bh(&bat_priv->softif_neigh_vid_lock);
139 hlist_add_head_rcu(&softif_neigh_vid->list,
140 &bat_priv->softif_neigh_vids);
141 spin_unlock_bh(&bat_priv->softif_neigh_vid_lock);
142
143out:
144 rcu_read_unlock();
145 return softif_neigh_vid;
130} 146}
131 147
132static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, 148static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv,
133 uint8_t *addr, short vid) 149 uint8_t *addr, short vid)
134{ 150{
135 struct softif_neigh *softif_neigh; 151 struct softif_neigh_vid *softif_neigh_vid;
152 struct softif_neigh *softif_neigh = NULL;
136 struct hlist_node *node; 153 struct hlist_node *node;
137 154
155 softif_neigh_vid = softif_neigh_vid_get(bat_priv, vid);
156 if (!softif_neigh_vid)
157 goto out;
158
138 rcu_read_lock(); 159 rcu_read_lock();
139 hlist_for_each_entry_rcu(softif_neigh, node, 160 hlist_for_each_entry_rcu(softif_neigh, node,
140 &bat_priv->softif_neigh_list, list) { 161 &softif_neigh_vid->softif_neigh_list,
141 if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0) 162 list) {
163 if (!compare_eth(softif_neigh->addr, addr))
142 continue; 164 continue;
143 165
144 if (softif_neigh->vid != vid) 166 if (!atomic_inc_not_zero(&softif_neigh->refcount))
145 continue; 167 continue;
146 168
147 softif_neigh->last_seen = jiffies; 169 softif_neigh->last_seen = jiffies;
148 goto found; 170 goto unlock;
149 } 171 }
150 172
151 softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); 173 softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC);
152 if (!softif_neigh) 174 if (!softif_neigh)
153 goto out; 175 goto unlock;
154 176
155 memcpy(softif_neigh->addr, addr, ETH_ALEN); 177 memcpy(softif_neigh->addr, addr, ETH_ALEN);
156 softif_neigh->vid = vid;
157 softif_neigh->last_seen = jiffies; 178 softif_neigh->last_seen = jiffies;
158 kref_init(&softif_neigh->refcount); 179 /* initialize with 2 - caller decrements counter by one */
180 atomic_set(&softif_neigh->refcount, 2);
159 181
160 INIT_HLIST_NODE(&softif_neigh->list); 182 INIT_HLIST_NODE(&softif_neigh->list);
161 spin_lock_bh(&bat_priv->softif_neigh_lock); 183 spin_lock_bh(&bat_priv->softif_neigh_lock);
162 hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list); 184 hlist_add_head_rcu(&softif_neigh->list,
185 &softif_neigh_vid->softif_neigh_list);
163 spin_unlock_bh(&bat_priv->softif_neigh_lock); 186 spin_unlock_bh(&bat_priv->softif_neigh_lock);
164 187
165found: 188unlock:
166 kref_get(&softif_neigh->refcount); 189 rcu_read_unlock();
167out: 190out:
191 if (softif_neigh_vid)
192 softif_neigh_vid_free_ref(softif_neigh_vid);
193 return softif_neigh;
194}
195
196static struct softif_neigh *softif_neigh_get_selected(
197 struct softif_neigh_vid *softif_neigh_vid)
198{
199 struct softif_neigh *softif_neigh;
200
201 rcu_read_lock();
202 softif_neigh = rcu_dereference(softif_neigh_vid->softif_neigh);
203
204 if (softif_neigh && !atomic_inc_not_zero(&softif_neigh->refcount))
205 softif_neigh = NULL;
206
168 rcu_read_unlock(); 207 rcu_read_unlock();
169 return softif_neigh; 208 return softif_neigh;
170} 209}
171 210
211static struct softif_neigh *softif_neigh_vid_get_selected(
212 struct bat_priv *bat_priv,
213 short vid)
214{
215 struct softif_neigh_vid *softif_neigh_vid;
216 struct softif_neigh *softif_neigh = NULL;
217
218 softif_neigh_vid = softif_neigh_vid_get(bat_priv, vid);
219 if (!softif_neigh_vid)
220 goto out;
221
222 softif_neigh = softif_neigh_get_selected(softif_neigh_vid);
223out:
224 if (softif_neigh_vid)
225 softif_neigh_vid_free_ref(softif_neigh_vid);
226 return softif_neigh;
227}
228
229static void softif_neigh_vid_select(struct bat_priv *bat_priv,
230 struct softif_neigh *new_neigh,
231 short vid)
232{
233 struct softif_neigh_vid *softif_neigh_vid;
234 struct softif_neigh *curr_neigh;
235
236 softif_neigh_vid = softif_neigh_vid_get(bat_priv, vid);
237 if (!softif_neigh_vid)
238 goto out;
239
240 spin_lock_bh(&bat_priv->softif_neigh_lock);
241
242 if (new_neigh && !atomic_inc_not_zero(&new_neigh->refcount))
243 new_neigh = NULL;
244
245 curr_neigh = softif_neigh_vid->softif_neigh;
246 rcu_assign_pointer(softif_neigh_vid->softif_neigh, new_neigh);
247
248 if ((curr_neigh) && (!new_neigh))
249 bat_dbg(DBG_ROUTES, bat_priv,
250 "Removing mesh exit point on vid: %d (prev: %pM).\n",
251 vid, curr_neigh->addr);
252 else if ((curr_neigh) && (new_neigh))
253 bat_dbg(DBG_ROUTES, bat_priv,
254 "Changing mesh exit point on vid: %d from %pM "
255 "to %pM.\n", vid, curr_neigh->addr, new_neigh->addr);
256 else if ((!curr_neigh) && (new_neigh))
257 bat_dbg(DBG_ROUTES, bat_priv,
258 "Setting mesh exit point on vid: %d to %pM.\n",
259 vid, new_neigh->addr);
260
261 if (curr_neigh)
262 softif_neigh_free_ref(curr_neigh);
263
264 spin_unlock_bh(&bat_priv->softif_neigh_lock);
265
266out:
267 if (softif_neigh_vid)
268 softif_neigh_vid_free_ref(softif_neigh_vid);
269}
270
271static void softif_neigh_vid_deselect(struct bat_priv *bat_priv,
272 struct softif_neigh_vid *softif_neigh_vid)
273{
274 struct softif_neigh *curr_neigh;
275 struct softif_neigh *softif_neigh = NULL, *softif_neigh_tmp;
276 struct hard_iface *primary_if = NULL;
277 struct hlist_node *node;
278
279 primary_if = primary_if_get_selected(bat_priv);
280 if (!primary_if)
281 goto out;
282
283 /* find new softif_neigh immediately to avoid temporary loops */
284 rcu_read_lock();
285 curr_neigh = rcu_dereference(softif_neigh_vid->softif_neigh);
286
287 hlist_for_each_entry_rcu(softif_neigh_tmp, node,
288 &softif_neigh_vid->softif_neigh_list,
289 list) {
290 if (softif_neigh_tmp == curr_neigh)
291 continue;
292
293 /* we got a neighbor but its mac is 'bigger' than ours */
294 if (memcmp(primary_if->net_dev->dev_addr,
295 softif_neigh_tmp->addr, ETH_ALEN) < 0)
296 continue;
297
298 if (!atomic_inc_not_zero(&softif_neigh_tmp->refcount))
299 continue;
300
301 softif_neigh = softif_neigh_tmp;
302 goto unlock;
303 }
304
305unlock:
306 rcu_read_unlock();
307out:
308 softif_neigh_vid_select(bat_priv, softif_neigh, softif_neigh_vid->vid);
309
310 if (primary_if)
311 hardif_free_ref(primary_if);
312 if (softif_neigh)
313 softif_neigh_free_ref(softif_neigh);
314}
315
172int softif_neigh_seq_print_text(struct seq_file *seq, void *offset) 316int softif_neigh_seq_print_text(struct seq_file *seq, void *offset)
173{ 317{
174 struct net_device *net_dev = (struct net_device *)seq->private; 318 struct net_device *net_dev = (struct net_device *)seq->private;
175 struct bat_priv *bat_priv = netdev_priv(net_dev); 319 struct bat_priv *bat_priv = netdev_priv(net_dev);
320 struct softif_neigh_vid *softif_neigh_vid;
176 struct softif_neigh *softif_neigh; 321 struct softif_neigh *softif_neigh;
177 struct hlist_node *node; 322 struct hard_iface *primary_if;
178 size_t buf_size, pos; 323 struct hlist_node *node, *node_tmp;
179 char *buff; 324 struct softif_neigh *curr_softif_neigh;
325 int ret = 0, last_seen_secs, last_seen_msecs;
326
327 primary_if = primary_if_get_selected(bat_priv);
328 if (!primary_if) {
329 ret = seq_printf(seq, "BATMAN mesh %s disabled - "
330 "please specify interfaces to enable it\n",
331 net_dev->name);
332 goto out;
333 }
180 334
181 if (!bat_priv->primary_if) { 335 if (primary_if->if_status != IF_ACTIVE) {
182 return seq_printf(seq, "BATMAN mesh %s disabled - " 336 ret = seq_printf(seq, "BATMAN mesh %s "
183 "please specify interfaces to enable it\n", 337 "disabled - primary interface not active\n",
184 net_dev->name); 338 net_dev->name);
339 goto out;
185 } 340 }
186 341
187 seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name); 342 seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name);
188 343
189 buf_size = 1;
190 /* Estimate length for: " xx:xx:xx:xx:xx:xx\n" */
191 rcu_read_lock(); 344 rcu_read_lock();
192 hlist_for_each_entry_rcu(softif_neigh, node, 345 hlist_for_each_entry_rcu(softif_neigh_vid, node,
193 &bat_priv->softif_neigh_list, list) 346 &bat_priv->softif_neigh_vids, list) {
194 buf_size += 30; 347 seq_printf(seq, " %-15s %s on vid: %d\n",
348 "Originator", "last-seen", softif_neigh_vid->vid);
349
350 curr_softif_neigh = softif_neigh_get_selected(softif_neigh_vid);
351
352 hlist_for_each_entry_rcu(softif_neigh, node_tmp,
353 &softif_neigh_vid->softif_neigh_list,
354 list) {
355 last_seen_secs = jiffies_to_msecs(jiffies -
356 softif_neigh->last_seen) / 1000;
357 last_seen_msecs = jiffies_to_msecs(jiffies -
358 softif_neigh->last_seen) % 1000;
359 seq_printf(seq, "%s %pM %3i.%03is\n",
360 curr_softif_neigh == softif_neigh
361 ? "=>" : " ", softif_neigh->addr,
362 last_seen_secs, last_seen_msecs);
363 }
364
365 if (curr_softif_neigh)
366 softif_neigh_free_ref(curr_softif_neigh);
367
368 seq_printf(seq, "\n");
369 }
195 rcu_read_unlock(); 370 rcu_read_unlock();
196 371
197 buff = kmalloc(buf_size, GFP_ATOMIC); 372out:
198 if (!buff) 373 if (primary_if)
199 return -ENOMEM; 374 hardif_free_ref(primary_if);
375 return ret;
376}
200 377
201 buff[0] = '\0'; 378void softif_neigh_purge(struct bat_priv *bat_priv)
202 pos = 0; 379{
380 struct softif_neigh *softif_neigh, *curr_softif_neigh;
381 struct softif_neigh_vid *softif_neigh_vid;
382 struct hlist_node *node, *node_tmp, *node_tmp2;
383 char do_deselect;
203 384
204 rcu_read_lock(); 385 rcu_read_lock();
205 hlist_for_each_entry_rcu(softif_neigh, node, 386 hlist_for_each_entry_rcu(softif_neigh_vid, node,
206 &bat_priv->softif_neigh_list, list) { 387 &bat_priv->softif_neigh_vids, list) {
207 pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n", 388 if (!atomic_inc_not_zero(&softif_neigh_vid->refcount))
208 bat_priv->softif_neigh == softif_neigh 389 continue;
209 ? "=>" : " ", softif_neigh->addr, 390
210 softif_neigh->vid); 391 curr_softif_neigh = softif_neigh_get_selected(softif_neigh_vid);
392 do_deselect = 0;
393
394 spin_lock_bh(&bat_priv->softif_neigh_lock);
395 hlist_for_each_entry_safe(softif_neigh, node_tmp, node_tmp2,
396 &softif_neigh_vid->softif_neigh_list,
397 list) {
398 if ((!time_after(jiffies, softif_neigh->last_seen +
399 msecs_to_jiffies(SOFTIF_NEIGH_TIMEOUT))) &&
400 (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE))
401 continue;
402
403 if (curr_softif_neigh == softif_neigh) {
404 bat_dbg(DBG_ROUTES, bat_priv,
405 "Current mesh exit point on vid: %d "
406 "'%pM' vanished.\n",
407 softif_neigh_vid->vid,
408 softif_neigh->addr);
409 do_deselect = 1;
410 }
411
412 hlist_del_rcu(&softif_neigh->list);
413 softif_neigh_free_ref(softif_neigh);
414 }
415 spin_unlock_bh(&bat_priv->softif_neigh_lock);
416
417 /* soft_neigh_vid_deselect() needs to acquire the
418 * softif_neigh_lock */
419 if (do_deselect)
420 softif_neigh_vid_deselect(bat_priv, softif_neigh_vid);
421
422 if (curr_softif_neigh)
423 softif_neigh_free_ref(curr_softif_neigh);
424
425 softif_neigh_vid_free_ref(softif_neigh_vid);
211 } 426 }
212 rcu_read_unlock(); 427 rcu_read_unlock();
213 428
214 seq_printf(seq, "%s", buff); 429 spin_lock_bh(&bat_priv->softif_neigh_vid_lock);
215 kfree(buff); 430 hlist_for_each_entry_safe(softif_neigh_vid, node, node_tmp,
216 return 0; 431 &bat_priv->softif_neigh_vids, list) {
432 if (!hlist_empty(&softif_neigh_vid->softif_neigh_list))
433 continue;
434
435 hlist_del_rcu(&softif_neigh_vid->list);
436 softif_neigh_vid_free_ref(softif_neigh_vid);
437 }
438 spin_unlock_bh(&bat_priv->softif_neigh_vid_lock);
439
217} 440}
218 441
219static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, 442static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
@@ -222,7 +445,9 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
222 struct bat_priv *bat_priv = netdev_priv(dev); 445 struct bat_priv *bat_priv = netdev_priv(dev);
223 struct ethhdr *ethhdr = (struct ethhdr *)skb->data; 446 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
224 struct batman_packet *batman_packet; 447 struct batman_packet *batman_packet;
225 struct softif_neigh *softif_neigh, *softif_neigh_tmp; 448 struct softif_neigh *softif_neigh = NULL;
449 struct hard_iface *primary_if = NULL;
450 struct softif_neigh *curr_softif_neigh = NULL;
226 451
227 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) 452 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
228 batman_packet = (struct batman_packet *) 453 batman_packet = (struct batman_packet *)
@@ -231,63 +456,52 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
231 batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN); 456 batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN);
232 457
233 if (batman_packet->version != COMPAT_VERSION) 458 if (batman_packet->version != COMPAT_VERSION)
234 goto err; 459 goto out;
235 460
236 if (batman_packet->packet_type != BAT_PACKET) 461 if (batman_packet->packet_type != BAT_PACKET)
237 goto err; 462 goto out;
238 463
239 if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) 464 if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
240 goto err; 465 goto out;
241 466
242 if (is_my_mac(batman_packet->orig)) 467 if (is_my_mac(batman_packet->orig))
243 goto err; 468 goto out;
244 469
245 softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid); 470 softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid);
246
247 if (!softif_neigh) 471 if (!softif_neigh)
248 goto err; 472 goto out;
249 473
250 if (bat_priv->softif_neigh == softif_neigh) 474 curr_softif_neigh = softif_neigh_vid_get_selected(bat_priv, vid);
475 if (curr_softif_neigh == softif_neigh)
476 goto out;
477
478 primary_if = primary_if_get_selected(bat_priv);
479 if (!primary_if)
251 goto out; 480 goto out;
252 481
253 /* we got a neighbor but its mac is 'bigger' than ours */ 482 /* we got a neighbor but its mac is 'bigger' than ours */
254 if (memcmp(bat_priv->primary_if->net_dev->dev_addr, 483 if (memcmp(primary_if->net_dev->dev_addr,
255 softif_neigh->addr, ETH_ALEN) < 0) 484 softif_neigh->addr, ETH_ALEN) < 0)
256 goto out; 485 goto out;
257 486
258 /* switch to new 'smallest neighbor' */
259 if ((bat_priv->softif_neigh) &&
260 (memcmp(softif_neigh->addr, bat_priv->softif_neigh->addr,
261 ETH_ALEN) < 0)) {
262 bat_dbg(DBG_ROUTES, bat_priv,
263 "Changing mesh exit point from %pM (vid: %d) "
264 "to %pM (vid: %d).\n",
265 bat_priv->softif_neigh->addr,
266 bat_priv->softif_neigh->vid,
267 softif_neigh->addr, softif_neigh->vid);
268 softif_neigh_tmp = bat_priv->softif_neigh;
269 bat_priv->softif_neigh = softif_neigh;
270 kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref);
271 /* we need to hold the additional reference */
272 goto err;
273 }
274
275 /* close own batX device and use softif_neigh as exit node */ 487 /* close own batX device and use softif_neigh as exit node */
276 if ((!bat_priv->softif_neigh) && 488 if (!curr_softif_neigh) {
277 (memcmp(softif_neigh->addr, 489 softif_neigh_vid_select(bat_priv, softif_neigh, vid);
278 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN) < 0)) { 490 goto out;
279 bat_dbg(DBG_ROUTES, bat_priv,
280 "Setting mesh exit point to %pM (vid: %d).\n",
281 softif_neigh->addr, softif_neigh->vid);
282 bat_priv->softif_neigh = softif_neigh;
283 /* we need to hold the additional reference */
284 goto err;
285 } 491 }
286 492
493 /* switch to new 'smallest neighbor' */
494 if (memcmp(softif_neigh->addr, curr_softif_neigh->addr, ETH_ALEN) < 0)
495 softif_neigh_vid_select(bat_priv, softif_neigh, vid);
496
287out: 497out:
288 kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
289err:
290 kfree_skb(skb); 498 kfree_skb(skb);
499 if (softif_neigh)
500 softif_neigh_free_ref(softif_neigh);
501 if (curr_softif_neigh)
502 softif_neigh_free_ref(curr_softif_neigh);
503 if (primary_if)
504 hardif_free_ref(primary_if);
291 return; 505 return;
292} 506}
293 507
@@ -317,11 +531,11 @@ static int interface_set_mac_addr(struct net_device *dev, void *p)
317 if (!is_valid_ether_addr(addr->sa_data)) 531 if (!is_valid_ether_addr(addr->sa_data))
318 return -EADDRNOTAVAIL; 532 return -EADDRNOTAVAIL;
319 533
320 /* only modify hna-table if it has been initialised before */ 534 /* only modify transtable if it has been initialised before */
321 if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { 535 if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) {
322 hna_local_remove(bat_priv, dev->dev_addr, 536 tt_local_remove(bat_priv, dev->dev_addr,
323 "mac address changed"); 537 "mac address changed");
324 hna_local_add(dev, addr->sa_data); 538 tt_local_add(dev, addr->sa_data);
325 } 539 }
326 540
327 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); 541 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -343,8 +557,10 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
343{ 557{
344 struct ethhdr *ethhdr = (struct ethhdr *)skb->data; 558 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
345 struct bat_priv *bat_priv = netdev_priv(soft_iface); 559 struct bat_priv *bat_priv = netdev_priv(soft_iface);
560 struct hard_iface *primary_if = NULL;
346 struct bcast_packet *bcast_packet; 561 struct bcast_packet *bcast_packet;
347 struct vlan_ethhdr *vhdr; 562 struct vlan_ethhdr *vhdr;
563 struct softif_neigh *curr_softif_neigh = NULL;
348 int data_len = skb->len, ret; 564 int data_len = skb->len, ret;
349 short vid = -1; 565 short vid = -1;
350 bool do_bcast = false; 566 bool do_bcast = false;
@@ -372,11 +588,12 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
372 * if we have a another chosen mesh exit node in range 588 * if we have a another chosen mesh exit node in range
373 * it will transport the packets to the mesh 589 * it will transport the packets to the mesh
374 */ 590 */
375 if ((bat_priv->softif_neigh) && (bat_priv->softif_neigh->vid == vid)) 591 curr_softif_neigh = softif_neigh_vid_get_selected(bat_priv, vid);
592 if (curr_softif_neigh)
376 goto dropped; 593 goto dropped;
377 594
378 /* TODO: check this for locks */ 595 /* TODO: check this for locks */
379 hna_local_add(soft_iface, ethhdr->h_source); 596 tt_local_add(soft_iface, ethhdr->h_source);
380 597
381 if (is_multicast_ether_addr(ethhdr->h_dest)) { 598 if (is_multicast_ether_addr(ethhdr->h_dest)) {
382 ret = gw_is_target(bat_priv, skb); 599 ret = gw_is_target(bat_priv, skb);
@@ -390,7 +607,8 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
390 607
391 /* ethernet packet should be broadcasted */ 608 /* ethernet packet should be broadcasted */
392 if (do_bcast) { 609 if (do_bcast) {
393 if (!bat_priv->primary_if) 610 primary_if = primary_if_get_selected(bat_priv);
611 if (!primary_if)
394 goto dropped; 612 goto dropped;
395 613
396 if (my_skb_head_push(skb, sizeof(struct bcast_packet)) < 0) 614 if (my_skb_head_push(skb, sizeof(struct bcast_packet)) < 0)
@@ -406,7 +624,7 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
406 /* hw address of first interface is the orig mac because only 624 /* hw address of first interface is the orig mac because only
407 * this mac is known throughout the mesh */ 625 * this mac is known throughout the mesh */
408 memcpy(bcast_packet->orig, 626 memcpy(bcast_packet->orig,
409 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 627 primary_if->net_dev->dev_addr, ETH_ALEN);
410 628
411 /* set broadcast sequence number */ 629 /* set broadcast sequence number */
412 bcast_packet->seqno = 630 bcast_packet->seqno =
@@ -434,17 +652,22 @@ dropped:
434dropped_freed: 652dropped_freed:
435 bat_priv->stats.tx_dropped++; 653 bat_priv->stats.tx_dropped++;
436end: 654end:
655 if (curr_softif_neigh)
656 softif_neigh_free_ref(curr_softif_neigh);
657 if (primary_if)
658 hardif_free_ref(primary_if);
437 return NETDEV_TX_OK; 659 return NETDEV_TX_OK;
438} 660}
439 661
440void interface_rx(struct net_device *soft_iface, 662void interface_rx(struct net_device *soft_iface,
441 struct sk_buff *skb, struct batman_if *recv_if, 663 struct sk_buff *skb, struct hard_iface *recv_if,
442 int hdr_size) 664 int hdr_size)
443{ 665{
444 struct bat_priv *bat_priv = netdev_priv(soft_iface); 666 struct bat_priv *bat_priv = netdev_priv(soft_iface);
445 struct unicast_packet *unicast_packet; 667 struct unicast_packet *unicast_packet;
446 struct ethhdr *ethhdr; 668 struct ethhdr *ethhdr;
447 struct vlan_ethhdr *vhdr; 669 struct vlan_ethhdr *vhdr;
670 struct softif_neigh *curr_softif_neigh = NULL;
448 short vid = -1; 671 short vid = -1;
449 int ret; 672 int ret;
450 673
@@ -474,7 +697,8 @@ void interface_rx(struct net_device *soft_iface,
474 * if we have a another chosen mesh exit node in range 697 * if we have a another chosen mesh exit node in range
475 * it will transport the packets to the non-mesh network 698 * it will transport the packets to the non-mesh network
476 */ 699 */
477 if ((bat_priv->softif_neigh) && (bat_priv->softif_neigh->vid == vid)) { 700 curr_softif_neigh = softif_neigh_vid_get_selected(bat_priv, vid);
701 if (curr_softif_neigh) {
478 skb_push(skb, hdr_size); 702 skb_push(skb, hdr_size);
479 unicast_packet = (struct unicast_packet *)skb->data; 703 unicast_packet = (struct unicast_packet *)skb->data;
480 704
@@ -485,8 +709,8 @@ void interface_rx(struct net_device *soft_iface,
485 skb_reset_mac_header(skb); 709 skb_reset_mac_header(skb);
486 710
487 memcpy(unicast_packet->dest, 711 memcpy(unicast_packet->dest,
488 bat_priv->softif_neigh->addr, ETH_ALEN); 712 curr_softif_neigh->addr, ETH_ALEN);
489 ret = route_unicast_packet(skb, recv_if, hdr_size); 713 ret = route_unicast_packet(skb, recv_if);
490 if (ret == NET_RX_DROP) 714 if (ret == NET_RX_DROP)
491 goto dropped; 715 goto dropped;
492 716
@@ -498,7 +722,7 @@ void interface_rx(struct net_device *soft_iface,
498 goto dropped; 722 goto dropped;
499 skb->protocol = eth_type_trans(skb, soft_iface); 723 skb->protocol = eth_type_trans(skb, soft_iface);
500 724
501 /* should not be neccesary anymore as we use skb_pull_rcsum() 725 /* should not be necessary anymore as we use skb_pull_rcsum()
502 * TODO: please verify this and remove this TODO 726 * TODO: please verify this and remove this TODO
503 * -- Dec 21st 2009, Simon Wunderlich */ 727 * -- Dec 21st 2009, Simon Wunderlich */
504 728
@@ -510,11 +734,13 @@ void interface_rx(struct net_device *soft_iface,
510 soft_iface->last_rx = jiffies; 734 soft_iface->last_rx = jiffies;
511 735
512 netif_rx(skb); 736 netif_rx(skb);
513 return; 737 goto out;
514 738
515dropped: 739dropped:
516 kfree_skb(skb); 740 kfree_skb(skb);
517out: 741out:
742 if (curr_softif_neigh)
743 softif_neigh_free_ref(curr_softif_neigh);
518 return; 744 return;
519} 745}
520 746
@@ -548,14 +774,15 @@ static void interface_setup(struct net_device *dev)
548 dev->hard_start_xmit = interface_tx; 774 dev->hard_start_xmit = interface_tx;
549#endif 775#endif
550 dev->destructor = free_netdev; 776 dev->destructor = free_netdev;
777 dev->tx_queue_len = 0;
551 778
552 /** 779 /**
553 * can't call min_mtu, because the needed variables 780 * can't call min_mtu, because the needed variables
554 * have not been initialized yet 781 * have not been initialized yet
555 */ 782 */
556 dev->mtu = ETH_DATA_LEN; 783 dev->mtu = ETH_DATA_LEN;
557 dev->hard_header_len = BAT_HEADER_LEN; /* reserve more space in the 784 /* reserve more space in the skbuff for our header */
558 * skbuff for our header */ 785 dev->hard_header_len = BAT_HEADER_LEN;
559 786
560 /* generate random address */ 787 /* generate random address */
561 random_ether_addr(dev_addr); 788 random_ether_addr(dev_addr);
@@ -580,7 +807,7 @@ struct net_device *softif_create(char *name)
580 goto out; 807 goto out;
581 } 808 }
582 809
583 ret = register_netdev(soft_iface); 810 ret = register_netdevice(soft_iface);
584 if (ret < 0) { 811 if (ret < 0) {
585 pr_err("Unable to register the batman interface '%s': %i\n", 812 pr_err("Unable to register the batman interface '%s': %i\n",
586 name, ret); 813 name, ret);
@@ -604,11 +831,10 @@ struct net_device *softif_create(char *name)
604 831
605 atomic_set(&bat_priv->mesh_state, MESH_INACTIVE); 832 atomic_set(&bat_priv->mesh_state, MESH_INACTIVE);
606 atomic_set(&bat_priv->bcast_seqno, 1); 833 atomic_set(&bat_priv->bcast_seqno, 1);
607 atomic_set(&bat_priv->hna_local_changed, 0); 834 atomic_set(&bat_priv->tt_local_changed, 0);
608 835
609 bat_priv->primary_if = NULL; 836 bat_priv->primary_if = NULL;
610 bat_priv->num_ifaces = 0; 837 bat_priv->num_ifaces = 0;
611 bat_priv->softif_neigh = NULL;
612 838
613 ret = sysfs_add_meshif(soft_iface); 839 ret = sysfs_add_meshif(soft_iface);
614 if (ret < 0) 840 if (ret < 0)
@@ -646,12 +872,25 @@ void softif_destroy(struct net_device *soft_iface)
646 unregister_netdevice(soft_iface); 872 unregister_netdevice(soft_iface);
647} 873}
648 874
875int softif_is_valid(struct net_device *net_dev)
876{
877#ifdef HAVE_NET_DEVICE_OPS
878 if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
879 return 1;
880#else
881 if (net_dev->hard_start_xmit == interface_tx)
882 return 1;
883#endif
884
885 return 0;
886}
887
649/* ethtool */ 888/* ethtool */
650static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 889static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
651{ 890{
652 cmd->supported = 0; 891 cmd->supported = 0;
653 cmd->advertising = 0; 892 cmd->advertising = 0;
654 cmd->speed = SPEED_10; 893 ethtool_cmd_speed_set(cmd, SPEED_10);
655 cmd->duplex = DUPLEX_FULL; 894 cmd->duplex = DUPLEX_FULL;
656 cmd->port = PORT_TP; 895 cmd->port = PORT_TP;
657 cmd->phy_address = 0; 896 cmd->phy_address = 0;
@@ -686,12 +925,3 @@ static u32 bat_get_link(struct net_device *dev)
686 return 1; 925 return 1;
687} 926}
688 927
689static u32 bat_get_rx_csum(struct net_device *dev)
690{
691 return 0;
692}
693
694static int bat_set_rx_csum(struct net_device *dev, u32 data)
695{
696 return -EOPNOTSUPP;
697}
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 02b77334d10d..4789b6f2a0b3 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -27,9 +27,10 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset);
27void softif_neigh_purge(struct bat_priv *bat_priv); 27void softif_neigh_purge(struct bat_priv *bat_priv);
28int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); 28int interface_tx(struct sk_buff *skb, struct net_device *soft_iface);
29void interface_rx(struct net_device *soft_iface, 29void interface_rx(struct net_device *soft_iface,
30 struct sk_buff *skb, struct batman_if *recv_if, 30 struct sk_buff *skb, struct hard_iface *recv_if,
31 int hdr_size); 31 int hdr_size);
32struct net_device *softif_create(char *name); 32struct net_device *softif_create(char *name);
33void softif_destroy(struct net_device *soft_iface); 33void softif_destroy(struct net_device *soft_iface);
34int softif_is_valid(struct net_device *net_dev);
34 35
35#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ 36#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a633b5a435e2..7b729660cbfd 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,416 +22,511 @@
22#include "main.h" 22#include "main.h"
23#include "translation-table.h" 23#include "translation-table.h"
24#include "soft-interface.h" 24#include "soft-interface.h"
25#include "types.h" 25#include "hard-interface.h"
26#include "hash.h" 26#include "hash.h"
27#include "originator.h" 27#include "originator.h"
28 28
29static void hna_local_purge(struct work_struct *work); 29static void tt_local_purge(struct work_struct *work);
30static void _hna_global_del_orig(struct bat_priv *bat_priv, 30static void _tt_global_del_orig(struct bat_priv *bat_priv,
31 struct hna_global_entry *hna_global_entry, 31 struct tt_global_entry *tt_global_entry,
32 char *message); 32 char *message);
33 33
34static void hna_local_start_timer(struct bat_priv *bat_priv) 34/* returns 1 if they are the same mac addr */
35static int compare_ltt(struct hlist_node *node, void *data2)
35{ 36{
36 INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge); 37 void *data1 = container_of(node, struct tt_local_entry, hash_entry);
37 queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ); 38
39 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
40}
41
42/* returns 1 if they are the same mac addr */
43static int compare_gtt(struct hlist_node *node, void *data2)
44{
45 void *data1 = container_of(node, struct tt_global_entry, hash_entry);
46
47 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
48}
49
50static void tt_local_start_timer(struct bat_priv *bat_priv)
51{
52 INIT_DELAYED_WORK(&bat_priv->tt_work, tt_local_purge);
53 queue_delayed_work(bat_event_workqueue, &bat_priv->tt_work, 10 * HZ);
54}
55
56static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv,
57 void *data)
58{
59 struct hashtable_t *hash = bat_priv->tt_local_hash;
60 struct hlist_head *head;
61 struct hlist_node *node;
62 struct tt_local_entry *tt_local_entry, *tt_local_entry_tmp = NULL;
63 int index;
64
65 if (!hash)
66 return NULL;
67
68 index = choose_orig(data, hash->size);
69 head = &hash->table[index];
70
71 rcu_read_lock();
72 hlist_for_each_entry_rcu(tt_local_entry, node, head, hash_entry) {
73 if (!compare_eth(tt_local_entry, data))
74 continue;
75
76 tt_local_entry_tmp = tt_local_entry;
77 break;
78 }
79 rcu_read_unlock();
80
81 return tt_local_entry_tmp;
82}
83
84static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv,
85 void *data)
86{
87 struct hashtable_t *hash = bat_priv->tt_global_hash;
88 struct hlist_head *head;
89 struct hlist_node *node;
90 struct tt_global_entry *tt_global_entry;
91 struct tt_global_entry *tt_global_entry_tmp = NULL;
92 int index;
93
94 if (!hash)
95 return NULL;
96
97 index = choose_orig(data, hash->size);
98 head = &hash->table[index];
99
100 rcu_read_lock();
101 hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) {
102 if (!compare_eth(tt_global_entry, data))
103 continue;
104
105 tt_global_entry_tmp = tt_global_entry;
106 break;
107 }
108 rcu_read_unlock();
109
110 return tt_global_entry_tmp;
38} 111}
39 112
40int hna_local_init(struct bat_priv *bat_priv) 113int tt_local_init(struct bat_priv *bat_priv)
41{ 114{
42 if (bat_priv->hna_local_hash) 115 if (bat_priv->tt_local_hash)
43 return 1; 116 return 1;
44 117
45 bat_priv->hna_local_hash = hash_new(1024); 118 bat_priv->tt_local_hash = hash_new(1024);
46 119
47 if (!bat_priv->hna_local_hash) 120 if (!bat_priv->tt_local_hash)
48 return 0; 121 return 0;
49 122
50 atomic_set(&bat_priv->hna_local_changed, 0); 123 atomic_set(&bat_priv->tt_local_changed, 0);
51 hna_local_start_timer(bat_priv); 124 tt_local_start_timer(bat_priv);
52 125
53 return 1; 126 return 1;
54} 127}
55 128
56void hna_local_add(struct net_device *soft_iface, uint8_t *addr) 129void tt_local_add(struct net_device *soft_iface, uint8_t *addr)
57{ 130{
58 struct bat_priv *bat_priv = netdev_priv(soft_iface); 131 struct bat_priv *bat_priv = netdev_priv(soft_iface);
59 struct hna_local_entry *hna_local_entry; 132 struct tt_local_entry *tt_local_entry;
60 struct hna_global_entry *hna_global_entry; 133 struct tt_global_entry *tt_global_entry;
61 int required_bytes; 134 int required_bytes;
62 135
63 spin_lock_bh(&bat_priv->hna_lhash_lock); 136 spin_lock_bh(&bat_priv->tt_lhash_lock);
64 hna_local_entry = 137 tt_local_entry = tt_local_hash_find(bat_priv, addr);
65 ((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash, 138 spin_unlock_bh(&bat_priv->tt_lhash_lock);
66 compare_orig, choose_orig,
67 addr));
68 spin_unlock_bh(&bat_priv->hna_lhash_lock);
69 139
70 if (hna_local_entry) { 140 if (tt_local_entry) {
71 hna_local_entry->last_seen = jiffies; 141 tt_local_entry->last_seen = jiffies;
72 return; 142 return;
73 } 143 }
74 144
75 /* only announce as many hosts as possible in the batman-packet and 145 /* only announce as many hosts as possible in the batman-packet and
76 space in batman_packet->num_hna That also should give a limit to 146 space in batman_packet->num_tt That also should give a limit to
77 MAC-flooding. */ 147 MAC-flooding. */
78 required_bytes = (bat_priv->num_local_hna + 1) * ETH_ALEN; 148 required_bytes = (bat_priv->num_local_tt + 1) * ETH_ALEN;
79 required_bytes += BAT_PACKET_LEN; 149 required_bytes += BAT_PACKET_LEN;
80 150
81 if ((required_bytes > ETH_DATA_LEN) || 151 if ((required_bytes > ETH_DATA_LEN) ||
82 (atomic_read(&bat_priv->aggregated_ogms) && 152 (atomic_read(&bat_priv->aggregated_ogms) &&
83 required_bytes > MAX_AGGREGATION_BYTES) || 153 required_bytes > MAX_AGGREGATION_BYTES) ||
84 (bat_priv->num_local_hna + 1 > 255)) { 154 (bat_priv->num_local_tt + 1 > 255)) {
85 bat_dbg(DBG_ROUTES, bat_priv, 155 bat_dbg(DBG_ROUTES, bat_priv,
86 "Can't add new local hna entry (%pM): " 156 "Can't add new local tt entry (%pM): "
87 "number of local hna entries exceeds packet size\n", 157 "number of local tt entries exceeds packet size\n",
88 addr); 158 addr);
89 return; 159 return;
90 } 160 }
91 161
92 bat_dbg(DBG_ROUTES, bat_priv, 162 bat_dbg(DBG_ROUTES, bat_priv,
93 "Creating new local hna entry: %pM\n", addr); 163 "Creating new local tt entry: %pM\n", addr);
94 164
95 hna_local_entry = kmalloc(sizeof(struct hna_local_entry), GFP_ATOMIC); 165 tt_local_entry = kmalloc(sizeof(struct tt_local_entry), GFP_ATOMIC);
96 if (!hna_local_entry) 166 if (!tt_local_entry)
97 return; 167 return;
98 168
99 memcpy(hna_local_entry->addr, addr, ETH_ALEN); 169 memcpy(tt_local_entry->addr, addr, ETH_ALEN);
100 hna_local_entry->last_seen = jiffies; 170 tt_local_entry->last_seen = jiffies;
101 171
102 /* the batman interface mac address should never be purged */ 172 /* the batman interface mac address should never be purged */
103 if (compare_orig(addr, soft_iface->dev_addr)) 173 if (compare_eth(addr, soft_iface->dev_addr))
104 hna_local_entry->never_purge = 1; 174 tt_local_entry->never_purge = 1;
105 else 175 else
106 hna_local_entry->never_purge = 0; 176 tt_local_entry->never_purge = 0;
107 177
108 spin_lock_bh(&bat_priv->hna_lhash_lock); 178 spin_lock_bh(&bat_priv->tt_lhash_lock);
109 179
110 hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig, 180 hash_add(bat_priv->tt_local_hash, compare_ltt, choose_orig,
111 hna_local_entry); 181 tt_local_entry, &tt_local_entry->hash_entry);
112 bat_priv->num_local_hna++; 182 bat_priv->num_local_tt++;
113 atomic_set(&bat_priv->hna_local_changed, 1); 183 atomic_set(&bat_priv->tt_local_changed, 1);
114 184
115 spin_unlock_bh(&bat_priv->hna_lhash_lock); 185 spin_unlock_bh(&bat_priv->tt_lhash_lock);
116 186
117 /* remove address from global hash if present */ 187 /* remove address from global hash if present */
118 spin_lock_bh(&bat_priv->hna_ghash_lock); 188 spin_lock_bh(&bat_priv->tt_ghash_lock);
119 189
120 hna_global_entry = ((struct hna_global_entry *) 190 tt_global_entry = tt_global_hash_find(bat_priv, addr);
121 hash_find(bat_priv->hna_global_hash,
122 compare_orig, choose_orig, addr));
123 191
124 if (hna_global_entry) 192 if (tt_global_entry)
125 _hna_global_del_orig(bat_priv, hna_global_entry, 193 _tt_global_del_orig(bat_priv, tt_global_entry,
126 "local hna received"); 194 "local tt received");
127 195
128 spin_unlock_bh(&bat_priv->hna_ghash_lock); 196 spin_unlock_bh(&bat_priv->tt_ghash_lock);
129} 197}
130 198
131int hna_local_fill_buffer(struct bat_priv *bat_priv, 199int tt_local_fill_buffer(struct bat_priv *bat_priv,
132 unsigned char *buff, int buff_len) 200 unsigned char *buff, int buff_len)
133{ 201{
134 struct hashtable_t *hash = bat_priv->hna_local_hash; 202 struct hashtable_t *hash = bat_priv->tt_local_hash;
135 struct hna_local_entry *hna_local_entry; 203 struct tt_local_entry *tt_local_entry;
136 struct element_t *bucket; 204 struct hlist_node *node;
137 int i;
138 struct hlist_node *walk;
139 struct hlist_head *head; 205 struct hlist_head *head;
140 int count = 0; 206 int i, count = 0;
141 207
142 spin_lock_bh(&bat_priv->hna_lhash_lock); 208 spin_lock_bh(&bat_priv->tt_lhash_lock);
143 209
144 for (i = 0; i < hash->size; i++) { 210 for (i = 0; i < hash->size; i++) {
145 head = &hash->table[i]; 211 head = &hash->table[i];
146 212
147 hlist_for_each_entry(bucket, walk, head, hlist) { 213 rcu_read_lock();
148 214 hlist_for_each_entry_rcu(tt_local_entry, node,
215 head, hash_entry) {
149 if (buff_len < (count + 1) * ETH_ALEN) 216 if (buff_len < (count + 1) * ETH_ALEN)
150 break; 217 break;
151 218
152 hna_local_entry = bucket->data; 219 memcpy(buff + (count * ETH_ALEN), tt_local_entry->addr,
153 memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr,
154 ETH_ALEN); 220 ETH_ALEN);
155 221
156 count++; 222 count++;
157 } 223 }
224 rcu_read_unlock();
158 } 225 }
159 226
160 /* if we did not get all new local hnas see you next time ;-) */ 227 /* if we did not get all new local tts see you next time ;-) */
161 if (count == bat_priv->num_local_hna) 228 if (count == bat_priv->num_local_tt)
162 atomic_set(&bat_priv->hna_local_changed, 0); 229 atomic_set(&bat_priv->tt_local_changed, 0);
163 230
164 spin_unlock_bh(&bat_priv->hna_lhash_lock); 231 spin_unlock_bh(&bat_priv->tt_lhash_lock);
165 return count; 232 return count;
166} 233}
167 234
168int hna_local_seq_print_text(struct seq_file *seq, void *offset) 235int tt_local_seq_print_text(struct seq_file *seq, void *offset)
169{ 236{
170 struct net_device *net_dev = (struct net_device *)seq->private; 237 struct net_device *net_dev = (struct net_device *)seq->private;
171 struct bat_priv *bat_priv = netdev_priv(net_dev); 238 struct bat_priv *bat_priv = netdev_priv(net_dev);
172 struct hashtable_t *hash = bat_priv->hna_local_hash; 239 struct hashtable_t *hash = bat_priv->tt_local_hash;
173 struct hna_local_entry *hna_local_entry; 240 struct tt_local_entry *tt_local_entry;
174 int i; 241 struct hard_iface *primary_if;
175 struct hlist_node *walk; 242 struct hlist_node *node;
176 struct hlist_head *head; 243 struct hlist_head *head;
177 struct element_t *bucket;
178 size_t buf_size, pos; 244 size_t buf_size, pos;
179 char *buff; 245 char *buff;
246 int i, ret = 0;
247
248 primary_if = primary_if_get_selected(bat_priv);
249 if (!primary_if) {
250 ret = seq_printf(seq, "BATMAN mesh %s disabled - "
251 "please specify interfaces to enable it\n",
252 net_dev->name);
253 goto out;
254 }
180 255
181 if (!bat_priv->primary_if) { 256 if (primary_if->if_status != IF_ACTIVE) {
182 return seq_printf(seq, "BATMAN mesh %s disabled - " 257 ret = seq_printf(seq, "BATMAN mesh %s disabled - "
183 "please specify interfaces to enable it\n", 258 "primary interface not active\n",
184 net_dev->name); 259 net_dev->name);
260 goto out;
185 } 261 }
186 262
187 seq_printf(seq, "Locally retrieved addresses (from %s) " 263 seq_printf(seq, "Locally retrieved addresses (from %s) "
188 "announced via HNA:\n", 264 "announced via TT:\n",
189 net_dev->name); 265 net_dev->name);
190 266
191 spin_lock_bh(&bat_priv->hna_lhash_lock); 267 spin_lock_bh(&bat_priv->tt_lhash_lock);
192 268
193 buf_size = 1; 269 buf_size = 1;
194 /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */ 270 /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */
195 for (i = 0; i < hash->size; i++) { 271 for (i = 0; i < hash->size; i++) {
196 head = &hash->table[i]; 272 head = &hash->table[i];
197 273
198 hlist_for_each(walk, head) 274 rcu_read_lock();
275 __hlist_for_each_rcu(node, head)
199 buf_size += 21; 276 buf_size += 21;
277 rcu_read_unlock();
200 } 278 }
201 279
202 buff = kmalloc(buf_size, GFP_ATOMIC); 280 buff = kmalloc(buf_size, GFP_ATOMIC);
203 if (!buff) { 281 if (!buff) {
204 spin_unlock_bh(&bat_priv->hna_lhash_lock); 282 spin_unlock_bh(&bat_priv->tt_lhash_lock);
205 return -ENOMEM; 283 ret = -ENOMEM;
284 goto out;
206 } 285 }
286
207 buff[0] = '\0'; 287 buff[0] = '\0';
208 pos = 0; 288 pos = 0;
209 289
210 for (i = 0; i < hash->size; i++) { 290 for (i = 0; i < hash->size; i++) {
211 head = &hash->table[i]; 291 head = &hash->table[i];
212 292
213 hlist_for_each_entry(bucket, walk, head, hlist) { 293 rcu_read_lock();
214 hna_local_entry = bucket->data; 294 hlist_for_each_entry_rcu(tt_local_entry, node,
215 295 head, hash_entry) {
216 pos += snprintf(buff + pos, 22, " * %pM\n", 296 pos += snprintf(buff + pos, 22, " * %pM\n",
217 hna_local_entry->addr); 297 tt_local_entry->addr);
218 } 298 }
299 rcu_read_unlock();
219 } 300 }
220 301
221 spin_unlock_bh(&bat_priv->hna_lhash_lock); 302 spin_unlock_bh(&bat_priv->tt_lhash_lock);
222 303
223 seq_printf(seq, "%s", buff); 304 seq_printf(seq, "%s", buff);
224 kfree(buff); 305 kfree(buff);
225 return 0; 306out:
307 if (primary_if)
308 hardif_free_ref(primary_if);
309 return ret;
226} 310}
227 311
228static void _hna_local_del(void *data, void *arg) 312static void _tt_local_del(struct hlist_node *node, void *arg)
229{ 313{
230 struct bat_priv *bat_priv = (struct bat_priv *)arg; 314 struct bat_priv *bat_priv = (struct bat_priv *)arg;
315 void *data = container_of(node, struct tt_local_entry, hash_entry);
231 316
232 kfree(data); 317 kfree(data);
233 bat_priv->num_local_hna--; 318 bat_priv->num_local_tt--;
234 atomic_set(&bat_priv->hna_local_changed, 1); 319 atomic_set(&bat_priv->tt_local_changed, 1);
235} 320}
236 321
237static void hna_local_del(struct bat_priv *bat_priv, 322static void tt_local_del(struct bat_priv *bat_priv,
238 struct hna_local_entry *hna_local_entry, 323 struct tt_local_entry *tt_local_entry,
239 char *message) 324 char *message)
240{ 325{
241 bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n", 326 bat_dbg(DBG_ROUTES, bat_priv, "Deleting local tt entry (%pM): %s\n",
242 hna_local_entry->addr, message); 327 tt_local_entry->addr, message);
243 328
244 hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig, 329 hash_remove(bat_priv->tt_local_hash, compare_ltt, choose_orig,
245 hna_local_entry->addr); 330 tt_local_entry->addr);
246 _hna_local_del(hna_local_entry, bat_priv); 331 _tt_local_del(&tt_local_entry->hash_entry, bat_priv);
247} 332}
248 333
249void hna_local_remove(struct bat_priv *bat_priv, 334void tt_local_remove(struct bat_priv *bat_priv,
250 uint8_t *addr, char *message) 335 uint8_t *addr, char *message)
251{ 336{
252 struct hna_local_entry *hna_local_entry; 337 struct tt_local_entry *tt_local_entry;
253 338
254 spin_lock_bh(&bat_priv->hna_lhash_lock); 339 spin_lock_bh(&bat_priv->tt_lhash_lock);
255 340
256 hna_local_entry = (struct hna_local_entry *) 341 tt_local_entry = tt_local_hash_find(bat_priv, addr);
257 hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig,
258 addr);
259 342
260 if (hna_local_entry) 343 if (tt_local_entry)
261 hna_local_del(bat_priv, hna_local_entry, message); 344 tt_local_del(bat_priv, tt_local_entry, message);
262 345
263 spin_unlock_bh(&bat_priv->hna_lhash_lock); 346 spin_unlock_bh(&bat_priv->tt_lhash_lock);
264} 347}
265 348
266static void hna_local_purge(struct work_struct *work) 349static void tt_local_purge(struct work_struct *work)
267{ 350{
268 struct delayed_work *delayed_work = 351 struct delayed_work *delayed_work =
269 container_of(work, struct delayed_work, work); 352 container_of(work, struct delayed_work, work);
270 struct bat_priv *bat_priv = 353 struct bat_priv *bat_priv =
271 container_of(delayed_work, struct bat_priv, hna_work); 354 container_of(delayed_work, struct bat_priv, tt_work);
272 struct hashtable_t *hash = bat_priv->hna_local_hash; 355 struct hashtable_t *hash = bat_priv->tt_local_hash;
273 struct hna_local_entry *hna_local_entry; 356 struct tt_local_entry *tt_local_entry;
274 int i; 357 struct hlist_node *node, *node_tmp;
275 struct hlist_node *walk, *safe;
276 struct hlist_head *head; 358 struct hlist_head *head;
277 struct element_t *bucket;
278 unsigned long timeout; 359 unsigned long timeout;
360 int i;
279 361
280 spin_lock_bh(&bat_priv->hna_lhash_lock); 362 spin_lock_bh(&bat_priv->tt_lhash_lock);
281 363
282 for (i = 0; i < hash->size; i++) { 364 for (i = 0; i < hash->size; i++) {
283 head = &hash->table[i]; 365 head = &hash->table[i];
284 366
285 hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { 367 hlist_for_each_entry_safe(tt_local_entry, node, node_tmp,
286 hna_local_entry = bucket->data; 368 head, hash_entry) {
369 if (tt_local_entry->never_purge)
370 continue;
287 371
288 timeout = hna_local_entry->last_seen; 372 timeout = tt_local_entry->last_seen;
289 timeout += LOCAL_HNA_TIMEOUT * HZ; 373 timeout += TT_LOCAL_TIMEOUT * HZ;
290 374
291 if ((!hna_local_entry->never_purge) && 375 if (time_before(jiffies, timeout))
292 time_after(jiffies, timeout)) 376 continue;
293 hna_local_del(bat_priv, hna_local_entry, 377
294 "address timed out"); 378 tt_local_del(bat_priv, tt_local_entry,
379 "address timed out");
295 } 380 }
296 } 381 }
297 382
298 spin_unlock_bh(&bat_priv->hna_lhash_lock); 383 spin_unlock_bh(&bat_priv->tt_lhash_lock);
299 hna_local_start_timer(bat_priv); 384 tt_local_start_timer(bat_priv);
300} 385}
301 386
302void hna_local_free(struct bat_priv *bat_priv) 387void tt_local_free(struct bat_priv *bat_priv)
303{ 388{
304 if (!bat_priv->hna_local_hash) 389 if (!bat_priv->tt_local_hash)
305 return; 390 return;
306 391
307 cancel_delayed_work_sync(&bat_priv->hna_work); 392 cancel_delayed_work_sync(&bat_priv->tt_work);
308 hash_delete(bat_priv->hna_local_hash, _hna_local_del, bat_priv); 393 hash_delete(bat_priv->tt_local_hash, _tt_local_del, bat_priv);
309 bat_priv->hna_local_hash = NULL; 394 bat_priv->tt_local_hash = NULL;
310} 395}
311 396
312int hna_global_init(struct bat_priv *bat_priv) 397int tt_global_init(struct bat_priv *bat_priv)
313{ 398{
314 if (bat_priv->hna_global_hash) 399 if (bat_priv->tt_global_hash)
315 return 1; 400 return 1;
316 401
317 bat_priv->hna_global_hash = hash_new(1024); 402 bat_priv->tt_global_hash = hash_new(1024);
318 403
319 if (!bat_priv->hna_global_hash) 404 if (!bat_priv->tt_global_hash)
320 return 0; 405 return 0;
321 406
322 return 1; 407 return 1;
323} 408}
324 409
325void hna_global_add_orig(struct bat_priv *bat_priv, 410void tt_global_add_orig(struct bat_priv *bat_priv,
326 struct orig_node *orig_node, 411 struct orig_node *orig_node,
327 unsigned char *hna_buff, int hna_buff_len) 412 unsigned char *tt_buff, int tt_buff_len)
328{ 413{
329 struct hna_global_entry *hna_global_entry; 414 struct tt_global_entry *tt_global_entry;
330 struct hna_local_entry *hna_local_entry; 415 struct tt_local_entry *tt_local_entry;
331 int hna_buff_count = 0; 416 int tt_buff_count = 0;
332 unsigned char *hna_ptr; 417 unsigned char *tt_ptr;
333 418
334 while ((hna_buff_count + 1) * ETH_ALEN <= hna_buff_len) { 419 while ((tt_buff_count + 1) * ETH_ALEN <= tt_buff_len) {
335 spin_lock_bh(&bat_priv->hna_ghash_lock); 420 spin_lock_bh(&bat_priv->tt_ghash_lock);
336 421
337 hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); 422 tt_ptr = tt_buff + (tt_buff_count * ETH_ALEN);
338 hna_global_entry = (struct hna_global_entry *) 423 tt_global_entry = tt_global_hash_find(bat_priv, tt_ptr);
339 hash_find(bat_priv->hna_global_hash, compare_orig,
340 choose_orig, hna_ptr);
341 424
342 if (!hna_global_entry) { 425 if (!tt_global_entry) {
343 spin_unlock_bh(&bat_priv->hna_ghash_lock); 426 spin_unlock_bh(&bat_priv->tt_ghash_lock);
344 427
345 hna_global_entry = 428 tt_global_entry =
346 kmalloc(sizeof(struct hna_global_entry), 429 kmalloc(sizeof(struct tt_global_entry),
347 GFP_ATOMIC); 430 GFP_ATOMIC);
348 431
349 if (!hna_global_entry) 432 if (!tt_global_entry)
350 break; 433 break;
351 434
352 memcpy(hna_global_entry->addr, hna_ptr, ETH_ALEN); 435 memcpy(tt_global_entry->addr, tt_ptr, ETH_ALEN);
353 436
354 bat_dbg(DBG_ROUTES, bat_priv, 437 bat_dbg(DBG_ROUTES, bat_priv,
355 "Creating new global hna entry: " 438 "Creating new global tt entry: "
356 "%pM (via %pM)\n", 439 "%pM (via %pM)\n",
357 hna_global_entry->addr, orig_node->orig); 440 tt_global_entry->addr, orig_node->orig);
358 441
359 spin_lock_bh(&bat_priv->hna_ghash_lock); 442 spin_lock_bh(&bat_priv->tt_ghash_lock);
360 hash_add(bat_priv->hna_global_hash, compare_orig, 443 hash_add(bat_priv->tt_global_hash, compare_gtt,
361 choose_orig, hna_global_entry); 444 choose_orig, tt_global_entry,
445 &tt_global_entry->hash_entry);
362 446
363 } 447 }
364 448
365 hna_global_entry->orig_node = orig_node; 449 tt_global_entry->orig_node = orig_node;
366 spin_unlock_bh(&bat_priv->hna_ghash_lock); 450 spin_unlock_bh(&bat_priv->tt_ghash_lock);
367 451
368 /* remove address from local hash if present */ 452 /* remove address from local hash if present */
369 spin_lock_bh(&bat_priv->hna_lhash_lock); 453 spin_lock_bh(&bat_priv->tt_lhash_lock);
370 454
371 hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); 455 tt_ptr = tt_buff + (tt_buff_count * ETH_ALEN);
372 hna_local_entry = (struct hna_local_entry *) 456 tt_local_entry = tt_local_hash_find(bat_priv, tt_ptr);
373 hash_find(bat_priv->hna_local_hash, compare_orig,
374 choose_orig, hna_ptr);
375 457
376 if (hna_local_entry) 458 if (tt_local_entry)
377 hna_local_del(bat_priv, hna_local_entry, 459 tt_local_del(bat_priv, tt_local_entry,
378 "global hna received"); 460 "global tt received");
379 461
380 spin_unlock_bh(&bat_priv->hna_lhash_lock); 462 spin_unlock_bh(&bat_priv->tt_lhash_lock);
381 463
382 hna_buff_count++; 464 tt_buff_count++;
383 } 465 }
384 466
385 /* initialize, and overwrite if malloc succeeds */ 467 /* initialize, and overwrite if malloc succeeds */
386 orig_node->hna_buff = NULL; 468 orig_node->tt_buff = NULL;
387 orig_node->hna_buff_len = 0; 469 orig_node->tt_buff_len = 0;
388 470
389 if (hna_buff_len > 0) { 471 if (tt_buff_len > 0) {
390 orig_node->hna_buff = kmalloc(hna_buff_len, GFP_ATOMIC); 472 orig_node->tt_buff = kmalloc(tt_buff_len, GFP_ATOMIC);
391 if (orig_node->hna_buff) { 473 if (orig_node->tt_buff) {
392 memcpy(orig_node->hna_buff, hna_buff, hna_buff_len); 474 memcpy(orig_node->tt_buff, tt_buff, tt_buff_len);
393 orig_node->hna_buff_len = hna_buff_len; 475 orig_node->tt_buff_len = tt_buff_len;
394 } 476 }
395 } 477 }
396} 478}
397 479
398int hna_global_seq_print_text(struct seq_file *seq, void *offset) 480int tt_global_seq_print_text(struct seq_file *seq, void *offset)
399{ 481{
400 struct net_device *net_dev = (struct net_device *)seq->private; 482 struct net_device *net_dev = (struct net_device *)seq->private;
401 struct bat_priv *bat_priv = netdev_priv(net_dev); 483 struct bat_priv *bat_priv = netdev_priv(net_dev);
402 struct hashtable_t *hash = bat_priv->hna_global_hash; 484 struct hashtable_t *hash = bat_priv->tt_global_hash;
403 struct hna_global_entry *hna_global_entry; 485 struct tt_global_entry *tt_global_entry;
404 int i; 486 struct hard_iface *primary_if;
405 struct hlist_node *walk; 487 struct hlist_node *node;
406 struct hlist_head *head; 488 struct hlist_head *head;
407 struct element_t *bucket;
408 size_t buf_size, pos; 489 size_t buf_size, pos;
409 char *buff; 490 char *buff;
491 int i, ret = 0;
492
493 primary_if = primary_if_get_selected(bat_priv);
494 if (!primary_if) {
495 ret = seq_printf(seq, "BATMAN mesh %s disabled - please "
496 "specify interfaces to enable it\n",
497 net_dev->name);
498 goto out;
499 }
410 500
411 if (!bat_priv->primary_if) { 501 if (primary_if->if_status != IF_ACTIVE) {
412 return seq_printf(seq, "BATMAN mesh %s disabled - " 502 ret = seq_printf(seq, "BATMAN mesh %s disabled - "
413 "please specify interfaces to enable it\n", 503 "primary interface not active\n",
414 net_dev->name); 504 net_dev->name);
505 goto out;
415 } 506 }
416 507
417 seq_printf(seq, "Globally announced HNAs received via the mesh %s\n", 508 seq_printf(seq,
509 "Globally announced TT entries received via the mesh %s\n",
418 net_dev->name); 510 net_dev->name);
419 511
420 spin_lock_bh(&bat_priv->hna_ghash_lock); 512 spin_lock_bh(&bat_priv->tt_ghash_lock);
421 513
422 buf_size = 1; 514 buf_size = 1;
423 /* Estimate length for: " * xx:xx:xx:xx:xx:xx via xx:xx:xx:xx:xx:xx\n"*/ 515 /* Estimate length for: " * xx:xx:xx:xx:xx:xx via xx:xx:xx:xx:xx:xx\n"*/
424 for (i = 0; i < hash->size; i++) { 516 for (i = 0; i < hash->size; i++) {
425 head = &hash->table[i]; 517 head = &hash->table[i];
426 518
427 hlist_for_each(walk, head) 519 rcu_read_lock();
520 __hlist_for_each_rcu(node, head)
428 buf_size += 43; 521 buf_size += 43;
522 rcu_read_unlock();
429 } 523 }
430 524
431 buff = kmalloc(buf_size, GFP_ATOMIC); 525 buff = kmalloc(buf_size, GFP_ATOMIC);
432 if (!buff) { 526 if (!buff) {
433 spin_unlock_bh(&bat_priv->hna_ghash_lock); 527 spin_unlock_bh(&bat_priv->tt_ghash_lock);
434 return -ENOMEM; 528 ret = -ENOMEM;
529 goto out;
435 } 530 }
436 buff[0] = '\0'; 531 buff[0] = '\0';
437 pos = 0; 532 pos = 0;
@@ -439,96 +534,105 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
439 for (i = 0; i < hash->size; i++) { 534 for (i = 0; i < hash->size; i++) {
440 head = &hash->table[i]; 535 head = &hash->table[i];
441 536
442 hlist_for_each_entry(bucket, walk, head, hlist) { 537 rcu_read_lock();
443 hna_global_entry = bucket->data; 538 hlist_for_each_entry_rcu(tt_global_entry, node,
444 539 head, hash_entry) {
445 pos += snprintf(buff + pos, 44, 540 pos += snprintf(buff + pos, 44,
446 " * %pM via %pM\n", 541 " * %pM via %pM\n",
447 hna_global_entry->addr, 542 tt_global_entry->addr,
448 hna_global_entry->orig_node->orig); 543 tt_global_entry->orig_node->orig);
449 } 544 }
545 rcu_read_unlock();
450 } 546 }
451 547
452 spin_unlock_bh(&bat_priv->hna_ghash_lock); 548 spin_unlock_bh(&bat_priv->tt_ghash_lock);
453 549
454 seq_printf(seq, "%s", buff); 550 seq_printf(seq, "%s", buff);
455 kfree(buff); 551 kfree(buff);
456 return 0; 552out:
553 if (primary_if)
554 hardif_free_ref(primary_if);
555 return ret;
457} 556}
458 557
459static void _hna_global_del_orig(struct bat_priv *bat_priv, 558static void _tt_global_del_orig(struct bat_priv *bat_priv,
460 struct hna_global_entry *hna_global_entry, 559 struct tt_global_entry *tt_global_entry,
461 char *message) 560 char *message)
462{ 561{
463 bat_dbg(DBG_ROUTES, bat_priv, 562 bat_dbg(DBG_ROUTES, bat_priv,
464 "Deleting global hna entry %pM (via %pM): %s\n", 563 "Deleting global tt entry %pM (via %pM): %s\n",
465 hna_global_entry->addr, hna_global_entry->orig_node->orig, 564 tt_global_entry->addr, tt_global_entry->orig_node->orig,
466 message); 565 message);
467 566
468 hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig, 567 hash_remove(bat_priv->tt_global_hash, compare_gtt, choose_orig,
469 hna_global_entry->addr); 568 tt_global_entry->addr);
470 kfree(hna_global_entry); 569 kfree(tt_global_entry);
471} 570}
472 571
473void hna_global_del_orig(struct bat_priv *bat_priv, 572void tt_global_del_orig(struct bat_priv *bat_priv,
474 struct orig_node *orig_node, char *message) 573 struct orig_node *orig_node, char *message)
475{ 574{
476 struct hna_global_entry *hna_global_entry; 575 struct tt_global_entry *tt_global_entry;
477 int hna_buff_count = 0; 576 int tt_buff_count = 0;
478 unsigned char *hna_ptr; 577 unsigned char *tt_ptr;
479 578
480 if (orig_node->hna_buff_len == 0) 579 if (orig_node->tt_buff_len == 0)
481 return; 580 return;
482 581
483 spin_lock_bh(&bat_priv->hna_ghash_lock); 582 spin_lock_bh(&bat_priv->tt_ghash_lock);
484 583
485 while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) { 584 while ((tt_buff_count + 1) * ETH_ALEN <= orig_node->tt_buff_len) {
486 hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN); 585 tt_ptr = orig_node->tt_buff + (tt_buff_count * ETH_ALEN);
487 hna_global_entry = (struct hna_global_entry *) 586 tt_global_entry = tt_global_hash_find(bat_priv, tt_ptr);
488 hash_find(bat_priv->hna_global_hash, compare_orig,
489 choose_orig, hna_ptr);
490 587
491 if ((hna_global_entry) && 588 if ((tt_global_entry) &&
492 (hna_global_entry->orig_node == orig_node)) 589 (tt_global_entry->orig_node == orig_node))
493 _hna_global_del_orig(bat_priv, hna_global_entry, 590 _tt_global_del_orig(bat_priv, tt_global_entry,
494 message); 591 message);
495 592
496 hna_buff_count++; 593 tt_buff_count++;
497 } 594 }
498 595
499 spin_unlock_bh(&bat_priv->hna_ghash_lock); 596 spin_unlock_bh(&bat_priv->tt_ghash_lock);
500 597
501 orig_node->hna_buff_len = 0; 598 orig_node->tt_buff_len = 0;
502 kfree(orig_node->hna_buff); 599 kfree(orig_node->tt_buff);
503 orig_node->hna_buff = NULL; 600 orig_node->tt_buff = NULL;
504} 601}
505 602
506static void hna_global_del(void *data, void *arg) 603static void tt_global_del(struct hlist_node *node, void *arg)
507{ 604{
605 void *data = container_of(node, struct tt_global_entry, hash_entry);
606
508 kfree(data); 607 kfree(data);
509} 608}
510 609
511void hna_global_free(struct bat_priv *bat_priv) 610void tt_global_free(struct bat_priv *bat_priv)
512{ 611{
513 if (!bat_priv->hna_global_hash) 612 if (!bat_priv->tt_global_hash)
514 return; 613 return;
515 614
516 hash_delete(bat_priv->hna_global_hash, hna_global_del, NULL); 615 hash_delete(bat_priv->tt_global_hash, tt_global_del, NULL);
517 bat_priv->hna_global_hash = NULL; 616 bat_priv->tt_global_hash = NULL;
518} 617}
519 618
520struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) 619struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr)
521{ 620{
522 struct hna_global_entry *hna_global_entry; 621 struct tt_global_entry *tt_global_entry;
622 struct orig_node *orig_node = NULL;
523 623
524 spin_lock_bh(&bat_priv->hna_ghash_lock); 624 spin_lock_bh(&bat_priv->tt_ghash_lock);
525 hna_global_entry = (struct hna_global_entry *) 625 tt_global_entry = tt_global_hash_find(bat_priv, addr);
526 hash_find(bat_priv->hna_global_hash,
527 compare_orig, choose_orig, addr);
528 spin_unlock_bh(&bat_priv->hna_ghash_lock);
529 626
530 if (!hna_global_entry) 627 if (!tt_global_entry)
531 return NULL; 628 goto out;
629
630 if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount))
631 goto out;
632
633 orig_node = tt_global_entry->orig_node;
532 634
533 return hna_global_entry->orig_node; 635out:
636 spin_unlock_bh(&bat_priv->tt_ghash_lock);
637 return orig_node;
534} 638}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 10c4c5c319b6..46152c38cc95 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,24 +22,22 @@
22#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ 22#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
23#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ 23#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
24 24
25#include "types.h" 25int tt_local_init(struct bat_priv *bat_priv);
26 26void tt_local_add(struct net_device *soft_iface, uint8_t *addr);
27int hna_local_init(struct bat_priv *bat_priv); 27void tt_local_remove(struct bat_priv *bat_priv,
28void hna_local_add(struct net_device *soft_iface, uint8_t *addr);
29void hna_local_remove(struct bat_priv *bat_priv,
30 uint8_t *addr, char *message); 28 uint8_t *addr, char *message);
31int hna_local_fill_buffer(struct bat_priv *bat_priv, 29int tt_local_fill_buffer(struct bat_priv *bat_priv,
32 unsigned char *buff, int buff_len); 30 unsigned char *buff, int buff_len);
33int hna_local_seq_print_text(struct seq_file *seq, void *offset); 31int tt_local_seq_print_text(struct seq_file *seq, void *offset);
34void hna_local_free(struct bat_priv *bat_priv); 32void tt_local_free(struct bat_priv *bat_priv);
35int hna_global_init(struct bat_priv *bat_priv); 33int tt_global_init(struct bat_priv *bat_priv);
36void hna_global_add_orig(struct bat_priv *bat_priv, 34void tt_global_add_orig(struct bat_priv *bat_priv,
37 struct orig_node *orig_node, 35 struct orig_node *orig_node,
38 unsigned char *hna_buff, int hna_buff_len); 36 unsigned char *tt_buff, int tt_buff_len);
39int hna_global_seq_print_text(struct seq_file *seq, void *offset); 37int tt_global_seq_print_text(struct seq_file *seq, void *offset);
40void hna_global_del_orig(struct bat_priv *bat_priv, 38void tt_global_del_orig(struct bat_priv *bat_priv,
41 struct orig_node *orig_node, char *message); 39 struct orig_node *orig_node, char *message);
42void hna_global_free(struct bat_priv *bat_priv); 40void tt_global_free(struct bat_priv *bat_priv);
43struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr); 41struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr);
44 42
45#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ 43#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index bf3f6f5a12c4..fab70e8b16ee 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -33,7 +33,7 @@
33 sizeof(struct bcast_packet)))) 33 sizeof(struct bcast_packet))))
34 34
35 35
36struct batman_if { 36struct hard_iface {
37 struct list_head list; 37 struct list_head list;
38 int16_t if_num; 38 int16_t if_num;
39 char if_status; 39 char if_status;
@@ -43,7 +43,7 @@ struct batman_if {
43 unsigned char *packet_buff; 43 unsigned char *packet_buff;
44 int packet_len; 44 int packet_len;
45 struct kobject *hardif_obj; 45 struct kobject *hardif_obj;
46 struct kref refcount; 46 atomic_t refcount;
47 struct packet_type batman_adv_ptype; 47 struct packet_type batman_adv_ptype;
48 struct net_device *soft_iface; 48 struct net_device *soft_iface;
49 struct rcu_head rcu; 49 struct rcu_head rcu;
@@ -67,36 +67,42 @@ struct batman_if {
67struct orig_node { 67struct orig_node {
68 uint8_t orig[ETH_ALEN]; 68 uint8_t orig[ETH_ALEN];
69 uint8_t primary_addr[ETH_ALEN]; 69 uint8_t primary_addr[ETH_ALEN];
70 struct neigh_node *router; 70 struct neigh_node __rcu *router; /* rcu protected pointer */
71 unsigned long *bcast_own; 71 unsigned long *bcast_own;
72 uint8_t *bcast_own_sum; 72 uint8_t *bcast_own_sum;
73 uint8_t tq_own;
74 int tq_asym_penalty;
75 unsigned long last_valid; 73 unsigned long last_valid;
76 unsigned long bcast_seqno_reset; 74 unsigned long bcast_seqno_reset;
77 unsigned long batman_seqno_reset; 75 unsigned long batman_seqno_reset;
78 uint8_t gw_flags; 76 uint8_t gw_flags;
79 uint8_t flags; 77 uint8_t flags;
80 unsigned char *hna_buff; 78 unsigned char *tt_buff;
81 int16_t hna_buff_len; 79 int16_t tt_buff_len;
82 uint32_t last_real_seqno; 80 uint32_t last_real_seqno;
83 uint8_t last_ttl; 81 uint8_t last_ttl;
84 unsigned long bcast_bits[NUM_WORDS]; 82 unsigned long bcast_bits[NUM_WORDS];
85 uint32_t last_bcast_seqno; 83 uint32_t last_bcast_seqno;
86 struct list_head neigh_list; 84 struct hlist_head neigh_list;
87 struct list_head frag_list; 85 struct list_head frag_list;
86 spinlock_t neigh_list_lock; /* protects neigh_list and router */
87 atomic_t refcount;
88 struct rcu_head rcu;
89 struct hlist_node hash_entry;
90 struct bat_priv *bat_priv;
88 unsigned long last_frag_packet; 91 unsigned long last_frag_packet;
89 struct { 92 /* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
90 uint8_t candidates; 93 * neigh_node->real_bits, neigh_node->real_packet_count */
91 struct neigh_node *selected; 94 spinlock_t ogm_cnt_lock;
92 } bond; 95 /* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */
96 spinlock_t bcast_seqno_lock;
97 atomic_t bond_candidates;
98 struct list_head bond_list;
93}; 99};
94 100
95struct gw_node { 101struct gw_node {
96 struct hlist_node list; 102 struct hlist_node list;
97 struct orig_node *orig_node; 103 struct orig_node *orig_node;
98 unsigned long deleted; 104 unsigned long deleted;
99 struct kref refcount; 105 atomic_t refcount;
100 struct rcu_head rcu; 106 struct rcu_head rcu;
101}; 107};
102 108
@@ -105,18 +111,21 @@ struct gw_node {
105 * @last_valid: when last packet via this neighbor was received 111 * @last_valid: when last packet via this neighbor was received
106 */ 112 */
107struct neigh_node { 113struct neigh_node {
108 struct list_head list; 114 struct hlist_node list;
109 uint8_t addr[ETH_ALEN]; 115 uint8_t addr[ETH_ALEN];
110 uint8_t real_packet_count; 116 uint8_t real_packet_count;
111 uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE]; 117 uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE];
112 uint8_t tq_index; 118 uint8_t tq_index;
113 uint8_t tq_avg; 119 uint8_t tq_avg;
114 uint8_t last_ttl; 120 uint8_t last_ttl;
115 struct neigh_node *next_bond_candidate; 121 struct list_head bonding_list;
116 unsigned long last_valid; 122 unsigned long last_valid;
117 unsigned long real_bits[NUM_WORDS]; 123 unsigned long real_bits[NUM_WORDS];
124 atomic_t refcount;
125 struct rcu_head rcu;
118 struct orig_node *orig_node; 126 struct orig_node *orig_node;
119 struct batman_if *if_incoming; 127 struct hard_iface *if_incoming;
128 spinlock_t tq_lock; /* protects: tq_recv, tq_index */
120}; 129};
121 130
122 131
@@ -137,35 +146,34 @@ struct bat_priv {
137 atomic_t bcast_queue_left; 146 atomic_t bcast_queue_left;
138 atomic_t batman_queue_left; 147 atomic_t batman_queue_left;
139 char num_ifaces; 148 char num_ifaces;
140 struct hlist_head softif_neigh_list;
141 struct softif_neigh *softif_neigh;
142 struct debug_log *debug_log; 149 struct debug_log *debug_log;
143 struct batman_if *primary_if;
144 struct kobject *mesh_obj; 150 struct kobject *mesh_obj;
145 struct dentry *debug_dir; 151 struct dentry *debug_dir;
146 struct hlist_head forw_bat_list; 152 struct hlist_head forw_bat_list;
147 struct hlist_head forw_bcast_list; 153 struct hlist_head forw_bcast_list;
148 struct hlist_head gw_list; 154 struct hlist_head gw_list;
155 struct hlist_head softif_neigh_vids;
149 struct list_head vis_send_list; 156 struct list_head vis_send_list;
150 struct hashtable_t *orig_hash; 157 struct hashtable_t *orig_hash;
151 struct hashtable_t *hna_local_hash; 158 struct hashtable_t *tt_local_hash;
152 struct hashtable_t *hna_global_hash; 159 struct hashtable_t *tt_global_hash;
153 struct hashtable_t *vis_hash; 160 struct hashtable_t *vis_hash;
154 spinlock_t orig_hash_lock; /* protects orig_hash */
155 spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ 161 spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
156 spinlock_t forw_bcast_list_lock; /* protects */ 162 spinlock_t forw_bcast_list_lock; /* protects */
157 spinlock_t hna_lhash_lock; /* protects hna_local_hash */ 163 spinlock_t tt_lhash_lock; /* protects tt_local_hash */
158 spinlock_t hna_ghash_lock; /* protects hna_global_hash */ 164 spinlock_t tt_ghash_lock; /* protects tt_global_hash */
159 spinlock_t gw_list_lock; /* protects gw_list */ 165 spinlock_t gw_list_lock; /* protects gw_list and curr_gw */
160 spinlock_t vis_hash_lock; /* protects vis_hash */ 166 spinlock_t vis_hash_lock; /* protects vis_hash */
161 spinlock_t vis_list_lock; /* protects vis_info::recv_list */ 167 spinlock_t vis_list_lock; /* protects vis_info::recv_list */
162 spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ 168 spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */
163 int16_t num_local_hna; 169 spinlock_t softif_neigh_vid_lock; /* protects soft-interface vid list */
164 atomic_t hna_local_changed; 170 int16_t num_local_tt;
165 struct delayed_work hna_work; 171 atomic_t tt_local_changed;
172 struct delayed_work tt_work;
166 struct delayed_work orig_work; 173 struct delayed_work orig_work;
167 struct delayed_work vis_work; 174 struct delayed_work vis_work;
168 struct gw_node *curr_gw; 175 struct gw_node __rcu *curr_gw; /* rcu protected pointer */
176 struct hard_iface __rcu *primary_if; /* rcu protected pointer */
169 struct vis_info *my_vis_info; 177 struct vis_info *my_vis_info;
170}; 178};
171 179
@@ -184,15 +192,17 @@ struct socket_packet {
184 struct icmp_packet_rr icmp_packet; 192 struct icmp_packet_rr icmp_packet;
185}; 193};
186 194
187struct hna_local_entry { 195struct tt_local_entry {
188 uint8_t addr[ETH_ALEN]; 196 uint8_t addr[ETH_ALEN];
189 unsigned long last_seen; 197 unsigned long last_seen;
190 char never_purge; 198 char never_purge;
199 struct hlist_node hash_entry;
191}; 200};
192 201
193struct hna_global_entry { 202struct tt_global_entry {
194 uint8_t addr[ETH_ALEN]; 203 uint8_t addr[ETH_ALEN];
195 struct orig_node *orig_node; 204 struct orig_node *orig_node;
205 struct hlist_node hash_entry;
196}; 206};
197 207
198/** 208/**
@@ -208,7 +218,7 @@ struct forw_packet {
208 uint32_t direct_link_flags; 218 uint32_t direct_link_flags;
209 uint8_t num_packets; 219 uint8_t num_packets;
210 struct delayed_work delayed_work; 220 struct delayed_work delayed_work;
211 struct batman_if *if_incoming; 221 struct hard_iface *if_incoming;
212}; 222};
213 223
214/* While scanning for vis-entries of a particular vis-originator 224/* While scanning for vis-entries of a particular vis-originator
@@ -242,6 +252,7 @@ struct vis_info {
242 * from. we should not reply to them. */ 252 * from. we should not reply to them. */
243 struct list_head send_list; 253 struct list_head send_list;
244 struct kref refcount; 254 struct kref refcount;
255 struct hlist_node hash_entry;
245 struct bat_priv *bat_priv; 256 struct bat_priv *bat_priv;
246 /* this packet might be part of the vis send queue. */ 257 /* this packet might be part of the vis send queue. */
247 struct sk_buff *skb_packet; 258 struct sk_buff *skb_packet;
@@ -251,7 +262,7 @@ struct vis_info {
251struct vis_info_entry { 262struct vis_info_entry {
252 uint8_t src[ETH_ALEN]; 263 uint8_t src[ETH_ALEN];
253 uint8_t dest[ETH_ALEN]; 264 uint8_t dest[ETH_ALEN];
254 uint8_t quality; /* quality = 0 means HNA */ 265 uint8_t quality; /* quality = 0 client */
255} __packed; 266} __packed;
256 267
257struct recvlist_node { 268struct recvlist_node {
@@ -259,12 +270,21 @@ struct recvlist_node {
259 uint8_t mac[ETH_ALEN]; 270 uint8_t mac[ETH_ALEN];
260}; 271};
261 272
273struct softif_neigh_vid {
274 struct hlist_node list;
275 struct bat_priv *bat_priv;
276 short vid;
277 atomic_t refcount;
278 struct softif_neigh __rcu *softif_neigh;
279 struct rcu_head rcu;
280 struct hlist_head softif_neigh_list;
281};
282
262struct softif_neigh { 283struct softif_neigh {
263 struct hlist_node list; 284 struct hlist_node list;
264 uint8_t addr[ETH_ALEN]; 285 uint8_t addr[ETH_ALEN];
265 unsigned long last_seen; 286 unsigned long last_seen;
266 short vid; 287 atomic_t refcount;
267 struct kref refcount;
268 struct rcu_head rcu; 288 struct rcu_head rcu;
269}; 289};
270 290
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index d1a611322549..19c3daf34ac6 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Andreas Langer 4 * Andreas Langer
5 * 5 *
@@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head,
39 (struct unicast_frag_packet *)skb->data; 39 (struct unicast_frag_packet *)skb->data;
40 struct sk_buff *tmp_skb; 40 struct sk_buff *tmp_skb;
41 struct unicast_packet *unicast_packet; 41 struct unicast_packet *unicast_packet;
42 int hdr_len = sizeof(struct unicast_packet), 42 int hdr_len = sizeof(struct unicast_packet);
43 uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; 43 int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len;
44 44
45 /* set skb to the first part and tmp_skb to the second part */ 45 /* set skb to the first part and tmp_skb to the second part */
46 if (up->flags & UNI_FRAG_HEAD) { 46 if (up->flags & UNI_FRAG_HEAD) {
@@ -183,15 +183,10 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
183 (struct unicast_frag_packet *)skb->data; 183 (struct unicast_frag_packet *)skb->data;
184 184
185 *new_skb = NULL; 185 *new_skb = NULL;
186 spin_lock_bh(&bat_priv->orig_hash_lock);
187 orig_node = ((struct orig_node *)
188 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
189 unicast_packet->orig));
190 186
191 if (!orig_node) { 187 orig_node = orig_hash_find(bat_priv, unicast_packet->orig);
192 pr_debug("couldn't find originator in orig_hash\n"); 188 if (!orig_node)
193 goto out; 189 goto out;
194 }
195 190
196 orig_node->last_frag_packet = jiffies; 191 orig_node->last_frag_packet = jiffies;
197 192
@@ -215,32 +210,38 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
215 /* if not, merge failed */ 210 /* if not, merge failed */
216 if (*new_skb) 211 if (*new_skb)
217 ret = NET_RX_SUCCESS; 212 ret = NET_RX_SUCCESS;
218out:
219 spin_unlock_bh(&bat_priv->orig_hash_lock);
220 213
214out:
215 if (orig_node)
216 orig_node_free_ref(orig_node);
221 return ret; 217 return ret;
222} 218}
223 219
224int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, 220int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
225 struct batman_if *batman_if, uint8_t dstaddr[]) 221 struct hard_iface *hard_iface, uint8_t dstaddr[])
226{ 222{
227 struct unicast_packet tmp_uc, *unicast_packet; 223 struct unicast_packet tmp_uc, *unicast_packet;
224 struct hard_iface *primary_if;
228 struct sk_buff *frag_skb; 225 struct sk_buff *frag_skb;
229 struct unicast_frag_packet *frag1, *frag2; 226 struct unicast_frag_packet *frag1, *frag2;
230 int uc_hdr_len = sizeof(struct unicast_packet); 227 int uc_hdr_len = sizeof(struct unicast_packet);
231 int ucf_hdr_len = sizeof(struct unicast_frag_packet); 228 int ucf_hdr_len = sizeof(struct unicast_frag_packet);
232 int data_len = skb->len; 229 int data_len = skb->len - uc_hdr_len;
230 int large_tail = 0, ret = NET_RX_DROP;
231 uint16_t seqno;
233 232
234 if (!bat_priv->primary_if) 233 primary_if = primary_if_get_selected(bat_priv);
234 if (!primary_if)
235 goto dropped; 235 goto dropped;
236 236
237 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); 237 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
238 if (!frag_skb) 238 if (!frag_skb)
239 goto dropped; 239 goto dropped;
240 skb_reserve(frag_skb, ucf_hdr_len);
240 241
241 unicast_packet = (struct unicast_packet *) skb->data; 242 unicast_packet = (struct unicast_packet *) skb->data;
242 memcpy(&tmp_uc, unicast_packet, uc_hdr_len); 243 memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
243 skb_split(skb, frag_skb, data_len / 2); 244 skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len);
244 245
245 if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || 246 if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
246 my_skb_head_push(frag_skb, ucf_hdr_len) < 0) 247 my_skb_head_push(frag_skb, ucf_hdr_len) < 0)
@@ -255,26 +256,32 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
255 frag1->version = COMPAT_VERSION; 256 frag1->version = COMPAT_VERSION;
256 frag1->packet_type = BAT_UNICAST_FRAG; 257 frag1->packet_type = BAT_UNICAST_FRAG;
257 258
258 memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 259 memcpy(frag1->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
259 memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); 260 memcpy(frag2, frag1, sizeof(struct unicast_frag_packet));
260 261
261 frag1->flags |= UNI_FRAG_HEAD; 262 if (data_len & 1)
262 frag2->flags &= ~UNI_FRAG_HEAD; 263 large_tail = UNI_FRAG_LARGETAIL;
264
265 frag1->flags = UNI_FRAG_HEAD | large_tail;
266 frag2->flags = large_tail;
263 267
264 frag1->seqno = htons((uint16_t)atomic_inc_return( 268 seqno = atomic_add_return(2, &hard_iface->frag_seqno);
265 &batman_if->frag_seqno)); 269 frag1->seqno = htons(seqno - 1);
266 frag2->seqno = htons((uint16_t)atomic_inc_return( 270 frag2->seqno = htons(seqno);
267 &batman_if->frag_seqno));
268 271
269 send_skb_packet(skb, batman_if, dstaddr); 272 send_skb_packet(skb, hard_iface, dstaddr);
270 send_skb_packet(frag_skb, batman_if, dstaddr); 273 send_skb_packet(frag_skb, hard_iface, dstaddr);
271 return NET_RX_SUCCESS; 274 ret = NET_RX_SUCCESS;
275 goto out;
272 276
273drop_frag: 277drop_frag:
274 kfree_skb(frag_skb); 278 kfree_skb(frag_skb);
275dropped: 279dropped:
276 kfree_skb(skb); 280 kfree_skb(skb);
277 return NET_RX_DROP; 281out:
282 if (primary_if)
283 hardif_free_ref(primary_if);
284 return ret;
278} 285}
279 286
280int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) 287int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
@@ -282,44 +289,36 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
282 struct ethhdr *ethhdr = (struct ethhdr *)skb->data; 289 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
283 struct unicast_packet *unicast_packet; 290 struct unicast_packet *unicast_packet;
284 struct orig_node *orig_node; 291 struct orig_node *orig_node;
285 struct batman_if *batman_if; 292 struct neigh_node *neigh_node;
286 struct neigh_node *router;
287 int data_len = skb->len; 293 int data_len = skb->len;
288 uint8_t dstaddr[6]; 294 int ret = 1;
289
290 spin_lock_bh(&bat_priv->orig_hash_lock);
291 295
292 /* get routing information */ 296 /* get routing information */
293 if (is_multicast_ether_addr(ethhdr->h_dest)) 297 if (is_multicast_ether_addr(ethhdr->h_dest)) {
294 orig_node = (struct orig_node *)gw_get_selected(bat_priv); 298 orig_node = (struct orig_node *)gw_get_selected_orig(bat_priv);
295 else 299 if (orig_node)
296 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 300 goto find_router;
297 compare_orig, 301 }
298 choose_orig,
299 ethhdr->h_dest));
300
301 /* check for hna host */
302 if (!orig_node)
303 orig_node = transtable_search(bat_priv, ethhdr->h_dest);
304
305 router = find_router(bat_priv, orig_node, NULL);
306
307 if (!router)
308 goto unlock;
309 302
310 /* don't lock while sending the packets ... we therefore 303 /* check for tt host - increases orig_node refcount */
311 * copy the required data before sending */ 304 orig_node = transtable_search(bat_priv, ethhdr->h_dest);
312 305
313 batman_if = router->if_incoming; 306find_router:
314 memcpy(dstaddr, router->addr, ETH_ALEN); 307 /**
308 * find_router():
309 * - if orig_node is NULL it returns NULL
310 * - increases neigh_nodes refcount if found.
311 */
312 neigh_node = find_router(bat_priv, orig_node, NULL);
315 313
316 spin_unlock_bh(&bat_priv->orig_hash_lock); 314 if (!neigh_node)
315 goto out;
317 316
318 if (batman_if->if_status != IF_ACTIVE) 317 if (neigh_node->if_incoming->if_status != IF_ACTIVE)
319 goto dropped; 318 goto out;
320 319
321 if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) 320 if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0)
322 goto dropped; 321 goto out;
323 322
324 unicast_packet = (struct unicast_packet *)skb->data; 323 unicast_packet = (struct unicast_packet *)skb->data;
325 324
@@ -333,18 +332,24 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
333 332
334 if (atomic_read(&bat_priv->fragmentation) && 333 if (atomic_read(&bat_priv->fragmentation) &&
335 data_len + sizeof(struct unicast_packet) > 334 data_len + sizeof(struct unicast_packet) >
336 batman_if->net_dev->mtu) { 335 neigh_node->if_incoming->net_dev->mtu) {
337 /* send frag skb decreases ttl */ 336 /* send frag skb decreases ttl */
338 unicast_packet->ttl++; 337 unicast_packet->ttl++;
339 return frag_send_skb(skb, bat_priv, batman_if, 338 ret = frag_send_skb(skb, bat_priv,
340 dstaddr); 339 neigh_node->if_incoming, neigh_node->addr);
340 goto out;
341 } 341 }
342 send_skb_packet(skb, batman_if, dstaddr);
343 return 0;
344 342
345unlock: 343 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
346 spin_unlock_bh(&bat_priv->orig_hash_lock); 344 ret = 0;
347dropped: 345 goto out;
348 kfree_skb(skb); 346
349 return 1; 347out:
348 if (neigh_node)
349 neigh_node_free_ref(neigh_node);
350 if (orig_node)
351 orig_node_free_ref(orig_node);
352 if (ret == 1)
353 kfree_skb(skb);
354 return ret;
350} 355}
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index e32b7867a9a4..16ad7a9242b5 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Andreas Langer 4 * Andreas Langer
5 * 5 *
@@ -22,6 +22,8 @@
22#ifndef _NET_BATMAN_ADV_UNICAST_H_ 22#ifndef _NET_BATMAN_ADV_UNICAST_H_
23#define _NET_BATMAN_ADV_UNICAST_H_ 23#define _NET_BATMAN_ADV_UNICAST_H_
24 24
25#include "packet.h"
26
25#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ 27#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */
26#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ 28#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */
27 29
@@ -30,6 +32,27 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
30void frag_list_free(struct list_head *head); 32void frag_list_free(struct list_head *head);
31int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); 33int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv);
32int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, 34int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
33 struct batman_if *batman_if, uint8_t dstaddr[]); 35 struct hard_iface *hard_iface, uint8_t dstaddr[]);
36
37static inline int frag_can_reassemble(struct sk_buff *skb, int mtu)
38{
39 struct unicast_frag_packet *unicast_packet;
40 int uneven_correction = 0;
41 unsigned int merged_size;
42
43 unicast_packet = (struct unicast_frag_packet *)skb->data;
44
45 if (unicast_packet->flags & UNI_FRAG_LARGETAIL) {
46 if (unicast_packet->flags & UNI_FRAG_HEAD)
47 uneven_correction = 1;
48 else
49 uneven_correction = -1;
50 }
51
52 merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2;
53 merged_size += sizeof(struct unicast_packet) + uneven_correction;
54
55 return merged_size <= mtu;
56}
34 57
35#endif /* _NET_BATMAN_ADV_UNICAST_H_ */ 58#endif /* _NET_BATMAN_ADV_UNICAST_H_ */
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index de1022cacaf7..c39f20cc1ba6 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich 4 * Simon Wunderlich
5 * 5 *
@@ -68,15 +68,16 @@ static void free_info(struct kref *ref)
68} 68}
69 69
70/* Compare two vis packets, used by the hashing algorithm */ 70/* Compare two vis packets, used by the hashing algorithm */
71static int vis_info_cmp(void *data1, void *data2) 71static int vis_info_cmp(struct hlist_node *node, void *data2)
72{ 72{
73 struct vis_info *d1, *d2; 73 struct vis_info *d1, *d2;
74 struct vis_packet *p1, *p2; 74 struct vis_packet *p1, *p2;
75 d1 = data1; 75
76 d1 = container_of(node, struct vis_info, hash_entry);
76 d2 = data2; 77 d2 = data2;
77 p1 = (struct vis_packet *)d1->skb_packet->data; 78 p1 = (struct vis_packet *)d1->skb_packet->data;
78 p2 = (struct vis_packet *)d2->skb_packet->data; 79 p2 = (struct vis_packet *)d2->skb_packet->data;
79 return compare_orig(p1->vis_orig, p2->vis_orig); 80 return compare_eth(p1->vis_orig, p2->vis_orig);
80} 81}
81 82
82/* hash function to choose an entry in a hash table of given size */ 83/* hash function to choose an entry in a hash table of given size */
@@ -104,6 +105,34 @@ static int vis_info_choose(void *data, int size)
104 return hash % size; 105 return hash % size;
105} 106}
106 107
108static struct vis_info *vis_hash_find(struct bat_priv *bat_priv,
109 void *data)
110{
111 struct hashtable_t *hash = bat_priv->vis_hash;
112 struct hlist_head *head;
113 struct hlist_node *node;
114 struct vis_info *vis_info, *vis_info_tmp = NULL;
115 int index;
116
117 if (!hash)
118 return NULL;
119
120 index = vis_info_choose(data, hash->size);
121 head = &hash->table[index];
122
123 rcu_read_lock();
124 hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) {
125 if (!vis_info_cmp(node, data))
126 continue;
127
128 vis_info_tmp = vis_info;
129 break;
130 }
131 rcu_read_unlock();
132
133 return vis_info_tmp;
134}
135
107/* insert interface to the list of interfaces of one originator, if it 136/* insert interface to the list of interfaces of one originator, if it
108 * does not already exist in the list */ 137 * does not already exist in the list */
109static void vis_data_insert_interface(const uint8_t *interface, 138static void vis_data_insert_interface(const uint8_t *interface,
@@ -114,7 +143,7 @@ static void vis_data_insert_interface(const uint8_t *interface,
114 struct hlist_node *pos; 143 struct hlist_node *pos;
115 144
116 hlist_for_each_entry(entry, pos, if_list, list) { 145 hlist_for_each_entry(entry, pos, if_list, list) {
117 if (compare_orig(entry->addr, (void *)interface)) 146 if (compare_eth(entry->addr, (void *)interface))
118 return; 147 return;
119 } 148 }
120 149
@@ -165,8 +194,8 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
165{ 194{
166 /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ 195 /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */
167 if (primary && entry->quality == 0) 196 if (primary && entry->quality == 0)
168 return sprintf(buff, "HNA %pM, ", entry->dest); 197 return sprintf(buff, "TT %pM, ", entry->dest);
169 else if (compare_orig(entry->src, src)) 198 else if (compare_eth(entry->src, src))
170 return sprintf(buff, "TQ %pM %d, ", entry->dest, 199 return sprintf(buff, "TQ %pM %d, ", entry->dest,
171 entry->quality); 200 entry->quality);
172 201
@@ -175,9 +204,9 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
175 204
176int vis_seq_print_text(struct seq_file *seq, void *offset) 205int vis_seq_print_text(struct seq_file *seq, void *offset)
177{ 206{
178 struct hlist_node *walk; 207 struct hard_iface *primary_if;
208 struct hlist_node *node;
179 struct hlist_head *head; 209 struct hlist_head *head;
180 struct element_t *bucket;
181 struct vis_info *info; 210 struct vis_info *info;
182 struct vis_packet *packet; 211 struct vis_packet *packet;
183 struct vis_info_entry *entries; 212 struct vis_info_entry *entries;
@@ -187,15 +216,18 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
187 HLIST_HEAD(vis_if_list); 216 HLIST_HEAD(vis_if_list);
188 struct if_list_entry *entry; 217 struct if_list_entry *entry;
189 struct hlist_node *pos, *n; 218 struct hlist_node *pos, *n;
190 int i, j; 219 int i, j, ret = 0;
191 int vis_server = atomic_read(&bat_priv->vis_mode); 220 int vis_server = atomic_read(&bat_priv->vis_mode);
192 size_t buff_pos, buf_size; 221 size_t buff_pos, buf_size;
193 char *buff; 222 char *buff;
194 int compare; 223 int compare;
195 224
196 if ((!bat_priv->primary_if) || 225 primary_if = primary_if_get_selected(bat_priv);
197 (vis_server == VIS_TYPE_CLIENT_UPDATE)) 226 if (!primary_if)
198 return 0; 227 goto out;
228
229 if (vis_server == VIS_TYPE_CLIENT_UPDATE)
230 goto out;
199 231
200 buf_size = 1; 232 buf_size = 1;
201 /* Estimate length */ 233 /* Estimate length */
@@ -203,8 +235,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
203 for (i = 0; i < hash->size; i++) { 235 for (i = 0; i < hash->size; i++) {
204 head = &hash->table[i]; 236 head = &hash->table[i];
205 237
206 hlist_for_each_entry(bucket, walk, head, hlist) { 238 rcu_read_lock();
207 info = bucket->data; 239 hlist_for_each_entry_rcu(info, node, head, hash_entry) {
208 packet = (struct vis_packet *)info->skb_packet->data; 240 packet = (struct vis_packet *)info->skb_packet->data;
209 entries = (struct vis_info_entry *) 241 entries = (struct vis_info_entry *)
210 ((char *)packet + sizeof(struct vis_packet)); 242 ((char *)packet + sizeof(struct vis_packet));
@@ -213,7 +245,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
213 if (entries[j].quality == 0) 245 if (entries[j].quality == 0)
214 continue; 246 continue;
215 compare = 247 compare =
216 compare_orig(entries[j].src, packet->vis_orig); 248 compare_eth(entries[j].src, packet->vis_orig);
217 vis_data_insert_interface(entries[j].src, 249 vis_data_insert_interface(entries[j].src,
218 &vis_if_list, 250 &vis_if_list,
219 compare); 251 compare);
@@ -223,7 +255,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
223 buf_size += 18 + 26 * packet->entries; 255 buf_size += 18 + 26 * packet->entries;
224 256
225 /* add primary/secondary records */ 257 /* add primary/secondary records */
226 if (compare_orig(entry->addr, packet->vis_orig)) 258 if (compare_eth(entry->addr, packet->vis_orig))
227 buf_size += 259 buf_size +=
228 vis_data_count_prim_sec(&vis_if_list); 260 vis_data_count_prim_sec(&vis_if_list);
229 261
@@ -236,12 +268,14 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
236 kfree(entry); 268 kfree(entry);
237 } 269 }
238 } 270 }
271 rcu_read_unlock();
239 } 272 }
240 273
241 buff = kmalloc(buf_size, GFP_ATOMIC); 274 buff = kmalloc(buf_size, GFP_ATOMIC);
242 if (!buff) { 275 if (!buff) {
243 spin_unlock_bh(&bat_priv->vis_hash_lock); 276 spin_unlock_bh(&bat_priv->vis_hash_lock);
244 return -ENOMEM; 277 ret = -ENOMEM;
278 goto out;
245 } 279 }
246 buff[0] = '\0'; 280 buff[0] = '\0';
247 buff_pos = 0; 281 buff_pos = 0;
@@ -249,8 +283,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
249 for (i = 0; i < hash->size; i++) { 283 for (i = 0; i < hash->size; i++) {
250 head = &hash->table[i]; 284 head = &hash->table[i];
251 285
252 hlist_for_each_entry(bucket, walk, head, hlist) { 286 rcu_read_lock();
253 info = bucket->data; 287 hlist_for_each_entry_rcu(info, node, head, hash_entry) {
254 packet = (struct vis_packet *)info->skb_packet->data; 288 packet = (struct vis_packet *)info->skb_packet->data;
255 entries = (struct vis_info_entry *) 289 entries = (struct vis_info_entry *)
256 ((char *)packet + sizeof(struct vis_packet)); 290 ((char *)packet + sizeof(struct vis_packet));
@@ -259,7 +293,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
259 if (entries[j].quality == 0) 293 if (entries[j].quality == 0)
260 continue; 294 continue;
261 compare = 295 compare =
262 compare_orig(entries[j].src, packet->vis_orig); 296 compare_eth(entries[j].src, packet->vis_orig);
263 vis_data_insert_interface(entries[j].src, 297 vis_data_insert_interface(entries[j].src,
264 &vis_if_list, 298 &vis_if_list,
265 compare); 299 compare);
@@ -277,7 +311,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
277 entry->primary); 311 entry->primary);
278 312
279 /* add primary/secondary records */ 313 /* add primary/secondary records */
280 if (compare_orig(entry->addr, packet->vis_orig)) 314 if (compare_eth(entry->addr, packet->vis_orig))
281 buff_pos += 315 buff_pos +=
282 vis_data_read_prim_sec(buff + buff_pos, 316 vis_data_read_prim_sec(buff + buff_pos,
283 &vis_if_list); 317 &vis_if_list);
@@ -291,6 +325,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
291 kfree(entry); 325 kfree(entry);
292 } 326 }
293 } 327 }
328 rcu_read_unlock();
294 } 329 }
295 330
296 spin_unlock_bh(&bat_priv->vis_hash_lock); 331 spin_unlock_bh(&bat_priv->vis_hash_lock);
@@ -298,7 +333,10 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
298 seq_printf(seq, "%s", buff); 333 seq_printf(seq, "%s", buff);
299 kfree(buff); 334 kfree(buff);
300 335
301 return 0; 336out:
337 if (primary_if)
338 hardif_free_ref(primary_if);
339 return ret;
302} 340}
303 341
304/* add the info packet to the send list, if it was not 342/* add the info packet to the send list, if it was not
@@ -345,7 +383,7 @@ static int recv_list_is_in(struct bat_priv *bat_priv,
345 383
346 spin_lock_bh(&bat_priv->vis_list_lock); 384 spin_lock_bh(&bat_priv->vis_list_lock);
347 list_for_each_entry(entry, recv_list, list) { 385 list_for_each_entry(entry, recv_list, list) {
348 if (memcmp(entry->mac, mac, ETH_ALEN) == 0) { 386 if (compare_eth(entry->mac, mac)) {
349 spin_unlock_bh(&bat_priv->vis_list_lock); 387 spin_unlock_bh(&bat_priv->vis_list_lock);
350 return 1; 388 return 1;
351 } 389 }
@@ -381,8 +419,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
381 sizeof(struct vis_packet)); 419 sizeof(struct vis_packet));
382 420
383 memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); 421 memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN);
384 old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 422 old_info = vis_hash_find(bat_priv, &search_elem);
385 &search_elem);
386 kfree_skb(search_elem.skb_packet); 423 kfree_skb(search_elem.skb_packet);
387 424
388 if (old_info) { 425 if (old_info) {
@@ -442,7 +479,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
442 479
443 /* try to add it */ 480 /* try to add it */
444 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 481 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
445 info); 482 info, &info->hash_entry);
446 if (hash_added < 0) { 483 if (hash_added < 0) {
447 /* did not work (for some reason) */ 484 /* did not work (for some reason) */
448 kref_put(&info->refcount, free_info); 485 kref_put(&info->refcount, free_info);
@@ -529,9 +566,9 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
529 struct vis_info *info) 566 struct vis_info *info)
530{ 567{
531 struct hashtable_t *hash = bat_priv->orig_hash; 568 struct hashtable_t *hash = bat_priv->orig_hash;
532 struct hlist_node *walk; 569 struct neigh_node *router;
570 struct hlist_node *node;
533 struct hlist_head *head; 571 struct hlist_head *head;
534 struct element_t *bucket;
535 struct orig_node *orig_node; 572 struct orig_node *orig_node;
536 struct vis_packet *packet; 573 struct vis_packet *packet;
537 int best_tq = -1, i; 574 int best_tq = -1, i;
@@ -541,16 +578,21 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
541 for (i = 0; i < hash->size; i++) { 578 for (i = 0; i < hash->size; i++) {
542 head = &hash->table[i]; 579 head = &hash->table[i];
543 580
544 hlist_for_each_entry(bucket, walk, head, hlist) { 581 rcu_read_lock();
545 orig_node = bucket->data; 582 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
546 if ((orig_node) && (orig_node->router) && 583 router = orig_node_get_router(orig_node);
547 (orig_node->flags & VIS_SERVER) && 584 if (!router)
548 (orig_node->router->tq_avg > best_tq)) { 585 continue;
549 best_tq = orig_node->router->tq_avg; 586
587 if ((orig_node->flags & VIS_SERVER) &&
588 (router->tq_avg > best_tq)) {
589 best_tq = router->tq_avg;
550 memcpy(packet->target_orig, orig_node->orig, 590 memcpy(packet->target_orig, orig_node->orig,
551 ETH_ALEN); 591 ETH_ALEN);
552 } 592 }
593 neigh_node_free_ref(router);
553 } 594 }
595 rcu_read_unlock();
554 } 596 }
555 597
556 return best_tq; 598 return best_tq;
@@ -573,21 +615,19 @@ static bool vis_packet_full(struct vis_info *info)
573static int generate_vis_packet(struct bat_priv *bat_priv) 615static int generate_vis_packet(struct bat_priv *bat_priv)
574{ 616{
575 struct hashtable_t *hash = bat_priv->orig_hash; 617 struct hashtable_t *hash = bat_priv->orig_hash;
576 struct hlist_node *walk; 618 struct hlist_node *node;
577 struct hlist_head *head; 619 struct hlist_head *head;
578 struct element_t *bucket;
579 struct orig_node *orig_node; 620 struct orig_node *orig_node;
580 struct neigh_node *neigh_node; 621 struct neigh_node *router;
581 struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; 622 struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info;
582 struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data; 623 struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data;
583 struct vis_info_entry *entry; 624 struct vis_info_entry *entry;
584 struct hna_local_entry *hna_local_entry; 625 struct tt_local_entry *tt_local_entry;
585 int best_tq = -1, i; 626 int best_tq = -1, i;
586 627
587 info->first_seen = jiffies; 628 info->first_seen = jiffies;
588 packet->vis_type = atomic_read(&bat_priv->vis_mode); 629 packet->vis_type = atomic_read(&bat_priv->vis_mode);
589 630
590 spin_lock_bh(&bat_priv->orig_hash_lock);
591 memcpy(packet->target_orig, broadcast_addr, ETH_ALEN); 631 memcpy(packet->target_orig, broadcast_addr, ETH_ALEN);
592 packet->ttl = TTL; 632 packet->ttl = TTL;
593 packet->seqno = htonl(ntohl(packet->seqno) + 1); 633 packet->seqno = htonl(ntohl(packet->seqno) + 1);
@@ -597,74 +637,74 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
597 if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { 637 if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) {
598 best_tq = find_best_vis_server(bat_priv, info); 638 best_tq = find_best_vis_server(bat_priv, info);
599 639
600 if (best_tq < 0) { 640 if (best_tq < 0)
601 spin_unlock_bh(&bat_priv->orig_hash_lock);
602 return -1; 641 return -1;
603 }
604 } 642 }
605 643
606 for (i = 0; i < hash->size; i++) { 644 for (i = 0; i < hash->size; i++) {
607 head = &hash->table[i]; 645 head = &hash->table[i];
608 646
609 hlist_for_each_entry(bucket, walk, head, hlist) { 647 rcu_read_lock();
610 orig_node = bucket->data; 648 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
611 neigh_node = orig_node->router; 649 router = orig_node_get_router(orig_node);
612 650 if (!router)
613 if (!neigh_node)
614 continue; 651 continue;
615 652
616 if (!compare_orig(neigh_node->addr, orig_node->orig)) 653 if (!compare_eth(router->addr, orig_node->orig))
617 continue; 654 goto next;
618 655
619 if (neigh_node->if_incoming->if_status != IF_ACTIVE) 656 if (router->if_incoming->if_status != IF_ACTIVE)
620 continue; 657 goto next;
621 658
622 if (neigh_node->tq_avg < 1) 659 if (router->tq_avg < 1)
623 continue; 660 goto next;
624 661
625 /* fill one entry into buffer. */ 662 /* fill one entry into buffer. */
626 entry = (struct vis_info_entry *) 663 entry = (struct vis_info_entry *)
627 skb_put(info->skb_packet, sizeof(*entry)); 664 skb_put(info->skb_packet, sizeof(*entry));
628 memcpy(entry->src, 665 memcpy(entry->src,
629 neigh_node->if_incoming->net_dev->dev_addr, 666 router->if_incoming->net_dev->dev_addr,
630 ETH_ALEN); 667 ETH_ALEN);
631 memcpy(entry->dest, orig_node->orig, ETH_ALEN); 668 memcpy(entry->dest, orig_node->orig, ETH_ALEN);
632 entry->quality = neigh_node->tq_avg; 669 entry->quality = router->tq_avg;
633 packet->entries++; 670 packet->entries++;
634 671
635 if (vis_packet_full(info)) { 672next:
636 spin_unlock_bh(&bat_priv->orig_hash_lock); 673 neigh_node_free_ref(router);
637 return 0; 674
638 } 675 if (vis_packet_full(info))
676 goto unlock;
639 } 677 }
678 rcu_read_unlock();
640 } 679 }
641 680
642 spin_unlock_bh(&bat_priv->orig_hash_lock); 681 hash = bat_priv->tt_local_hash;
643
644 hash = bat_priv->hna_local_hash;
645 682
646 spin_lock_bh(&bat_priv->hna_lhash_lock); 683 spin_lock_bh(&bat_priv->tt_lhash_lock);
647 for (i = 0; i < hash->size; i++) { 684 for (i = 0; i < hash->size; i++) {
648 head = &hash->table[i]; 685 head = &hash->table[i];
649 686
650 hlist_for_each_entry(bucket, walk, head, hlist) { 687 hlist_for_each_entry(tt_local_entry, node, head, hash_entry) {
651 hna_local_entry = bucket->data;
652 entry = (struct vis_info_entry *) 688 entry = (struct vis_info_entry *)
653 skb_put(info->skb_packet, 689 skb_put(info->skb_packet,
654 sizeof(*entry)); 690 sizeof(*entry));
655 memset(entry->src, 0, ETH_ALEN); 691 memset(entry->src, 0, ETH_ALEN);
656 memcpy(entry->dest, hna_local_entry->addr, ETH_ALEN); 692 memcpy(entry->dest, tt_local_entry->addr, ETH_ALEN);
657 entry->quality = 0; /* 0 means HNA */ 693 entry->quality = 0; /* 0 means TT */
658 packet->entries++; 694 packet->entries++;
659 695
660 if (vis_packet_full(info)) { 696 if (vis_packet_full(info)) {
661 spin_unlock_bh(&bat_priv->hna_lhash_lock); 697 spin_unlock_bh(&bat_priv->tt_lhash_lock);
662 return 0; 698 return 0;
663 } 699 }
664 } 700 }
665 } 701 }
666 702
667 spin_unlock_bh(&bat_priv->hna_lhash_lock); 703 spin_unlock_bh(&bat_priv->tt_lhash_lock);
704 return 0;
705
706unlock:
707 rcu_read_unlock();
668 return 0; 708 return 0;
669} 709}
670 710
@@ -674,25 +714,22 @@ static void purge_vis_packets(struct bat_priv *bat_priv)
674{ 714{
675 int i; 715 int i;
676 struct hashtable_t *hash = bat_priv->vis_hash; 716 struct hashtable_t *hash = bat_priv->vis_hash;
677 struct hlist_node *walk, *safe; 717 struct hlist_node *node, *node_tmp;
678 struct hlist_head *head; 718 struct hlist_head *head;
679 struct element_t *bucket;
680 struct vis_info *info; 719 struct vis_info *info;
681 720
682 for (i = 0; i < hash->size; i++) { 721 for (i = 0; i < hash->size; i++) {
683 head = &hash->table[i]; 722 head = &hash->table[i];
684 723
685 hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { 724 hlist_for_each_entry_safe(info, node, node_tmp,
686 info = bucket->data; 725 head, hash_entry) {
687
688 /* never purge own data. */ 726 /* never purge own data. */
689 if (info == bat_priv->my_vis_info) 727 if (info == bat_priv->my_vis_info)
690 continue; 728 continue;
691 729
692 if (time_after(jiffies, 730 if (time_after(jiffies,
693 info->first_seen + VIS_TIMEOUT * HZ)) { 731 info->first_seen + VIS_TIMEOUT * HZ)) {
694 hlist_del(walk); 732 hlist_del(node);
695 kfree(bucket);
696 send_list_del(info); 733 send_list_del(info);
697 kref_put(&info->refcount, free_info); 734 kref_put(&info->refcount, free_info);
698 } 735 }
@@ -703,103 +740,103 @@ static void purge_vis_packets(struct bat_priv *bat_priv)
703static void broadcast_vis_packet(struct bat_priv *bat_priv, 740static void broadcast_vis_packet(struct bat_priv *bat_priv,
704 struct vis_info *info) 741 struct vis_info *info)
705{ 742{
743 struct neigh_node *router;
706 struct hashtable_t *hash = bat_priv->orig_hash; 744 struct hashtable_t *hash = bat_priv->orig_hash;
707 struct hlist_node *walk; 745 struct hlist_node *node;
708 struct hlist_head *head; 746 struct hlist_head *head;
709 struct element_t *bucket;
710 struct orig_node *orig_node; 747 struct orig_node *orig_node;
711 struct vis_packet *packet; 748 struct vis_packet *packet;
712 struct sk_buff *skb; 749 struct sk_buff *skb;
713 struct batman_if *batman_if; 750 struct hard_iface *hard_iface;
714 uint8_t dstaddr[ETH_ALEN]; 751 uint8_t dstaddr[ETH_ALEN];
715 int i; 752 int i;
716 753
717 754
718 spin_lock_bh(&bat_priv->orig_hash_lock);
719 packet = (struct vis_packet *)info->skb_packet->data; 755 packet = (struct vis_packet *)info->skb_packet->data;
720 756
721 /* send to all routers in range. */ 757 /* send to all routers in range. */
722 for (i = 0; i < hash->size; i++) { 758 for (i = 0; i < hash->size; i++) {
723 head = &hash->table[i]; 759 head = &hash->table[i];
724 760
725 hlist_for_each_entry(bucket, walk, head, hlist) { 761 rcu_read_lock();
726 orig_node = bucket->data; 762 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
727
728 /* if it's a vis server and reachable, send it. */ 763 /* if it's a vis server and reachable, send it. */
729 if ((!orig_node) || (!orig_node->router))
730 continue;
731 if (!(orig_node->flags & VIS_SERVER)) 764 if (!(orig_node->flags & VIS_SERVER))
732 continue; 765 continue;
766
767 router = orig_node_get_router(orig_node);
768 if (!router)
769 continue;
770
733 /* don't send it if we already received the packet from 771 /* don't send it if we already received the packet from
734 * this node. */ 772 * this node. */
735 if (recv_list_is_in(bat_priv, &info->recv_list, 773 if (recv_list_is_in(bat_priv, &info->recv_list,
736 orig_node->orig)) 774 orig_node->orig)) {
775 neigh_node_free_ref(router);
737 continue; 776 continue;
777 }
738 778
739 memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); 779 memcpy(packet->target_orig, orig_node->orig, ETH_ALEN);
740 batman_if = orig_node->router->if_incoming; 780 hard_iface = router->if_incoming;
741 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); 781 memcpy(dstaddr, router->addr, ETH_ALEN);
742 spin_unlock_bh(&bat_priv->orig_hash_lock); 782
783 neigh_node_free_ref(router);
743 784
744 skb = skb_clone(info->skb_packet, GFP_ATOMIC); 785 skb = skb_clone(info->skb_packet, GFP_ATOMIC);
745 if (skb) 786 if (skb)
746 send_skb_packet(skb, batman_if, dstaddr); 787 send_skb_packet(skb, hard_iface, dstaddr);
747 788
748 spin_lock_bh(&bat_priv->orig_hash_lock);
749 } 789 }
750 790 rcu_read_unlock();
751 } 791 }
752
753 spin_unlock_bh(&bat_priv->orig_hash_lock);
754} 792}
755 793
756static void unicast_vis_packet(struct bat_priv *bat_priv, 794static void unicast_vis_packet(struct bat_priv *bat_priv,
757 struct vis_info *info) 795 struct vis_info *info)
758{ 796{
759 struct orig_node *orig_node; 797 struct orig_node *orig_node;
798 struct neigh_node *router = NULL;
760 struct sk_buff *skb; 799 struct sk_buff *skb;
761 struct vis_packet *packet; 800 struct vis_packet *packet;
762 struct batman_if *batman_if;
763 uint8_t dstaddr[ETH_ALEN];
764 801
765 spin_lock_bh(&bat_priv->orig_hash_lock);
766 packet = (struct vis_packet *)info->skb_packet->data; 802 packet = (struct vis_packet *)info->skb_packet->data;
767 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
768 compare_orig, choose_orig,
769 packet->target_orig));
770 803
771 if ((!orig_node) || (!orig_node->router)) 804 orig_node = orig_hash_find(bat_priv, packet->target_orig);
805 if (!orig_node)
772 goto out; 806 goto out;
773 807
774 /* don't lock while sending the packets ... we therefore 808 router = orig_node_get_router(orig_node);
775 * copy the required data before sending */ 809 if (!router)
776 batman_if = orig_node->router->if_incoming; 810 goto out;
777 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
778 spin_unlock_bh(&bat_priv->orig_hash_lock);
779 811
780 skb = skb_clone(info->skb_packet, GFP_ATOMIC); 812 skb = skb_clone(info->skb_packet, GFP_ATOMIC);
781 if (skb) 813 if (skb)
782 send_skb_packet(skb, batman_if, dstaddr); 814 send_skb_packet(skb, router->if_incoming, router->addr);
783
784 return;
785 815
786out: 816out:
787 spin_unlock_bh(&bat_priv->orig_hash_lock); 817 if (router)
818 neigh_node_free_ref(router);
819 if (orig_node)
820 orig_node_free_ref(orig_node);
788} 821}
789 822
790/* only send one vis packet. called from send_vis_packets() */ 823/* only send one vis packet. called from send_vis_packets() */
791static void send_vis_packet(struct bat_priv *bat_priv, struct vis_info *info) 824static void send_vis_packet(struct bat_priv *bat_priv, struct vis_info *info)
792{ 825{
826 struct hard_iface *primary_if;
793 struct vis_packet *packet; 827 struct vis_packet *packet;
794 828
829 primary_if = primary_if_get_selected(bat_priv);
830 if (!primary_if)
831 goto out;
832
795 packet = (struct vis_packet *)info->skb_packet->data; 833 packet = (struct vis_packet *)info->skb_packet->data;
796 if (packet->ttl < 2) { 834 if (packet->ttl < 2) {
797 pr_debug("Error - can't send vis packet: ttl exceeded\n"); 835 pr_debug("Error - can't send vis packet: ttl exceeded\n");
798 return; 836 goto out;
799 } 837 }
800 838
801 memcpy(packet->sender_orig, bat_priv->primary_if->net_dev->dev_addr, 839 memcpy(packet->sender_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
802 ETH_ALEN);
803 packet->ttl--; 840 packet->ttl--;
804 841
805 if (is_broadcast_ether_addr(packet->target_orig)) 842 if (is_broadcast_ether_addr(packet->target_orig))
@@ -807,6 +844,10 @@ static void send_vis_packet(struct bat_priv *bat_priv, struct vis_info *info)
807 else 844 else
808 unicast_vis_packet(bat_priv, info); 845 unicast_vis_packet(bat_priv, info);
809 packet->ttl++; /* restore TTL */ 846 packet->ttl++; /* restore TTL */
847
848out:
849 if (primary_if)
850 hardif_free_ref(primary_if);
810} 851}
811 852
812/* called from timer; send (and maybe generate) vis packet. */ 853/* called from timer; send (and maybe generate) vis packet. */
@@ -833,8 +874,7 @@ static void send_vis_packets(struct work_struct *work)
833 kref_get(&info->refcount); 874 kref_get(&info->refcount);
834 spin_unlock_bh(&bat_priv->vis_hash_lock); 875 spin_unlock_bh(&bat_priv->vis_hash_lock);
835 876
836 if (bat_priv->primary_if) 877 send_vis_packet(bat_priv, info);
837 send_vis_packet(bat_priv, info);
838 878
839 spin_lock_bh(&bat_priv->vis_hash_lock); 879 spin_lock_bh(&bat_priv->vis_hash_lock);
840 send_list_del(info); 880 send_list_del(info);
@@ -896,7 +936,8 @@ int vis_init(struct bat_priv *bat_priv)
896 INIT_LIST_HEAD(&bat_priv->vis_send_list); 936 INIT_LIST_HEAD(&bat_priv->vis_send_list);
897 937
898 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 938 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
899 bat_priv->my_vis_info); 939 bat_priv->my_vis_info,
940 &bat_priv->my_vis_info->hash_entry);
900 if (hash_added < 0) { 941 if (hash_added < 0) {
901 pr_err("Can't add own vis packet into hash\n"); 942 pr_err("Can't add own vis packet into hash\n");
902 /* not in hash, need to remove it manually. */ 943 /* not in hash, need to remove it manually. */
@@ -918,10 +959,11 @@ err:
918} 959}
919 960
920/* Decrease the reference count on a hash item info */ 961/* Decrease the reference count on a hash item info */
921static void free_info_ref(void *data, void *arg) 962static void free_info_ref(struct hlist_node *node, void *arg)
922{ 963{
923 struct vis_info *info = data; 964 struct vis_info *info;
924 965
966 info = container_of(node, struct vis_info, hash_entry);
925 send_list_del(info); 967 send_list_del(info);
926 kref_put(&info->refcount, free_info); 968 kref_put(&info->refcount, free_info);
927} 969}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
index 2c3b33089a9b..31b820d07f23 100644
--- a/net/batman-adv/vis.h
+++ b/net/batman-adv/vis.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ed371684c133..6ae5ec508587 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -27,31 +27,27 @@ menuconfig BT
27 compile it as module (bluetooth). 27 compile it as module (bluetooth).
28 28
29 To use Linux Bluetooth subsystem, you will need several user-space 29 To use Linux Bluetooth subsystem, you will need several user-space
30 utilities like hciconfig and hcid. These utilities and updates to 30 utilities like hciconfig and bluetoothd. These utilities and updates
31 Bluetooth kernel modules are provided in the BlueZ packages. 31 to Bluetooth kernel modules are provided in the BlueZ packages. For
32 For more information, see <http://www.bluez.org/>. 32 more information, see <http://www.bluez.org/>.
33
34if BT != n
33 35
34config BT_L2CAP 36config BT_L2CAP
35 tristate "L2CAP protocol support" 37 bool "L2CAP protocol support"
36 depends on BT
37 select CRC16 38 select CRC16
38 help 39 help
39 L2CAP (Logical Link Control and Adaptation Protocol) provides 40 L2CAP (Logical Link Control and Adaptation Protocol) provides
40 connection oriented and connection-less data transport. L2CAP 41 connection oriented and connection-less data transport. L2CAP
41 support is required for most Bluetooth applications. 42 support is required for most Bluetooth applications.
42 43
43 Say Y here to compile L2CAP support into the kernel or say M to
44 compile it as module (l2cap).
45
46config BT_SCO 44config BT_SCO
47 tristate "SCO links support" 45 bool "SCO links support"
48 depends on BT
49 help 46 help
50 SCO link provides voice transport over Bluetooth. SCO support is 47 SCO link provides voice transport over Bluetooth. SCO support is
51 required for voice applications like Headset and Audio. 48 required for voice applications like Headset and Audio.
52 49
53 Say Y here to compile SCO support into the kernel or say M to 50endif
54 compile it as module (sco).
55 51
56source "net/bluetooth/rfcomm/Kconfig" 52source "net/bluetooth/rfcomm/Kconfig"
57 53
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 250f954f0213..f04fe9a9d634 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -3,11 +3,11 @@
3# 3#
4 4
5obj-$(CONFIG_BT) += bluetooth.o 5obj-$(CONFIG_BT) += bluetooth.o
6obj-$(CONFIG_BT_L2CAP) += l2cap.o
7obj-$(CONFIG_BT_SCO) += sco.o
8obj-$(CONFIG_BT_RFCOMM) += rfcomm/ 6obj-$(CONFIG_BT_RFCOMM) += rfcomm/
9obj-$(CONFIG_BT_BNEP) += bnep/ 7obj-$(CONFIG_BT_BNEP) += bnep/
10obj-$(CONFIG_BT_CMTP) += cmtp/ 8obj-$(CONFIG_BT_CMTP) += cmtp/
11obj-$(CONFIG_BT_HIDP) += hidp/ 9obj-$(CONFIG_BT_HIDP) += hidp/
12 10
13bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o 11bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o
12bluetooth-$(CONFIG_BT_L2CAP) += l2cap_core.o l2cap_sock.o
13bluetooth-$(CONFIG_BT_SCO) += sco.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c4cf3f595004..8add9b499912 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -40,7 +40,7 @@
40 40
41#include <net/bluetooth/bluetooth.h> 41#include <net/bluetooth/bluetooth.h>
42 42
43#define VERSION "2.15" 43#define VERSION "2.16"
44 44
45/* Bluetooth sockets */ 45/* Bluetooth sockets */
46#define BT_MAX_PROTO 8 46#define BT_MAX_PROTO 8
@@ -199,14 +199,15 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
199 199
200 BT_DBG("parent %p", parent); 200 BT_DBG("parent %p", parent);
201 201
202 local_bh_disable();
202 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { 203 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
203 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); 204 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
204 205
205 lock_sock(sk); 206 bh_lock_sock(sk);
206 207
207 /* FIXME: Is this check still needed */ 208 /* FIXME: Is this check still needed */
208 if (sk->sk_state == BT_CLOSED) { 209 if (sk->sk_state == BT_CLOSED) {
209 release_sock(sk); 210 bh_unlock_sock(sk);
210 bt_accept_unlink(sk); 211 bt_accept_unlink(sk);
211 continue; 212 continue;
212 } 213 }
@@ -216,12 +217,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
216 bt_accept_unlink(sk); 217 bt_accept_unlink(sk);
217 if (newsock) 218 if (newsock)
218 sock_graft(sk, newsock); 219 sock_graft(sk, newsock);
219 release_sock(sk); 220
221 bh_unlock_sock(sk);
222 local_bh_enable();
220 return sk; 223 return sk;
221 } 224 }
222 225
223 release_sock(sk); 226 bh_unlock_sock(sk);
224 } 227 }
228 local_bh_enable();
229
225 return NULL; 230 return NULL;
226} 231}
227EXPORT_SYMBOL(bt_accept_dequeue); 232EXPORT_SYMBOL(bt_accept_dequeue);
@@ -240,7 +245,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
240 if (flags & (MSG_OOB)) 245 if (flags & (MSG_OOB))
241 return -EOPNOTSUPP; 246 return -EOPNOTSUPP;
242 247
243 if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) { 248 skb = skb_recv_datagram(sk, flags, noblock, &err);
249 if (!skb) {
244 if (sk->sk_shutdown & RCV_SHUTDOWN) 250 if (sk->sk_shutdown & RCV_SHUTDOWN)
245 return 0; 251 return 0;
246 return err; 252 return err;
@@ -323,7 +329,8 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
323 if (copied >= target) 329 if (copied >= target)
324 break; 330 break;
325 331
326 if ((err = sock_error(sk)) != 0) 332 err = sock_error(sk);
333 if (err)
327 break; 334 break;
328 if (sk->sk_shutdown & RCV_SHUTDOWN) 335 if (sk->sk_shutdown & RCV_SHUTDOWN)
329 break; 336 break;
@@ -390,7 +397,7 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
390 return 0; 397 return 0;
391} 398}
392 399
393unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait) 400unsigned int bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
394{ 401{
395 struct sock *sk = sock->sk; 402 struct sock *sk = sock->sk;
396 unsigned int mask = 0; 403 unsigned int mask = 0;
@@ -538,13 +545,39 @@ static int __init bt_init(void)
538 545
539 BT_INFO("HCI device and connection manager initialized"); 546 BT_INFO("HCI device and connection manager initialized");
540 547
541 hci_sock_init(); 548 err = hci_sock_init();
549 if (err < 0)
550 goto error;
551
552 err = l2cap_init();
553 if (err < 0)
554 goto sock_err;
555
556 err = sco_init();
557 if (err < 0) {
558 l2cap_exit();
559 goto sock_err;
560 }
542 561
543 return 0; 562 return 0;
563
564sock_err:
565 hci_sock_cleanup();
566
567error:
568 sock_unregister(PF_BLUETOOTH);
569 bt_sysfs_cleanup();
570
571 return err;
544} 572}
545 573
546static void __exit bt_exit(void) 574static void __exit bt_exit(void)
547{ 575{
576
577 sco_exit();
578
579 l2cap_exit();
580
548 hci_sock_cleanup(); 581 hci_sock_cleanup();
549 582
550 sock_unregister(PF_BLUETOOTH); 583 sock_unregister(PF_BLUETOOTH);
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 70672544db86..8e6c06158f8e 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -23,88 +23,88 @@
23#include <linux/crc32.h> 23#include <linux/crc32.h>
24#include <net/bluetooth/bluetooth.h> 24#include <net/bluetooth/bluetooth.h>
25 25
26// Limits 26/* Limits */
27#define BNEP_MAX_PROTO_FILTERS 5 27#define BNEP_MAX_PROTO_FILTERS 5
28#define BNEP_MAX_MULTICAST_FILTERS 20 28#define BNEP_MAX_MULTICAST_FILTERS 20
29 29
30// UUIDs 30/* UUIDs */
31#define BNEP_BASE_UUID 0x0000000000001000800000805F9B34FB 31#define BNEP_BASE_UUID 0x0000000000001000800000805F9B34FB
32#define BNEP_UUID16 0x02 32#define BNEP_UUID16 0x02
33#define BNEP_UUID32 0x04 33#define BNEP_UUID32 0x04
34#define BNEP_UUID128 0x16 34#define BNEP_UUID128 0x16
35 35
36#define BNEP_SVC_PANU 0x1115 36#define BNEP_SVC_PANU 0x1115
37#define BNEP_SVC_NAP 0x1116 37#define BNEP_SVC_NAP 0x1116
38#define BNEP_SVC_GN 0x1117 38#define BNEP_SVC_GN 0x1117
39 39
40// Packet types 40/* Packet types */
41#define BNEP_GENERAL 0x00 41#define BNEP_GENERAL 0x00
42#define BNEP_CONTROL 0x01 42#define BNEP_CONTROL 0x01
43#define BNEP_COMPRESSED 0x02 43#define BNEP_COMPRESSED 0x02
44#define BNEP_COMPRESSED_SRC_ONLY 0x03 44#define BNEP_COMPRESSED_SRC_ONLY 0x03
45#define BNEP_COMPRESSED_DST_ONLY 0x04 45#define BNEP_COMPRESSED_DST_ONLY 0x04
46 46
47// Control types 47/* Control types */
48#define BNEP_CMD_NOT_UNDERSTOOD 0x00 48#define BNEP_CMD_NOT_UNDERSTOOD 0x00
49#define BNEP_SETUP_CONN_REQ 0x01 49#define BNEP_SETUP_CONN_REQ 0x01
50#define BNEP_SETUP_CONN_RSP 0x02 50#define BNEP_SETUP_CONN_RSP 0x02
51#define BNEP_FILTER_NET_TYPE_SET 0x03 51#define BNEP_FILTER_NET_TYPE_SET 0x03
52#define BNEP_FILTER_NET_TYPE_RSP 0x04 52#define BNEP_FILTER_NET_TYPE_RSP 0x04
53#define BNEP_FILTER_MULTI_ADDR_SET 0x05 53#define BNEP_FILTER_MULTI_ADDR_SET 0x05
54#define BNEP_FILTER_MULTI_ADDR_RSP 0x06 54#define BNEP_FILTER_MULTI_ADDR_RSP 0x06
55 55
56// Extension types 56/* Extension types */
57#define BNEP_EXT_CONTROL 0x00 57#define BNEP_EXT_CONTROL 0x00
58 58
59// Response messages 59/* Response messages */
60#define BNEP_SUCCESS 0x00 60#define BNEP_SUCCESS 0x00
61 61
62#define BNEP_CONN_INVALID_DST 0x01 62#define BNEP_CONN_INVALID_DST 0x01
63#define BNEP_CONN_INVALID_SRC 0x02 63#define BNEP_CONN_INVALID_SRC 0x02
64#define BNEP_CONN_INVALID_SVC 0x03 64#define BNEP_CONN_INVALID_SVC 0x03
65#define BNEP_CONN_NOT_ALLOWED 0x04 65#define BNEP_CONN_NOT_ALLOWED 0x04
66 66
67#define BNEP_FILTER_UNSUPPORTED_REQ 0x01 67#define BNEP_FILTER_UNSUPPORTED_REQ 0x01
68#define BNEP_FILTER_INVALID_RANGE 0x02 68#define BNEP_FILTER_INVALID_RANGE 0x02
69#define BNEP_FILTER_INVALID_MCADDR 0x02 69#define BNEP_FILTER_INVALID_MCADDR 0x02
70#define BNEP_FILTER_LIMIT_REACHED 0x03 70#define BNEP_FILTER_LIMIT_REACHED 0x03
71#define BNEP_FILTER_DENIED_SECURITY 0x04 71#define BNEP_FILTER_DENIED_SECURITY 0x04
72 72
73// L2CAP settings 73/* L2CAP settings */
74#define BNEP_MTU 1691 74#define BNEP_MTU 1691
75#define BNEP_PSM 0x0f 75#define BNEP_PSM 0x0f
76#define BNEP_FLUSH_TO 0xffff 76#define BNEP_FLUSH_TO 0xffff
77#define BNEP_CONNECT_TO 15 77#define BNEP_CONNECT_TO 15
78#define BNEP_FILTER_TO 15 78#define BNEP_FILTER_TO 15
79 79
80// Headers 80/* Headers */
81#define BNEP_TYPE_MASK 0x7f 81#define BNEP_TYPE_MASK 0x7f
82#define BNEP_EXT_HEADER 0x80 82#define BNEP_EXT_HEADER 0x80
83 83
84struct bnep_setup_conn_req { 84struct bnep_setup_conn_req {
85 __u8 type; 85 __u8 type;
86 __u8 ctrl; 86 __u8 ctrl;
87 __u8 uuid_size; 87 __u8 uuid_size;
88 __u8 service[0]; 88 __u8 service[0];
89} __packed; 89} __packed;
90 90
91struct bnep_set_filter_req { 91struct bnep_set_filter_req {
92 __u8 type; 92 __u8 type;
93 __u8 ctrl; 93 __u8 ctrl;
94 __be16 len; 94 __be16 len;
95 __u8 list[0]; 95 __u8 list[0];
96} __packed; 96} __packed;
97 97
98struct bnep_control_rsp { 98struct bnep_control_rsp {
99 __u8 type; 99 __u8 type;
100 __u8 ctrl; 100 __u8 ctrl;
101 __be16 resp; 101 __be16 resp;
102} __packed; 102} __packed;
103 103
104struct bnep_ext_hdr { 104struct bnep_ext_hdr {
105 __u8 type; 105 __u8 type;
106 __u8 len; 106 __u8 len;
107 __u8 data[0]; 107 __u8 data[0];
108} __packed; 108} __packed;
109 109
110/* BNEP ioctl defines */ 110/* BNEP ioctl defines */
@@ -114,10 +114,10 @@ struct bnep_ext_hdr {
114#define BNEPGETCONNINFO _IOR('B', 211, int) 114#define BNEPGETCONNINFO _IOR('B', 211, int)
115 115
116struct bnep_connadd_req { 116struct bnep_connadd_req {
117 int sock; // Connected socket 117 int sock; /* Connected socket */
118 __u32 flags; 118 __u32 flags;
119 __u16 role; 119 __u16 role;
120 char device[16]; // Name of the Ethernet device 120 char device[16]; /* Name of the Ethernet device */
121}; 121};
122 122
123struct bnep_conndel_req { 123struct bnep_conndel_req {
@@ -148,14 +148,14 @@ int bnep_del_connection(struct bnep_conndel_req *req);
148int bnep_get_connlist(struct bnep_connlist_req *req); 148int bnep_get_connlist(struct bnep_connlist_req *req);
149int bnep_get_conninfo(struct bnep_conninfo *ci); 149int bnep_get_conninfo(struct bnep_conninfo *ci);
150 150
151// BNEP sessions 151/* BNEP sessions */
152struct bnep_session { 152struct bnep_session {
153 struct list_head list; 153 struct list_head list;
154 154
155 unsigned int role; 155 unsigned int role;
156 unsigned long state; 156 unsigned long state;
157 unsigned long flags; 157 unsigned long flags;
158 atomic_t killed; 158 struct task_struct *task;
159 159
160 struct ethhdr eh; 160 struct ethhdr eh;
161 struct msghdr msg; 161 struct msghdr msg;
@@ -173,7 +173,7 @@ void bnep_sock_cleanup(void);
173 173
174static inline int bnep_mc_hash(__u8 *addr) 174static inline int bnep_mc_hash(__u8 *addr)
175{ 175{
176 return (crc32_be(~0, addr, ETH_ALEN) >> 26); 176 return crc32_be(~0, addr, ETH_ALEN) >> 26;
177} 177}
178 178
179#endif 179#endif
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 5868597534e5..ca39fcf010ce 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -36,6 +36,7 @@
36#include <linux/errno.h> 36#include <linux/errno.h>
37#include <linux/net.h> 37#include <linux/net.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/kthread.h>
39#include <net/sock.h> 40#include <net/sock.h>
40 41
41#include <linux/socket.h> 42#include <linux/socket.h>
@@ -131,7 +132,8 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len
131 return -EILSEQ; 132 return -EILSEQ;
132 133
133 n = get_unaligned_be16(data); 134 n = get_unaligned_be16(data);
134 data++; len -= 2; 135 data++;
136 len -= 2;
135 137
136 if (len < n) 138 if (len < n)
137 return -EILSEQ; 139 return -EILSEQ;
@@ -176,7 +178,8 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
176 return -EILSEQ; 178 return -EILSEQ;
177 179
178 n = get_unaligned_be16(data); 180 n = get_unaligned_be16(data);
179 data += 2; len -= 2; 181 data += 2;
182 len -= 2;
180 183
181 if (len < n) 184 if (len < n)
182 return -EILSEQ; 185 return -EILSEQ;
@@ -187,6 +190,8 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
187 n /= (ETH_ALEN * 2); 190 n /= (ETH_ALEN * 2);
188 191
189 if (n > 0) { 192 if (n > 0) {
193 int i;
194
190 s->mc_filter = 0; 195 s->mc_filter = 0;
191 196
192 /* Always send broadcast */ 197 /* Always send broadcast */
@@ -196,18 +201,22 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
196 for (; n > 0; n--) { 201 for (; n > 0; n--) {
197 u8 a1[6], *a2; 202 u8 a1[6], *a2;
198 203
199 memcpy(a1, data, ETH_ALEN); data += ETH_ALEN; 204 memcpy(a1, data, ETH_ALEN);
200 a2 = data; data += ETH_ALEN; 205 data += ETH_ALEN;
206 a2 = data;
207 data += ETH_ALEN;
201 208
202 BT_DBG("mc filter %s -> %s", 209 BT_DBG("mc filter %s -> %s",
203 batostr((void *) a1), batostr((void *) a2)); 210 batostr((void *) a1), batostr((void *) a2));
204 211
205 #define INCA(a) { int i = 5; while (i >=0 && ++a[i--] == 0); }
206
207 /* Iterate from a1 to a2 */ 212 /* Iterate from a1 to a2 */
208 set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter); 213 set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter);
209 while (memcmp(a1, a2, 6) < 0 && s->mc_filter != ~0LL) { 214 while (memcmp(a1, a2, 6) < 0 && s->mc_filter != ~0LL) {
210 INCA(a1); 215 /* Increment a1 */
216 i = 5;
217 while (i >= 0 && ++a1[i--] == 0)
218 ;
219
211 set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter); 220 set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter);
212 } 221 }
213 } 222 }
@@ -227,7 +236,8 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len)
227 u8 cmd = *(u8 *)data; 236 u8 cmd = *(u8 *)data;
228 int err = 0; 237 int err = 0;
229 238
230 data++; len--; 239 data++;
240 len--;
231 241
232 switch (cmd) { 242 switch (cmd) {
233 case BNEP_CMD_NOT_UNDERSTOOD: 243 case BNEP_CMD_NOT_UNDERSTOOD:
@@ -302,7 +312,6 @@ static u8 __bnep_rx_hlen[] = {
302 ETH_ALEN + 2, /* BNEP_COMPRESSED_SRC_ONLY */ 312 ETH_ALEN + 2, /* BNEP_COMPRESSED_SRC_ONLY */
303 ETH_ALEN + 2 /* BNEP_COMPRESSED_DST_ONLY */ 313 ETH_ALEN + 2 /* BNEP_COMPRESSED_DST_ONLY */
304}; 314};
305#define BNEP_RX_TYPES (sizeof(__bnep_rx_hlen) - 1)
306 315
307static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) 316static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
308{ 317{
@@ -312,9 +321,10 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
312 321
313 dev->stats.rx_bytes += skb->len; 322 dev->stats.rx_bytes += skb->len;
314 323
315 type = *(u8 *) skb->data; skb_pull(skb, 1); 324 type = *(u8 *) skb->data;
325 skb_pull(skb, 1);
316 326
317 if ((type & BNEP_TYPE_MASK) > BNEP_RX_TYPES) 327 if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen))
318 goto badframe; 328 goto badframe;
319 329
320 if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) { 330 if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) {
@@ -367,14 +377,14 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
367 377
368 case BNEP_COMPRESSED_DST_ONLY: 378 case BNEP_COMPRESSED_DST_ONLY:
369 memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), 379 memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
370 ETH_ALEN); 380 ETH_ALEN);
371 memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, 381 memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
372 ETH_ALEN + 2); 382 ETH_ALEN + 2);
373 break; 383 break;
374 384
375 case BNEP_GENERAL: 385 case BNEP_GENERAL:
376 memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb), 386 memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
377 ETH_ALEN * 2); 387 ETH_ALEN * 2);
378 put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); 388 put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
379 break; 389 break;
380 } 390 }
@@ -470,15 +480,14 @@ static int bnep_session(void *arg)
470 480
471 BT_DBG(""); 481 BT_DBG("");
472 482
473 daemonize("kbnepd %s", dev->name);
474 set_user_nice(current, -15); 483 set_user_nice(current, -15);
475 484
476 init_waitqueue_entry(&wait, current); 485 init_waitqueue_entry(&wait, current);
477 add_wait_queue(sk_sleep(sk), &wait); 486 add_wait_queue(sk_sleep(sk), &wait);
478 while (!atomic_read(&s->killed)) { 487 while (!kthread_should_stop()) {
479 set_current_state(TASK_INTERRUPTIBLE); 488 set_current_state(TASK_INTERRUPTIBLE);
480 489
481 // RX 490 /* RX */
482 while ((skb = skb_dequeue(&sk->sk_receive_queue))) { 491 while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
483 skb_orphan(skb); 492 skb_orphan(skb);
484 bnep_rx_frame(s, skb); 493 bnep_rx_frame(s, skb);
@@ -487,7 +496,7 @@ static int bnep_session(void *arg)
487 if (sk->sk_state != BT_CONNECTED) 496 if (sk->sk_state != BT_CONNECTED)
488 break; 497 break;
489 498
490 // TX 499 /* TX */
491 while ((skb = skb_dequeue(&sk->sk_write_queue))) 500 while ((skb = skb_dequeue(&sk->sk_write_queue)))
492 if (bnep_tx_frame(s, skb)) 501 if (bnep_tx_frame(s, skb))
493 break; 502 break;
@@ -555,8 +564,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
555 564
556 /* session struct allocated as private part of net_device */ 565 /* session struct allocated as private part of net_device */
557 dev = alloc_netdev(sizeof(struct bnep_session), 566 dev = alloc_netdev(sizeof(struct bnep_session),
558 (*req->device) ? req->device : "bnep%d", 567 (*req->device) ? req->device : "bnep%d",
559 bnep_net_setup); 568 bnep_net_setup);
560 if (!dev) 569 if (!dev)
561 return -ENOMEM; 570 return -ENOMEM;
562 571
@@ -571,7 +580,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
571 s = netdev_priv(dev); 580 s = netdev_priv(dev);
572 581
573 /* This is rx header therefore addresses are swapped. 582 /* This is rx header therefore addresses are swapped.
574 * ie eh.h_dest is our local address. */ 583 * ie. eh.h_dest is our local address. */
575 memcpy(s->eh.h_dest, &src, ETH_ALEN); 584 memcpy(s->eh.h_dest, &src, ETH_ALEN);
576 memcpy(s->eh.h_source, &dst, ETH_ALEN); 585 memcpy(s->eh.h_source, &dst, ETH_ALEN);
577 memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN); 586 memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN);
@@ -597,17 +606,17 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
597 SET_NETDEV_DEVTYPE(dev, &bnep_type); 606 SET_NETDEV_DEVTYPE(dev, &bnep_type);
598 607
599 err = register_netdev(dev); 608 err = register_netdev(dev);
600 if (err) { 609 if (err)
601 goto failed; 610 goto failed;
602 }
603 611
604 __bnep_link_session(s); 612 __bnep_link_session(s);
605 613
606 err = kernel_thread(bnep_session, s, CLONE_KERNEL); 614 s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name);
607 if (err < 0) { 615 if (IS_ERR(s->task)) {
608 /* Session thread start failed, gotta cleanup. */ 616 /* Session thread start failed, gotta cleanup. */
609 unregister_netdev(dev); 617 unregister_netdev(dev);
610 __bnep_unlink_session(s); 618 __bnep_unlink_session(s);
619 err = PTR_ERR(s->task);
611 goto failed; 620 goto failed;
612 } 621 }
613 622
@@ -631,15 +640,9 @@ int bnep_del_connection(struct bnep_conndel_req *req)
631 down_read(&bnep_session_sem); 640 down_read(&bnep_session_sem);
632 641
633 s = __bnep_get_session(req->dst); 642 s = __bnep_get_session(req->dst);
634 if (s) { 643 if (s)
635 /* Wakeup user-space which is polling for socket errors. 644 kthread_stop(s->task);
636 * This is temporary hack until we have shutdown in L2CAP */ 645 else
637 s->sock->sk->sk_err = EUNATCH;
638
639 /* Kill session thread */
640 atomic_inc(&s->killed);
641 wake_up_interruptible(sk_sleep(s->sock->sk));
642 } else
643 err = -ENOENT; 646 err = -ENOENT;
644 647
645 up_read(&bnep_session_sem); 648 up_read(&bnep_session_sem);
@@ -708,8 +711,6 @@ static int __init bnep_init(void)
708{ 711{
709 char flt[50] = ""; 712 char flt[50] = "";
710 713
711 l2cap_load();
712
713#ifdef CONFIG_BT_BNEP_PROTO_FILTER 714#ifdef CONFIG_BT_BNEP_PROTO_FILTER
714 strcat(flt, "protocol "); 715 strcat(flt, "protocol ");
715#endif 716#endif
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 2862f53b66b1..17800b1d28ea 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -39,10 +39,10 @@
39#include <linux/init.h> 39#include <linux/init.h>
40#include <linux/compat.h> 40#include <linux/compat.h>
41#include <linux/gfp.h> 41#include <linux/gfp.h>
42#include <linux/uaccess.h>
42#include <net/sock.h> 43#include <net/sock.h>
43 44
44#include <asm/system.h> 45#include <asm/system.h>
45#include <asm/uaccess.h>
46 46
47#include "bnep.h" 47#include "bnep.h"
48 48
@@ -88,6 +88,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
88 sockfd_put(nsock); 88 sockfd_put(nsock);
89 return -EBADFD; 89 return -EBADFD;
90 } 90 }
91 ca.device[sizeof(ca.device)-1] = 0;
91 92
92 err = bnep_add_connection(&ca, nsock); 93 err = bnep_add_connection(&ca, nsock);
93 if (!err) { 94 if (!err) {
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 3487cfe74aec..744233cba244 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -35,6 +35,7 @@
35#include <linux/ioctl.h> 35#include <linux/ioctl.h>
36#include <linux/file.h> 36#include <linux/file.h>
37#include <linux/wait.h> 37#include <linux/wait.h>
38#include <linux/kthread.h>
38#include <net/sock.h> 39#include <net/sock.h>
39 40
40#include <linux/isdn/capilli.h> 41#include <linux/isdn/capilli.h>
@@ -143,7 +144,7 @@ static void cmtp_send_capimsg(struct cmtp_session *session, struct sk_buff *skb)
143 144
144 skb_queue_tail(&session->transmit, skb); 145 skb_queue_tail(&session->transmit, skb);
145 146
146 cmtp_schedule(session); 147 wake_up_interruptible(sk_sleep(session->sock->sk));
147} 148}
148 149
149static void cmtp_send_interopmsg(struct cmtp_session *session, 150static void cmtp_send_interopmsg(struct cmtp_session *session,
@@ -155,7 +156,8 @@ static void cmtp_send_interopmsg(struct cmtp_session *session,
155 156
156 BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum); 157 BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum);
157 158
158 if (!(skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC))) { 159 skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC);
160 if (!skb) {
159 BT_ERR("Can't allocate memory for interoperability packet"); 161 BT_ERR("Can't allocate memory for interoperability packet");
160 return; 162 return;
161 } 163 }
@@ -385,8 +387,7 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl)
385 387
386 capi_ctr_down(ctrl); 388 capi_ctr_down(ctrl);
387 389
388 atomic_inc(&session->terminate); 390 kthread_stop(session->task);
389 cmtp_schedule(session);
390} 391}
391 392
392static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp) 393static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp)
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index 785e79e953c5..db43b54ac9af 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -37,7 +37,7 @@
37#define CMTP_LOOPBACK 0 37#define CMTP_LOOPBACK 0
38 38
39struct cmtp_connadd_req { 39struct cmtp_connadd_req {
40 int sock; // Connected socket 40 int sock; /* Connected socket */
41 __u32 flags; 41 __u32 flags;
42}; 42};
43 43
@@ -81,7 +81,7 @@ struct cmtp_session {
81 81
82 char name[BTNAMSIZ]; 82 char name[BTNAMSIZ];
83 83
84 atomic_t terminate; 84 struct task_struct *task;
85 85
86 wait_queue_head_t wait; 86 wait_queue_head_t wait;
87 87
@@ -121,13 +121,6 @@ void cmtp_detach_device(struct cmtp_session *session);
121 121
122void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb); 122void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb);
123 123
124static inline void cmtp_schedule(struct cmtp_session *session)
125{
126 struct sock *sk = session->sock->sk;
127
128 wake_up_interruptible(sk_sleep(sk));
129}
130
131/* CMTP init defines */ 124/* CMTP init defines */
132int cmtp_init_sockets(void); 125int cmtp_init_sockets(void);
133void cmtp_cleanup_sockets(void); 126void cmtp_cleanup_sockets(void);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 8e5f292529ac..c5b11af908be 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -35,6 +35,7 @@
35#include <linux/ioctl.h> 35#include <linux/ioctl.h>
36#include <linux/file.h> 36#include <linux/file.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/kthread.h>
38#include <net/sock.h> 39#include <net/sock.h>
39 40
40#include <linux/isdn/capilli.h> 41#include <linux/isdn/capilli.h>
@@ -115,7 +116,8 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
115 116
116 size = (skb) ? skb->len + count : count; 117 size = (skb) ? skb->len + count : count;
117 118
118 if (!(nskb = alloc_skb(size, GFP_ATOMIC))) { 119 nskb = alloc_skb(size, GFP_ATOMIC);
120 if (!nskb) {
119 BT_ERR("Can't allocate memory for CAPI message"); 121 BT_ERR("Can't allocate memory for CAPI message");
120 return; 122 return;
121 } 123 }
@@ -216,7 +218,8 @@ static void cmtp_process_transmit(struct cmtp_session *session)
216 218
217 BT_DBG("session %p", session); 219 BT_DBG("session %p", session);
218 220
219 if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) { 221 nskb = alloc_skb(session->mtu, GFP_ATOMIC);
222 if (!nskb) {
220 BT_ERR("Can't allocate memory for new frame"); 223 BT_ERR("Can't allocate memory for new frame");
221 return; 224 return;
222 } 225 }
@@ -224,7 +227,8 @@ static void cmtp_process_transmit(struct cmtp_session *session)
224 while ((skb = skb_dequeue(&session->transmit))) { 227 while ((skb = skb_dequeue(&session->transmit))) {
225 struct cmtp_scb *scb = (void *) skb->cb; 228 struct cmtp_scb *scb = (void *) skb->cb;
226 229
227 if ((tail = (session->mtu - nskb->len)) < 5) { 230 tail = session->mtu - nskb->len;
231 if (tail < 5) {
228 cmtp_send_frame(session, nskb->data, nskb->len); 232 cmtp_send_frame(session, nskb->data, nskb->len);
229 skb_trim(nskb, 0); 233 skb_trim(nskb, 0);
230 tail = session->mtu; 234 tail = session->mtu;
@@ -232,9 +236,12 @@ static void cmtp_process_transmit(struct cmtp_session *session)
232 236
233 size = min_t(uint, ((tail < 258) ? (tail - 2) : (tail - 3)), skb->len); 237 size = min_t(uint, ((tail < 258) ? (tail - 2) : (tail - 3)), skb->len);
234 238
235 if ((scb->id < 0) && ((scb->id = cmtp_alloc_block_id(session)) < 0)) { 239 if (scb->id < 0) {
236 skb_queue_head(&session->transmit, skb); 240 scb->id = cmtp_alloc_block_id(session);
237 break; 241 if (scb->id < 0) {
242 skb_queue_head(&session->transmit, skb);
243 break;
244 }
238 } 245 }
239 246
240 if (size < 256) { 247 if (size < 256) {
@@ -281,12 +288,11 @@ static int cmtp_session(void *arg)
281 288
282 BT_DBG("session %p", session); 289 BT_DBG("session %p", session);
283 290
284 daemonize("kcmtpd_ctr_%d", session->num);
285 set_user_nice(current, -15); 291 set_user_nice(current, -15);
286 292
287 init_waitqueue_entry(&wait, current); 293 init_waitqueue_entry(&wait, current);
288 add_wait_queue(sk_sleep(sk), &wait); 294 add_wait_queue(sk_sleep(sk), &wait);
289 while (!atomic_read(&session->terminate)) { 295 while (!kthread_should_stop()) {
290 set_current_state(TASK_INTERRUPTIBLE); 296 set_current_state(TASK_INTERRUPTIBLE);
291 297
292 if (sk->sk_state != BT_CONNECTED) 298 if (sk->sk_state != BT_CONNECTED)
@@ -340,7 +346,8 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
340 346
341 bacpy(&session->bdaddr, &bt_sk(sock->sk)->dst); 347 bacpy(&session->bdaddr, &bt_sk(sock->sk)->dst);
342 348
343 session->mtu = min_t(uint, l2cap_pi(sock->sk)->omtu, l2cap_pi(sock->sk)->imtu); 349 session->mtu = min_t(uint, l2cap_pi(sock->sk)->chan->omtu,
350 l2cap_pi(sock->sk)->chan->imtu);
344 351
345 BT_DBG("mtu %d", session->mtu); 352 BT_DBG("mtu %d", session->mtu);
346 353
@@ -364,9 +371,12 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
364 371
365 __cmtp_link_session(session); 372 __cmtp_link_session(session);
366 373
367 err = kernel_thread(cmtp_session, session, CLONE_KERNEL); 374 session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d",
368 if (err < 0) 375 session->num);
376 if (IS_ERR(session->task)) {
377 err = PTR_ERR(session->task);
369 goto unlink; 378 goto unlink;
379 }
370 380
371 if (!(session->flags & (1 << CMTP_LOOPBACK))) { 381 if (!(session->flags & (1 << CMTP_LOOPBACK))) {
372 err = cmtp_attach_device(session); 382 err = cmtp_attach_device(session);
@@ -403,9 +413,8 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
403 /* Flush the transmit queue */ 413 /* Flush the transmit queue */
404 skb_queue_purge(&session->transmit); 414 skb_queue_purge(&session->transmit);
405 415
406 /* Kill session thread */ 416 /* Stop session thread */
407 atomic_inc(&session->terminate); 417 kthread_stop(session->task);
408 cmtp_schedule(session);
409 } else 418 } else
410 err = -ENOENT; 419 err = -ENOENT;
411 420
@@ -466,8 +475,6 @@ int cmtp_get_conninfo(struct cmtp_conninfo *ci)
466 475
467static int __init cmtp_init(void) 476static int __init cmtp_init(void)
468{ 477{
469 l2cap_load();
470
471 BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); 478 BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
472 479
473 cmtp_init_sockets(); 480 cmtp_init_sockets();
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 7ea1979a8e4f..3f2dd5c25ae5 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -34,12 +34,12 @@
34#include <linux/file.h> 34#include <linux/file.h>
35#include <linux/compat.h> 35#include <linux/compat.h>
36#include <linux/gfp.h> 36#include <linux/gfp.h>
37#include <linux/uaccess.h>
37#include <net/sock.h> 38#include <net/sock.h>
38 39
39#include <linux/isdn/capilli.h> 40#include <linux/isdn/capilli.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h>
43 43
44#include "cmtp.h" 44#include "cmtp.h"
45 45
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 99cd8d9d891b..3163330cd4f1 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -45,6 +45,33 @@
45#include <net/bluetooth/bluetooth.h> 45#include <net/bluetooth/bluetooth.h>
46#include <net/bluetooth/hci_core.h> 46#include <net/bluetooth/hci_core.h>
47 47
48static void hci_le_connect(struct hci_conn *conn)
49{
50 struct hci_dev *hdev = conn->hdev;
51 struct hci_cp_le_create_conn cp;
52
53 conn->state = BT_CONNECT;
54 conn->out = 1;
55 conn->link_mode |= HCI_LM_MASTER;
56
57 memset(&cp, 0, sizeof(cp));
58 cp.scan_interval = cpu_to_le16(0x0004);
59 cp.scan_window = cpu_to_le16(0x0004);
60 bacpy(&cp.peer_addr, &conn->dst);
61 cp.conn_interval_min = cpu_to_le16(0x0008);
62 cp.conn_interval_max = cpu_to_le16(0x0100);
63 cp.supervision_timeout = cpu_to_le16(0x0064);
64 cp.min_ce_len = cpu_to_le16(0x0001);
65 cp.max_ce_len = cpu_to_le16(0x0001);
66
67 hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
68}
69
70static void hci_le_connect_cancel(struct hci_conn *conn)
71{
72 hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
73}
74
48void hci_acl_connect(struct hci_conn *conn) 75void hci_acl_connect(struct hci_conn *conn)
49{ 76{
50 struct hci_dev *hdev = conn->hdev; 77 struct hci_dev *hdev = conn->hdev;
@@ -156,6 +183,26 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
156 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); 183 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
157} 184}
158 185
186void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
187 u16 latency, u16 to_multiplier)
188{
189 struct hci_cp_le_conn_update cp;
190 struct hci_dev *hdev = conn->hdev;
191
192 memset(&cp, 0, sizeof(cp));
193
194 cp.handle = cpu_to_le16(conn->handle);
195 cp.conn_interval_min = cpu_to_le16(min);
196 cp.conn_interval_max = cpu_to_le16(max);
197 cp.conn_latency = cpu_to_le16(latency);
198 cp.supervision_timeout = cpu_to_le16(to_multiplier);
199 cp.min_ce_len = cpu_to_le16(0x0001);
200 cp.max_ce_len = cpu_to_le16(0x0001);
201
202 hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp);
203}
204EXPORT_SYMBOL(hci_le_conn_update);
205
159/* Device _must_ be locked */ 206/* Device _must_ be locked */
160void hci_sco_setup(struct hci_conn *conn, __u8 status) 207void hci_sco_setup(struct hci_conn *conn, __u8 status)
161{ 208{
@@ -193,8 +240,12 @@ static void hci_conn_timeout(unsigned long arg)
193 switch (conn->state) { 240 switch (conn->state) {
194 case BT_CONNECT: 241 case BT_CONNECT:
195 case BT_CONNECT2: 242 case BT_CONNECT2:
196 if (conn->type == ACL_LINK && conn->out) 243 if (conn->out) {
197 hci_acl_connect_cancel(conn); 244 if (conn->type == ACL_LINK)
245 hci_acl_connect_cancel(conn);
246 else if (conn->type == LE_LINK)
247 hci_le_connect_cancel(conn);
248 }
198 break; 249 break;
199 case BT_CONFIG: 250 case BT_CONFIG:
200 case BT_CONNECTED: 251 case BT_CONNECTED:
@@ -218,6 +269,19 @@ static void hci_conn_idle(unsigned long arg)
218 hci_conn_enter_sniff_mode(conn); 269 hci_conn_enter_sniff_mode(conn);
219} 270}
220 271
272static void hci_conn_auto_accept(unsigned long arg)
273{
274 struct hci_conn *conn = (void *) arg;
275 struct hci_dev *hdev = conn->hdev;
276
277 hci_dev_lock(hdev);
278
279 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_REPLY, sizeof(conn->dst),
280 &conn->dst);
281
282 hci_dev_unlock(hdev);
283}
284
221struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) 285struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
222{ 286{
223 struct hci_conn *conn; 287 struct hci_conn *conn;
@@ -234,6 +298,9 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
234 conn->mode = HCI_CM_ACTIVE; 298 conn->mode = HCI_CM_ACTIVE;
235 conn->state = BT_OPEN; 299 conn->state = BT_OPEN;
236 conn->auth_type = HCI_AT_GENERAL_BONDING; 300 conn->auth_type = HCI_AT_GENERAL_BONDING;
301 conn->io_capability = hdev->io_capability;
302 conn->remote_auth = 0xff;
303 conn->key_type = 0xff;
237 304
238 conn->power_save = 1; 305 conn->power_save = 1;
239 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 306 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -258,6 +325,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
258 325
259 setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn); 326 setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn);
260 setup_timer(&conn->idle_timer, hci_conn_idle, (unsigned long)conn); 327 setup_timer(&conn->idle_timer, hci_conn_idle, (unsigned long)conn);
328 setup_timer(&conn->auto_accept_timer, hci_conn_auto_accept,
329 (unsigned long) conn);
261 330
262 atomic_set(&conn->refcnt, 0); 331 atomic_set(&conn->refcnt, 0);
263 332
@@ -288,6 +357,8 @@ int hci_conn_del(struct hci_conn *conn)
288 357
289 del_timer(&conn->disc_timer); 358 del_timer(&conn->disc_timer);
290 359
360 del_timer(&conn->auto_accept_timer);
361
291 if (conn->type == ACL_LINK) { 362 if (conn->type == ACL_LINK) {
292 struct hci_conn *sco = conn->link; 363 struct hci_conn *sco = conn->link;
293 if (sco) 364 if (sco)
@@ -295,6 +366,11 @@ int hci_conn_del(struct hci_conn *conn)
295 366
296 /* Unacked frames */ 367 /* Unacked frames */
297 hdev->acl_cnt += conn->sent; 368 hdev->acl_cnt += conn->sent;
369 } else if (conn->type == LE_LINK) {
370 if (hdev->le_pkts)
371 hdev->le_cnt += conn->sent;
372 else
373 hdev->acl_cnt += conn->sent;
298 } else { 374 } else {
299 struct hci_conn *acl = conn->link; 375 struct hci_conn *acl = conn->link;
300 if (acl) { 376 if (acl) {
@@ -360,15 +436,31 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
360} 436}
361EXPORT_SYMBOL(hci_get_route); 437EXPORT_SYMBOL(hci_get_route);
362 438
363/* Create SCO or ACL connection. 439/* Create SCO, ACL or LE connection.
364 * Device _must_ be locked */ 440 * Device _must_ be locked */
365struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) 441struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type)
366{ 442{
367 struct hci_conn *acl; 443 struct hci_conn *acl;
368 struct hci_conn *sco; 444 struct hci_conn *sco;
445 struct hci_conn *le;
369 446
370 BT_DBG("%s dst %s", hdev->name, batostr(dst)); 447 BT_DBG("%s dst %s", hdev->name, batostr(dst));
371 448
449 if (type == LE_LINK) {
450 le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
451 if (le)
452 return ERR_PTR(-EBUSY);
453 le = hci_conn_add(hdev, LE_LINK, dst);
454 if (!le)
455 return ERR_PTR(-ENOMEM);
456 if (le->state == BT_OPEN)
457 hci_le_connect(le);
458
459 hci_conn_hold(le);
460
461 return le;
462 }
463
372 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); 464 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
373 if (!acl) { 465 if (!acl) {
374 acl = hci_conn_add(hdev, ACL_LINK, dst); 466 acl = hci_conn_add(hdev, ACL_LINK, dst);
@@ -461,36 +553,93 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
461 return 0; 553 return 0;
462} 554}
463 555
556/* Encrypt the the link */
557static void hci_conn_encrypt(struct hci_conn *conn)
558{
559 BT_DBG("conn %p", conn);
560
561 if (!test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
562 struct hci_cp_set_conn_encrypt cp;
563 cp.handle = cpu_to_le16(conn->handle);
564 cp.encrypt = 0x01;
565 hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp),
566 &cp);
567 }
568}
569
464/* Enable security */ 570/* Enable security */
465int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) 571int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
466{ 572{
467 BT_DBG("conn %p", conn); 573 BT_DBG("conn %p", conn);
468 574
575 /* For sdp we don't need the link key. */
469 if (sec_level == BT_SECURITY_SDP) 576 if (sec_level == BT_SECURITY_SDP)
470 return 1; 577 return 1;
471 578
579 /* For non 2.1 devices and low security level we don't need the link
580 key. */
472 if (sec_level == BT_SECURITY_LOW && 581 if (sec_level == BT_SECURITY_LOW &&
473 (!conn->ssp_mode || !conn->hdev->ssp_mode)) 582 (!conn->ssp_mode || !conn->hdev->ssp_mode))
474 return 1; 583 return 1;
475 584
476 if (conn->link_mode & HCI_LM_ENCRYPT) 585 /* For other security levels we need the link key. */
477 return hci_conn_auth(conn, sec_level, auth_type); 586 if (!(conn->link_mode & HCI_LM_AUTH))
478 587 goto auth;
588
589 /* An authenticated combination key has sufficient security for any
590 security level. */
591 if (conn->key_type == HCI_LK_AUTH_COMBINATION)
592 goto encrypt;
593
594 /* An unauthenticated combination key has sufficient security for
595 security level 1 and 2. */
596 if (conn->key_type == HCI_LK_UNAUTH_COMBINATION &&
597 (sec_level == BT_SECURITY_MEDIUM ||
598 sec_level == BT_SECURITY_LOW))
599 goto encrypt;
600
601 /* A combination key has always sufficient security for the security
602 levels 1 or 2. High security level requires the combination key
603 is generated using maximum PIN code length (16).
604 For pre 2.1 units. */
605 if (conn->key_type == HCI_LK_COMBINATION &&
606 (sec_level != BT_SECURITY_HIGH ||
607 conn->pin_length == 16))
608 goto encrypt;
609
610auth:
479 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) 611 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))
480 return 0; 612 return 0;
481 613
482 if (hci_conn_auth(conn, sec_level, auth_type)) { 614 hci_conn_auth(conn, sec_level, auth_type);
483 struct hci_cp_set_conn_encrypt cp; 615 return 0;
484 cp.handle = cpu_to_le16(conn->handle);
485 cp.encrypt = 1;
486 hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT,
487 sizeof(cp), &cp);
488 }
489 616
617encrypt:
618 if (conn->link_mode & HCI_LM_ENCRYPT)
619 return 1;
620
621 hci_conn_encrypt(conn);
490 return 0; 622 return 0;
491} 623}
492EXPORT_SYMBOL(hci_conn_security); 624EXPORT_SYMBOL(hci_conn_security);
493 625
626/* Check secure link requirement */
627int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level)
628{
629 BT_DBG("conn %p", conn);
630
631 if (sec_level != BT_SECURITY_HIGH)
632 return 1; /* Accept if non-secure is required */
633
634 if (conn->key_type == HCI_LK_AUTH_COMBINATION ||
635 (conn->key_type == HCI_LK_COMBINATION &&
636 conn->pin_length == 16))
637 return 1;
638
639 return 0; /* Reject not secure link */
640}
641EXPORT_SYMBOL(hci_conn_check_secure);
642
494/* Change link key */ 643/* Change link key */
495int hci_conn_change_link_key(struct hci_conn *conn) 644int hci_conn_change_link_key(struct hci_conn *conn)
496{ 645{
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9c4541bc488a..815269b07f20 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -41,6 +41,7 @@
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/rfkill.h> 43#include <linux/rfkill.h>
44#include <linux/timer.h>
44#include <net/sock.h> 45#include <net/sock.h>
45 46
46#include <asm/system.h> 47#include <asm/system.h>
@@ -50,10 +51,11 @@
50#include <net/bluetooth/bluetooth.h> 51#include <net/bluetooth/bluetooth.h>
51#include <net/bluetooth/hci_core.h> 52#include <net/bluetooth/hci_core.h>
52 53
54#define AUTO_OFF_TIMEOUT 2000
55
53static void hci_cmd_task(unsigned long arg); 56static void hci_cmd_task(unsigned long arg);
54static void hci_rx_task(unsigned long arg); 57static void hci_rx_task(unsigned long arg);
55static void hci_tx_task(unsigned long arg); 58static void hci_tx_task(unsigned long arg);
56static void hci_notify(struct hci_dev *hdev, int event);
57 59
58static DEFINE_RWLOCK(hci_task_lock); 60static DEFINE_RWLOCK(hci_task_lock);
59 61
@@ -95,11 +97,10 @@ void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
95{ 97{
96 BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result); 98 BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result);
97 99
98 /* If the request has set req_last_cmd (typical for multi-HCI 100 /* If this is the init phase check if the completed command matches
99 * command requests) check if the completed command matches 101 * the last init command, and if not just return.
100 * this, and if not just return. Single HCI command requests 102 */
101 * typically leave req_last_cmd as 0 */ 103 if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd)
102 if (hdev->req_last_cmd && cmd != hdev->req_last_cmd)
103 return; 104 return;
104 105
105 if (hdev->req_status == HCI_REQ_PEND) { 106 if (hdev->req_status == HCI_REQ_PEND) {
@@ -122,7 +123,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
122 123
123/* Execute request and wait for completion. */ 124/* Execute request and wait for completion. */
124static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), 125static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
125 unsigned long opt, __u32 timeout) 126 unsigned long opt, __u32 timeout)
126{ 127{
127 DECLARE_WAITQUEUE(wait, current); 128 DECLARE_WAITQUEUE(wait, current);
128 int err = 0; 129 int err = 0;
@@ -156,7 +157,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
156 break; 157 break;
157 } 158 }
158 159
159 hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0; 160 hdev->req_status = hdev->req_result = 0;
160 161
161 BT_DBG("%s end: err %d", hdev->name, err); 162 BT_DBG("%s end: err %d", hdev->name, err);
162 163
@@ -164,7 +165,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
164} 165}
165 166
166static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), 167static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
167 unsigned long opt, __u32 timeout) 168 unsigned long opt, __u32 timeout)
168{ 169{
169 int ret; 170 int ret;
170 171
@@ -184,11 +185,13 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
184 BT_DBG("%s %ld", hdev->name, opt); 185 BT_DBG("%s %ld", hdev->name, opt);
185 186
186 /* Reset device */ 187 /* Reset device */
188 set_bit(HCI_RESET, &hdev->flags);
187 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); 189 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
188} 190}
189 191
190static void hci_init_req(struct hci_dev *hdev, unsigned long opt) 192static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
191{ 193{
194 struct hci_cp_delete_stored_link_key cp;
192 struct sk_buff *skb; 195 struct sk_buff *skb;
193 __le16 param; 196 __le16 param;
194 __u8 flt_type; 197 __u8 flt_type;
@@ -210,8 +213,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
210 /* Mandatory initialization */ 213 /* Mandatory initialization */
211 214
212 /* Reset */ 215 /* Reset */
213 if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) 216 if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) {
217 set_bit(HCI_RESET, &hdev->flags);
214 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); 218 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
219 }
215 220
216 /* Read Local Supported Features */ 221 /* Read Local Supported Features */
217 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); 222 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
@@ -252,15 +257,21 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
252 flt_type = HCI_FLT_CLEAR_ALL; 257 flt_type = HCI_FLT_CLEAR_ALL;
253 hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); 258 hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
254 259
255 /* Page timeout ~20 secs */
256 param = cpu_to_le16(0x8000);
257 hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, &param);
258
259 /* Connection accept timeout ~20 secs */ 260 /* Connection accept timeout ~20 secs */
260 param = cpu_to_le16(0x7d00); 261 param = cpu_to_le16(0x7d00);
261 hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param); 262 hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
262 263
263 hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT; 264 bacpy(&cp.bdaddr, BDADDR_ANY);
265 cp.delete_all = 1;
266 hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
267}
268
269static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt)
270{
271 BT_DBG("%s", hdev->name);
272
273 /* Read LE buffer size */
274 hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
264} 275}
265 276
266static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) 277static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
@@ -429,7 +440,8 @@ int hci_inquiry(void __user *arg)
429 if (copy_from_user(&ir, ptr, sizeof(ir))) 440 if (copy_from_user(&ir, ptr, sizeof(ir)))
430 return -EFAULT; 441 return -EFAULT;
431 442
432 if (!(hdev = hci_dev_get(ir.dev_id))) 443 hdev = hci_dev_get(ir.dev_id);
444 if (!hdev)
433 return -ENODEV; 445 return -ENODEV;
434 446
435 hci_dev_lock_bh(hdev); 447 hci_dev_lock_bh(hdev);
@@ -455,7 +467,7 @@ int hci_inquiry(void __user *arg)
455 /* cache_dump can't sleep. Therefore we allocate temp buffer and then 467 /* cache_dump can't sleep. Therefore we allocate temp buffer and then
456 * copy it to the user space. 468 * copy it to the user space.
457 */ 469 */
458 buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL); 470 buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL);
459 if (!buf) { 471 if (!buf) {
460 err = -ENOMEM; 472 err = -ENOMEM;
461 goto done; 473 goto done;
@@ -489,7 +501,8 @@ int hci_dev_open(__u16 dev)
489 struct hci_dev *hdev; 501 struct hci_dev *hdev;
490 int ret = 0; 502 int ret = 0;
491 503
492 if (!(hdev = hci_dev_get(dev))) 504 hdev = hci_dev_get(dev);
505 if (!hdev)
493 return -ENODEV; 506 return -ENODEV;
494 507
495 BT_DBG("%s %p", hdev->name, hdev); 508 BT_DBG("%s %p", hdev->name, hdev);
@@ -521,11 +534,15 @@ int hci_dev_open(__u16 dev)
521 if (!test_bit(HCI_RAW, &hdev->flags)) { 534 if (!test_bit(HCI_RAW, &hdev->flags)) {
522 atomic_set(&hdev->cmd_cnt, 1); 535 atomic_set(&hdev->cmd_cnt, 1);
523 set_bit(HCI_INIT, &hdev->flags); 536 set_bit(HCI_INIT, &hdev->flags);
537 hdev->init_last_cmd = 0;
524 538
525 //__hci_request(hdev, hci_reset_req, 0, HZ);
526 ret = __hci_request(hdev, hci_init_req, 0, 539 ret = __hci_request(hdev, hci_init_req, 0,
527 msecs_to_jiffies(HCI_INIT_TIMEOUT)); 540 msecs_to_jiffies(HCI_INIT_TIMEOUT));
528 541
542 if (lmp_le_capable(hdev))
543 ret = __hci_request(hdev, hci_le_init_req, 0,
544 msecs_to_jiffies(HCI_INIT_TIMEOUT));
545
529 clear_bit(HCI_INIT, &hdev->flags); 546 clear_bit(HCI_INIT, &hdev->flags);
530 } 547 }
531 548
@@ -533,6 +550,8 @@ int hci_dev_open(__u16 dev)
533 hci_dev_hold(hdev); 550 hci_dev_hold(hdev);
534 set_bit(HCI_UP, &hdev->flags); 551 set_bit(HCI_UP, &hdev->flags);
535 hci_notify(hdev, HCI_DEV_UP); 552 hci_notify(hdev, HCI_DEV_UP);
553 if (!test_bit(HCI_SETUP, &hdev->flags))
554 mgmt_powered(hdev->id, 1);
536 } else { 555 } else {
537 /* Init failed, cleanup */ 556 /* Init failed, cleanup */
538 tasklet_kill(&hdev->rx_task); 557 tasklet_kill(&hdev->rx_task);
@@ -568,6 +587,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
568 hci_req_lock(hdev); 587 hci_req_lock(hdev);
569 588
570 if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { 589 if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
590 del_timer_sync(&hdev->cmd_timer);
571 hci_req_unlock(hdev); 591 hci_req_unlock(hdev);
572 return 0; 592 return 0;
573 } 593 }
@@ -606,6 +626,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
606 626
607 /* Drop last sent command */ 627 /* Drop last sent command */
608 if (hdev->sent_cmd) { 628 if (hdev->sent_cmd) {
629 del_timer_sync(&hdev->cmd_timer);
609 kfree_skb(hdev->sent_cmd); 630 kfree_skb(hdev->sent_cmd);
610 hdev->sent_cmd = NULL; 631 hdev->sent_cmd = NULL;
611 } 632 }
@@ -614,6 +635,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
614 * and no tasks are scheduled. */ 635 * and no tasks are scheduled. */
615 hdev->close(hdev); 636 hdev->close(hdev);
616 637
638 mgmt_powered(hdev->id, 0);
639
617 /* Clear flags */ 640 /* Clear flags */
618 hdev->flags = 0; 641 hdev->flags = 0;
619 642
@@ -664,7 +687,7 @@ int hci_dev_reset(__u16 dev)
664 hdev->flush(hdev); 687 hdev->flush(hdev);
665 688
666 atomic_set(&hdev->cmd_cnt, 1); 689 atomic_set(&hdev->cmd_cnt, 1);
667 hdev->acl_cnt = 0; hdev->sco_cnt = 0; 690 hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
668 691
669 if (!test_bit(HCI_RAW, &hdev->flags)) 692 if (!test_bit(HCI_RAW, &hdev->flags))
670 ret = __hci_request(hdev, hci_reset_req, 0, 693 ret = __hci_request(hdev, hci_reset_req, 0,
@@ -793,9 +816,17 @@ int hci_get_dev_list(void __user *arg)
793 read_lock_bh(&hci_dev_list_lock); 816 read_lock_bh(&hci_dev_list_lock);
794 list_for_each(p, &hci_dev_list) { 817 list_for_each(p, &hci_dev_list) {
795 struct hci_dev *hdev; 818 struct hci_dev *hdev;
819
796 hdev = list_entry(p, struct hci_dev, list); 820 hdev = list_entry(p, struct hci_dev, list);
821
822 hci_del_off_timer(hdev);
823
824 if (!test_bit(HCI_MGMT, &hdev->flags))
825 set_bit(HCI_PAIRABLE, &hdev->flags);
826
797 (dr + n)->dev_id = hdev->id; 827 (dr + n)->dev_id = hdev->id;
798 (dr + n)->dev_opt = hdev->flags; 828 (dr + n)->dev_opt = hdev->flags;
829
799 if (++n >= dev_num) 830 if (++n >= dev_num)
800 break; 831 break;
801 } 832 }
@@ -823,6 +854,11 @@ int hci_get_dev_info(void __user *arg)
823 if (!hdev) 854 if (!hdev)
824 return -ENODEV; 855 return -ENODEV;
825 856
857 hci_del_off_timer(hdev);
858
859 if (!test_bit(HCI_MGMT, &hdev->flags))
860 set_bit(HCI_PAIRABLE, &hdev->flags);
861
826 strcpy(di.name, hdev->name); 862 strcpy(di.name, hdev->name);
827 di.bdaddr = hdev->bdaddr; 863 di.bdaddr = hdev->bdaddr;
828 di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); 864 di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4);
@@ -891,6 +927,281 @@ void hci_free_dev(struct hci_dev *hdev)
891} 927}
892EXPORT_SYMBOL(hci_free_dev); 928EXPORT_SYMBOL(hci_free_dev);
893 929
930static void hci_power_on(struct work_struct *work)
931{
932 struct hci_dev *hdev = container_of(work, struct hci_dev, power_on);
933
934 BT_DBG("%s", hdev->name);
935
936 if (hci_dev_open(hdev->id) < 0)
937 return;
938
939 if (test_bit(HCI_AUTO_OFF, &hdev->flags))
940 mod_timer(&hdev->off_timer,
941 jiffies + msecs_to_jiffies(AUTO_OFF_TIMEOUT));
942
943 if (test_and_clear_bit(HCI_SETUP, &hdev->flags))
944 mgmt_index_added(hdev->id);
945}
946
947static void hci_power_off(struct work_struct *work)
948{
949 struct hci_dev *hdev = container_of(work, struct hci_dev, power_off);
950
951 BT_DBG("%s", hdev->name);
952
953 hci_dev_close(hdev->id);
954}
955
956static void hci_auto_off(unsigned long data)
957{
958 struct hci_dev *hdev = (struct hci_dev *) data;
959
960 BT_DBG("%s", hdev->name);
961
962 clear_bit(HCI_AUTO_OFF, &hdev->flags);
963
964 queue_work(hdev->workqueue, &hdev->power_off);
965}
966
967void hci_del_off_timer(struct hci_dev *hdev)
968{
969 BT_DBG("%s", hdev->name);
970
971 clear_bit(HCI_AUTO_OFF, &hdev->flags);
972 del_timer(&hdev->off_timer);
973}
974
975int hci_uuids_clear(struct hci_dev *hdev)
976{
977 struct list_head *p, *n;
978
979 list_for_each_safe(p, n, &hdev->uuids) {
980 struct bt_uuid *uuid;
981
982 uuid = list_entry(p, struct bt_uuid, list);
983
984 list_del(p);
985 kfree(uuid);
986 }
987
988 return 0;
989}
990
991int hci_link_keys_clear(struct hci_dev *hdev)
992{
993 struct list_head *p, *n;
994
995 list_for_each_safe(p, n, &hdev->link_keys) {
996 struct link_key *key;
997
998 key = list_entry(p, struct link_key, list);
999
1000 list_del(p);
1001 kfree(key);
1002 }
1003
1004 return 0;
1005}
1006
1007struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
1008{
1009 struct list_head *p;
1010
1011 list_for_each(p, &hdev->link_keys) {
1012 struct link_key *k;
1013
1014 k = list_entry(p, struct link_key, list);
1015
1016 if (bacmp(bdaddr, &k->bdaddr) == 0)
1017 return k;
1018 }
1019
1020 return NULL;
1021}
1022
1023static int hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn,
1024 u8 key_type, u8 old_key_type)
1025{
1026 /* Legacy key */
1027 if (key_type < 0x03)
1028 return 1;
1029
1030 /* Debug keys are insecure so don't store them persistently */
1031 if (key_type == HCI_LK_DEBUG_COMBINATION)
1032 return 0;
1033
1034 /* Changed combination key and there's no previous one */
1035 if (key_type == HCI_LK_CHANGED_COMBINATION && old_key_type == 0xff)
1036 return 0;
1037
1038 /* Security mode 3 case */
1039 if (!conn)
1040 return 1;
1041
1042 /* Neither local nor remote side had no-bonding as requirement */
1043 if (conn->auth_type > 0x01 && conn->remote_auth > 0x01)
1044 return 1;
1045
1046 /* Local side had dedicated bonding as requirement */
1047 if (conn->auth_type == 0x02 || conn->auth_type == 0x03)
1048 return 1;
1049
1050 /* Remote side had dedicated bonding as requirement */
1051 if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03)
1052 return 1;
1053
1054 /* If none of the above criteria match, then don't store the key
1055 * persistently */
1056 return 0;
1057}
1058
1059int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
1060 bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len)
1061{
1062 struct link_key *key, *old_key;
1063 u8 old_key_type, persistent;
1064
1065 old_key = hci_find_link_key(hdev, bdaddr);
1066 if (old_key) {
1067 old_key_type = old_key->type;
1068 key = old_key;
1069 } else {
1070 old_key_type = conn ? conn->key_type : 0xff;
1071 key = kzalloc(sizeof(*key), GFP_ATOMIC);
1072 if (!key)
1073 return -ENOMEM;
1074 list_add(&key->list, &hdev->link_keys);
1075 }
1076
1077 BT_DBG("%s key for %s type %u", hdev->name, batostr(bdaddr), type);
1078
1079 /* Some buggy controller combinations generate a changed
1080 * combination key for legacy pairing even when there's no
1081 * previous key */
1082 if (type == HCI_LK_CHANGED_COMBINATION &&
1083 (!conn || conn->remote_auth == 0xff) &&
1084 old_key_type == 0xff) {
1085 type = HCI_LK_COMBINATION;
1086 if (conn)
1087 conn->key_type = type;
1088 }
1089
1090 bacpy(&key->bdaddr, bdaddr);
1091 memcpy(key->val, val, 16);
1092 key->pin_len = pin_len;
1093
1094 if (type == HCI_LK_CHANGED_COMBINATION)
1095 key->type = old_key_type;
1096 else
1097 key->type = type;
1098
1099 if (!new_key)
1100 return 0;
1101
1102 persistent = hci_persistent_key(hdev, conn, type, old_key_type);
1103
1104 mgmt_new_key(hdev->id, key, persistent);
1105
1106 if (!persistent) {
1107 list_del(&key->list);
1108 kfree(key);
1109 }
1110
1111 return 0;
1112}
1113
1114int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
1115{
1116 struct link_key *key;
1117
1118 key = hci_find_link_key(hdev, bdaddr);
1119 if (!key)
1120 return -ENOENT;
1121
1122 BT_DBG("%s removing %s", hdev->name, batostr(bdaddr));
1123
1124 list_del(&key->list);
1125 kfree(key);
1126
1127 return 0;
1128}
1129
1130/* HCI command timer function */
1131static void hci_cmd_timer(unsigned long arg)
1132{
1133 struct hci_dev *hdev = (void *) arg;
1134
1135 BT_ERR("%s command tx timeout", hdev->name);
1136 atomic_set(&hdev->cmd_cnt, 1);
1137 clear_bit(HCI_RESET, &hdev->flags);
1138 tasklet_schedule(&hdev->cmd_task);
1139}
1140
1141struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
1142 bdaddr_t *bdaddr)
1143{
1144 struct oob_data *data;
1145
1146 list_for_each_entry(data, &hdev->remote_oob_data, list)
1147 if (bacmp(bdaddr, &data->bdaddr) == 0)
1148 return data;
1149
1150 return NULL;
1151}
1152
1153int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr)
1154{
1155 struct oob_data *data;
1156
1157 data = hci_find_remote_oob_data(hdev, bdaddr);
1158 if (!data)
1159 return -ENOENT;
1160
1161 BT_DBG("%s removing %s", hdev->name, batostr(bdaddr));
1162
1163 list_del(&data->list);
1164 kfree(data);
1165
1166 return 0;
1167}
1168
1169int hci_remote_oob_data_clear(struct hci_dev *hdev)
1170{
1171 struct oob_data *data, *n;
1172
1173 list_for_each_entry_safe(data, n, &hdev->remote_oob_data, list) {
1174 list_del(&data->list);
1175 kfree(data);
1176 }
1177
1178 return 0;
1179}
1180
1181int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *hash,
1182 u8 *randomizer)
1183{
1184 struct oob_data *data;
1185
1186 data = hci_find_remote_oob_data(hdev, bdaddr);
1187
1188 if (!data) {
1189 data = kmalloc(sizeof(*data), GFP_ATOMIC);
1190 if (!data)
1191 return -ENOMEM;
1192
1193 bacpy(&data->bdaddr, bdaddr);
1194 list_add(&data->list, &hdev->remote_oob_data);
1195 }
1196
1197 memcpy(data->hash, hash, sizeof(data->hash));
1198 memcpy(data->randomizer, randomizer, sizeof(data->randomizer));
1199
1200 BT_DBG("%s for %s", hdev->name, batostr(bdaddr));
1201
1202 return 0;
1203}
1204
894/* Register HCI device */ 1205/* Register HCI device */
895int hci_register_dev(struct hci_dev *hdev) 1206int hci_register_dev(struct hci_dev *hdev)
896{ 1207{
@@ -923,6 +1234,7 @@ int hci_register_dev(struct hci_dev *hdev)
923 hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); 1234 hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1);
924 hdev->esco_type = (ESCO_HV1); 1235 hdev->esco_type = (ESCO_HV1);
925 hdev->link_mode = (HCI_LM_ACCEPT); 1236 hdev->link_mode = (HCI_LM_ACCEPT);
1237 hdev->io_capability = 0x03; /* No Input No Output */
926 1238
927 hdev->idle_timeout = 0; 1239 hdev->idle_timeout = 0;
928 hdev->sniff_max_interval = 800; 1240 hdev->sniff_max_interval = 800;
@@ -936,6 +1248,8 @@ int hci_register_dev(struct hci_dev *hdev)
936 skb_queue_head_init(&hdev->cmd_q); 1248 skb_queue_head_init(&hdev->cmd_q);
937 skb_queue_head_init(&hdev->raw_q); 1249 skb_queue_head_init(&hdev->raw_q);
938 1250
1251 setup_timer(&hdev->cmd_timer, hci_cmd_timer, (unsigned long) hdev);
1252
939 for (i = 0; i < NUM_REASSEMBLY; i++) 1253 for (i = 0; i < NUM_REASSEMBLY; i++)
940 hdev->reassembly[i] = NULL; 1254 hdev->reassembly[i] = NULL;
941 1255
@@ -948,6 +1262,16 @@ int hci_register_dev(struct hci_dev *hdev)
948 1262
949 INIT_LIST_HEAD(&hdev->blacklist); 1263 INIT_LIST_HEAD(&hdev->blacklist);
950 1264
1265 INIT_LIST_HEAD(&hdev->uuids);
1266
1267 INIT_LIST_HEAD(&hdev->link_keys);
1268
1269 INIT_LIST_HEAD(&hdev->remote_oob_data);
1270
1271 INIT_WORK(&hdev->power_on, hci_power_on);
1272 INIT_WORK(&hdev->power_off, hci_power_off);
1273 setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev);
1274
951 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); 1275 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
952 1276
953 atomic_set(&hdev->promisc, 0); 1277 atomic_set(&hdev->promisc, 0);
@@ -969,7 +1293,10 @@ int hci_register_dev(struct hci_dev *hdev)
969 } 1293 }
970 } 1294 }
971 1295
972 mgmt_index_added(hdev->id); 1296 set_bit(HCI_AUTO_OFF, &hdev->flags);
1297 set_bit(HCI_SETUP, &hdev->flags);
1298 queue_work(hdev->workqueue, &hdev->power_on);
1299
973 hci_notify(hdev, HCI_DEV_REG); 1300 hci_notify(hdev, HCI_DEV_REG);
974 1301
975 return id; 1302 return id;
@@ -999,7 +1326,10 @@ int hci_unregister_dev(struct hci_dev *hdev)
999 for (i = 0; i < NUM_REASSEMBLY; i++) 1326 for (i = 0; i < NUM_REASSEMBLY; i++)
1000 kfree_skb(hdev->reassembly[i]); 1327 kfree_skb(hdev->reassembly[i]);
1001 1328
1002 mgmt_index_removed(hdev->id); 1329 if (!test_bit(HCI_INIT, &hdev->flags) &&
1330 !test_bit(HCI_SETUP, &hdev->flags))
1331 mgmt_index_removed(hdev->id);
1332
1003 hci_notify(hdev, HCI_DEV_UNREG); 1333 hci_notify(hdev, HCI_DEV_UNREG);
1004 1334
1005 if (hdev->rfkill) { 1335 if (hdev->rfkill) {
@@ -1009,10 +1339,15 @@ int hci_unregister_dev(struct hci_dev *hdev)
1009 1339
1010 hci_unregister_sysfs(hdev); 1340 hci_unregister_sysfs(hdev);
1011 1341
1342 hci_del_off_timer(hdev);
1343
1012 destroy_workqueue(hdev->workqueue); 1344 destroy_workqueue(hdev->workqueue);
1013 1345
1014 hci_dev_lock_bh(hdev); 1346 hci_dev_lock_bh(hdev);
1015 hci_blacklist_clear(hdev); 1347 hci_blacklist_clear(hdev);
1348 hci_uuids_clear(hdev);
1349 hci_link_keys_clear(hdev);
1350 hci_remote_oob_data_clear(hdev);
1016 hci_dev_unlock_bh(hdev); 1351 hci_dev_unlock_bh(hdev);
1017 1352
1018 __hci_dev_put(hdev); 1353 __hci_dev_put(hdev);
@@ -1062,7 +1397,7 @@ int hci_recv_frame(struct sk_buff *skb)
1062EXPORT_SYMBOL(hci_recv_frame); 1397EXPORT_SYMBOL(hci_recv_frame);
1063 1398
1064static int hci_reassembly(struct hci_dev *hdev, int type, void *data, 1399static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
1065 int count, __u8 index, gfp_t gfp_mask) 1400 int count, __u8 index)
1066{ 1401{
1067 int len = 0; 1402 int len = 0;
1068 int hlen = 0; 1403 int hlen = 0;
@@ -1092,7 +1427,7 @@ static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
1092 break; 1427 break;
1093 } 1428 }
1094 1429
1095 skb = bt_skb_alloc(len, gfp_mask); 1430 skb = bt_skb_alloc(len, GFP_ATOMIC);
1096 if (!skb) 1431 if (!skb)
1097 return -ENOMEM; 1432 return -ENOMEM;
1098 1433
@@ -1178,8 +1513,7 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
1178 return -EILSEQ; 1513 return -EILSEQ;
1179 1514
1180 while (count) { 1515 while (count) {
1181 rem = hci_reassembly(hdev, type, data, count, 1516 rem = hci_reassembly(hdev, type, data, count, type - 1);
1182 type - 1, GFP_ATOMIC);
1183 if (rem < 0) 1517 if (rem < 0)
1184 return rem; 1518 return rem;
1185 1519
@@ -1213,8 +1547,8 @@ int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
1213 } else 1547 } else
1214 type = bt_cb(skb)->pkt_type; 1548 type = bt_cb(skb)->pkt_type;
1215 1549
1216 rem = hci_reassembly(hdev, type, data, 1550 rem = hci_reassembly(hdev, type, data, count,
1217 count, STREAM_REASSEMBLY, GFP_ATOMIC); 1551 STREAM_REASSEMBLY);
1218 if (rem < 0) 1552 if (rem < 0)
1219 return rem; 1553 return rem;
1220 1554
@@ -1313,7 +1647,7 @@ static int hci_send_frame(struct sk_buff *skb)
1313 /* Time stamp */ 1647 /* Time stamp */
1314 __net_timestamp(skb); 1648 __net_timestamp(skb);
1315 1649
1316 hci_send_to_sock(hdev, skb); 1650 hci_send_to_sock(hdev, skb, NULL);
1317 } 1651 }
1318 1652
1319 /* Get rid of skb owner, prior to sending to the driver. */ 1653 /* Get rid of skb owner, prior to sending to the driver. */
@@ -1349,6 +1683,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
1349 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; 1683 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
1350 skb->dev = (void *) hdev; 1684 skb->dev = (void *) hdev;
1351 1685
1686 if (test_bit(HCI_INIT, &hdev->flags))
1687 hdev->init_last_cmd = opcode;
1688
1352 skb_queue_tail(&hdev->cmd_q, skb); 1689 skb_queue_tail(&hdev->cmd_q, skb);
1353 tasklet_schedule(&hdev->cmd_task); 1690 tasklet_schedule(&hdev->cmd_task);
1354 1691
@@ -1395,7 +1732,7 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
1395 1732
1396 skb->dev = (void *) hdev; 1733 skb->dev = (void *) hdev;
1397 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; 1734 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
1398 hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); 1735 hci_add_acl_hdr(skb, conn->handle, flags);
1399 1736
1400 list = skb_shinfo(skb)->frag_list; 1737 list = skb_shinfo(skb)->frag_list;
1401 if (!list) { 1738 if (!list) {
@@ -1413,12 +1750,15 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
1413 spin_lock_bh(&conn->data_q.lock); 1750 spin_lock_bh(&conn->data_q.lock);
1414 1751
1415 __skb_queue_tail(&conn->data_q, skb); 1752 __skb_queue_tail(&conn->data_q, skb);
1753
1754 flags &= ~ACL_START;
1755 flags |= ACL_CONT;
1416 do { 1756 do {
1417 skb = list; list = list->next; 1757 skb = list; list = list->next;
1418 1758
1419 skb->dev = (void *) hdev; 1759 skb->dev = (void *) hdev;
1420 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; 1760 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
1421 hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); 1761 hci_add_acl_hdr(skb, conn->handle, flags);
1422 1762
1423 BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); 1763 BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
1424 1764
@@ -1486,8 +1826,25 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
1486 } 1826 }
1487 1827
1488 if (conn) { 1828 if (conn) {
1489 int cnt = (type == ACL_LINK ? hdev->acl_cnt : hdev->sco_cnt); 1829 int cnt, q;
1490 int q = cnt / num; 1830
1831 switch (conn->type) {
1832 case ACL_LINK:
1833 cnt = hdev->acl_cnt;
1834 break;
1835 case SCO_LINK:
1836 case ESCO_LINK:
1837 cnt = hdev->sco_cnt;
1838 break;
1839 case LE_LINK:
1840 cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
1841 break;
1842 default:
1843 cnt = 0;
1844 BT_ERR("Unknown link type");
1845 }
1846
1847 q = cnt / num;
1491 *quote = q ? q : 1; 1848 *quote = q ? q : 1;
1492 } else 1849 } else
1493 *quote = 0; 1850 *quote = 0;
@@ -1496,19 +1853,19 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
1496 return conn; 1853 return conn;
1497} 1854}
1498 1855
1499static inline void hci_acl_tx_to(struct hci_dev *hdev) 1856static inline void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
1500{ 1857{
1501 struct hci_conn_hash *h = &hdev->conn_hash; 1858 struct hci_conn_hash *h = &hdev->conn_hash;
1502 struct list_head *p; 1859 struct list_head *p;
1503 struct hci_conn *c; 1860 struct hci_conn *c;
1504 1861
1505 BT_ERR("%s ACL tx timeout", hdev->name); 1862 BT_ERR("%s link tx timeout", hdev->name);
1506 1863
1507 /* Kill stalled connections */ 1864 /* Kill stalled connections */
1508 list_for_each(p, &h->list) { 1865 list_for_each(p, &h->list) {
1509 c = list_entry(p, struct hci_conn, list); 1866 c = list_entry(p, struct hci_conn, list);
1510 if (c->type == ACL_LINK && c->sent) { 1867 if (c->type == type && c->sent) {
1511 BT_ERR("%s killing stalled ACL connection %s", 1868 BT_ERR("%s killing stalled connection %s",
1512 hdev->name, batostr(&c->dst)); 1869 hdev->name, batostr(&c->dst));
1513 hci_acl_disconn(c, 0x13); 1870 hci_acl_disconn(c, 0x13);
1514 } 1871 }
@@ -1527,7 +1884,7 @@ static inline void hci_sched_acl(struct hci_dev *hdev)
1527 /* ACL tx timeout must be longer than maximum 1884 /* ACL tx timeout must be longer than maximum
1528 * link supervision timeout (40.9 seconds) */ 1885 * link supervision timeout (40.9 seconds) */
1529 if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45)) 1886 if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45))
1530 hci_acl_tx_to(hdev); 1887 hci_link_tx_to(hdev, ACL_LINK);
1531 } 1888 }
1532 1889
1533 while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) { 1890 while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) {
@@ -1586,6 +1943,40 @@ static inline void hci_sched_esco(struct hci_dev *hdev)
1586 } 1943 }
1587} 1944}
1588 1945
1946static inline void hci_sched_le(struct hci_dev *hdev)
1947{
1948 struct hci_conn *conn;
1949 struct sk_buff *skb;
1950 int quote, cnt;
1951
1952 BT_DBG("%s", hdev->name);
1953
1954 if (!test_bit(HCI_RAW, &hdev->flags)) {
1955 /* LE tx timeout must be longer than maximum
1956 * link supervision timeout (40.9 seconds) */
1957 if (!hdev->le_cnt && hdev->le_pkts &&
1958 time_after(jiffies, hdev->le_last_tx + HZ * 45))
1959 hci_link_tx_to(hdev, LE_LINK);
1960 }
1961
1962 cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
1963 while (cnt && (conn = hci_low_sent(hdev, LE_LINK, &quote))) {
1964 while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
1965 BT_DBG("skb %p len %d", skb, skb->len);
1966
1967 hci_send_frame(skb);
1968 hdev->le_last_tx = jiffies;
1969
1970 cnt--;
1971 conn->sent++;
1972 }
1973 }
1974 if (hdev->le_pkts)
1975 hdev->le_cnt = cnt;
1976 else
1977 hdev->acl_cnt = cnt;
1978}
1979
1589static void hci_tx_task(unsigned long arg) 1980static void hci_tx_task(unsigned long arg)
1590{ 1981{
1591 struct hci_dev *hdev = (struct hci_dev *) arg; 1982 struct hci_dev *hdev = (struct hci_dev *) arg;
@@ -1593,7 +1984,8 @@ static void hci_tx_task(unsigned long arg)
1593 1984
1594 read_lock(&hci_task_lock); 1985 read_lock(&hci_task_lock);
1595 1986
1596 BT_DBG("%s acl %d sco %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt); 1987 BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt,
1988 hdev->sco_cnt, hdev->le_cnt);
1597 1989
1598 /* Schedule queues and send stuff to HCI driver */ 1990 /* Schedule queues and send stuff to HCI driver */
1599 1991
@@ -1603,6 +1995,8 @@ static void hci_tx_task(unsigned long arg)
1603 1995
1604 hci_sched_esco(hdev); 1996 hci_sched_esco(hdev);
1605 1997
1998 hci_sched_le(hdev);
1999
1606 /* Send next queued raw (unknown type) packet */ 2000 /* Send next queued raw (unknown type) packet */
1607 while ((skb = skb_dequeue(&hdev->raw_q))) 2001 while ((skb = skb_dequeue(&hdev->raw_q)))
1608 hci_send_frame(skb); 2002 hci_send_frame(skb);
@@ -1610,7 +2004,7 @@ static void hci_tx_task(unsigned long arg)
1610 read_unlock(&hci_task_lock); 2004 read_unlock(&hci_task_lock);
1611} 2005}
1612 2006
1613/* ----- HCI RX task (incoming data proccessing) ----- */ 2007/* ----- HCI RX task (incoming data processing) ----- */
1614 2008
1615/* ACL data packet */ 2009/* ACL data packet */
1616static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) 2010static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1700,7 +2094,7 @@ static void hci_rx_task(unsigned long arg)
1700 while ((skb = skb_dequeue(&hdev->rx_q))) { 2094 while ((skb = skb_dequeue(&hdev->rx_q))) {
1701 if (atomic_read(&hdev->promisc)) { 2095 if (atomic_read(&hdev->promisc)) {
1702 /* Send copy to the sockets */ 2096 /* Send copy to the sockets */
1703 hci_send_to_sock(hdev, skb); 2097 hci_send_to_sock(hdev, skb, NULL);
1704 } 2098 }
1705 2099
1706 if (test_bit(HCI_RAW, &hdev->flags)) { 2100 if (test_bit(HCI_RAW, &hdev->flags)) {
@@ -1750,20 +2144,20 @@ static void hci_cmd_task(unsigned long arg)
1750 2144
1751 BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt)); 2145 BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt));
1752 2146
1753 if (!atomic_read(&hdev->cmd_cnt) && time_after(jiffies, hdev->cmd_last_tx + HZ)) {
1754 BT_ERR("%s command tx timeout", hdev->name);
1755 atomic_set(&hdev->cmd_cnt, 1);
1756 }
1757
1758 /* Send queued commands */ 2147 /* Send queued commands */
1759 if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { 2148 if (atomic_read(&hdev->cmd_cnt)) {
2149 skb = skb_dequeue(&hdev->cmd_q);
2150 if (!skb)
2151 return;
2152
1760 kfree_skb(hdev->sent_cmd); 2153 kfree_skb(hdev->sent_cmd);
1761 2154
1762 hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); 2155 hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC);
1763 if (hdev->sent_cmd) { 2156 if (hdev->sent_cmd) {
1764 atomic_dec(&hdev->cmd_cnt); 2157 atomic_dec(&hdev->cmd_cnt);
1765 hci_send_frame(skb); 2158 hci_send_frame(skb);
1766 hdev->cmd_last_tx = jiffies; 2159 mod_timer(&hdev->cmd_timer,
2160 jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT));
1767 } else { 2161 } else {
1768 skb_queue_head(&hdev->cmd_q, skb); 2162 skb_queue_head(&hdev->cmd_q, skb);
1769 tasklet_schedule(&hdev->cmd_task); 2163 tasklet_schedule(&hdev->cmd_task);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a290854fdaa6..f13ddbf858ba 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -56,7 +56,9 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
56 if (status) 56 if (status)
57 return; 57 return;
58 58
59 clear_bit(HCI_INQUIRY, &hdev->flags); 59 if (test_bit(HCI_MGMT, &hdev->flags) &&
60 test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
61 mgmt_discovering(hdev->id, 0);
60 62
61 hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); 63 hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status);
62 64
@@ -72,7 +74,9 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb)
72 if (status) 74 if (status)
73 return; 75 return;
74 76
75 clear_bit(HCI_INQUIRY, &hdev->flags); 77 if (test_bit(HCI_MGMT, &hdev->flags) &&
78 test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
79 mgmt_discovering(hdev->id, 0);
76 80
77 hci_conn_check_pending(hdev); 81 hci_conn_check_pending(hdev);
78} 82}
@@ -183,6 +187,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
183 187
184 BT_DBG("%s status 0x%x", hdev->name, status); 188 BT_DBG("%s status 0x%x", hdev->name, status);
185 189
190 clear_bit(HCI_RESET, &hdev->flags);
191
186 hci_req_complete(hdev, HCI_OP_RESET, status); 192 hci_req_complete(hdev, HCI_OP_RESET, status);
187} 193}
188 194
@@ -193,14 +199,17 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
193 199
194 BT_DBG("%s status 0x%x", hdev->name, status); 200 BT_DBG("%s status 0x%x", hdev->name, status);
195 201
196 if (status)
197 return;
198
199 sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME); 202 sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME);
200 if (!sent) 203 if (!sent)
201 return; 204 return;
202 205
203 memcpy(hdev->dev_name, sent, 248); 206 if (test_bit(HCI_MGMT, &hdev->flags))
207 mgmt_set_local_name_complete(hdev->id, sent, status);
208
209 if (status)
210 return;
211
212 memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
204} 213}
205 214
206static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) 215static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -212,7 +221,7 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
212 if (rp->status) 221 if (rp->status)
213 return; 222 return;
214 223
215 memcpy(hdev->dev_name, rp->name, 248); 224 memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH);
216} 225}
217 226
218static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) 227static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -274,15 +283,24 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
274 283
275 if (!status) { 284 if (!status) {
276 __u8 param = *((__u8 *) sent); 285 __u8 param = *((__u8 *) sent);
286 int old_pscan, old_iscan;
277 287
278 clear_bit(HCI_PSCAN, &hdev->flags); 288 old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags);
279 clear_bit(HCI_ISCAN, &hdev->flags); 289 old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags);
280 290
281 if (param & SCAN_INQUIRY) 291 if (param & SCAN_INQUIRY) {
282 set_bit(HCI_ISCAN, &hdev->flags); 292 set_bit(HCI_ISCAN, &hdev->flags);
293 if (!old_iscan)
294 mgmt_discoverable(hdev->id, 1);
295 } else if (old_iscan)
296 mgmt_discoverable(hdev->id, 0);
283 297
284 if (param & SCAN_PAGE) 298 if (param & SCAN_PAGE) {
285 set_bit(HCI_PSCAN, &hdev->flags); 299 set_bit(HCI_PSCAN, &hdev->flags);
300 if (!old_pscan)
301 mgmt_connectable(hdev->id, 1);
302 } else if (old_pscan)
303 mgmt_connectable(hdev->id, 0);
286 } 304 }
287 305
288 hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); 306 hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
@@ -415,6 +433,115 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
415 hdev->ssp_mode = *((__u8 *) sent); 433 hdev->ssp_mode = *((__u8 *) sent);
416} 434}
417 435
436static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
437{
438 if (hdev->features[6] & LMP_EXT_INQ)
439 return 2;
440
441 if (hdev->features[3] & LMP_RSSI_INQ)
442 return 1;
443
444 if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
445 hdev->lmp_subver == 0x0757)
446 return 1;
447
448 if (hdev->manufacturer == 15) {
449 if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
450 return 1;
451 if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
452 return 1;
453 if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
454 return 1;
455 }
456
457 if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
458 hdev->lmp_subver == 0x1805)
459 return 1;
460
461 return 0;
462}
463
464static void hci_setup_inquiry_mode(struct hci_dev *hdev)
465{
466 u8 mode;
467
468 mode = hci_get_inquiry_mode(hdev);
469
470 hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
471}
472
473static void hci_setup_event_mask(struct hci_dev *hdev)
474{
475 /* The second byte is 0xff instead of 0x9f (two reserved bits
476 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
477 * command otherwise */
478 u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
479
480 /* Events for 1.2 and newer controllers */
481 if (hdev->lmp_ver > 1) {
482 events[4] |= 0x01; /* Flow Specification Complete */
483 events[4] |= 0x02; /* Inquiry Result with RSSI */
484 events[4] |= 0x04; /* Read Remote Extended Features Complete */
485 events[5] |= 0x08; /* Synchronous Connection Complete */
486 events[5] |= 0x10; /* Synchronous Connection Changed */
487 }
488
489 if (hdev->features[3] & LMP_RSSI_INQ)
490 events[4] |= 0x04; /* Inquiry Result with RSSI */
491
492 if (hdev->features[5] & LMP_SNIFF_SUBR)
493 events[5] |= 0x20; /* Sniff Subrating */
494
495 if (hdev->features[5] & LMP_PAUSE_ENC)
496 events[5] |= 0x80; /* Encryption Key Refresh Complete */
497
498 if (hdev->features[6] & LMP_EXT_INQ)
499 events[5] |= 0x40; /* Extended Inquiry Result */
500
501 if (hdev->features[6] & LMP_NO_FLUSH)
502 events[7] |= 0x01; /* Enhanced Flush Complete */
503
504 if (hdev->features[7] & LMP_LSTO)
505 events[6] |= 0x80; /* Link Supervision Timeout Changed */
506
507 if (hdev->features[6] & LMP_SIMPLE_PAIR) {
508 events[6] |= 0x01; /* IO Capability Request */
509 events[6] |= 0x02; /* IO Capability Response */
510 events[6] |= 0x04; /* User Confirmation Request */
511 events[6] |= 0x08; /* User Passkey Request */
512 events[6] |= 0x10; /* Remote OOB Data Request */
513 events[6] |= 0x20; /* Simple Pairing Complete */
514 events[7] |= 0x04; /* User Passkey Notification */
515 events[7] |= 0x08; /* Keypress Notification */
516 events[7] |= 0x10; /* Remote Host Supported
517 * Features Notification */
518 }
519
520 if (hdev->features[4] & LMP_LE)
521 events[7] |= 0x20; /* LE Meta-Event */
522
523 hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
524}
525
526static void hci_setup(struct hci_dev *hdev)
527{
528 hci_setup_event_mask(hdev);
529
530 if (hdev->lmp_ver > 1)
531 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
532
533 if (hdev->features[6] & LMP_SIMPLE_PAIR) {
534 u8 mode = 0x01;
535 hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
536 }
537
538 if (hdev->features[3] & LMP_RSSI_INQ)
539 hci_setup_inquiry_mode(hdev);
540
541 if (hdev->features[7] & LMP_INQ_TX_PWR)
542 hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
543}
544
418static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) 545static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
419{ 546{
420 struct hci_rp_read_local_version *rp = (void *) skb->data; 547 struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -426,11 +553,34 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
426 553
427 hdev->hci_ver = rp->hci_ver; 554 hdev->hci_ver = rp->hci_ver;
428 hdev->hci_rev = __le16_to_cpu(rp->hci_rev); 555 hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
556 hdev->lmp_ver = rp->lmp_ver;
429 hdev->manufacturer = __le16_to_cpu(rp->manufacturer); 557 hdev->manufacturer = __le16_to_cpu(rp->manufacturer);
558 hdev->lmp_subver = __le16_to_cpu(rp->lmp_subver);
430 559
431 BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, 560 BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name,
432 hdev->manufacturer, 561 hdev->manufacturer,
433 hdev->hci_ver, hdev->hci_rev); 562 hdev->hci_ver, hdev->hci_rev);
563
564 if (test_bit(HCI_INIT, &hdev->flags))
565 hci_setup(hdev);
566}
567
568static void hci_setup_link_policy(struct hci_dev *hdev)
569{
570 u16 link_policy = 0;
571
572 if (hdev->features[0] & LMP_RSWITCH)
573 link_policy |= HCI_LP_RSWITCH;
574 if (hdev->features[0] & LMP_HOLD)
575 link_policy |= HCI_LP_HOLD;
576 if (hdev->features[0] & LMP_SNIFF)
577 link_policy |= HCI_LP_SNIFF;
578 if (hdev->features[1] & LMP_PARK)
579 link_policy |= HCI_LP_PARK;
580
581 link_policy = cpu_to_le16(link_policy);
582 hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
583 sizeof(link_policy), &link_policy);
434} 584}
435 585
436static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb) 586static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb)
@@ -440,9 +590,15 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb
440 BT_DBG("%s status 0x%x", hdev->name, rp->status); 590 BT_DBG("%s status 0x%x", hdev->name, rp->status);
441 591
442 if (rp->status) 592 if (rp->status)
443 return; 593 goto done;
444 594
445 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); 595 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
596
597 if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
598 hci_setup_link_policy(hdev);
599
600done:
601 hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
446} 602}
447 603
448static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb) 604static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb)
@@ -548,16 +704,155 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
548 hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); 704 hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
549} 705}
550 706
707static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
708 struct sk_buff *skb)
709{
710 __u8 status = *((__u8 *) skb->data);
711
712 BT_DBG("%s status 0x%x", hdev->name, status);
713
714 hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
715}
716
717static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
718{
719 __u8 status = *((__u8 *) skb->data);
720
721 BT_DBG("%s status 0x%x", hdev->name, status);
722
723 hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
724}
725
726static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
727 struct sk_buff *skb)
728{
729 __u8 status = *((__u8 *) skb->data);
730
731 BT_DBG("%s status 0x%x", hdev->name, status);
732
733 hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
734}
735
736static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
737 struct sk_buff *skb)
738{
739 __u8 status = *((__u8 *) skb->data);
740
741 BT_DBG("%s status 0x%x", hdev->name, status);
742
743 hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, status);
744}
745
746static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
747{
748 __u8 status = *((__u8 *) skb->data);
749
750 BT_DBG("%s status 0x%x", hdev->name, status);
751
752 hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
753}
754
755static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
756{
757 struct hci_rp_pin_code_reply *rp = (void *) skb->data;
758 struct hci_cp_pin_code_reply *cp;
759 struct hci_conn *conn;
760
761 BT_DBG("%s status 0x%x", hdev->name, rp->status);
762
763 if (test_bit(HCI_MGMT, &hdev->flags))
764 mgmt_pin_code_reply_complete(hdev->id, &rp->bdaddr, rp->status);
765
766 if (rp->status != 0)
767 return;
768
769 cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY);
770 if (!cp)
771 return;
772
773 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
774 if (conn)
775 conn->pin_length = cp->pin_len;
776}
777
778static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
779{
780 struct hci_rp_pin_code_neg_reply *rp = (void *) skb->data;
781
782 BT_DBG("%s status 0x%x", hdev->name, rp->status);
783
784 if (test_bit(HCI_MGMT, &hdev->flags))
785 mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr,
786 rp->status);
787}
788static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
789 struct sk_buff *skb)
790{
791 struct hci_rp_le_read_buffer_size *rp = (void *) skb->data;
792
793 BT_DBG("%s status 0x%x", hdev->name, rp->status);
794
795 if (rp->status)
796 return;
797
798 hdev->le_mtu = __le16_to_cpu(rp->le_mtu);
799 hdev->le_pkts = rp->le_max_pkt;
800
801 hdev->le_cnt = hdev->le_pkts;
802
803 BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
804
805 hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
806}
807
808static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
809{
810 struct hci_rp_user_confirm_reply *rp = (void *) skb->data;
811
812 BT_DBG("%s status 0x%x", hdev->name, rp->status);
813
814 if (test_bit(HCI_MGMT, &hdev->flags))
815 mgmt_user_confirm_reply_complete(hdev->id, &rp->bdaddr,
816 rp->status);
817}
818
819static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev,
820 struct sk_buff *skb)
821{
822 struct hci_rp_user_confirm_reply *rp = (void *) skb->data;
823
824 BT_DBG("%s status 0x%x", hdev->name, rp->status);
825
826 if (test_bit(HCI_MGMT, &hdev->flags))
827 mgmt_user_confirm_neg_reply_complete(hdev->id, &rp->bdaddr,
828 rp->status);
829}
830
831static void hci_cc_read_local_oob_data_reply(struct hci_dev *hdev,
832 struct sk_buff *skb)
833{
834 struct hci_rp_read_local_oob_data *rp = (void *) skb->data;
835
836 BT_DBG("%s status 0x%x", hdev->name, rp->status);
837
838 mgmt_read_local_oob_data_reply_complete(hdev->id, rp->hash,
839 rp->randomizer, rp->status);
840}
841
551static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) 842static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
552{ 843{
553 BT_DBG("%s status 0x%x", hdev->name, status); 844 BT_DBG("%s status 0x%x", hdev->name, status);
554 845
555 if (status) { 846 if (status) {
556 hci_req_complete(hdev, HCI_OP_INQUIRY, status); 847 hci_req_complete(hdev, HCI_OP_INQUIRY, status);
557
558 hci_conn_check_pending(hdev); 848 hci_conn_check_pending(hdev);
559 } else 849 return;
560 set_bit(HCI_INQUIRY, &hdev->flags); 850 }
851
852 if (test_bit(HCI_MGMT, &hdev->flags) &&
853 !test_and_set_bit(HCI_INQUIRY,
854 &hdev->flags))
855 mgmt_discovering(hdev->id, 1);
561} 856}
562 857
563static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) 858static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
@@ -622,11 +917,14 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status)
622 hci_dev_lock(hdev); 917 hci_dev_lock(hdev);
623 918
624 acl = hci_conn_hash_lookup_handle(hdev, handle); 919 acl = hci_conn_hash_lookup_handle(hdev, handle);
625 if (acl && (sco = acl->link)) { 920 if (acl) {
626 sco->state = BT_CLOSED; 921 sco = acl->link;
922 if (sco) {
923 sco->state = BT_CLOSED;
627 924
628 hci_proto_connect_cfm(sco, status); 925 hci_proto_connect_cfm(sco, status);
629 hci_conn_del(sco); 926 hci_conn_del(sco);
927 }
630 } 928 }
631 929
632 hci_dev_unlock(hdev); 930 hci_dev_unlock(hdev);
@@ -687,7 +985,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
687} 985}
688 986
689static int hci_outgoing_auth_needed(struct hci_dev *hdev, 987static int hci_outgoing_auth_needed(struct hci_dev *hdev,
690 struct hci_conn *conn) 988 struct hci_conn *conn)
691{ 989{
692 if (conn->state != BT_CONFIG || !conn->out) 990 if (conn->state != BT_CONFIG || !conn->out)
693 return 0; 991 return 0;
@@ -723,12 +1021,19 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
723 hci_dev_lock(hdev); 1021 hci_dev_lock(hdev);
724 1022
725 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); 1023 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
726 if (conn && hci_outgoing_auth_needed(hdev, conn)) { 1024 if (!conn)
1025 goto unlock;
1026
1027 if (!hci_outgoing_auth_needed(hdev, conn))
1028 goto unlock;
1029
1030 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
727 struct hci_cp_auth_requested cp; 1031 struct hci_cp_auth_requested cp;
728 cp.handle = __cpu_to_le16(conn->handle); 1032 cp.handle = __cpu_to_le16(conn->handle);
729 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); 1033 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
730 } 1034 }
731 1035
1036unlock:
732 hci_dev_unlock(hdev); 1037 hci_dev_unlock(hdev);
733} 1038}
734 1039
@@ -808,11 +1113,14 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
808 hci_dev_lock(hdev); 1113 hci_dev_lock(hdev);
809 1114
810 acl = hci_conn_hash_lookup_handle(hdev, handle); 1115 acl = hci_conn_hash_lookup_handle(hdev, handle);
811 if (acl && (sco = acl->link)) { 1116 if (acl) {
812 sco->state = BT_CLOSED; 1117 sco = acl->link;
1118 if (sco) {
1119 sco->state = BT_CLOSED;
813 1120
814 hci_proto_connect_cfm(sco, status); 1121 hci_proto_connect_cfm(sco, status);
815 hci_conn_del(sco); 1122 hci_conn_del(sco);
1123 }
816 } 1124 }
817 1125
818 hci_dev_unlock(hdev); 1126 hci_dev_unlock(hdev);
@@ -872,13 +1180,52 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
872 hci_dev_unlock(hdev); 1180 hci_dev_unlock(hdev);
873} 1181}
874 1182
1183static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
1184{
1185 struct hci_cp_le_create_conn *cp;
1186 struct hci_conn *conn;
1187
1188 BT_DBG("%s status 0x%x", hdev->name, status);
1189
1190 cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
1191 if (!cp)
1192 return;
1193
1194 hci_dev_lock(hdev);
1195
1196 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
1197
1198 BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr),
1199 conn);
1200
1201 if (status) {
1202 if (conn && conn->state == BT_CONNECT) {
1203 conn->state = BT_CLOSED;
1204 hci_proto_connect_cfm(conn, status);
1205 hci_conn_del(conn);
1206 }
1207 } else {
1208 if (!conn) {
1209 conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr);
1210 if (conn)
1211 conn->out = 1;
1212 else
1213 BT_ERR("No memory for new connection");
1214 }
1215 }
1216
1217 hci_dev_unlock(hdev);
1218}
1219
875static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 1220static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
876{ 1221{
877 __u8 status = *((__u8 *) skb->data); 1222 __u8 status = *((__u8 *) skb->data);
878 1223
879 BT_DBG("%s status %d", hdev->name, status); 1224 BT_DBG("%s status %d", hdev->name, status);
880 1225
881 clear_bit(HCI_INQUIRY, &hdev->flags); 1226 if (test_bit(HCI_MGMT, &hdev->flags) &&
1227 test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
1228 mgmt_discovering(hdev->id, 0);
882 1229
883 hci_req_complete(hdev, HCI_OP_INQUIRY, status); 1230 hci_req_complete(hdev, HCI_OP_INQUIRY, status);
884 1231
@@ -898,7 +1245,13 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *
898 1245
899 hci_dev_lock(hdev); 1246 hci_dev_lock(hdev);
900 1247
901 for (; num_rsp; num_rsp--) { 1248 if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags)) {
1249
1250 if (test_bit(HCI_MGMT, &hdev->flags))
1251 mgmt_discovering(hdev->id, 1);
1252 }
1253
1254 for (; num_rsp; num_rsp--, info++) {
902 bacpy(&data.bdaddr, &info->bdaddr); 1255 bacpy(&data.bdaddr, &info->bdaddr);
903 data.pscan_rep_mode = info->pscan_rep_mode; 1256 data.pscan_rep_mode = info->pscan_rep_mode;
904 data.pscan_period_mode = info->pscan_period_mode; 1257 data.pscan_period_mode = info->pscan_period_mode;
@@ -907,8 +1260,9 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *
907 data.clock_offset = info->clock_offset; 1260 data.clock_offset = info->clock_offset;
908 data.rssi = 0x00; 1261 data.rssi = 0x00;
909 data.ssp_mode = 0x00; 1262 data.ssp_mode = 0x00;
910 info++;
911 hci_inquiry_cache_update(hdev, &data); 1263 hci_inquiry_cache_update(hdev, &data);
1264 mgmt_device_found(hdev->id, &info->bdaddr, info->dev_class, 0,
1265 NULL);
912 } 1266 }
913 1267
914 hci_dev_unlock(hdev); 1268 hci_dev_unlock(hdev);
@@ -942,6 +1296,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
942 conn->state = BT_CONFIG; 1296 conn->state = BT_CONFIG;
943 hci_conn_hold(conn); 1297 hci_conn_hold(conn);
944 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 1298 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
1299 mgmt_connected(hdev->id, &ev->bdaddr);
945 } else 1300 } else
946 conn->state = BT_CONNECTED; 1301 conn->state = BT_CONNECTED;
947 1302
@@ -970,8 +1325,11 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
970 hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, 1325 hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE,
971 sizeof(cp), &cp); 1326 sizeof(cp), &cp);
972 } 1327 }
973 } else 1328 } else {
974 conn->state = BT_CLOSED; 1329 conn->state = BT_CLOSED;
1330 if (conn->type == ACL_LINK)
1331 mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status);
1332 }
975 1333
976 if (conn->type == ACL_LINK) 1334 if (conn->type == ACL_LINK)
977 hci_sco_setup(conn, ev->status); 1335 hci_sco_setup(conn, ev->status);
@@ -998,7 +1356,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
998 1356
999 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); 1357 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
1000 1358
1001 if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) { 1359 if ((mask & HCI_LM_ACCEPT) &&
1360 !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
1002 /* Connection accepted */ 1361 /* Connection accepted */
1003 struct inquiry_entry *ie; 1362 struct inquiry_entry *ie;
1004 struct hci_conn *conn; 1363 struct hci_conn *conn;
@@ -1068,19 +1427,26 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
1068 1427
1069 BT_DBG("%s status %d", hdev->name, ev->status); 1428 BT_DBG("%s status %d", hdev->name, ev->status);
1070 1429
1071 if (ev->status) 1430 if (ev->status) {
1431 mgmt_disconnect_failed(hdev->id);
1072 return; 1432 return;
1433 }
1073 1434
1074 hci_dev_lock(hdev); 1435 hci_dev_lock(hdev);
1075 1436
1076 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); 1437 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
1077 if (conn) { 1438 if (!conn)
1078 conn->state = BT_CLOSED; 1439 goto unlock;
1079 1440
1080 hci_proto_disconn_cfm(conn, ev->reason); 1441 conn->state = BT_CLOSED;
1081 hci_conn_del(conn); 1442
1082 } 1443 if (conn->type == ACL_LINK || conn->type == LE_LINK)
1444 mgmt_disconnected(hdev->id, &conn->dst);
1445
1446 hci_proto_disconn_cfm(conn, ev->reason);
1447 hci_conn_del(conn);
1083 1448
1449unlock:
1084 hci_dev_unlock(hdev); 1450 hci_dev_unlock(hdev);
1085} 1451}
1086 1452
@@ -1098,8 +1464,9 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
1098 if (!ev->status) { 1464 if (!ev->status) {
1099 conn->link_mode |= HCI_LM_AUTH; 1465 conn->link_mode |= HCI_LM_AUTH;
1100 conn->sec_level = conn->pending_sec_level; 1466 conn->sec_level = conn->pending_sec_level;
1101 } else 1467 } else {
1102 conn->sec_level = BT_SECURITY_LOW; 1468 mgmt_auth_failed(hdev->id, &conn->dst, ev->status);
1469 }
1103 1470
1104 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); 1471 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
1105 1472
@@ -1152,13 +1519,23 @@ static inline void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb
1152 1519
1153 hci_dev_lock(hdev); 1520 hci_dev_lock(hdev);
1154 1521
1522 if (ev->status == 0 && test_bit(HCI_MGMT, &hdev->flags))
1523 mgmt_remote_name(hdev->id, &ev->bdaddr, ev->name);
1524
1155 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); 1525 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1156 if (conn && hci_outgoing_auth_needed(hdev, conn)) { 1526 if (!conn)
1527 goto unlock;
1528
1529 if (!hci_outgoing_auth_needed(hdev, conn))
1530 goto unlock;
1531
1532 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
1157 struct hci_cp_auth_requested cp; 1533 struct hci_cp_auth_requested cp;
1158 cp.handle = __cpu_to_le16(conn->handle); 1534 cp.handle = __cpu_to_le16(conn->handle);
1159 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); 1535 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
1160 } 1536 }
1161 1537
1538unlock:
1162 hci_dev_unlock(hdev); 1539 hci_dev_unlock(hdev);
1163} 1540}
1164 1541
@@ -1393,11 +1770,58 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
1393 hci_cc_write_ca_timeout(hdev, skb); 1770 hci_cc_write_ca_timeout(hdev, skb);
1394 break; 1771 break;
1395 1772
1773 case HCI_OP_DELETE_STORED_LINK_KEY:
1774 hci_cc_delete_stored_link_key(hdev, skb);
1775 break;
1776
1777 case HCI_OP_SET_EVENT_MASK:
1778 hci_cc_set_event_mask(hdev, skb);
1779 break;
1780
1781 case HCI_OP_WRITE_INQUIRY_MODE:
1782 hci_cc_write_inquiry_mode(hdev, skb);
1783 break;
1784
1785 case HCI_OP_READ_INQ_RSP_TX_POWER:
1786 hci_cc_read_inq_rsp_tx_power(hdev, skb);
1787 break;
1788
1789 case HCI_OP_SET_EVENT_FLT:
1790 hci_cc_set_event_flt(hdev, skb);
1791 break;
1792
1793 case HCI_OP_PIN_CODE_REPLY:
1794 hci_cc_pin_code_reply(hdev, skb);
1795 break;
1796
1797 case HCI_OP_PIN_CODE_NEG_REPLY:
1798 hci_cc_pin_code_neg_reply(hdev, skb);
1799 break;
1800
1801 case HCI_OP_READ_LOCAL_OOB_DATA:
1802 hci_cc_read_local_oob_data_reply(hdev, skb);
1803 break;
1804
1805 case HCI_OP_LE_READ_BUFFER_SIZE:
1806 hci_cc_le_read_buffer_size(hdev, skb);
1807 break;
1808
1809 case HCI_OP_USER_CONFIRM_REPLY:
1810 hci_cc_user_confirm_reply(hdev, skb);
1811 break;
1812
1813 case HCI_OP_USER_CONFIRM_NEG_REPLY:
1814 hci_cc_user_confirm_neg_reply(hdev, skb);
1815 break;
1816
1396 default: 1817 default:
1397 BT_DBG("%s opcode 0x%x", hdev->name, opcode); 1818 BT_DBG("%s opcode 0x%x", hdev->name, opcode);
1398 break; 1819 break;
1399 } 1820 }
1400 1821
1822 if (ev->opcode != HCI_OP_NOP)
1823 del_timer(&hdev->cmd_timer);
1824
1401 if (ev->ncmd) { 1825 if (ev->ncmd) {
1402 atomic_set(&hdev->cmd_cnt, 1); 1826 atomic_set(&hdev->cmd_cnt, 1);
1403 if (!skb_queue_empty(&hdev->cmd_q)) 1827 if (!skb_queue_empty(&hdev->cmd_q))
@@ -1459,12 +1883,24 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
1459 hci_cs_exit_sniff_mode(hdev, ev->status); 1883 hci_cs_exit_sniff_mode(hdev, ev->status);
1460 break; 1884 break;
1461 1885
1886 case HCI_OP_DISCONNECT:
1887 if (ev->status != 0)
1888 mgmt_disconnect_failed(hdev->id);
1889 break;
1890
1891 case HCI_OP_LE_CREATE_CONN:
1892 hci_cs_le_create_conn(hdev, ev->status);
1893 break;
1894
1462 default: 1895 default:
1463 BT_DBG("%s opcode 0x%x", hdev->name, opcode); 1896 BT_DBG("%s opcode 0x%x", hdev->name, opcode);
1464 break; 1897 break;
1465 } 1898 }
1466 1899
1467 if (ev->ncmd) { 1900 if (ev->opcode != HCI_OP_NOP)
1901 del_timer(&hdev->cmd_timer);
1902
1903 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
1468 atomic_set(&hdev->cmd_cnt, 1); 1904 atomic_set(&hdev->cmd_cnt, 1);
1469 if (!skb_queue_empty(&hdev->cmd_q)) 1905 if (!skb_queue_empty(&hdev->cmd_q))
1470 tasklet_schedule(&hdev->cmd_task); 1906 tasklet_schedule(&hdev->cmd_task);
@@ -1529,6 +1965,16 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
1529 hdev->acl_cnt += count; 1965 hdev->acl_cnt += count;
1530 if (hdev->acl_cnt > hdev->acl_pkts) 1966 if (hdev->acl_cnt > hdev->acl_pkts)
1531 hdev->acl_cnt = hdev->acl_pkts; 1967 hdev->acl_cnt = hdev->acl_pkts;
1968 } else if (conn->type == LE_LINK) {
1969 if (hdev->le_pkts) {
1970 hdev->le_cnt += count;
1971 if (hdev->le_cnt > hdev->le_pkts)
1972 hdev->le_cnt = hdev->le_pkts;
1973 } else {
1974 hdev->acl_cnt += count;
1975 if (hdev->acl_cnt > hdev->acl_pkts)
1976 hdev->acl_cnt = hdev->acl_pkts;
1977 }
1532 } else { 1978 } else {
1533 hdev->sco_cnt += count; 1979 hdev->sco_cnt += count;
1534 if (hdev->sco_cnt > hdev->sco_pkts) 1980 if (hdev->sco_cnt > hdev->sco_pkts)
@@ -1586,18 +2032,92 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff
1586 hci_conn_put(conn); 2032 hci_conn_put(conn);
1587 } 2033 }
1588 2034
2035 if (!test_bit(HCI_PAIRABLE, &hdev->flags))
2036 hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY,
2037 sizeof(ev->bdaddr), &ev->bdaddr);
2038 else if (test_bit(HCI_MGMT, &hdev->flags)) {
2039 u8 secure;
2040
2041 if (conn->pending_sec_level == BT_SECURITY_HIGH)
2042 secure = 1;
2043 else
2044 secure = 0;
2045
2046 mgmt_pin_code_request(hdev->id, &ev->bdaddr, secure);
2047 }
2048
1589 hci_dev_unlock(hdev); 2049 hci_dev_unlock(hdev);
1590} 2050}
1591 2051
1592static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 2052static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
1593{ 2053{
2054 struct hci_ev_link_key_req *ev = (void *) skb->data;
2055 struct hci_cp_link_key_reply cp;
2056 struct hci_conn *conn;
2057 struct link_key *key;
2058
1594 BT_DBG("%s", hdev->name); 2059 BT_DBG("%s", hdev->name);
2060
2061 if (!test_bit(HCI_LINK_KEYS, &hdev->flags))
2062 return;
2063
2064 hci_dev_lock(hdev);
2065
2066 key = hci_find_link_key(hdev, &ev->bdaddr);
2067 if (!key) {
2068 BT_DBG("%s link key not found for %s", hdev->name,
2069 batostr(&ev->bdaddr));
2070 goto not_found;
2071 }
2072
2073 BT_DBG("%s found key type %u for %s", hdev->name, key->type,
2074 batostr(&ev->bdaddr));
2075
2076 if (!test_bit(HCI_DEBUG_KEYS, &hdev->flags) &&
2077 key->type == HCI_LK_DEBUG_COMBINATION) {
2078 BT_DBG("%s ignoring debug key", hdev->name);
2079 goto not_found;
2080 }
2081
2082 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
2083 if (conn) {
2084 if (key->type == HCI_LK_UNAUTH_COMBINATION &&
2085 conn->auth_type != 0xff &&
2086 (conn->auth_type & 0x01)) {
2087 BT_DBG("%s ignoring unauthenticated key", hdev->name);
2088 goto not_found;
2089 }
2090
2091 if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 &&
2092 conn->pending_sec_level == BT_SECURITY_HIGH) {
2093 BT_DBG("%s ignoring key unauthenticated for high \
2094 security", hdev->name);
2095 goto not_found;
2096 }
2097
2098 conn->key_type = key->type;
2099 conn->pin_length = key->pin_len;
2100 }
2101
2102 bacpy(&cp.bdaddr, &ev->bdaddr);
2103 memcpy(cp.link_key, key->val, 16);
2104
2105 hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp);
2106
2107 hci_dev_unlock(hdev);
2108
2109 return;
2110
2111not_found:
2112 hci_send_cmd(hdev, HCI_OP_LINK_KEY_NEG_REPLY, 6, &ev->bdaddr);
2113 hci_dev_unlock(hdev);
1595} 2114}
1596 2115
1597static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) 2116static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
1598{ 2117{
1599 struct hci_ev_link_key_notify *ev = (void *) skb->data; 2118 struct hci_ev_link_key_notify *ev = (void *) skb->data;
1600 struct hci_conn *conn; 2119 struct hci_conn *conn;
2120 u8 pin_len = 0;
1601 2121
1602 BT_DBG("%s", hdev->name); 2122 BT_DBG("%s", hdev->name);
1603 2123
@@ -1607,9 +2127,18 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff
1607 if (conn) { 2127 if (conn) {
1608 hci_conn_hold(conn); 2128 hci_conn_hold(conn);
1609 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 2129 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
2130 pin_len = conn->pin_length;
2131
2132 if (ev->key_type != HCI_LK_CHANGED_COMBINATION)
2133 conn->key_type = ev->key_type;
2134
1610 hci_conn_put(conn); 2135 hci_conn_put(conn);
1611 } 2136 }
1612 2137
2138 if (test_bit(HCI_LINK_KEYS, &hdev->flags))
2139 hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key,
2140 ev->key_type, pin_len);
2141
1613 hci_dev_unlock(hdev); 2142 hci_dev_unlock(hdev);
1614} 2143}
1615 2144
@@ -1682,10 +2211,17 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
1682 2211
1683 hci_dev_lock(hdev); 2212 hci_dev_lock(hdev);
1684 2213
2214 if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags)) {
2215
2216 if (test_bit(HCI_MGMT, &hdev->flags))
2217 mgmt_discovering(hdev->id, 1);
2218 }
2219
1685 if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { 2220 if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) {
1686 struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1); 2221 struct inquiry_info_with_rssi_and_pscan_mode *info;
2222 info = (void *) (skb->data + 1);
1687 2223
1688 for (; num_rsp; num_rsp--) { 2224 for (; num_rsp; num_rsp--, info++) {
1689 bacpy(&data.bdaddr, &info->bdaddr); 2225 bacpy(&data.bdaddr, &info->bdaddr);
1690 data.pscan_rep_mode = info->pscan_rep_mode; 2226 data.pscan_rep_mode = info->pscan_rep_mode;
1691 data.pscan_period_mode = info->pscan_period_mode; 2227 data.pscan_period_mode = info->pscan_period_mode;
@@ -1694,13 +2230,15 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
1694 data.clock_offset = info->clock_offset; 2230 data.clock_offset = info->clock_offset;
1695 data.rssi = info->rssi; 2231 data.rssi = info->rssi;
1696 data.ssp_mode = 0x00; 2232 data.ssp_mode = 0x00;
1697 info++;
1698 hci_inquiry_cache_update(hdev, &data); 2233 hci_inquiry_cache_update(hdev, &data);
2234 mgmt_device_found(hdev->id, &info->bdaddr,
2235 info->dev_class, info->rssi,
2236 NULL);
1699 } 2237 }
1700 } else { 2238 } else {
1701 struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); 2239 struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
1702 2240
1703 for (; num_rsp; num_rsp--) { 2241 for (; num_rsp; num_rsp--, info++) {
1704 bacpy(&data.bdaddr, &info->bdaddr); 2242 bacpy(&data.bdaddr, &info->bdaddr);
1705 data.pscan_rep_mode = info->pscan_rep_mode; 2243 data.pscan_rep_mode = info->pscan_rep_mode;
1706 data.pscan_period_mode = info->pscan_period_mode; 2244 data.pscan_period_mode = info->pscan_period_mode;
@@ -1709,8 +2247,10 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
1709 data.clock_offset = info->clock_offset; 2247 data.clock_offset = info->clock_offset;
1710 data.rssi = info->rssi; 2248 data.rssi = info->rssi;
1711 data.ssp_mode = 0x00; 2249 data.ssp_mode = 0x00;
1712 info++;
1713 hci_inquiry_cache_update(hdev, &data); 2250 hci_inquiry_cache_update(hdev, &data);
2251 mgmt_device_found(hdev->id, &info->bdaddr,
2252 info->dev_class, info->rssi,
2253 NULL);
1714 } 2254 }
1715 } 2255 }
1716 2256
@@ -1824,17 +2364,8 @@ static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buf
1824static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) 2364static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
1825{ 2365{
1826 struct hci_ev_sniff_subrate *ev = (void *) skb->data; 2366 struct hci_ev_sniff_subrate *ev = (void *) skb->data;
1827 struct hci_conn *conn;
1828 2367
1829 BT_DBG("%s status %d", hdev->name, ev->status); 2368 BT_DBG("%s status %d", hdev->name, ev->status);
1830
1831 hci_dev_lock(hdev);
1832
1833 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
1834 if (conn) {
1835 }
1836
1837 hci_dev_unlock(hdev);
1838} 2369}
1839 2370
1840static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) 2371static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1848,24 +2379,50 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
1848 if (!num_rsp) 2379 if (!num_rsp)
1849 return; 2380 return;
1850 2381
2382 if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags)) {
2383
2384 if (test_bit(HCI_MGMT, &hdev->flags))
2385 mgmt_discovering(hdev->id, 1);
2386 }
2387
1851 hci_dev_lock(hdev); 2388 hci_dev_lock(hdev);
1852 2389
1853 for (; num_rsp; num_rsp--) { 2390 for (; num_rsp; num_rsp--, info++) {
1854 bacpy(&data.bdaddr, &info->bdaddr); 2391 bacpy(&data.bdaddr, &info->bdaddr);
1855 data.pscan_rep_mode = info->pscan_rep_mode; 2392 data.pscan_rep_mode = info->pscan_rep_mode;
1856 data.pscan_period_mode = info->pscan_period_mode; 2393 data.pscan_period_mode = info->pscan_period_mode;
1857 data.pscan_mode = 0x00; 2394 data.pscan_mode = 0x00;
1858 memcpy(data.dev_class, info->dev_class, 3); 2395 memcpy(data.dev_class, info->dev_class, 3);
1859 data.clock_offset = info->clock_offset; 2396 data.clock_offset = info->clock_offset;
1860 data.rssi = info->rssi; 2397 data.rssi = info->rssi;
1861 data.ssp_mode = 0x01; 2398 data.ssp_mode = 0x01;
1862 info++;
1863 hci_inquiry_cache_update(hdev, &data); 2399 hci_inquiry_cache_update(hdev, &data);
2400 mgmt_device_found(hdev->id, &info->bdaddr, info->dev_class,
2401 info->rssi, info->data);
1864 } 2402 }
1865 2403
1866 hci_dev_unlock(hdev); 2404 hci_dev_unlock(hdev);
1867} 2405}
1868 2406
2407static inline u8 hci_get_auth_req(struct hci_conn *conn)
2408{
2409 /* If remote requests dedicated bonding follow that lead */
2410 if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) {
2411 /* If both remote and local IO capabilities allow MITM
2412 * protection then require it, otherwise don't */
2413 if (conn->remote_cap == 0x03 || conn->io_capability == 0x03)
2414 return 0x02;
2415 else
2416 return 0x03;
2417 }
2418
2419 /* If remote requests no-bonding follow that lead */
2420 if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01)
2421 return conn->remote_auth | (conn->auth_type & 0x01);
2422
2423 return conn->auth_type;
2424}
2425
1869static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 2426static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
1870{ 2427{
1871 struct hci_ev_io_capa_request *ev = (void *) skb->data; 2428 struct hci_ev_io_capa_request *ev = (void *) skb->data;
@@ -1876,9 +2433,131 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff
1876 hci_dev_lock(hdev); 2433 hci_dev_lock(hdev);
1877 2434
1878 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); 2435 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1879 if (conn) 2436 if (!conn)
1880 hci_conn_hold(conn); 2437 goto unlock;
2438
2439 hci_conn_hold(conn);
2440
2441 if (!test_bit(HCI_MGMT, &hdev->flags))
2442 goto unlock;
2443
2444 if (test_bit(HCI_PAIRABLE, &hdev->flags) ||
2445 (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) {
2446 struct hci_cp_io_capability_reply cp;
2447
2448 bacpy(&cp.bdaddr, &ev->bdaddr);
2449 cp.capability = conn->io_capability;
2450 conn->auth_type = hci_get_auth_req(conn);
2451 cp.authentication = conn->auth_type;
2452
2453 if ((conn->out == 0x01 || conn->remote_oob == 0x01) &&
2454 hci_find_remote_oob_data(hdev, &conn->dst))
2455 cp.oob_data = 0x01;
2456 else
2457 cp.oob_data = 0x00;
2458
2459 hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY,
2460 sizeof(cp), &cp);
2461 } else {
2462 struct hci_cp_io_capability_neg_reply cp;
2463
2464 bacpy(&cp.bdaddr, &ev->bdaddr);
2465 cp.reason = 0x18; /* Pairing not allowed */
2466
2467 hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_NEG_REPLY,
2468 sizeof(cp), &cp);
2469 }
2470
2471unlock:
2472 hci_dev_unlock(hdev);
2473}
2474
2475static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb)
2476{
2477 struct hci_ev_io_capa_reply *ev = (void *) skb->data;
2478 struct hci_conn *conn;
2479
2480 BT_DBG("%s", hdev->name);
2481
2482 hci_dev_lock(hdev);
2483
2484 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
2485 if (!conn)
2486 goto unlock;
2487
2488 conn->remote_cap = ev->capability;
2489 conn->remote_oob = ev->oob_data;
2490 conn->remote_auth = ev->authentication;
2491
2492unlock:
2493 hci_dev_unlock(hdev);
2494}
2495
2496static inline void hci_user_confirm_request_evt(struct hci_dev *hdev,
2497 struct sk_buff *skb)
2498{
2499 struct hci_ev_user_confirm_req *ev = (void *) skb->data;
2500 int loc_mitm, rem_mitm, confirm_hint = 0;
2501 struct hci_conn *conn;
2502
2503 BT_DBG("%s", hdev->name);
2504
2505 hci_dev_lock(hdev);
2506
2507 if (!test_bit(HCI_MGMT, &hdev->flags))
2508 goto unlock;
2509
2510 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
2511 if (!conn)
2512 goto unlock;
2513
2514 loc_mitm = (conn->auth_type & 0x01);
2515 rem_mitm = (conn->remote_auth & 0x01);
2516
2517 /* If we require MITM but the remote device can't provide that
2518 * (it has NoInputNoOutput) then reject the confirmation
2519 * request. The only exception is when we're dedicated bonding
2520 * initiators (connect_cfm_cb set) since then we always have the MITM
2521 * bit set. */
2522 if (!conn->connect_cfm_cb && loc_mitm && conn->remote_cap == 0x03) {
2523 BT_DBG("Rejecting request: remote device can't provide MITM");
2524 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
2525 sizeof(ev->bdaddr), &ev->bdaddr);
2526 goto unlock;
2527 }
2528
2529 /* If no side requires MITM protection; auto-accept */
2530 if ((!loc_mitm || conn->remote_cap == 0x03) &&
2531 (!rem_mitm || conn->io_capability == 0x03)) {
2532
2533 /* If we're not the initiators request authorization to
2534 * proceed from user space (mgmt_user_confirm with
2535 * confirm_hint set to 1). */
2536 if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
2537 BT_DBG("Confirming auto-accept as acceptor");
2538 confirm_hint = 1;
2539 goto confirm;
2540 }
2541
2542 BT_DBG("Auto-accept of user confirmation with %ums delay",
2543 hdev->auto_accept_delay);
1881 2544
2545 if (hdev->auto_accept_delay > 0) {
2546 int delay = msecs_to_jiffies(hdev->auto_accept_delay);
2547 mod_timer(&conn->auto_accept_timer, jiffies + delay);
2548 goto unlock;
2549 }
2550
2551 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_REPLY,
2552 sizeof(ev->bdaddr), &ev->bdaddr);
2553 goto unlock;
2554 }
2555
2556confirm:
2557 mgmt_user_confirm_request(hdev->id, &ev->bdaddr, ev->passkey,
2558 confirm_hint);
2559
2560unlock:
1882 hci_dev_unlock(hdev); 2561 hci_dev_unlock(hdev);
1883} 2562}
1884 2563
@@ -1892,9 +2571,20 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_
1892 hci_dev_lock(hdev); 2571 hci_dev_lock(hdev);
1893 2572
1894 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); 2573 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1895 if (conn) 2574 if (!conn)
1896 hci_conn_put(conn); 2575 goto unlock;
2576
2577 /* To avoid duplicate auth_failed events to user space we check
2578 * the HCI_CONN_AUTH_PEND flag which will be set if we
2579 * initiated the authentication. A traditional auth_complete
2580 * event gets always produced as initiator and is also mapped to
2581 * the mgmt_auth_failed event */
2582 if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend) && ev->status != 0)
2583 mgmt_auth_failed(hdev->id, &conn->dst, ev->status);
2584
2585 hci_conn_put(conn);
1897 2586
2587unlock:
1898 hci_dev_unlock(hdev); 2588 hci_dev_unlock(hdev);
1899} 2589}
1900 2590
@@ -1914,6 +2604,98 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_
1914 hci_dev_unlock(hdev); 2604 hci_dev_unlock(hdev);
1915} 2605}
1916 2606
2607static inline void hci_remote_oob_data_request_evt(struct hci_dev *hdev,
2608 struct sk_buff *skb)
2609{
2610 struct hci_ev_remote_oob_data_request *ev = (void *) skb->data;
2611 struct oob_data *data;
2612
2613 BT_DBG("%s", hdev->name);
2614
2615 hci_dev_lock(hdev);
2616
2617 if (!test_bit(HCI_MGMT, &hdev->flags))
2618 goto unlock;
2619
2620 data = hci_find_remote_oob_data(hdev, &ev->bdaddr);
2621 if (data) {
2622 struct hci_cp_remote_oob_data_reply cp;
2623
2624 bacpy(&cp.bdaddr, &ev->bdaddr);
2625 memcpy(cp.hash, data->hash, sizeof(cp.hash));
2626 memcpy(cp.randomizer, data->randomizer, sizeof(cp.randomizer));
2627
2628 hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, sizeof(cp),
2629 &cp);
2630 } else {
2631 struct hci_cp_remote_oob_data_neg_reply cp;
2632
2633 bacpy(&cp.bdaddr, &ev->bdaddr);
2634 hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, sizeof(cp),
2635 &cp);
2636 }
2637
2638unlock:
2639 hci_dev_unlock(hdev);
2640}
2641
2642static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2643{
2644 struct hci_ev_le_conn_complete *ev = (void *) skb->data;
2645 struct hci_conn *conn;
2646
2647 BT_DBG("%s status %d", hdev->name, ev->status);
2648
2649 hci_dev_lock(hdev);
2650
2651 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);
2652 if (!conn) {
2653 conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
2654 if (!conn) {
2655 BT_ERR("No memory for new connection");
2656 hci_dev_unlock(hdev);
2657 return;
2658 }
2659 }
2660
2661 if (ev->status) {
2662 mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status);
2663 hci_proto_connect_cfm(conn, ev->status);
2664 conn->state = BT_CLOSED;
2665 hci_conn_del(conn);
2666 goto unlock;
2667 }
2668
2669 mgmt_connected(hdev->id, &ev->bdaddr);
2670
2671 conn->handle = __le16_to_cpu(ev->handle);
2672 conn->state = BT_CONNECTED;
2673
2674 hci_conn_hold_device(conn);
2675 hci_conn_add_sysfs(conn);
2676
2677 hci_proto_connect_cfm(conn, ev->status);
2678
2679unlock:
2680 hci_dev_unlock(hdev);
2681}
2682
2683static inline void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
2684{
2685 struct hci_ev_le_meta *le_ev = (void *) skb->data;
2686
2687 skb_pull(skb, sizeof(*le_ev));
2688
2689 switch (le_ev->subevent) {
2690 case HCI_EV_LE_CONN_COMPLETE:
2691 hci_le_conn_complete_evt(hdev, skb);
2692 break;
2693
2694 default:
2695 break;
2696 }
2697}
2698
1917void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) 2699void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
1918{ 2700{
1919 struct hci_event_hdr *hdr = (void *) skb->data; 2701 struct hci_event_hdr *hdr = (void *) skb->data;
@@ -2042,6 +2824,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
2042 hci_io_capa_request_evt(hdev, skb); 2824 hci_io_capa_request_evt(hdev, skb);
2043 break; 2825 break;
2044 2826
2827 case HCI_EV_IO_CAPA_REPLY:
2828 hci_io_capa_reply_evt(hdev, skb);
2829 break;
2830
2831 case HCI_EV_USER_CONFIRM_REQUEST:
2832 hci_user_confirm_request_evt(hdev, skb);
2833 break;
2834
2045 case HCI_EV_SIMPLE_PAIR_COMPLETE: 2835 case HCI_EV_SIMPLE_PAIR_COMPLETE:
2046 hci_simple_pair_complete_evt(hdev, skb); 2836 hci_simple_pair_complete_evt(hdev, skb);
2047 break; 2837 break;
@@ -2050,6 +2840,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
2050 hci_remote_host_features_evt(hdev, skb); 2840 hci_remote_host_features_evt(hdev, skb);
2051 break; 2841 break;
2052 2842
2843 case HCI_EV_LE_META:
2844 hci_le_meta_evt(hdev, skb);
2845 break;
2846
2847 case HCI_EV_REMOTE_OOB_DATA_REQUEST:
2848 hci_remote_oob_data_request_evt(hdev, skb);
2849 break;
2850
2053 default: 2851 default:
2054 BT_DBG("%s event 0x%x", hdev->name, event); 2852 BT_DBG("%s event 0x%x", hdev->name, event);
2055 break; 2853 break;
@@ -2083,6 +2881,6 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
2083 2881
2084 bt_cb(skb)->pkt_type = HCI_EVENT_PKT; 2882 bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
2085 skb->dev = (void *) hdev; 2883 skb->dev = (void *) hdev;
2086 hci_send_to_sock(hdev, skb); 2884 hci_send_to_sock(hdev, skb, NULL);
2087 kfree_skb(skb); 2885 kfree_skb(skb);
2088} 2886}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 29827c77f6ce..295e4a88fff8 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -85,7 +85,8 @@ static struct bt_sock_list hci_sk_list = {
85}; 85};
86 86
87/* Send frame to RAW socket */ 87/* Send frame to RAW socket */
88void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) 88void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb,
89 struct sock *skip_sk)
89{ 90{
90 struct sock *sk; 91 struct sock *sk;
91 struct hlist_node *node; 92 struct hlist_node *node;
@@ -97,6 +98,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
97 struct hci_filter *flt; 98 struct hci_filter *flt;
98 struct sk_buff *nskb; 99 struct sk_buff *nskb;
99 100
101 if (sk == skip_sk)
102 continue;
103
100 if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) 104 if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev)
101 continue; 105 continue;
102 106
@@ -857,7 +861,7 @@ error:
857 return err; 861 return err;
858} 862}
859 863
860void __exit hci_sock_cleanup(void) 864void hci_sock_cleanup(void)
861{ 865{
862 if (bt_sock_unregister(BTPROTO_HCI) < 0) 866 if (bt_sock_unregister(BTPROTO_HCI) < 0)
863 BT_ERR("HCI socket unregistration failed"); 867 BT_ERR("HCI socket unregistration failed");
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 5fce3d6d07b4..a6c3aa8be1f7 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -11,7 +11,7 @@
11 11
12static struct class *bt_class; 12static struct class *bt_class;
13 13
14struct dentry *bt_debugfs = NULL; 14struct dentry *bt_debugfs;
15EXPORT_SYMBOL_GPL(bt_debugfs); 15EXPORT_SYMBOL_GPL(bt_debugfs);
16 16
17static inline char *link_typetostr(int type) 17static inline char *link_typetostr(int type)
@@ -51,8 +51,8 @@ static ssize_t show_link_features(struct device *dev, struct device_attribute *a
51 conn->features[6], conn->features[7]); 51 conn->features[6], conn->features[7]);
52} 52}
53 53
54#define LINK_ATTR(_name,_mode,_show,_store) \ 54#define LINK_ATTR(_name, _mode, _show, _store) \
55struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store) 55struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store)
56 56
57static LINK_ATTR(type, S_IRUGO, show_link_type, NULL); 57static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
58static LINK_ATTR(address, S_IRUGO, show_link_address, NULL); 58static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
@@ -216,13 +216,13 @@ static ssize_t show_type(struct device *dev, struct device_attribute *attr, char
216static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) 216static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf)
217{ 217{
218 struct hci_dev *hdev = dev_get_drvdata(dev); 218 struct hci_dev *hdev = dev_get_drvdata(dev);
219 char name[249]; 219 char name[HCI_MAX_NAME_LENGTH + 1];
220 int i; 220 int i;
221 221
222 for (i = 0; i < 248; i++) 222 for (i = 0; i < HCI_MAX_NAME_LENGTH; i++)
223 name[i] = hdev->dev_name[i]; 223 name[i] = hdev->dev_name[i];
224 224
225 name[248] = '\0'; 225 name[HCI_MAX_NAME_LENGTH] = '\0';
226 return sprintf(buf, "%s\n", name); 226 return sprintf(buf, "%s\n", name);
227} 227}
228 228
@@ -277,10 +277,12 @@ static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *at
277static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) 277static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
278{ 278{
279 struct hci_dev *hdev = dev_get_drvdata(dev); 279 struct hci_dev *hdev = dev_get_drvdata(dev);
280 unsigned long val; 280 unsigned int val;
281 int rv;
281 282
282 if (strict_strtoul(buf, 0, &val) < 0) 283 rv = kstrtouint(buf, 0, &val);
283 return -EINVAL; 284 if (rv < 0)
285 return rv;
284 286
285 if (val != 0 && (val < 500 || val > 3600000)) 287 if (val != 0 && (val < 500 || val > 3600000))
286 return -EINVAL; 288 return -EINVAL;
@@ -299,15 +301,14 @@ static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribu
299static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) 301static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
300{ 302{
301 struct hci_dev *hdev = dev_get_drvdata(dev); 303 struct hci_dev *hdev = dev_get_drvdata(dev);
302 unsigned long val; 304 u16 val;
303 305 int rv;
304 if (strict_strtoul(buf, 0, &val) < 0)
305 return -EINVAL;
306 306
307 if (val < 0x0002 || val > 0xFFFE || val % 2) 307 rv = kstrtou16(buf, 0, &val);
308 return -EINVAL; 308 if (rv < 0)
309 return rv;
309 310
310 if (val < hdev->sniff_min_interval) 311 if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
311 return -EINVAL; 312 return -EINVAL;
312 313
313 hdev->sniff_max_interval = val; 314 hdev->sniff_max_interval = val;
@@ -324,15 +325,14 @@ static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribu
324static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) 325static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
325{ 326{
326 struct hci_dev *hdev = dev_get_drvdata(dev); 327 struct hci_dev *hdev = dev_get_drvdata(dev);
327 unsigned long val; 328 u16 val;
329 int rv;
328 330
329 if (strict_strtoul(buf, 0, &val) < 0) 331 rv = kstrtou16(buf, 0, &val);
330 return -EINVAL; 332 if (rv < 0)
331 333 return rv;
332 if (val < 0x0002 || val > 0xFFFE || val % 2)
333 return -EINVAL;
334 334
335 if (val > hdev->sniff_max_interval) 335 if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
336 return -EINVAL; 336 return -EINVAL;
337 337
338 hdev->sniff_min_interval = val; 338 hdev->sniff_min_interval = val;
@@ -461,6 +461,85 @@ static const struct file_operations blacklist_fops = {
461 .llseek = seq_lseek, 461 .llseek = seq_lseek,
462 .release = single_release, 462 .release = single_release,
463}; 463};
464
465static void print_bt_uuid(struct seq_file *f, u8 *uuid)
466{
467 u32 data0, data4;
468 u16 data1, data2, data3, data5;
469
470 memcpy(&data0, &uuid[0], 4);
471 memcpy(&data1, &uuid[4], 2);
472 memcpy(&data2, &uuid[6], 2);
473 memcpy(&data3, &uuid[8], 2);
474 memcpy(&data4, &uuid[10], 4);
475 memcpy(&data5, &uuid[14], 2);
476
477 seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n",
478 ntohl(data0), ntohs(data1), ntohs(data2),
479 ntohs(data3), ntohl(data4), ntohs(data5));
480}
481
482static int uuids_show(struct seq_file *f, void *p)
483{
484 struct hci_dev *hdev = f->private;
485 struct list_head *l;
486
487 hci_dev_lock_bh(hdev);
488
489 list_for_each(l, &hdev->uuids) {
490 struct bt_uuid *uuid;
491
492 uuid = list_entry(l, struct bt_uuid, list);
493
494 print_bt_uuid(f, uuid->uuid);
495 }
496
497 hci_dev_unlock_bh(hdev);
498
499 return 0;
500}
501
502static int uuids_open(struct inode *inode, struct file *file)
503{
504 return single_open(file, uuids_show, inode->i_private);
505}
506
507static const struct file_operations uuids_fops = {
508 .open = uuids_open,
509 .read = seq_read,
510 .llseek = seq_lseek,
511 .release = single_release,
512};
513
514static int auto_accept_delay_set(void *data, u64 val)
515{
516 struct hci_dev *hdev = data;
517
518 hci_dev_lock_bh(hdev);
519
520 hdev->auto_accept_delay = val;
521
522 hci_dev_unlock_bh(hdev);
523
524 return 0;
525}
526
527static int auto_accept_delay_get(void *data, u64 *val)
528{
529 struct hci_dev *hdev = data;
530
531 hci_dev_lock_bh(hdev);
532
533 *val = hdev->auto_accept_delay;
534
535 hci_dev_unlock_bh(hdev);
536
537 return 0;
538}
539
540DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
541 auto_accept_delay_set, "%llu\n");
542
464int hci_register_sysfs(struct hci_dev *hdev) 543int hci_register_sysfs(struct hci_dev *hdev)
465{ 544{
466 struct device *dev = &hdev->dev; 545 struct device *dev = &hdev->dev;
@@ -493,6 +572,10 @@ int hci_register_sysfs(struct hci_dev *hdev)
493 debugfs_create_file("blacklist", 0444, hdev->debugfs, 572 debugfs_create_file("blacklist", 0444, hdev->debugfs,
494 hdev, &blacklist_fops); 573 hdev, &blacklist_fops);
495 574
575 debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
576
577 debugfs_create_file("auto_accept_delay", 0444, hdev->debugfs, hdev,
578 &auto_accept_delay_fops);
496 return 0; 579 return 0;
497} 580}
498 581
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 29544c21f4b5..c405a954a603 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,8 @@
36#include <linux/file.h> 36#include <linux/file.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/wait.h> 38#include <linux/wait.h>
39#include <linux/mutex.h>
40#include <linux/kthread.h>
39#include <net/sock.h> 41#include <net/sock.h>
40 42
41#include <linux/input.h> 43#include <linux/input.h>
@@ -54,22 +56,24 @@ static DECLARE_RWSEM(hidp_session_sem);
54static LIST_HEAD(hidp_session_list); 56static LIST_HEAD(hidp_session_list);
55 57
56static unsigned char hidp_keycode[256] = { 58static unsigned char hidp_keycode[256] = {
57 0, 0, 0, 0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38, 59 0, 0, 0, 0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36,
58 50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44, 2, 3, 60 37, 38, 50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45,
59 4, 5, 6, 7, 8, 9, 10, 11, 28, 1, 14, 15, 57, 12, 13, 26, 61 21, 44, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 1,
60 27, 43, 43, 39, 40, 41, 51, 52, 53, 58, 59, 60, 61, 62, 63, 64, 62 14, 15, 57, 12, 13, 26, 27, 43, 43, 39, 40, 41, 51, 52,
61 65, 66, 67, 68, 87, 88, 99, 70,119,110,102,104,111,107,109,106, 63 53, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 87, 88,
62 105,108,103, 69, 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71, 64 99, 70, 119, 110, 102, 104, 111, 107, 109, 106, 105, 108, 103, 69,
63 72, 73, 82, 83, 86,127,116,117,183,184,185,186,187,188,189,190, 65 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71, 72, 73,
64 191,192,193,194,134,138,130,132,128,129,131,137,133,135,136,113, 66 82, 83, 86, 127, 116, 117, 183, 184, 185, 186, 187, 188, 189, 190,
65 115,114, 0, 0, 0,121, 0, 89, 93,124, 92, 94, 95, 0, 0, 0, 67 191, 192, 193, 194, 134, 138, 130, 132, 128, 129, 131, 137, 133, 135,
66 122,123, 90, 91, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68 136, 113, 115, 114, 0, 0, 0, 121, 0, 89, 93, 124, 92, 94,
67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69 95, 0, 0, 0, 122, 123, 90, 91, 85, 0, 0, 0, 0, 0,
68 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71 29, 42, 56,125, 97, 54,100,126,164,166,165,163,161,115,114,113, 73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72 150,158,159,128,136,177,178,176,142,152,173,140 74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 29, 42, 56, 125, 97, 54, 100, 126, 164, 166, 165, 163, 161, 115,
76 114, 113, 150, 158, 159, 128, 136, 177, 178, 176, 142, 152, 173, 140
73}; 77};
74 78
75static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 }; 79static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 };
@@ -157,7 +161,8 @@ static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
157 161
158 session->leds = newleds; 162 session->leds = newleds;
159 163
160 if (!(skb = alloc_skb(3, GFP_ATOMIC))) { 164 skb = alloc_skb(3, GFP_ATOMIC);
165 if (!skb) {
161 BT_ERR("Can't allocate memory for new frame"); 166 BT_ERR("Can't allocate memory for new frame");
162 return -ENOMEM; 167 return -ENOMEM;
163 } 168 }
@@ -250,7 +255,8 @@ static int __hidp_send_ctrl_message(struct hidp_session *session,
250 255
251 BT_DBG("session %p data %p size %d", session, data, size); 256 BT_DBG("session %p data %p size %d", session, data, size);
252 257
253 if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { 258 skb = alloc_skb(size + 1, GFP_ATOMIC);
259 if (!skb) {
254 BT_ERR("Can't allocate memory for new frame"); 260 BT_ERR("Can't allocate memory for new frame");
255 return -ENOMEM; 261 return -ENOMEM;
256 } 262 }
@@ -283,7 +289,8 @@ static int hidp_queue_report(struct hidp_session *session,
283 289
284 BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); 290 BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size);
285 291
286 if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { 292 skb = alloc_skb(size + 1, GFP_ATOMIC);
293 if (!skb) {
287 BT_ERR("Can't allocate memory for new frame"); 294 BT_ERR("Can't allocate memory for new frame");
288 return -ENOMEM; 295 return -ENOMEM;
289 } 296 }
@@ -313,32 +320,151 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
313 return hidp_queue_report(session, buf, rsize); 320 return hidp_queue_report(session, buf, rsize);
314} 321}
315 322
323static int hidp_get_raw_report(struct hid_device *hid,
324 unsigned char report_number,
325 unsigned char *data, size_t count,
326 unsigned char report_type)
327{
328 struct hidp_session *session = hid->driver_data;
329 struct sk_buff *skb;
330 size_t len;
331 int numbered_reports = hid->report_enum[report_type].numbered;
332
333 switch (report_type) {
334 case HID_FEATURE_REPORT:
335 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
336 break;
337 case HID_INPUT_REPORT:
338 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_INPUT;
339 break;
340 case HID_OUTPUT_REPORT:
341 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_OUPUT;
342 break;
343 default:
344 return -EINVAL;
345 }
346
347 if (mutex_lock_interruptible(&session->report_mutex))
348 return -ERESTARTSYS;
349
350 /* Set up our wait, and send the report request to the device. */
351 session->waiting_report_type = report_type & HIDP_DATA_RTYPE_MASK;
352 session->waiting_report_number = numbered_reports ? report_number : -1;
353 set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
354 data[0] = report_number;
355 if (hidp_send_ctrl_message(hid->driver_data, report_type, data, 1))
356 goto err_eio;
357
358 /* Wait for the return of the report. The returned report
359 gets put in session->report_return. */
360 while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) {
361 int res;
362
363 res = wait_event_interruptible_timeout(session->report_queue,
364 !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags),
365 5*HZ);
366 if (res == 0) {
367 /* timeout */
368 goto err_eio;
369 }
370 if (res < 0) {
371 /* signal */
372 goto err_restartsys;
373 }
374 }
375
376 skb = session->report_return;
377 if (skb) {
378 len = skb->len < count ? skb->len : count;
379 memcpy(data, skb->data, len);
380
381 kfree_skb(skb);
382 session->report_return = NULL;
383 } else {
384 /* Device returned a HANDSHAKE, indicating protocol error. */
385 len = -EIO;
386 }
387
388 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
389 mutex_unlock(&session->report_mutex);
390
391 return len;
392
393err_restartsys:
394 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
395 mutex_unlock(&session->report_mutex);
396 return -ERESTARTSYS;
397err_eio:
398 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
399 mutex_unlock(&session->report_mutex);
400 return -EIO;
401}
402
316static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count, 403static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count,
317 unsigned char report_type) 404 unsigned char report_type)
318{ 405{
406 struct hidp_session *session = hid->driver_data;
407 int ret;
408
319 switch (report_type) { 409 switch (report_type) {
320 case HID_FEATURE_REPORT: 410 case HID_FEATURE_REPORT:
321 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; 411 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
322 break; 412 break;
323 case HID_OUTPUT_REPORT: 413 case HID_OUTPUT_REPORT:
324 report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; 414 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT;
325 break; 415 break;
326 default: 416 default:
327 return -EINVAL; 417 return -EINVAL;
328 } 418 }
329 419
420 if (mutex_lock_interruptible(&session->report_mutex))
421 return -ERESTARTSYS;
422
423 /* Set up our wait, and send the report request to the device. */
424 set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
330 if (hidp_send_ctrl_message(hid->driver_data, report_type, 425 if (hidp_send_ctrl_message(hid->driver_data, report_type,
331 data, count)) 426 data, count)) {
332 return -ENOMEM; 427 ret = -ENOMEM;
333 return count; 428 goto err;
429 }
430
431 /* Wait for the ACK from the device. */
432 while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) {
433 int res;
434
435 res = wait_event_interruptible_timeout(session->report_queue,
436 !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags),
437 10*HZ);
438 if (res == 0) {
439 /* timeout */
440 ret = -EIO;
441 goto err;
442 }
443 if (res < 0) {
444 /* signal */
445 ret = -ERESTARTSYS;
446 goto err;
447 }
448 }
449
450 if (!session->output_report_success) {
451 ret = -EIO;
452 goto err;
453 }
454
455 ret = count;
456
457err:
458 clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
459 mutex_unlock(&session->report_mutex);
460 return ret;
334} 461}
335 462
336static void hidp_idle_timeout(unsigned long arg) 463static void hidp_idle_timeout(unsigned long arg)
337{ 464{
338 struct hidp_session *session = (struct hidp_session *) arg; 465 struct hidp_session *session = (struct hidp_session *) arg;
339 466
340 atomic_inc(&session->terminate); 467 kthread_stop(session->task);
341 hidp_schedule(session);
342} 468}
343 469
344static void hidp_set_timer(struct hidp_session *session) 470static void hidp_set_timer(struct hidp_session *session)
@@ -357,16 +483,22 @@ static void hidp_process_handshake(struct hidp_session *session,
357 unsigned char param) 483 unsigned char param)
358{ 484{
359 BT_DBG("session %p param 0x%02x", session, param); 485 BT_DBG("session %p param 0x%02x", session, param);
486 session->output_report_success = 0; /* default condition */
360 487
361 switch (param) { 488 switch (param) {
362 case HIDP_HSHK_SUCCESSFUL: 489 case HIDP_HSHK_SUCCESSFUL:
363 /* FIXME: Call into SET_ GET_ handlers here */ 490 /* FIXME: Call into SET_ GET_ handlers here */
491 session->output_report_success = 1;
364 break; 492 break;
365 493
366 case HIDP_HSHK_NOT_READY: 494 case HIDP_HSHK_NOT_READY:
367 case HIDP_HSHK_ERR_INVALID_REPORT_ID: 495 case HIDP_HSHK_ERR_INVALID_REPORT_ID:
368 case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST: 496 case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST:
369 case HIDP_HSHK_ERR_INVALID_PARAMETER: 497 case HIDP_HSHK_ERR_INVALID_PARAMETER:
498 if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) {
499 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
500 wake_up_interruptible(&session->report_queue);
501 }
370 /* FIXME: Call into SET_ GET_ handlers here */ 502 /* FIXME: Call into SET_ GET_ handlers here */
371 break; 503 break;
372 504
@@ -385,6 +517,12 @@ static void hidp_process_handshake(struct hidp_session *session,
385 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); 517 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
386 break; 518 break;
387 } 519 }
520
521 /* Wake up the waiting thread. */
522 if (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) {
523 clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
524 wake_up_interruptible(&session->report_queue);
525 }
388} 526}
389 527
390static void hidp_process_hid_control(struct hidp_session *session, 528static void hidp_process_hid_control(struct hidp_session *session,
@@ -397,15 +535,15 @@ static void hidp_process_hid_control(struct hidp_session *session,
397 skb_queue_purge(&session->ctrl_transmit); 535 skb_queue_purge(&session->ctrl_transmit);
398 skb_queue_purge(&session->intr_transmit); 536 skb_queue_purge(&session->intr_transmit);
399 537
400 /* Kill session thread */ 538 kthread_stop(session->task);
401 atomic_inc(&session->terminate);
402 hidp_schedule(session);
403 } 539 }
404} 540}
405 541
406static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, 542/* Returns true if the passed-in skb should be freed by the caller. */
543static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
407 unsigned char param) 544 unsigned char param)
408{ 545{
546 int done_with_skb = 1;
409 BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param); 547 BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param);
410 548
411 switch (param) { 549 switch (param) {
@@ -417,7 +555,6 @@ static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
417 555
418 if (session->hid) 556 if (session->hid)
419 hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0); 557 hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0);
420
421 break; 558 break;
422 559
423 case HIDP_DATA_RTYPE_OTHER: 560 case HIDP_DATA_RTYPE_OTHER:
@@ -429,12 +566,27 @@ static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
429 __hidp_send_ctrl_message(session, 566 __hidp_send_ctrl_message(session,
430 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); 567 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
431 } 568 }
569
570 if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) &&
571 param == session->waiting_report_type) {
572 if (session->waiting_report_number < 0 ||
573 session->waiting_report_number == skb->data[0]) {
574 /* hidp_get_raw_report() is waiting on this report. */
575 session->report_return = skb;
576 done_with_skb = 0;
577 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
578 wake_up_interruptible(&session->report_queue);
579 }
580 }
581
582 return done_with_skb;
432} 583}
433 584
434static void hidp_recv_ctrl_frame(struct hidp_session *session, 585static void hidp_recv_ctrl_frame(struct hidp_session *session,
435 struct sk_buff *skb) 586 struct sk_buff *skb)
436{ 587{
437 unsigned char hdr, type, param; 588 unsigned char hdr, type, param;
589 int free_skb = 1;
438 590
439 BT_DBG("session %p skb %p len %d", session, skb, skb->len); 591 BT_DBG("session %p skb %p len %d", session, skb, skb->len);
440 592
@@ -454,7 +606,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
454 break; 606 break;
455 607
456 case HIDP_TRANS_DATA: 608 case HIDP_TRANS_DATA:
457 hidp_process_data(session, skb, param); 609 free_skb = hidp_process_data(session, skb, param);
458 break; 610 break;
459 611
460 default: 612 default:
@@ -463,7 +615,8 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
463 break; 615 break;
464 } 616 }
465 617
466 kfree_skb(skb); 618 if (free_skb)
619 kfree_skb(skb);
467} 620}
468 621
469static void hidp_recv_intr_frame(struct hidp_session *session, 622static void hidp_recv_intr_frame(struct hidp_session *session,
@@ -541,32 +694,23 @@ static int hidp_session(void *arg)
541 struct sock *ctrl_sk = session->ctrl_sock->sk; 694 struct sock *ctrl_sk = session->ctrl_sock->sk;
542 struct sock *intr_sk = session->intr_sock->sk; 695 struct sock *intr_sk = session->intr_sock->sk;
543 struct sk_buff *skb; 696 struct sk_buff *skb;
544 int vendor = 0x0000, product = 0x0000;
545 wait_queue_t ctrl_wait, intr_wait; 697 wait_queue_t ctrl_wait, intr_wait;
546 698
547 BT_DBG("session %p", session); 699 BT_DBG("session %p", session);
548 700
549 if (session->input) {
550 vendor = session->input->id.vendor;
551 product = session->input->id.product;
552 }
553
554 if (session->hid) {
555 vendor = session->hid->vendor;
556 product = session->hid->product;
557 }
558
559 daemonize("khidpd_%04x%04x", vendor, product);
560 set_user_nice(current, -15); 701 set_user_nice(current, -15);
561 702
562 init_waitqueue_entry(&ctrl_wait, current); 703 init_waitqueue_entry(&ctrl_wait, current);
563 init_waitqueue_entry(&intr_wait, current); 704 init_waitqueue_entry(&intr_wait, current);
564 add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); 705 add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
565 add_wait_queue(sk_sleep(intr_sk), &intr_wait); 706 add_wait_queue(sk_sleep(intr_sk), &intr_wait);
566 while (!atomic_read(&session->terminate)) { 707 session->waiting_for_startup = 0;
708 wake_up_interruptible(&session->startup_queue);
709 while (!kthread_should_stop()) {
567 set_current_state(TASK_INTERRUPTIBLE); 710 set_current_state(TASK_INTERRUPTIBLE);
568 711
569 if (ctrl_sk->sk_state != BT_CONNECTED || intr_sk->sk_state != BT_CONNECTED) 712 if (ctrl_sk->sk_state != BT_CONNECTED ||
713 intr_sk->sk_state != BT_CONNECTED)
570 break; 714 break;
571 715
572 while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) { 716 while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
@@ -754,6 +898,8 @@ static struct hid_ll_driver hidp_hid_driver = {
754 .hidinput_input_event = hidp_hidinput_event, 898 .hidinput_input_event = hidp_hidinput_event,
755}; 899};
756 900
901/* This function sets up the hid device. It does not add it
902 to the HID system. That is done in hidp_add_connection(). */
757static int hidp_setup_hid(struct hidp_session *session, 903static int hidp_setup_hid(struct hidp_session *session,
758 struct hidp_connadd_req *req) 904 struct hidp_connadd_req *req)
759{ 905{
@@ -793,18 +939,11 @@ static int hidp_setup_hid(struct hidp_session *session,
793 hid->dev.parent = hidp_get_device(session); 939 hid->dev.parent = hidp_get_device(session);
794 hid->ll_driver = &hidp_hid_driver; 940 hid->ll_driver = &hidp_hid_driver;
795 941
942 hid->hid_get_raw_report = hidp_get_raw_report;
796 hid->hid_output_raw_report = hidp_output_raw_report; 943 hid->hid_output_raw_report = hidp_output_raw_report;
797 944
798 err = hid_add_device(hid);
799 if (err < 0)
800 goto failed;
801
802 return 0; 945 return 0;
803 946
804failed:
805 hid_destroy_device(hid);
806 session->hid = NULL;
807
808fault: 947fault:
809 kfree(session->rd_data); 948 kfree(session->rd_data);
810 session->rd_data = NULL; 949 session->rd_data = NULL;
@@ -815,6 +954,7 @@ fault:
815int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) 954int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
816{ 955{
817 struct hidp_session *session, *s; 956 struct hidp_session *session, *s;
957 int vendor, product;
818 int err; 958 int err;
819 959
820 BT_DBG(""); 960 BT_DBG("");
@@ -839,8 +979,10 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
839 979
840 bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst); 980 bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst);
841 981
842 session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->omtu, l2cap_pi(ctrl_sock->sk)->imtu); 982 session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->chan->omtu,
843 session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->omtu, l2cap_pi(intr_sock->sk)->imtu); 983 l2cap_pi(ctrl_sock->sk)->chan->imtu);
984 session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->chan->omtu,
985 l2cap_pi(intr_sock->sk)->chan->imtu);
844 986
845 BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu); 987 BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu);
846 988
@@ -853,6 +995,10 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
853 skb_queue_head_init(&session->ctrl_transmit); 995 skb_queue_head_init(&session->ctrl_transmit);
854 skb_queue_head_init(&session->intr_transmit); 996 skb_queue_head_init(&session->intr_transmit);
855 997
998 mutex_init(&session->report_mutex);
999 init_waitqueue_head(&session->report_queue);
1000 init_waitqueue_head(&session->startup_queue);
1001 session->waiting_for_startup = 1;
856 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); 1002 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
857 session->idle_to = req->idle_to; 1003 session->idle_to = req->idle_to;
858 1004
@@ -872,9 +1018,32 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
872 1018
873 hidp_set_timer(session); 1019 hidp_set_timer(session);
874 1020
875 err = kernel_thread(hidp_session, session, CLONE_KERNEL); 1021 if (session->hid) {
876 if (err < 0) 1022 vendor = session->hid->vendor;
1023 product = session->hid->product;
1024 } else if (session->input) {
1025 vendor = session->input->id.vendor;
1026 product = session->input->id.product;
1027 } else {
1028 vendor = 0x0000;
1029 product = 0x0000;
1030 }
1031
1032 session->task = kthread_run(hidp_session, session, "khidpd_%04x%04x",
1033 vendor, product);
1034 if (IS_ERR(session->task)) {
1035 err = PTR_ERR(session->task);
877 goto unlink; 1036 goto unlink;
1037 }
1038
1039 while (session->waiting_for_startup) {
1040 wait_event_interruptible(session->startup_queue,
1041 !session->waiting_for_startup);
1042 }
1043
1044 err = hid_add_device(session->hid);
1045 if (err < 0)
1046 goto err_add_device;
878 1047
879 if (session->input) { 1048 if (session->input) {
880 hidp_send_ctrl_message(session, 1049 hidp_send_ctrl_message(session,
@@ -888,6 +1057,11 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
888 up_write(&hidp_session_sem); 1057 up_write(&hidp_session_sem);
889 return 0; 1058 return 0;
890 1059
1060err_add_device:
1061 hid_destroy_device(session->hid);
1062 session->hid = NULL;
1063 kthread_stop(session->task);
1064
891unlink: 1065unlink:
892 hidp_del_timer(session); 1066 hidp_del_timer(session);
893 1067
@@ -937,13 +1111,7 @@ int hidp_del_connection(struct hidp_conndel_req *req)
937 skb_queue_purge(&session->ctrl_transmit); 1111 skb_queue_purge(&session->ctrl_transmit);
938 skb_queue_purge(&session->intr_transmit); 1112 skb_queue_purge(&session->intr_transmit);
939 1113
940 /* Wakeup user-space polling for socket errors */ 1114 kthread_stop(session->task);
941 session->intr_sock->sk->sk_err = EUNATCH;
942 session->ctrl_sock->sk->sk_err = EUNATCH;
943
944 /* Kill session thread */
945 atomic_inc(&session->terminate);
946 hidp_schedule(session);
947 } 1115 }
948 } else 1116 } else
949 err = -ENOENT; 1117 err = -ENOENT;
@@ -1016,8 +1184,6 @@ static int __init hidp_init(void)
1016{ 1184{
1017 int ret; 1185 int ret;
1018 1186
1019 l2cap_load();
1020
1021 BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); 1187 BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
1022 1188
1023 ret = hid_register_driver(&hidp_driver); 1189 ret = hid_register_driver(&hidp_driver);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 8d934a19da0a..12822cde4b49 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -80,10 +80,12 @@
80#define HIDP_VIRTUAL_CABLE_UNPLUG 0 80#define HIDP_VIRTUAL_CABLE_UNPLUG 0
81#define HIDP_BOOT_PROTOCOL_MODE 1 81#define HIDP_BOOT_PROTOCOL_MODE 1
82#define HIDP_BLUETOOTH_VENDOR_ID 9 82#define HIDP_BLUETOOTH_VENDOR_ID 9
83#define HIDP_WAITING_FOR_RETURN 10
84#define HIDP_WAITING_FOR_SEND_ACK 11
83 85
84struct hidp_connadd_req { 86struct hidp_connadd_req {
85 int ctrl_sock; // Connected control socket 87 int ctrl_sock; /* Connected control socket */
86 int intr_sock; // Connteted interrupt socket 88 int intr_sock; /* Connected interrupt socket */
87 __u16 parser; 89 __u16 parser;
88 __u16 rd_size; 90 __u16 rd_size;
89 __u8 __user *rd_data; 91 __u8 __user *rd_data;
@@ -140,7 +142,7 @@ struct hidp_session {
140 uint ctrl_mtu; 142 uint ctrl_mtu;
141 uint intr_mtu; 143 uint intr_mtu;
142 144
143 atomic_t terminate; 145 struct task_struct *task;
144 146
145 unsigned char keys[8]; 147 unsigned char keys[8];
146 unsigned char leds; 148 unsigned char leds;
@@ -154,9 +156,22 @@ struct hidp_session {
154 struct sk_buff_head ctrl_transmit; 156 struct sk_buff_head ctrl_transmit;
155 struct sk_buff_head intr_transmit; 157 struct sk_buff_head intr_transmit;
156 158
159 /* Used in hidp_get_raw_report() */
160 int waiting_report_type; /* HIDP_DATA_RTYPE_* */
161 int waiting_report_number; /* -1 for not numbered */
162 struct mutex report_mutex;
163 struct sk_buff *report_return;
164 wait_queue_head_t report_queue;
165
166 /* Used in hidp_output_raw_report() */
167 int output_report_success; /* boolean */
168
157 /* Report descriptor */ 169 /* Report descriptor */
158 __u8 *rd_data; 170 __u8 *rd_data;
159 uint rd_size; 171 uint rd_size;
172
173 wait_queue_head_t startup_queue;
174 int waiting_for_startup;
160}; 175};
161 176
162static inline void hidp_schedule(struct hidp_session *session) 177static inline void hidp_schedule(struct hidp_session *session)
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 250dfd46237d..178ac7f127ad 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -85,7 +85,8 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
85 return err; 85 return err;
86 } 86 }
87 87
88 if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) { 88 if (csock->sk->sk_state != BT_CONNECTED ||
89 isock->sk->sk_state != BT_CONNECTED) {
89 sockfd_put(csock); 90 sockfd_put(csock);
90 sockfd_put(isock); 91 sockfd_put(isock);
91 return -EBADFD; 92 return -EBADFD;
@@ -140,8 +141,8 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
140 141
141#ifdef CONFIG_COMPAT 142#ifdef CONFIG_COMPAT
142struct compat_hidp_connadd_req { 143struct compat_hidp_connadd_req {
143 int ctrl_sock; // Connected control socket 144 int ctrl_sock; /* Connected control socket */
144 int intr_sock; // Connteted interrupt socket 145 int intr_sock; /* Connected interrupt socket */
145 __u16 parser; 146 __u16 parser;
146 __u16 rd_size; 147 __u16 rd_size;
147 compat_uptr_t rd_data; 148 compat_uptr_t rd_data;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
deleted file mode 100644
index 675614e38e14..000000000000
--- a/net/bluetooth/l2cap.c
+++ /dev/null
@@ -1,4930 +0,0 @@
1/*
2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated
4 Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
5 Copyright (C) 2010 Google Inc.
6
7 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License version 2 as
11 published by the Free Software Foundation;
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
16 IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
17 CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
18 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
22 ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
23 COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
24 SOFTWARE IS DISCLAIMED.
25*/
26
27/* Bluetooth L2CAP core and sockets. */
28
29#include <linux/module.h>
30
31#include <linux/types.h>
32#include <linux/capability.h>
33#include <linux/errno.h>
34#include <linux/kernel.h>
35#include <linux/sched.h>
36#include <linux/slab.h>
37#include <linux/poll.h>
38#include <linux/fcntl.h>
39#include <linux/init.h>
40#include <linux/interrupt.h>
41#include <linux/socket.h>
42#include <linux/skbuff.h>
43#include <linux/list.h>
44#include <linux/device.h>
45#include <linux/debugfs.h>
46#include <linux/seq_file.h>
47#include <linux/uaccess.h>
48#include <linux/crc16.h>
49#include <net/sock.h>
50
51#include <asm/system.h>
52#include <asm/unaligned.h>
53
54#include <net/bluetooth/bluetooth.h>
55#include <net/bluetooth/hci_core.h>
56#include <net/bluetooth/l2cap.h>
57
58#define VERSION "2.15"
59
60static int disable_ertm;
61
62static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
63static u8 l2cap_fixed_chan[8] = { 0x02, };
64
65static const struct proto_ops l2cap_sock_ops;
66
67static struct workqueue_struct *_busy_wq;
68
69static struct bt_sock_list l2cap_sk_list = {
70 .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
71};
72
73static void l2cap_busy_work(struct work_struct *work);
74
75static void __l2cap_sock_close(struct sock *sk, int reason);
76static void l2cap_sock_close(struct sock *sk);
77static void l2cap_sock_kill(struct sock *sk);
78
79static int l2cap_build_conf_req(struct sock *sk, void *data);
80static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
81 u8 code, u8 ident, u16 dlen, void *data);
82
83static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
84
85/* ---- L2CAP timers ---- */
86static void l2cap_sock_set_timer(struct sock *sk, long timeout)
87{
88 BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
89 sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
90}
91
92static void l2cap_sock_clear_timer(struct sock *sk)
93{
94 BT_DBG("sock %p state %d", sk, sk->sk_state);
95 sk_stop_timer(sk, &sk->sk_timer);
96}
97
98static void l2cap_sock_timeout(unsigned long arg)
99{
100 struct sock *sk = (struct sock *) arg;
101 int reason;
102
103 BT_DBG("sock %p state %d", sk, sk->sk_state);
104
105 bh_lock_sock(sk);
106
107 if (sock_owned_by_user(sk)) {
108 /* sk is owned by user. Try again later */
109 l2cap_sock_set_timer(sk, HZ / 5);
110 bh_unlock_sock(sk);
111 sock_put(sk);
112 return;
113 }
114
115 if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
116 reason = ECONNREFUSED;
117 else if (sk->sk_state == BT_CONNECT &&
118 l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
119 reason = ECONNREFUSED;
120 else
121 reason = ETIMEDOUT;
122
123 __l2cap_sock_close(sk, reason);
124
125 bh_unlock_sock(sk);
126
127 l2cap_sock_kill(sk);
128 sock_put(sk);
129}
130
131/* ---- L2CAP channels ---- */
132static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
133{
134 struct sock *s;
135 for (s = l->head; s; s = l2cap_pi(s)->next_c) {
136 if (l2cap_pi(s)->dcid == cid)
137 break;
138 }
139 return s;
140}
141
142static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
143{
144 struct sock *s;
145 for (s = l->head; s; s = l2cap_pi(s)->next_c) {
146 if (l2cap_pi(s)->scid == cid)
147 break;
148 }
149 return s;
150}
151
152/* Find channel with given SCID.
153 * Returns locked socket */
154static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
155{
156 struct sock *s;
157 read_lock(&l->lock);
158 s = __l2cap_get_chan_by_scid(l, cid);
159 if (s)
160 bh_lock_sock(s);
161 read_unlock(&l->lock);
162 return s;
163}
164
165static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
166{
167 struct sock *s;
168 for (s = l->head; s; s = l2cap_pi(s)->next_c) {
169 if (l2cap_pi(s)->ident == ident)
170 break;
171 }
172 return s;
173}
174
175static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
176{
177 struct sock *s;
178 read_lock(&l->lock);
179 s = __l2cap_get_chan_by_ident(l, ident);
180 if (s)
181 bh_lock_sock(s);
182 read_unlock(&l->lock);
183 return s;
184}
185
186static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
187{
188 u16 cid = L2CAP_CID_DYN_START;
189
190 for (; cid < L2CAP_CID_DYN_END; cid++) {
191 if (!__l2cap_get_chan_by_scid(l, cid))
192 return cid;
193 }
194
195 return 0;
196}
197
198static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk)
199{
200 sock_hold(sk);
201
202 if (l->head)
203 l2cap_pi(l->head)->prev_c = sk;
204
205 l2cap_pi(sk)->next_c = l->head;
206 l2cap_pi(sk)->prev_c = NULL;
207 l->head = sk;
208}
209
210static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk)
211{
212 struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c;
213
214 write_lock_bh(&l->lock);
215 if (sk == l->head)
216 l->head = next;
217
218 if (next)
219 l2cap_pi(next)->prev_c = prev;
220 if (prev)
221 l2cap_pi(prev)->next_c = next;
222 write_unlock_bh(&l->lock);
223
224 __sock_put(sk);
225}
226
227static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
228{
229 struct l2cap_chan_list *l = &conn->chan_list;
230
231 BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn,
232 l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
233
234 conn->disc_reason = 0x13;
235
236 l2cap_pi(sk)->conn = conn;
237
238 if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
239 /* Alloc CID for connection-oriented socket */
240 l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
241 } else if (sk->sk_type == SOCK_DGRAM) {
242 /* Connectionless socket */
243 l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS;
244 l2cap_pi(sk)->dcid = L2CAP_CID_CONN_LESS;
245 l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
246 } else {
247 /* Raw socket can send/recv signalling messages only */
248 l2cap_pi(sk)->scid = L2CAP_CID_SIGNALING;
249 l2cap_pi(sk)->dcid = L2CAP_CID_SIGNALING;
250 l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
251 }
252
253 __l2cap_chan_link(l, sk);
254
255 if (parent)
256 bt_accept_enqueue(parent, sk);
257}
258
259/* Delete channel.
260 * Must be called on the locked socket. */
261static void l2cap_chan_del(struct sock *sk, int err)
262{
263 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
264 struct sock *parent = bt_sk(sk)->parent;
265
266 l2cap_sock_clear_timer(sk);
267
268 BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
269
270 if (conn) {
271 /* Unlink from channel list */
272 l2cap_chan_unlink(&conn->chan_list, sk);
273 l2cap_pi(sk)->conn = NULL;
274 hci_conn_put(conn->hcon);
275 }
276
277 sk->sk_state = BT_CLOSED;
278 sock_set_flag(sk, SOCK_ZAPPED);
279
280 if (err)
281 sk->sk_err = err;
282
283 if (parent) {
284 bt_accept_unlink(sk);
285 parent->sk_data_ready(parent, 0);
286 } else
287 sk->sk_state_change(sk);
288
289 skb_queue_purge(TX_QUEUE(sk));
290
291 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
292 struct srej_list *l, *tmp;
293
294 del_timer(&l2cap_pi(sk)->retrans_timer);
295 del_timer(&l2cap_pi(sk)->monitor_timer);
296 del_timer(&l2cap_pi(sk)->ack_timer);
297
298 skb_queue_purge(SREJ_QUEUE(sk));
299 skb_queue_purge(BUSY_QUEUE(sk));
300
301 list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
302 list_del(&l->list);
303 kfree(l);
304 }
305 }
306}
307
308static inline u8 l2cap_get_auth_type(struct sock *sk)
309{
310 if (sk->sk_type == SOCK_RAW) {
311 switch (l2cap_pi(sk)->sec_level) {
312 case BT_SECURITY_HIGH:
313 return HCI_AT_DEDICATED_BONDING_MITM;
314 case BT_SECURITY_MEDIUM:
315 return HCI_AT_DEDICATED_BONDING;
316 default:
317 return HCI_AT_NO_BONDING;
318 }
319 } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
320 if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
321 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
322
323 if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
324 return HCI_AT_NO_BONDING_MITM;
325 else
326 return HCI_AT_NO_BONDING;
327 } else {
328 switch (l2cap_pi(sk)->sec_level) {
329 case BT_SECURITY_HIGH:
330 return HCI_AT_GENERAL_BONDING_MITM;
331 case BT_SECURITY_MEDIUM:
332 return HCI_AT_GENERAL_BONDING;
333 default:
334 return HCI_AT_NO_BONDING;
335 }
336 }
337}
338
339/* Service level security */
340static inline int l2cap_check_security(struct sock *sk)
341{
342 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
343 __u8 auth_type;
344
345 auth_type = l2cap_get_auth_type(sk);
346
347 return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level,
348 auth_type);
349}
350
351static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
352{
353 u8 id;
354
355 /* Get next available identificator.
356 * 1 - 128 are used by kernel.
357 * 129 - 199 are reserved.
358 * 200 - 254 are used by utilities like l2ping, etc.
359 */
360
361 spin_lock_bh(&conn->lock);
362
363 if (++conn->tx_ident > 128)
364 conn->tx_ident = 1;
365
366 id = conn->tx_ident;
367
368 spin_unlock_bh(&conn->lock);
369
370 return id;
371}
372
373static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
374{
375 struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
376
377 BT_DBG("code 0x%2.2x", code);
378
379 if (!skb)
380 return;
381
382 hci_send_acl(conn->hcon, skb, 0);
383}
384
385static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
386{
387 struct sk_buff *skb;
388 struct l2cap_hdr *lh;
389 struct l2cap_conn *conn = pi->conn;
390 struct sock *sk = (struct sock *)pi;
391 int count, hlen = L2CAP_HDR_SIZE + 2;
392
393 if (sk->sk_state != BT_CONNECTED)
394 return;
395
396 if (pi->fcs == L2CAP_FCS_CRC16)
397 hlen += 2;
398
399 BT_DBG("pi %p, control 0x%2.2x", pi, control);
400
401 count = min_t(unsigned int, conn->mtu, hlen);
402 control |= L2CAP_CTRL_FRAME_TYPE;
403
404 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
405 control |= L2CAP_CTRL_FINAL;
406 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
407 }
408
409 if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
410 control |= L2CAP_CTRL_POLL;
411 pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
412 }
413
414 skb = bt_skb_alloc(count, GFP_ATOMIC);
415 if (!skb)
416 return;
417
418 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
419 lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
420 lh->cid = cpu_to_le16(pi->dcid);
421 put_unaligned_le16(control, skb_put(skb, 2));
422
423 if (pi->fcs == L2CAP_FCS_CRC16) {
424 u16 fcs = crc16(0, (u8 *)lh, count - 2);
425 put_unaligned_le16(fcs, skb_put(skb, 2));
426 }
427
428 hci_send_acl(pi->conn->hcon, skb, 0);
429}
430
431static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
432{
433 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
434 control |= L2CAP_SUPER_RCV_NOT_READY;
435 pi->conn_state |= L2CAP_CONN_RNR_SENT;
436 } else
437 control |= L2CAP_SUPER_RCV_READY;
438
439 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
440
441 l2cap_send_sframe(pi, control);
442}
443
444static inline int __l2cap_no_conn_pending(struct sock *sk)
445{
446 return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
447}
448
449static void l2cap_do_start(struct sock *sk)
450{
451 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
452
453 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
454 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
455 return;
456
457 if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
458 struct l2cap_conn_req req;
459 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
460 req.psm = l2cap_pi(sk)->psm;
461
462 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
463 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
464
465 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
466 L2CAP_CONN_REQ, sizeof(req), &req);
467 }
468 } else {
469 struct l2cap_info_req req;
470 req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
471
472 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
473 conn->info_ident = l2cap_get_ident(conn);
474
475 mod_timer(&conn->info_timer, jiffies +
476 msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
477
478 l2cap_send_cmd(conn, conn->info_ident,
479 L2CAP_INFO_REQ, sizeof(req), &req);
480 }
481}
482
483static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
484{
485 u32 local_feat_mask = l2cap_feat_mask;
486 if (!disable_ertm)
487 local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
488
489 switch (mode) {
490 case L2CAP_MODE_ERTM:
491 return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
492 case L2CAP_MODE_STREAMING:
493 return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
494 default:
495 return 0x00;
496 }
497}
498
499static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
500{
501 struct l2cap_disconn_req req;
502
503 if (!conn)
504 return;
505
506 skb_queue_purge(TX_QUEUE(sk));
507
508 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
509 del_timer(&l2cap_pi(sk)->retrans_timer);
510 del_timer(&l2cap_pi(sk)->monitor_timer);
511 del_timer(&l2cap_pi(sk)->ack_timer);
512 }
513
514 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
515 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
516 l2cap_send_cmd(conn, l2cap_get_ident(conn),
517 L2CAP_DISCONN_REQ, sizeof(req), &req);
518
519 sk->sk_state = BT_DISCONN;
520 sk->sk_err = err;
521}
522
523/* ---- L2CAP connections ---- */
524static void l2cap_conn_start(struct l2cap_conn *conn)
525{
526 struct l2cap_chan_list *l = &conn->chan_list;
527 struct sock_del_list del, *tmp1, *tmp2;
528 struct sock *sk;
529
530 BT_DBG("conn %p", conn);
531
532 INIT_LIST_HEAD(&del.list);
533
534 read_lock(&l->lock);
535
536 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
537 bh_lock_sock(sk);
538
539 if (sk->sk_type != SOCK_SEQPACKET &&
540 sk->sk_type != SOCK_STREAM) {
541 bh_unlock_sock(sk);
542 continue;
543 }
544
545 if (sk->sk_state == BT_CONNECT) {
546 struct l2cap_conn_req req;
547
548 if (!l2cap_check_security(sk) ||
549 !__l2cap_no_conn_pending(sk)) {
550 bh_unlock_sock(sk);
551 continue;
552 }
553
554 if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
555 conn->feat_mask)
556 && l2cap_pi(sk)->conf_state &
557 L2CAP_CONF_STATE2_DEVICE) {
558 tmp1 = kzalloc(sizeof(struct sock_del_list),
559 GFP_ATOMIC);
560 tmp1->sk = sk;
561 list_add_tail(&tmp1->list, &del.list);
562 bh_unlock_sock(sk);
563 continue;
564 }
565
566 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
567 req.psm = l2cap_pi(sk)->psm;
568
569 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
570 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
571
572 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
573 L2CAP_CONN_REQ, sizeof(req), &req);
574
575 } else if (sk->sk_state == BT_CONNECT2) {
576 struct l2cap_conn_rsp rsp;
577 char buf[128];
578 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
579 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
580
581 if (l2cap_check_security(sk)) {
582 if (bt_sk(sk)->defer_setup) {
583 struct sock *parent = bt_sk(sk)->parent;
584 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
585 rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
586 parent->sk_data_ready(parent, 0);
587
588 } else {
589 sk->sk_state = BT_CONFIG;
590 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
591 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
592 }
593 } else {
594 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
595 rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
596 }
597
598 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
599 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
600
601 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT ||
602 rsp.result != L2CAP_CR_SUCCESS) {
603 bh_unlock_sock(sk);
604 continue;
605 }
606
607 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
608 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
609 l2cap_build_conf_req(sk, buf), buf);
610 l2cap_pi(sk)->num_conf_req++;
611 }
612
613 bh_unlock_sock(sk);
614 }
615
616 read_unlock(&l->lock);
617
618 list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
619 bh_lock_sock(tmp1->sk);
620 __l2cap_sock_close(tmp1->sk, ECONNRESET);
621 bh_unlock_sock(tmp1->sk);
622 list_del(&tmp1->list);
623 kfree(tmp1);
624 }
625}
626
627static void l2cap_conn_ready(struct l2cap_conn *conn)
628{
629 struct l2cap_chan_list *l = &conn->chan_list;
630 struct sock *sk;
631
632 BT_DBG("conn %p", conn);
633
634 read_lock(&l->lock);
635
636 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
637 bh_lock_sock(sk);
638
639 if (sk->sk_type != SOCK_SEQPACKET &&
640 sk->sk_type != SOCK_STREAM) {
641 l2cap_sock_clear_timer(sk);
642 sk->sk_state = BT_CONNECTED;
643 sk->sk_state_change(sk);
644 } else if (sk->sk_state == BT_CONNECT)
645 l2cap_do_start(sk);
646
647 bh_unlock_sock(sk);
648 }
649
650 read_unlock(&l->lock);
651}
652
653/* Notify sockets that we cannot guaranty reliability anymore */
654static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err)
655{
656 struct l2cap_chan_list *l = &conn->chan_list;
657 struct sock *sk;
658
659 BT_DBG("conn %p", conn);
660
661 read_lock(&l->lock);
662
663 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
664 if (l2cap_pi(sk)->force_reliable)
665 sk->sk_err = err;
666 }
667
668 read_unlock(&l->lock);
669}
670
671static void l2cap_info_timeout(unsigned long arg)
672{
673 struct l2cap_conn *conn = (void *) arg;
674
675 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
676 conn->info_ident = 0;
677
678 l2cap_conn_start(conn);
679}
680
681static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
682{
683 struct l2cap_conn *conn = hcon->l2cap_data;
684
685 if (conn || status)
686 return conn;
687
688 conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC);
689 if (!conn)
690 return NULL;
691
692 hcon->l2cap_data = conn;
693 conn->hcon = hcon;
694
695 BT_DBG("hcon %p conn %p", hcon, conn);
696
697 conn->mtu = hcon->hdev->acl_mtu;
698 conn->src = &hcon->hdev->bdaddr;
699 conn->dst = &hcon->dst;
700
701 conn->feat_mask = 0;
702
703 spin_lock_init(&conn->lock);
704 rwlock_init(&conn->chan_list.lock);
705
706 setup_timer(&conn->info_timer, l2cap_info_timeout,
707 (unsigned long) conn);
708
709 conn->disc_reason = 0x13;
710
711 return conn;
712}
713
714static void l2cap_conn_del(struct hci_conn *hcon, int err)
715{
716 struct l2cap_conn *conn = hcon->l2cap_data;
717 struct sock *sk;
718
719 if (!conn)
720 return;
721
722 BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
723
724 kfree_skb(conn->rx_skb);
725
726 /* Kill channels */
727 while ((sk = conn->chan_list.head)) {
728 bh_lock_sock(sk);
729 l2cap_chan_del(sk, err);
730 bh_unlock_sock(sk);
731 l2cap_sock_kill(sk);
732 }
733
734 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
735 del_timer_sync(&conn->info_timer);
736
737 hcon->l2cap_data = NULL;
738 kfree(conn);
739}
740
741static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
742{
743 struct l2cap_chan_list *l = &conn->chan_list;
744 write_lock_bh(&l->lock);
745 __l2cap_chan_add(conn, sk, parent);
746 write_unlock_bh(&l->lock);
747}
748
749/* ---- Socket interface ---- */
750static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src)
751{
752 struct sock *sk;
753 struct hlist_node *node;
754 sk_for_each(sk, node, &l2cap_sk_list.head)
755 if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src))
756 goto found;
757 sk = NULL;
758found:
759 return sk;
760}
761
762/* Find socket with psm and source bdaddr.
763 * Returns closest match.
764 */
765static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
766{
767 struct sock *sk = NULL, *sk1 = NULL;
768 struct hlist_node *node;
769
770 read_lock(&l2cap_sk_list.lock);
771
772 sk_for_each(sk, node, &l2cap_sk_list.head) {
773 if (state && sk->sk_state != state)
774 continue;
775
776 if (l2cap_pi(sk)->psm == psm) {
777 /* Exact match. */
778 if (!bacmp(&bt_sk(sk)->src, src))
779 break;
780
781 /* Closest match */
782 if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
783 sk1 = sk;
784 }
785 }
786
787 read_unlock(&l2cap_sk_list.lock);
788
789 return node ? sk : sk1;
790}
791
792static void l2cap_sock_destruct(struct sock *sk)
793{
794 BT_DBG("sk %p", sk);
795
796 skb_queue_purge(&sk->sk_receive_queue);
797 skb_queue_purge(&sk->sk_write_queue);
798}
799
800static void l2cap_sock_cleanup_listen(struct sock *parent)
801{
802 struct sock *sk;
803
804 BT_DBG("parent %p", parent);
805
806 /* Close not yet accepted channels */
807 while ((sk = bt_accept_dequeue(parent, NULL)))
808 l2cap_sock_close(sk);
809
810 parent->sk_state = BT_CLOSED;
811 sock_set_flag(parent, SOCK_ZAPPED);
812}
813
814/* Kill socket (only if zapped and orphan)
815 * Must be called on unlocked socket.
816 */
817static void l2cap_sock_kill(struct sock *sk)
818{
819 if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
820 return;
821
822 BT_DBG("sk %p state %d", sk, sk->sk_state);
823
824 /* Kill poor orphan */
825 bt_sock_unlink(&l2cap_sk_list, sk);
826 sock_set_flag(sk, SOCK_DEAD);
827 sock_put(sk);
828}
829
830static void __l2cap_sock_close(struct sock *sk, int reason)
831{
832 BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
833
834 switch (sk->sk_state) {
835 case BT_LISTEN:
836 l2cap_sock_cleanup_listen(sk);
837 break;
838
839 case BT_CONNECTED:
840 case BT_CONFIG:
841 if (sk->sk_type == SOCK_SEQPACKET ||
842 sk->sk_type == SOCK_STREAM) {
843 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
844
845 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
846 l2cap_send_disconn_req(conn, sk, reason);
847 } else
848 l2cap_chan_del(sk, reason);
849 break;
850
851 case BT_CONNECT2:
852 if (sk->sk_type == SOCK_SEQPACKET ||
853 sk->sk_type == SOCK_STREAM) {
854 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
855 struct l2cap_conn_rsp rsp;
856 __u16 result;
857
858 if (bt_sk(sk)->defer_setup)
859 result = L2CAP_CR_SEC_BLOCK;
860 else
861 result = L2CAP_CR_BAD_PSM;
862 sk->sk_state = BT_DISCONN;
863
864 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
865 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
866 rsp.result = cpu_to_le16(result);
867 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
868 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
869 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
870 } else
871 l2cap_chan_del(sk, reason);
872 break;
873
874 case BT_CONNECT:
875 case BT_DISCONN:
876 l2cap_chan_del(sk, reason);
877 break;
878
879 default:
880 sock_set_flag(sk, SOCK_ZAPPED);
881 break;
882 }
883}
884
885/* Must be called on unlocked socket. */
886static void l2cap_sock_close(struct sock *sk)
887{
888 l2cap_sock_clear_timer(sk);
889 lock_sock(sk);
890 __l2cap_sock_close(sk, ECONNRESET);
891 release_sock(sk);
892 l2cap_sock_kill(sk);
893}
894
895static void l2cap_sock_init(struct sock *sk, struct sock *parent)
896{
897 struct l2cap_pinfo *pi = l2cap_pi(sk);
898
899 BT_DBG("sk %p", sk);
900
901 if (parent) {
902 sk->sk_type = parent->sk_type;
903 bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
904
905 pi->imtu = l2cap_pi(parent)->imtu;
906 pi->omtu = l2cap_pi(parent)->omtu;
907 pi->conf_state = l2cap_pi(parent)->conf_state;
908 pi->mode = l2cap_pi(parent)->mode;
909 pi->fcs = l2cap_pi(parent)->fcs;
910 pi->max_tx = l2cap_pi(parent)->max_tx;
911 pi->tx_win = l2cap_pi(parent)->tx_win;
912 pi->sec_level = l2cap_pi(parent)->sec_level;
913 pi->role_switch = l2cap_pi(parent)->role_switch;
914 pi->force_reliable = l2cap_pi(parent)->force_reliable;
915 } else {
916 pi->imtu = L2CAP_DEFAULT_MTU;
917 pi->omtu = 0;
918 if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
919 pi->mode = L2CAP_MODE_ERTM;
920 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
921 } else {
922 pi->mode = L2CAP_MODE_BASIC;
923 }
924 pi->max_tx = L2CAP_DEFAULT_MAX_TX;
925 pi->fcs = L2CAP_FCS_CRC16;
926 pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
927 pi->sec_level = BT_SECURITY_LOW;
928 pi->role_switch = 0;
929 pi->force_reliable = 0;
930 }
931
932 /* Default config options */
933 pi->conf_len = 0;
934 pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
935 skb_queue_head_init(TX_QUEUE(sk));
936 skb_queue_head_init(SREJ_QUEUE(sk));
937 skb_queue_head_init(BUSY_QUEUE(sk));
938 INIT_LIST_HEAD(SREJ_LIST(sk));
939}
940
941static struct proto l2cap_proto = {
942 .name = "L2CAP",
943 .owner = THIS_MODULE,
944 .obj_size = sizeof(struct l2cap_pinfo)
945};
946
947static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
948{
949 struct sock *sk;
950
951 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
952 if (!sk)
953 return NULL;
954
955 sock_init_data(sock, sk);
956 INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
957
958 sk->sk_destruct = l2cap_sock_destruct;
959 sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
960
961 sock_reset_flag(sk, SOCK_ZAPPED);
962
963 sk->sk_protocol = proto;
964 sk->sk_state = BT_OPEN;
965
966 setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
967
968 bt_sock_link(&l2cap_sk_list, sk);
969 return sk;
970}
971
972static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
973 int kern)
974{
975 struct sock *sk;
976
977 BT_DBG("sock %p", sock);
978
979 sock->state = SS_UNCONNECTED;
980
981 if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
982 sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
983 return -ESOCKTNOSUPPORT;
984
985 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
986 return -EPERM;
987
988 sock->ops = &l2cap_sock_ops;
989
990 sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
991 if (!sk)
992 return -ENOMEM;
993
994 l2cap_sock_init(sk, NULL);
995 return 0;
996}
997
998static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
999{
1000 struct sock *sk = sock->sk;
1001 struct sockaddr_l2 la;
1002 int len, err = 0;
1003
1004 BT_DBG("sk %p", sk);
1005
1006 if (!addr || addr->sa_family != AF_BLUETOOTH)
1007 return -EINVAL;
1008
1009 memset(&la, 0, sizeof(la));
1010 len = min_t(unsigned int, sizeof(la), alen);
1011 memcpy(&la, addr, len);
1012
1013 if (la.l2_cid)
1014 return -EINVAL;
1015
1016 lock_sock(sk);
1017
1018 if (sk->sk_state != BT_OPEN) {
1019 err = -EBADFD;
1020 goto done;
1021 }
1022
1023 if (la.l2_psm) {
1024 __u16 psm = __le16_to_cpu(la.l2_psm);
1025
1026 /* PSM must be odd and lsb of upper byte must be 0 */
1027 if ((psm & 0x0101) != 0x0001) {
1028 err = -EINVAL;
1029 goto done;
1030 }
1031
1032 /* Restrict usage of well-known PSMs */
1033 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
1034 err = -EACCES;
1035 goto done;
1036 }
1037 }
1038
1039 write_lock_bh(&l2cap_sk_list.lock);
1040
1041 if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
1042 err = -EADDRINUSE;
1043 } else {
1044 /* Save source address */
1045 bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
1046 l2cap_pi(sk)->psm = la.l2_psm;
1047 l2cap_pi(sk)->sport = la.l2_psm;
1048 sk->sk_state = BT_BOUND;
1049
1050 if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
1051 __le16_to_cpu(la.l2_psm) == 0x0003)
1052 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
1053 }
1054
1055 write_unlock_bh(&l2cap_sk_list.lock);
1056
1057done:
1058 release_sock(sk);
1059 return err;
1060}
1061
1062static int l2cap_do_connect(struct sock *sk)
1063{
1064 bdaddr_t *src = &bt_sk(sk)->src;
1065 bdaddr_t *dst = &bt_sk(sk)->dst;
1066 struct l2cap_conn *conn;
1067 struct hci_conn *hcon;
1068 struct hci_dev *hdev;
1069 __u8 auth_type;
1070 int err;
1071
1072 BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst),
1073 l2cap_pi(sk)->psm);
1074
1075 hdev = hci_get_route(dst, src);
1076 if (!hdev)
1077 return -EHOSTUNREACH;
1078
1079 hci_dev_lock_bh(hdev);
1080
1081 err = -ENOMEM;
1082
1083 auth_type = l2cap_get_auth_type(sk);
1084
1085 hcon = hci_connect(hdev, ACL_LINK, dst,
1086 l2cap_pi(sk)->sec_level, auth_type);
1087 if (!hcon)
1088 goto done;
1089
1090 conn = l2cap_conn_add(hcon, 0);
1091 if (!conn) {
1092 hci_conn_put(hcon);
1093 goto done;
1094 }
1095
1096 err = 0;
1097
1098 /* Update source addr of the socket */
1099 bacpy(src, conn->src);
1100
1101 l2cap_chan_add(conn, sk, NULL);
1102
1103 sk->sk_state = BT_CONNECT;
1104 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
1105
1106 if (hcon->state == BT_CONNECTED) {
1107 if (sk->sk_type != SOCK_SEQPACKET &&
1108 sk->sk_type != SOCK_STREAM) {
1109 l2cap_sock_clear_timer(sk);
1110 if (l2cap_check_security(sk))
1111 sk->sk_state = BT_CONNECTED;
1112 } else
1113 l2cap_do_start(sk);
1114 }
1115
1116done:
1117 hci_dev_unlock_bh(hdev);
1118 hci_dev_put(hdev);
1119 return err;
1120}
1121
1122static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
1123{
1124 struct sock *sk = sock->sk;
1125 struct sockaddr_l2 la;
1126 int len, err = 0;
1127
1128 BT_DBG("sk %p", sk);
1129
1130 if (!addr || alen < sizeof(addr->sa_family) ||
1131 addr->sa_family != AF_BLUETOOTH)
1132 return -EINVAL;
1133
1134 memset(&la, 0, sizeof(la));
1135 len = min_t(unsigned int, sizeof(la), alen);
1136 memcpy(&la, addr, len);
1137
1138 if (la.l2_cid)
1139 return -EINVAL;
1140
1141 lock_sock(sk);
1142
1143 if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
1144 && !la.l2_psm) {
1145 err = -EINVAL;
1146 goto done;
1147 }
1148
1149 switch (l2cap_pi(sk)->mode) {
1150 case L2CAP_MODE_BASIC:
1151 break;
1152 case L2CAP_MODE_ERTM:
1153 case L2CAP_MODE_STREAMING:
1154 if (!disable_ertm)
1155 break;
1156 /* fall through */
1157 default:
1158 err = -ENOTSUPP;
1159 goto done;
1160 }
1161
1162 switch (sk->sk_state) {
1163 case BT_CONNECT:
1164 case BT_CONNECT2:
1165 case BT_CONFIG:
1166 /* Already connecting */
1167 goto wait;
1168
1169 case BT_CONNECTED:
1170 /* Already connected */
1171 err = -EISCONN;
1172 goto done;
1173
1174 case BT_OPEN:
1175 case BT_BOUND:
1176 /* Can connect */
1177 break;
1178
1179 default:
1180 err = -EBADFD;
1181 goto done;
1182 }
1183
1184 /* PSM must be odd and lsb of upper byte must be 0 */
1185 if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
1186 sk->sk_type != SOCK_RAW) {
1187 err = -EINVAL;
1188 goto done;
1189 }
1190
1191 /* Set destination address and psm */
1192 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
1193 l2cap_pi(sk)->psm = la.l2_psm;
1194
1195 err = l2cap_do_connect(sk);
1196 if (err)
1197 goto done;
1198
1199wait:
1200 err = bt_sock_wait_state(sk, BT_CONNECTED,
1201 sock_sndtimeo(sk, flags & O_NONBLOCK));
1202done:
1203 release_sock(sk);
1204 return err;
1205}
1206
1207static int l2cap_sock_listen(struct socket *sock, int backlog)
1208{
1209 struct sock *sk = sock->sk;
1210 int err = 0;
1211
1212 BT_DBG("sk %p backlog %d", sk, backlog);
1213
1214 lock_sock(sk);
1215
1216 if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
1217 || sk->sk_state != BT_BOUND) {
1218 err = -EBADFD;
1219 goto done;
1220 }
1221
1222 switch (l2cap_pi(sk)->mode) {
1223 case L2CAP_MODE_BASIC:
1224 break;
1225 case L2CAP_MODE_ERTM:
1226 case L2CAP_MODE_STREAMING:
1227 if (!disable_ertm)
1228 break;
1229 /* fall through */
1230 default:
1231 err = -ENOTSUPP;
1232 goto done;
1233 }
1234
1235 if (!l2cap_pi(sk)->psm) {
1236 bdaddr_t *src = &bt_sk(sk)->src;
1237 u16 psm;
1238
1239 err = -EINVAL;
1240
1241 write_lock_bh(&l2cap_sk_list.lock);
1242
1243 for (psm = 0x1001; psm < 0x1100; psm += 2)
1244 if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) {
1245 l2cap_pi(sk)->psm = cpu_to_le16(psm);
1246 l2cap_pi(sk)->sport = cpu_to_le16(psm);
1247 err = 0;
1248 break;
1249 }
1250
1251 write_unlock_bh(&l2cap_sk_list.lock);
1252
1253 if (err < 0)
1254 goto done;
1255 }
1256
1257 sk->sk_max_ack_backlog = backlog;
1258 sk->sk_ack_backlog = 0;
1259 sk->sk_state = BT_LISTEN;
1260
1261done:
1262 release_sock(sk);
1263 return err;
1264}
1265
1266static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
1267{
1268 DECLARE_WAITQUEUE(wait, current);
1269 struct sock *sk = sock->sk, *nsk;
1270 long timeo;
1271 int err = 0;
1272
1273 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
1274
1275 if (sk->sk_state != BT_LISTEN) {
1276 err = -EBADFD;
1277 goto done;
1278 }
1279
1280 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1281
1282 BT_DBG("sk %p timeo %ld", sk, timeo);
1283
1284 /* Wait for an incoming connection. (wake-one). */
1285 add_wait_queue_exclusive(sk_sleep(sk), &wait);
1286 while (!(nsk = bt_accept_dequeue(sk, newsock))) {
1287 set_current_state(TASK_INTERRUPTIBLE);
1288 if (!timeo) {
1289 err = -EAGAIN;
1290 break;
1291 }
1292
1293 release_sock(sk);
1294 timeo = schedule_timeout(timeo);
1295 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
1296
1297 if (sk->sk_state != BT_LISTEN) {
1298 err = -EBADFD;
1299 break;
1300 }
1301
1302 if (signal_pending(current)) {
1303 err = sock_intr_errno(timeo);
1304 break;
1305 }
1306 }
1307 set_current_state(TASK_RUNNING);
1308 remove_wait_queue(sk_sleep(sk), &wait);
1309
1310 if (err)
1311 goto done;
1312
1313 newsock->state = SS_CONNECTED;
1314
1315 BT_DBG("new socket %p", nsk);
1316
1317done:
1318 release_sock(sk);
1319 return err;
1320}
1321
1322static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
1323{
1324 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
1325 struct sock *sk = sock->sk;
1326
1327 BT_DBG("sock %p, sk %p", sock, sk);
1328
1329 addr->sa_family = AF_BLUETOOTH;
1330 *len = sizeof(struct sockaddr_l2);
1331
1332 if (peer) {
1333 la->l2_psm = l2cap_pi(sk)->psm;
1334 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
1335 la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid);
1336 } else {
1337 la->l2_psm = l2cap_pi(sk)->sport;
1338 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
1339 la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid);
1340 }
1341
1342 return 0;
1343}
1344
1345static int __l2cap_wait_ack(struct sock *sk)
1346{
1347 DECLARE_WAITQUEUE(wait, current);
1348 int err = 0;
1349 int timeo = HZ/5;
1350
1351 add_wait_queue(sk_sleep(sk), &wait);
1352 while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) {
1353 set_current_state(TASK_INTERRUPTIBLE);
1354
1355 if (!timeo)
1356 timeo = HZ/5;
1357
1358 if (signal_pending(current)) {
1359 err = sock_intr_errno(timeo);
1360 break;
1361 }
1362
1363 release_sock(sk);
1364 timeo = schedule_timeout(timeo);
1365 lock_sock(sk);
1366
1367 err = sock_error(sk);
1368 if (err)
1369 break;
1370 }
1371 set_current_state(TASK_RUNNING);
1372 remove_wait_queue(sk_sleep(sk), &wait);
1373 return err;
1374}
1375
1376static void l2cap_monitor_timeout(unsigned long arg)
1377{
1378 struct sock *sk = (void *) arg;
1379
1380 BT_DBG("sk %p", sk);
1381
1382 bh_lock_sock(sk);
1383 if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
1384 l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
1385 bh_unlock_sock(sk);
1386 return;
1387 }
1388
1389 l2cap_pi(sk)->retry_count++;
1390 __mod_monitor_timer();
1391
1392 l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
1393 bh_unlock_sock(sk);
1394}
1395
1396static void l2cap_retrans_timeout(unsigned long arg)
1397{
1398 struct sock *sk = (void *) arg;
1399
1400 BT_DBG("sk %p", sk);
1401
1402 bh_lock_sock(sk);
1403 l2cap_pi(sk)->retry_count = 1;
1404 __mod_monitor_timer();
1405
1406 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
1407
1408 l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
1409 bh_unlock_sock(sk);
1410}
1411
1412static void l2cap_drop_acked_frames(struct sock *sk)
1413{
1414 struct sk_buff *skb;
1415
1416 while ((skb = skb_peek(TX_QUEUE(sk))) &&
1417 l2cap_pi(sk)->unacked_frames) {
1418 if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq)
1419 break;
1420
1421 skb = skb_dequeue(TX_QUEUE(sk));
1422 kfree_skb(skb);
1423
1424 l2cap_pi(sk)->unacked_frames--;
1425 }
1426
1427 if (!l2cap_pi(sk)->unacked_frames)
1428 del_timer(&l2cap_pi(sk)->retrans_timer);
1429}
1430
1431static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
1432{
1433 struct l2cap_pinfo *pi = l2cap_pi(sk);
1434
1435 BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
1436
1437 hci_send_acl(pi->conn->hcon, skb, 0);
1438}
1439
1440static void l2cap_streaming_send(struct sock *sk)
1441{
1442 struct sk_buff *skb;
1443 struct l2cap_pinfo *pi = l2cap_pi(sk);
1444 u16 control, fcs;
1445
1446 while ((skb = skb_dequeue(TX_QUEUE(sk)))) {
1447 control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE);
1448 control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
1449 put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE);
1450
1451 if (pi->fcs == L2CAP_FCS_CRC16) {
1452 fcs = crc16(0, (u8 *)skb->data, skb->len - 2);
1453 put_unaligned_le16(fcs, skb->data + skb->len - 2);
1454 }
1455
1456 l2cap_do_send(sk, skb);
1457
1458 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
1459 }
1460}
1461
1462static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
1463{
1464 struct l2cap_pinfo *pi = l2cap_pi(sk);
1465 struct sk_buff *skb, *tx_skb;
1466 u16 control, fcs;
1467
1468 skb = skb_peek(TX_QUEUE(sk));
1469 if (!skb)
1470 return;
1471
1472 do {
1473 if (bt_cb(skb)->tx_seq == tx_seq)
1474 break;
1475
1476 if (skb_queue_is_last(TX_QUEUE(sk), skb))
1477 return;
1478
1479 } while ((skb = skb_queue_next(TX_QUEUE(sk), skb)));
1480
1481 if (pi->remote_max_tx &&
1482 bt_cb(skb)->retries == pi->remote_max_tx) {
1483 l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
1484 return;
1485 }
1486
1487 tx_skb = skb_clone(skb, GFP_ATOMIC);
1488 bt_cb(skb)->retries++;
1489 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1490
1491 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
1492 control |= L2CAP_CTRL_FINAL;
1493 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1494 }
1495
1496 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1497 | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1498
1499 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1500
1501 if (pi->fcs == L2CAP_FCS_CRC16) {
1502 fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
1503 put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
1504 }
1505
1506 l2cap_do_send(sk, tx_skb);
1507}
1508
1509static int l2cap_ertm_send(struct sock *sk)
1510{
1511 struct sk_buff *skb, *tx_skb;
1512 struct l2cap_pinfo *pi = l2cap_pi(sk);
1513 u16 control, fcs;
1514 int nsent = 0;
1515
1516 if (sk->sk_state != BT_CONNECTED)
1517 return -ENOTCONN;
1518
1519 while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
1520
1521 if (pi->remote_max_tx &&
1522 bt_cb(skb)->retries == pi->remote_max_tx) {
1523 l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
1524 break;
1525 }
1526
1527 tx_skb = skb_clone(skb, GFP_ATOMIC);
1528
1529 bt_cb(skb)->retries++;
1530
1531 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1532 control &= L2CAP_CTRL_SAR;
1533
1534 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
1535 control |= L2CAP_CTRL_FINAL;
1536 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1537 }
1538 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1539 | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1540 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1541
1542
1543 if (pi->fcs == L2CAP_FCS_CRC16) {
1544 fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2);
1545 put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
1546 }
1547
1548 l2cap_do_send(sk, tx_skb);
1549
1550 __mod_retrans_timer();
1551
1552 bt_cb(skb)->tx_seq = pi->next_tx_seq;
1553 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
1554
1555 pi->unacked_frames++;
1556 pi->frames_sent++;
1557
1558 if (skb_queue_is_last(TX_QUEUE(sk), skb))
1559 sk->sk_send_head = NULL;
1560 else
1561 sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb);
1562
1563 nsent++;
1564 }
1565
1566 return nsent;
1567}
1568
1569static int l2cap_retransmit_frames(struct sock *sk)
1570{
1571 struct l2cap_pinfo *pi = l2cap_pi(sk);
1572 int ret;
1573
1574 if (!skb_queue_empty(TX_QUEUE(sk)))
1575 sk->sk_send_head = TX_QUEUE(sk)->next;
1576
1577 pi->next_tx_seq = pi->expected_ack_seq;
1578 ret = l2cap_ertm_send(sk);
1579 return ret;
1580}
1581
1582static void l2cap_send_ack(struct l2cap_pinfo *pi)
1583{
1584 struct sock *sk = (struct sock *)pi;
1585 u16 control = 0;
1586
1587 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1588
1589 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
1590 control |= L2CAP_SUPER_RCV_NOT_READY;
1591 pi->conn_state |= L2CAP_CONN_RNR_SENT;
1592 l2cap_send_sframe(pi, control);
1593 return;
1594 }
1595
1596 if (l2cap_ertm_send(sk) > 0)
1597 return;
1598
1599 control |= L2CAP_SUPER_RCV_READY;
1600 l2cap_send_sframe(pi, control);
1601}
1602
1603static void l2cap_send_srejtail(struct sock *sk)
1604{
1605 struct srej_list *tail;
1606 u16 control;
1607
1608 control = L2CAP_SUPER_SELECT_REJECT;
1609 control |= L2CAP_CTRL_FINAL;
1610
1611 tail = list_entry(SREJ_LIST(sk)->prev, struct srej_list, list);
1612 control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1613
1614 l2cap_send_sframe(l2cap_pi(sk), control);
1615}
1616
1617static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
1618{
1619 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1620 struct sk_buff **frag;
1621 int err, sent = 0;
1622
1623 if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count))
1624 return -EFAULT;
1625
1626 sent += count;
1627 len -= count;
1628
1629 /* Continuation fragments (no L2CAP header) */
1630 frag = &skb_shinfo(skb)->frag_list;
1631 while (len) {
1632 count = min_t(unsigned int, conn->mtu, len);
1633
1634 *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
1635 if (!*frag)
1636 return err;
1637 if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
1638 return -EFAULT;
1639
1640 sent += count;
1641 len -= count;
1642
1643 frag = &(*frag)->next;
1644 }
1645
1646 return sent;
1647}
1648
1649static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len)
1650{
1651 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1652 struct sk_buff *skb;
1653 int err, count, hlen = L2CAP_HDR_SIZE + 2;
1654 struct l2cap_hdr *lh;
1655
1656 BT_DBG("sk %p len %d", sk, (int)len);
1657
1658 count = min_t(unsigned int, (conn->mtu - hlen), len);
1659 skb = bt_skb_send_alloc(sk, count + hlen,
1660 msg->msg_flags & MSG_DONTWAIT, &err);
1661 if (!skb)
1662 return ERR_PTR(err);
1663
1664 /* Create L2CAP header */
1665 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1666 lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
1667 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
1668 put_unaligned_le16(l2cap_pi(sk)->psm, skb_put(skb, 2));
1669
1670 err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
1671 if (unlikely(err < 0)) {
1672 kfree_skb(skb);
1673 return ERR_PTR(err);
1674 }
1675 return skb;
1676}
1677
1678static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len)
1679{
1680 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1681 struct sk_buff *skb;
1682 int err, count, hlen = L2CAP_HDR_SIZE;
1683 struct l2cap_hdr *lh;
1684
1685 BT_DBG("sk %p len %d", sk, (int)len);
1686
1687 count = min_t(unsigned int, (conn->mtu - hlen), len);
1688 skb = bt_skb_send_alloc(sk, count + hlen,
1689 msg->msg_flags & MSG_DONTWAIT, &err);
1690 if (!skb)
1691 return ERR_PTR(err);
1692
1693 /* Create L2CAP header */
1694 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1695 lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
1696 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
1697
1698 err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
1699 if (unlikely(err < 0)) {
1700 kfree_skb(skb);
1701 return ERR_PTR(err);
1702 }
1703 return skb;
1704}
1705
1706static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen)
1707{
1708 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1709 struct sk_buff *skb;
1710 int err, count, hlen = L2CAP_HDR_SIZE + 2;
1711 struct l2cap_hdr *lh;
1712
1713 BT_DBG("sk %p len %d", sk, (int)len);
1714
1715 if (!conn)
1716 return ERR_PTR(-ENOTCONN);
1717
1718 if (sdulen)
1719 hlen += 2;
1720
1721 if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16)
1722 hlen += 2;
1723
1724 count = min_t(unsigned int, (conn->mtu - hlen), len);
1725 skb = bt_skb_send_alloc(sk, count + hlen,
1726 msg->msg_flags & MSG_DONTWAIT, &err);
1727 if (!skb)
1728 return ERR_PTR(err);
1729
1730 /* Create L2CAP header */
1731 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1732 lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
1733 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
1734 put_unaligned_le16(control, skb_put(skb, 2));
1735 if (sdulen)
1736 put_unaligned_le16(sdulen, skb_put(skb, 2));
1737
1738 err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
1739 if (unlikely(err < 0)) {
1740 kfree_skb(skb);
1741 return ERR_PTR(err);
1742 }
1743
1744 if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16)
1745 put_unaligned_le16(0, skb_put(skb, 2));
1746
1747 bt_cb(skb)->retries = 0;
1748 return skb;
1749}
1750
1751static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len)
1752{
1753 struct l2cap_pinfo *pi = l2cap_pi(sk);
1754 struct sk_buff *skb;
1755 struct sk_buff_head sar_queue;
1756 u16 control;
1757 size_t size = 0;
1758
1759 skb_queue_head_init(&sar_queue);
1760 control = L2CAP_SDU_START;
1761 skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len);
1762 if (IS_ERR(skb))
1763 return PTR_ERR(skb);
1764
1765 __skb_queue_tail(&sar_queue, skb);
1766 len -= pi->remote_mps;
1767 size += pi->remote_mps;
1768
1769 while (len > 0) {
1770 size_t buflen;
1771
1772 if (len > pi->remote_mps) {
1773 control = L2CAP_SDU_CONTINUE;
1774 buflen = pi->remote_mps;
1775 } else {
1776 control = L2CAP_SDU_END;
1777 buflen = len;
1778 }
1779
1780 skb = l2cap_create_iframe_pdu(sk, msg, buflen, control, 0);
1781 if (IS_ERR(skb)) {
1782 skb_queue_purge(&sar_queue);
1783 return PTR_ERR(skb);
1784 }
1785
1786 __skb_queue_tail(&sar_queue, skb);
1787 len -= buflen;
1788 size += buflen;
1789 }
1790 skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
1791 if (sk->sk_send_head == NULL)
1792 sk->sk_send_head = sar_queue.next;
1793
1794 return size;
1795}
1796
1797static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
1798{
1799 struct sock *sk = sock->sk;
1800 struct l2cap_pinfo *pi = l2cap_pi(sk);
1801 struct sk_buff *skb;
1802 u16 control;
1803 int err;
1804
1805 BT_DBG("sock %p, sk %p", sock, sk);
1806
1807 err = sock_error(sk);
1808 if (err)
1809 return err;
1810
1811 if (msg->msg_flags & MSG_OOB)
1812 return -EOPNOTSUPP;
1813
1814 lock_sock(sk);
1815
1816 if (sk->sk_state != BT_CONNECTED) {
1817 err = -ENOTCONN;
1818 goto done;
1819 }
1820
1821 /* Connectionless channel */
1822 if (sk->sk_type == SOCK_DGRAM) {
1823 skb = l2cap_create_connless_pdu(sk, msg, len);
1824 if (IS_ERR(skb)) {
1825 err = PTR_ERR(skb);
1826 } else {
1827 l2cap_do_send(sk, skb);
1828 err = len;
1829 }
1830 goto done;
1831 }
1832
1833 switch (pi->mode) {
1834 case L2CAP_MODE_BASIC:
1835 /* Check outgoing MTU */
1836 if (len > pi->omtu) {
1837 err = -EMSGSIZE;
1838 goto done;
1839 }
1840
1841 /* Create a basic PDU */
1842 skb = l2cap_create_basic_pdu(sk, msg, len);
1843 if (IS_ERR(skb)) {
1844 err = PTR_ERR(skb);
1845 goto done;
1846 }
1847
1848 l2cap_do_send(sk, skb);
1849 err = len;
1850 break;
1851
1852 case L2CAP_MODE_ERTM:
1853 case L2CAP_MODE_STREAMING:
1854 /* Entire SDU fits into one PDU */
1855 if (len <= pi->remote_mps) {
1856 control = L2CAP_SDU_UNSEGMENTED;
1857 skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
1858 if (IS_ERR(skb)) {
1859 err = PTR_ERR(skb);
1860 goto done;
1861 }
1862 __skb_queue_tail(TX_QUEUE(sk), skb);
1863
1864 if (sk->sk_send_head == NULL)
1865 sk->sk_send_head = skb;
1866
1867 } else {
1868 /* Segment SDU into multiples PDUs */
1869 err = l2cap_sar_segment_sdu(sk, msg, len);
1870 if (err < 0)
1871 goto done;
1872 }
1873
1874 if (pi->mode == L2CAP_MODE_STREAMING) {
1875 l2cap_streaming_send(sk);
1876 } else {
1877 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
1878 (pi->conn_state & L2CAP_CONN_WAIT_F)) {
1879 err = len;
1880 break;
1881 }
1882 err = l2cap_ertm_send(sk);
1883 }
1884
1885 if (err >= 0)
1886 err = len;
1887 break;
1888
1889 default:
1890 BT_DBG("bad state %1.1x", pi->mode);
1891 err = -EBADFD;
1892 }
1893
1894done:
1895 release_sock(sk);
1896 return err;
1897}
1898
1899static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
1900{
1901 struct sock *sk = sock->sk;
1902
1903 lock_sock(sk);
1904
1905 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
1906 struct l2cap_conn_rsp rsp;
1907 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1908 u8 buf[128];
1909
1910 sk->sk_state = BT_CONFIG;
1911
1912 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1913 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
1914 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
1915 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
1916 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
1917 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1918
1919 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
1920 release_sock(sk);
1921 return 0;
1922 }
1923
1924 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
1925 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
1926 l2cap_build_conf_req(sk, buf), buf);
1927 l2cap_pi(sk)->num_conf_req++;
1928
1929 release_sock(sk);
1930 return 0;
1931 }
1932
1933 release_sock(sk);
1934
1935 if (sock->type == SOCK_STREAM)
1936 return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
1937
1938 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
1939}
1940
1941static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
1942{
1943 struct sock *sk = sock->sk;
1944 struct l2cap_options opts;
1945 int len, err = 0;
1946 u32 opt;
1947
1948 BT_DBG("sk %p", sk);
1949
1950 lock_sock(sk);
1951
1952 switch (optname) {
1953 case L2CAP_OPTIONS:
1954 if (sk->sk_state == BT_CONNECTED) {
1955 err = -EINVAL;
1956 break;
1957 }
1958
1959 opts.imtu = l2cap_pi(sk)->imtu;
1960 opts.omtu = l2cap_pi(sk)->omtu;
1961 opts.flush_to = l2cap_pi(sk)->flush_to;
1962 opts.mode = l2cap_pi(sk)->mode;
1963 opts.fcs = l2cap_pi(sk)->fcs;
1964 opts.max_tx = l2cap_pi(sk)->max_tx;
1965 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
1966
1967 len = min_t(unsigned int, sizeof(opts), optlen);
1968 if (copy_from_user((char *) &opts, optval, len)) {
1969 err = -EFAULT;
1970 break;
1971 }
1972
1973 if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
1974 err = -EINVAL;
1975 break;
1976 }
1977
1978 l2cap_pi(sk)->mode = opts.mode;
1979 switch (l2cap_pi(sk)->mode) {
1980 case L2CAP_MODE_BASIC:
1981 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
1982 break;
1983 case L2CAP_MODE_ERTM:
1984 case L2CAP_MODE_STREAMING:
1985 if (!disable_ertm)
1986 break;
1987 /* fall through */
1988 default:
1989 err = -EINVAL;
1990 break;
1991 }
1992
1993 l2cap_pi(sk)->imtu = opts.imtu;
1994 l2cap_pi(sk)->omtu = opts.omtu;
1995 l2cap_pi(sk)->fcs = opts.fcs;
1996 l2cap_pi(sk)->max_tx = opts.max_tx;
1997 l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
1998 break;
1999
2000 case L2CAP_LM:
2001 if (get_user(opt, (u32 __user *) optval)) {
2002 err = -EFAULT;
2003 break;
2004 }
2005
2006 if (opt & L2CAP_LM_AUTH)
2007 l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
2008 if (opt & L2CAP_LM_ENCRYPT)
2009 l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
2010 if (opt & L2CAP_LM_SECURE)
2011 l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
2012
2013 l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER);
2014 l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
2015 break;
2016
2017 default:
2018 err = -ENOPROTOOPT;
2019 break;
2020 }
2021
2022 release_sock(sk);
2023 return err;
2024}
2025
2026static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
2027{
2028 struct sock *sk = sock->sk;
2029 struct bt_security sec;
2030 int len, err = 0;
2031 u32 opt;
2032
2033 BT_DBG("sk %p", sk);
2034
2035 if (level == SOL_L2CAP)
2036 return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
2037
2038 if (level != SOL_BLUETOOTH)
2039 return -ENOPROTOOPT;
2040
2041 lock_sock(sk);
2042
2043 switch (optname) {
2044 case BT_SECURITY:
2045 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
2046 && sk->sk_type != SOCK_RAW) {
2047 err = -EINVAL;
2048 break;
2049 }
2050
2051 sec.level = BT_SECURITY_LOW;
2052
2053 len = min_t(unsigned int, sizeof(sec), optlen);
2054 if (copy_from_user((char *) &sec, optval, len)) {
2055 err = -EFAULT;
2056 break;
2057 }
2058
2059 if (sec.level < BT_SECURITY_LOW ||
2060 sec.level > BT_SECURITY_HIGH) {
2061 err = -EINVAL;
2062 break;
2063 }
2064
2065 l2cap_pi(sk)->sec_level = sec.level;
2066 break;
2067
2068 case BT_DEFER_SETUP:
2069 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
2070 err = -EINVAL;
2071 break;
2072 }
2073
2074 if (get_user(opt, (u32 __user *) optval)) {
2075 err = -EFAULT;
2076 break;
2077 }
2078
2079 bt_sk(sk)->defer_setup = opt;
2080 break;
2081
2082 default:
2083 err = -ENOPROTOOPT;
2084 break;
2085 }
2086
2087 release_sock(sk);
2088 return err;
2089}
2090
2091static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
2092{
2093 struct sock *sk = sock->sk;
2094 struct l2cap_options opts;
2095 struct l2cap_conninfo cinfo;
2096 int len, err = 0;
2097 u32 opt;
2098
2099 BT_DBG("sk %p", sk);
2100
2101 if (get_user(len, optlen))
2102 return -EFAULT;
2103
2104 lock_sock(sk);
2105
2106 switch (optname) {
2107 case L2CAP_OPTIONS:
2108 opts.imtu = l2cap_pi(sk)->imtu;
2109 opts.omtu = l2cap_pi(sk)->omtu;
2110 opts.flush_to = l2cap_pi(sk)->flush_to;
2111 opts.mode = l2cap_pi(sk)->mode;
2112 opts.fcs = l2cap_pi(sk)->fcs;
2113 opts.max_tx = l2cap_pi(sk)->max_tx;
2114 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
2115
2116 len = min_t(unsigned int, len, sizeof(opts));
2117 if (copy_to_user(optval, (char *) &opts, len))
2118 err = -EFAULT;
2119
2120 break;
2121
2122 case L2CAP_LM:
2123 switch (l2cap_pi(sk)->sec_level) {
2124 case BT_SECURITY_LOW:
2125 opt = L2CAP_LM_AUTH;
2126 break;
2127 case BT_SECURITY_MEDIUM:
2128 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
2129 break;
2130 case BT_SECURITY_HIGH:
2131 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
2132 L2CAP_LM_SECURE;
2133 break;
2134 default:
2135 opt = 0;
2136 break;
2137 }
2138
2139 if (l2cap_pi(sk)->role_switch)
2140 opt |= L2CAP_LM_MASTER;
2141
2142 if (l2cap_pi(sk)->force_reliable)
2143 opt |= L2CAP_LM_RELIABLE;
2144
2145 if (put_user(opt, (u32 __user *) optval))
2146 err = -EFAULT;
2147 break;
2148
2149 case L2CAP_CONNINFO:
2150 if (sk->sk_state != BT_CONNECTED &&
2151 !(sk->sk_state == BT_CONNECT2 &&
2152 bt_sk(sk)->defer_setup)) {
2153 err = -ENOTCONN;
2154 break;
2155 }
2156
2157 cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
2158 memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);
2159
2160 len = min_t(unsigned int, len, sizeof(cinfo));
2161 if (copy_to_user(optval, (char *) &cinfo, len))
2162 err = -EFAULT;
2163
2164 break;
2165
2166 default:
2167 err = -ENOPROTOOPT;
2168 break;
2169 }
2170
2171 release_sock(sk);
2172 return err;
2173}
2174
2175static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
2176{
2177 struct sock *sk = sock->sk;
2178 struct bt_security sec;
2179 int len, err = 0;
2180
2181 BT_DBG("sk %p", sk);
2182
2183 if (level == SOL_L2CAP)
2184 return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
2185
2186 if (level != SOL_BLUETOOTH)
2187 return -ENOPROTOOPT;
2188
2189 if (get_user(len, optlen))
2190 return -EFAULT;
2191
2192 lock_sock(sk);
2193
2194 switch (optname) {
2195 case BT_SECURITY:
2196 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
2197 && sk->sk_type != SOCK_RAW) {
2198 err = -EINVAL;
2199 break;
2200 }
2201
2202 sec.level = l2cap_pi(sk)->sec_level;
2203
2204 len = min_t(unsigned int, len, sizeof(sec));
2205 if (copy_to_user(optval, (char *) &sec, len))
2206 err = -EFAULT;
2207
2208 break;
2209
2210 case BT_DEFER_SETUP:
2211 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
2212 err = -EINVAL;
2213 break;
2214 }
2215
2216 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
2217 err = -EFAULT;
2218
2219 break;
2220
2221 default:
2222 err = -ENOPROTOOPT;
2223 break;
2224 }
2225
2226 release_sock(sk);
2227 return err;
2228}
2229
2230static int l2cap_sock_shutdown(struct socket *sock, int how)
2231{
2232 struct sock *sk = sock->sk;
2233 int err = 0;
2234
2235 BT_DBG("sock %p, sk %p", sock, sk);
2236
2237 if (!sk)
2238 return 0;
2239
2240 lock_sock(sk);
2241 if (!sk->sk_shutdown) {
2242 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
2243 err = __l2cap_wait_ack(sk);
2244
2245 sk->sk_shutdown = SHUTDOWN_MASK;
2246 l2cap_sock_clear_timer(sk);
2247 __l2cap_sock_close(sk, 0);
2248
2249 if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
2250 err = bt_sock_wait_state(sk, BT_CLOSED,
2251 sk->sk_lingertime);
2252 }
2253
2254 if (!err && sk->sk_err)
2255 err = -sk->sk_err;
2256
2257 release_sock(sk);
2258 return err;
2259}
2260
2261static int l2cap_sock_release(struct socket *sock)
2262{
2263 struct sock *sk = sock->sk;
2264 int err;
2265
2266 BT_DBG("sock %p, sk %p", sock, sk);
2267
2268 if (!sk)
2269 return 0;
2270
2271 err = l2cap_sock_shutdown(sock, 2);
2272
2273 sock_orphan(sk);
2274 l2cap_sock_kill(sk);
2275 return err;
2276}
2277
2278static void l2cap_chan_ready(struct sock *sk)
2279{
2280 struct sock *parent = bt_sk(sk)->parent;
2281
2282 BT_DBG("sk %p, parent %p", sk, parent);
2283
2284 l2cap_pi(sk)->conf_state = 0;
2285 l2cap_sock_clear_timer(sk);
2286
2287 if (!parent) {
2288 /* Outgoing channel.
2289 * Wake up socket sleeping on connect.
2290 */
2291 sk->sk_state = BT_CONNECTED;
2292 sk->sk_state_change(sk);
2293 } else {
2294 /* Incoming channel.
2295 * Wake up socket sleeping on accept.
2296 */
2297 parent->sk_data_ready(parent, 0);
2298 }
2299}
2300
2301/* Copy frame to all raw sockets on that connection */
2302static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb)
2303{
2304 struct l2cap_chan_list *l = &conn->chan_list;
2305 struct sk_buff *nskb;
2306 struct sock *sk;
2307
2308 BT_DBG("conn %p", conn);
2309
2310 read_lock(&l->lock);
2311 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
2312 if (sk->sk_type != SOCK_RAW)
2313 continue;
2314
2315 /* Don't send frame to the socket it came from */
2316 if (skb->sk == sk)
2317 continue;
2318 nskb = skb_clone(skb, GFP_ATOMIC);
2319 if (!nskb)
2320 continue;
2321
2322 if (sock_queue_rcv_skb(sk, nskb))
2323 kfree_skb(nskb);
2324 }
2325 read_unlock(&l->lock);
2326}
2327
2328/* ---- L2CAP signalling commands ---- */
2329static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
2330 u8 code, u8 ident, u16 dlen, void *data)
2331{
2332 struct sk_buff *skb, **frag;
2333 struct l2cap_cmd_hdr *cmd;
2334 struct l2cap_hdr *lh;
2335 int len, count;
2336
2337 BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %d",
2338 conn, code, ident, dlen);
2339
2340 len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen;
2341 count = min_t(unsigned int, conn->mtu, len);
2342
2343 skb = bt_skb_alloc(count, GFP_ATOMIC);
2344 if (!skb)
2345 return NULL;
2346
2347 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
2348 lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
2349 lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
2350
2351 cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
2352 cmd->code = code;
2353 cmd->ident = ident;
2354 cmd->len = cpu_to_le16(dlen);
2355
2356 if (dlen) {
2357 count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
2358 memcpy(skb_put(skb, count), data, count);
2359 data += count;
2360 }
2361
2362 len -= skb->len;
2363
2364 /* Continuation fragments (no L2CAP header) */
2365 frag = &skb_shinfo(skb)->frag_list;
2366 while (len) {
2367 count = min_t(unsigned int, conn->mtu, len);
2368
2369 *frag = bt_skb_alloc(count, GFP_ATOMIC);
2370 if (!*frag)
2371 goto fail;
2372
2373 memcpy(skb_put(*frag, count), data, count);
2374
2375 len -= count;
2376 data += count;
2377
2378 frag = &(*frag)->next;
2379 }
2380
2381 return skb;
2382
2383fail:
2384 kfree_skb(skb);
2385 return NULL;
2386}
2387
2388static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val)
2389{
2390 struct l2cap_conf_opt *opt = *ptr;
2391 int len;
2392
2393 len = L2CAP_CONF_OPT_SIZE + opt->len;
2394 *ptr += len;
2395
2396 *type = opt->type;
2397 *olen = opt->len;
2398
2399 switch (opt->len) {
2400 case 1:
2401 *val = *((u8 *) opt->val);
2402 break;
2403
2404 case 2:
2405 *val = get_unaligned_le16(opt->val);
2406 break;
2407
2408 case 4:
2409 *val = get_unaligned_le32(opt->val);
2410 break;
2411
2412 default:
2413 *val = (unsigned long) opt->val;
2414 break;
2415 }
2416
2417 BT_DBG("type 0x%2.2x len %d val 0x%lx", *type, opt->len, *val);
2418 return len;
2419}
2420
2421static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
2422{
2423 struct l2cap_conf_opt *opt = *ptr;
2424
2425 BT_DBG("type 0x%2.2x len %d val 0x%lx", type, len, val);
2426
2427 opt->type = type;
2428 opt->len = len;
2429
2430 switch (len) {
2431 case 1:
2432 *((u8 *) opt->val) = val;
2433 break;
2434
2435 case 2:
2436 put_unaligned_le16(val, opt->val);
2437 break;
2438
2439 case 4:
2440 put_unaligned_le32(val, opt->val);
2441 break;
2442
2443 default:
2444 memcpy(opt->val, (void *) val, len);
2445 break;
2446 }
2447
2448 *ptr += L2CAP_CONF_OPT_SIZE + len;
2449}
2450
2451static void l2cap_ack_timeout(unsigned long arg)
2452{
2453 struct sock *sk = (void *) arg;
2454
2455 bh_lock_sock(sk);
2456 l2cap_send_ack(l2cap_pi(sk));
2457 bh_unlock_sock(sk);
2458}
2459
2460static inline void l2cap_ertm_init(struct sock *sk)
2461{
2462 l2cap_pi(sk)->expected_ack_seq = 0;
2463 l2cap_pi(sk)->unacked_frames = 0;
2464 l2cap_pi(sk)->buffer_seq = 0;
2465 l2cap_pi(sk)->num_acked = 0;
2466 l2cap_pi(sk)->frames_sent = 0;
2467
2468 setup_timer(&l2cap_pi(sk)->retrans_timer,
2469 l2cap_retrans_timeout, (unsigned long) sk);
2470 setup_timer(&l2cap_pi(sk)->monitor_timer,
2471 l2cap_monitor_timeout, (unsigned long) sk);
2472 setup_timer(&l2cap_pi(sk)->ack_timer,
2473 l2cap_ack_timeout, (unsigned long) sk);
2474
2475 __skb_queue_head_init(SREJ_QUEUE(sk));
2476 __skb_queue_head_init(BUSY_QUEUE(sk));
2477
2478 INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
2479
2480 sk->sk_backlog_rcv = l2cap_ertm_data_rcv;
2481}
2482
2483static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
2484{
2485 switch (mode) {
2486 case L2CAP_MODE_STREAMING:
2487 case L2CAP_MODE_ERTM:
2488 if (l2cap_mode_supported(mode, remote_feat_mask))
2489 return mode;
2490 /* fall through */
2491 default:
2492 return L2CAP_MODE_BASIC;
2493 }
2494}
2495
2496static int l2cap_build_conf_req(struct sock *sk, void *data)
2497{
2498 struct l2cap_pinfo *pi = l2cap_pi(sk);
2499 struct l2cap_conf_req *req = data;
2500 struct l2cap_conf_rfc rfc = { .mode = pi->mode };
2501 void *ptr = req->data;
2502
2503 BT_DBG("sk %p", sk);
2504
2505 if (pi->num_conf_req || pi->num_conf_rsp)
2506 goto done;
2507
2508 switch (pi->mode) {
2509 case L2CAP_MODE_STREAMING:
2510 case L2CAP_MODE_ERTM:
2511 if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE)
2512 break;
2513
2514 /* fall through */
2515 default:
2516 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
2517 break;
2518 }
2519
2520done:
2521 switch (pi->mode) {
2522 case L2CAP_MODE_BASIC:
2523 if (pi->imtu != L2CAP_DEFAULT_MTU)
2524 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
2525
2526 if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) &&
2527 !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING))
2528 break;
2529
2530 rfc.mode = L2CAP_MODE_BASIC;
2531 rfc.txwin_size = 0;
2532 rfc.max_transmit = 0;
2533 rfc.retrans_timeout = 0;
2534 rfc.monitor_timeout = 0;
2535 rfc.max_pdu_size = 0;
2536
2537 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2538 (unsigned long) &rfc);
2539 break;
2540
2541 case L2CAP_MODE_ERTM:
2542 rfc.mode = L2CAP_MODE_ERTM;
2543 rfc.txwin_size = pi->tx_win;
2544 rfc.max_transmit = pi->max_tx;
2545 rfc.retrans_timeout = 0;
2546 rfc.monitor_timeout = 0;
2547 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
2548 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
2549 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2550
2551 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2552 (unsigned long) &rfc);
2553
2554 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
2555 break;
2556
2557 if (pi->fcs == L2CAP_FCS_NONE ||
2558 pi->conf_state & L2CAP_CONF_NO_FCS_RECV) {
2559 pi->fcs = L2CAP_FCS_NONE;
2560 l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs);
2561 }
2562 break;
2563
2564 case L2CAP_MODE_STREAMING:
2565 rfc.mode = L2CAP_MODE_STREAMING;
2566 rfc.txwin_size = 0;
2567 rfc.max_transmit = 0;
2568 rfc.retrans_timeout = 0;
2569 rfc.monitor_timeout = 0;
2570 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
2571 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
2572 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2573
2574 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2575 (unsigned long) &rfc);
2576
2577 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
2578 break;
2579
2580 if (pi->fcs == L2CAP_FCS_NONE ||
2581 pi->conf_state & L2CAP_CONF_NO_FCS_RECV) {
2582 pi->fcs = L2CAP_FCS_NONE;
2583 l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs);
2584 }
2585 break;
2586 }
2587
2588 /* FIXME: Need actual value of the flush timeout */
2589 //if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
2590 // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
2591
2592 req->dcid = cpu_to_le16(pi->dcid);
2593 req->flags = cpu_to_le16(0);
2594
2595 return ptr - data;
2596}
2597
2598static int l2cap_parse_conf_req(struct sock *sk, void *data)
2599{
2600 struct l2cap_pinfo *pi = l2cap_pi(sk);
2601 struct l2cap_conf_rsp *rsp = data;
2602 void *ptr = rsp->data;
2603 void *req = pi->conf_req;
2604 int len = pi->conf_len;
2605 int type, hint, olen;
2606 unsigned long val;
2607 struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
2608 u16 mtu = L2CAP_DEFAULT_MTU;
2609 u16 result = L2CAP_CONF_SUCCESS;
2610
2611 BT_DBG("sk %p", sk);
2612
2613 while (len >= L2CAP_CONF_OPT_SIZE) {
2614 len -= l2cap_get_conf_opt(&req, &type, &olen, &val);
2615
2616 hint = type & L2CAP_CONF_HINT;
2617 type &= L2CAP_CONF_MASK;
2618
2619 switch (type) {
2620 case L2CAP_CONF_MTU:
2621 mtu = val;
2622 break;
2623
2624 case L2CAP_CONF_FLUSH_TO:
2625 pi->flush_to = val;
2626 break;
2627
2628 case L2CAP_CONF_QOS:
2629 break;
2630
2631 case L2CAP_CONF_RFC:
2632 if (olen == sizeof(rfc))
2633 memcpy(&rfc, (void *) val, olen);
2634 break;
2635
2636 case L2CAP_CONF_FCS:
2637 if (val == L2CAP_FCS_NONE)
2638 pi->conf_state |= L2CAP_CONF_NO_FCS_RECV;
2639
2640 break;
2641
2642 default:
2643 if (hint)
2644 break;
2645
2646 result = L2CAP_CONF_UNKNOWN;
2647 *((u8 *) ptr++) = type;
2648 break;
2649 }
2650 }
2651
2652 if (pi->num_conf_rsp || pi->num_conf_req > 1)
2653 goto done;
2654
2655 switch (pi->mode) {
2656 case L2CAP_MODE_STREAMING:
2657 case L2CAP_MODE_ERTM:
2658 if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
2659 pi->mode = l2cap_select_mode(rfc.mode,
2660 pi->conn->feat_mask);
2661 break;
2662 }
2663
2664 if (pi->mode != rfc.mode)
2665 return -ECONNREFUSED;
2666
2667 break;
2668 }
2669
2670done:
2671 if (pi->mode != rfc.mode) {
2672 result = L2CAP_CONF_UNACCEPT;
2673 rfc.mode = pi->mode;
2674
2675 if (pi->num_conf_rsp == 1)
2676 return -ECONNREFUSED;
2677
2678 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
2679 sizeof(rfc), (unsigned long) &rfc);
2680 }
2681
2682
2683 if (result == L2CAP_CONF_SUCCESS) {
2684 /* Configure output options and let the other side know
2685 * which ones we don't like. */
2686
2687 if (mtu < L2CAP_DEFAULT_MIN_MTU)
2688 result = L2CAP_CONF_UNACCEPT;
2689 else {
2690 pi->omtu = mtu;
2691 pi->conf_state |= L2CAP_CONF_MTU_DONE;
2692 }
2693 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu);
2694
2695 switch (rfc.mode) {
2696 case L2CAP_MODE_BASIC:
2697 pi->fcs = L2CAP_FCS_NONE;
2698 pi->conf_state |= L2CAP_CONF_MODE_DONE;
2699 break;
2700
2701 case L2CAP_MODE_ERTM:
2702 pi->remote_tx_win = rfc.txwin_size;
2703 pi->remote_max_tx = rfc.max_transmit;
2704
2705 if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10)
2706 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2707
2708 pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
2709
2710 rfc.retrans_timeout =
2711 le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO);
2712 rfc.monitor_timeout =
2713 le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO);
2714
2715 pi->conf_state |= L2CAP_CONF_MODE_DONE;
2716
2717 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
2718 sizeof(rfc), (unsigned long) &rfc);
2719
2720 break;
2721
2722 case L2CAP_MODE_STREAMING:
2723 if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10)
2724 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2725
2726 pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
2727
2728 pi->conf_state |= L2CAP_CONF_MODE_DONE;
2729
2730 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
2731 sizeof(rfc), (unsigned long) &rfc);
2732
2733 break;
2734
2735 default:
2736 result = L2CAP_CONF_UNACCEPT;
2737
2738 memset(&rfc, 0, sizeof(rfc));
2739 rfc.mode = pi->mode;
2740 }
2741
2742 if (result == L2CAP_CONF_SUCCESS)
2743 pi->conf_state |= L2CAP_CONF_OUTPUT_DONE;
2744 }
2745 rsp->scid = cpu_to_le16(pi->dcid);
2746 rsp->result = cpu_to_le16(result);
2747 rsp->flags = cpu_to_le16(0x0000);
2748
2749 return ptr - data;
2750}
2751
2752static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, u16 *result)
2753{
2754 struct l2cap_pinfo *pi = l2cap_pi(sk);
2755 struct l2cap_conf_req *req = data;
2756 void *ptr = req->data;
2757 int type, olen;
2758 unsigned long val;
2759 struct l2cap_conf_rfc rfc;
2760
2761 BT_DBG("sk %p, rsp %p, len %d, req %p", sk, rsp, len, data);
2762
2763 while (len >= L2CAP_CONF_OPT_SIZE) {
2764 len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
2765
2766 switch (type) {
2767 case L2CAP_CONF_MTU:
2768 if (val < L2CAP_DEFAULT_MIN_MTU) {
2769 *result = L2CAP_CONF_UNACCEPT;
2770 pi->imtu = L2CAP_DEFAULT_MIN_MTU;
2771 } else
2772 pi->imtu = val;
2773 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
2774 break;
2775
2776 case L2CAP_CONF_FLUSH_TO:
2777 pi->flush_to = val;
2778 l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
2779 2, pi->flush_to);
2780 break;
2781
2782 case L2CAP_CONF_RFC:
2783 if (olen == sizeof(rfc))
2784 memcpy(&rfc, (void *)val, olen);
2785
2786 if ((pi->conf_state & L2CAP_CONF_STATE2_DEVICE) &&
2787 rfc.mode != pi->mode)
2788 return -ECONNREFUSED;
2789
2790 pi->fcs = 0;
2791
2792 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
2793 sizeof(rfc), (unsigned long) &rfc);
2794 break;
2795 }
2796 }
2797
2798 if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode)
2799 return -ECONNREFUSED;
2800
2801 pi->mode = rfc.mode;
2802
2803 if (*result == L2CAP_CONF_SUCCESS) {
2804 switch (rfc.mode) {
2805 case L2CAP_MODE_ERTM:
2806 pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
2807 pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
2808 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2809 break;
2810 case L2CAP_MODE_STREAMING:
2811 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2812 }
2813 }
2814
2815 req->dcid = cpu_to_le16(pi->dcid);
2816 req->flags = cpu_to_le16(0x0000);
2817
2818 return ptr - data;
2819}
2820
2821static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 flags)
2822{
2823 struct l2cap_conf_rsp *rsp = data;
2824 void *ptr = rsp->data;
2825
2826 BT_DBG("sk %p", sk);
2827
2828 rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid);
2829 rsp->result = cpu_to_le16(result);
2830 rsp->flags = cpu_to_le16(flags);
2831
2832 return ptr - data;
2833}
2834
2835static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len)
2836{
2837 struct l2cap_pinfo *pi = l2cap_pi(sk);
2838 int type, olen;
2839 unsigned long val;
2840 struct l2cap_conf_rfc rfc;
2841
2842 BT_DBG("sk %p, rsp %p, len %d", sk, rsp, len);
2843
2844 if ((pi->mode != L2CAP_MODE_ERTM) && (pi->mode != L2CAP_MODE_STREAMING))
2845 return;
2846
2847 while (len >= L2CAP_CONF_OPT_SIZE) {
2848 len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
2849
2850 switch (type) {
2851 case L2CAP_CONF_RFC:
2852 if (olen == sizeof(rfc))
2853 memcpy(&rfc, (void *)val, olen);
2854 goto done;
2855 }
2856 }
2857
2858done:
2859 switch (rfc.mode) {
2860 case L2CAP_MODE_ERTM:
2861 pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
2862 pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
2863 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2864 break;
2865 case L2CAP_MODE_STREAMING:
2866 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2867 }
2868}
2869
2870static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2871{
2872 struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data;
2873
2874 if (rej->reason != 0x0000)
2875 return 0;
2876
2877 if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) &&
2878 cmd->ident == conn->info_ident) {
2879 del_timer(&conn->info_timer);
2880
2881 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2882 conn->info_ident = 0;
2883
2884 l2cap_conn_start(conn);
2885 }
2886
2887 return 0;
2888}
2889
2890static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2891{
2892 struct l2cap_chan_list *list = &conn->chan_list;
2893 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
2894 struct l2cap_conn_rsp rsp;
2895 struct sock *parent, *sk = NULL;
2896 int result, status = L2CAP_CS_NO_INFO;
2897
2898 u16 dcid = 0, scid = __le16_to_cpu(req->scid);
2899 __le16 psm = req->psm;
2900
2901 BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid);
2902
2903 /* Check if we have socket listening on psm */
2904 parent = l2cap_get_sock_by_psm(BT_LISTEN, psm, conn->src);
2905 if (!parent) {
2906 result = L2CAP_CR_BAD_PSM;
2907 goto sendresp;
2908 }
2909
2910 bh_lock_sock(parent);
2911
2912 /* Check if the ACL is secure enough (if not SDP) */
2913 if (psm != cpu_to_le16(0x0001) &&
2914 !hci_conn_check_link_mode(conn->hcon)) {
2915 conn->disc_reason = 0x05;
2916 result = L2CAP_CR_SEC_BLOCK;
2917 goto response;
2918 }
2919
2920 result = L2CAP_CR_NO_MEM;
2921
2922 /* Check for backlog size */
2923 if (sk_acceptq_is_full(parent)) {
2924 BT_DBG("backlog full %d", parent->sk_ack_backlog);
2925 goto response;
2926 }
2927
2928 sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC);
2929 if (!sk)
2930 goto response;
2931
2932 write_lock_bh(&list->lock);
2933
2934 /* Check if we already have channel with that dcid */
2935 if (__l2cap_get_chan_by_dcid(list, scid)) {
2936 write_unlock_bh(&list->lock);
2937 sock_set_flag(sk, SOCK_ZAPPED);
2938 l2cap_sock_kill(sk);
2939 goto response;
2940 }
2941
2942 hci_conn_hold(conn->hcon);
2943
2944 l2cap_sock_init(sk, parent);
2945 bacpy(&bt_sk(sk)->src, conn->src);
2946 bacpy(&bt_sk(sk)->dst, conn->dst);
2947 l2cap_pi(sk)->psm = psm;
2948 l2cap_pi(sk)->dcid = scid;
2949
2950 __l2cap_chan_add(conn, sk, parent);
2951 dcid = l2cap_pi(sk)->scid;
2952
2953 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
2954
2955 l2cap_pi(sk)->ident = cmd->ident;
2956
2957 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) {
2958 if (l2cap_check_security(sk)) {
2959 if (bt_sk(sk)->defer_setup) {
2960 sk->sk_state = BT_CONNECT2;
2961 result = L2CAP_CR_PEND;
2962 status = L2CAP_CS_AUTHOR_PEND;
2963 parent->sk_data_ready(parent, 0);
2964 } else {
2965 sk->sk_state = BT_CONFIG;
2966 result = L2CAP_CR_SUCCESS;
2967 status = L2CAP_CS_NO_INFO;
2968 }
2969 } else {
2970 sk->sk_state = BT_CONNECT2;
2971 result = L2CAP_CR_PEND;
2972 status = L2CAP_CS_AUTHEN_PEND;
2973 }
2974 } else {
2975 sk->sk_state = BT_CONNECT2;
2976 result = L2CAP_CR_PEND;
2977 status = L2CAP_CS_NO_INFO;
2978 }
2979
2980 write_unlock_bh(&list->lock);
2981
2982response:
2983 bh_unlock_sock(parent);
2984
2985sendresp:
2986 rsp.scid = cpu_to_le16(scid);
2987 rsp.dcid = cpu_to_le16(dcid);
2988 rsp.result = cpu_to_le16(result);
2989 rsp.status = cpu_to_le16(status);
2990 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2991
2992 if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
2993 struct l2cap_info_req info;
2994 info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
2995
2996 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
2997 conn->info_ident = l2cap_get_ident(conn);
2998
2999 mod_timer(&conn->info_timer, jiffies +
3000 msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
3001
3002 l2cap_send_cmd(conn, conn->info_ident,
3003 L2CAP_INFO_REQ, sizeof(info), &info);
3004 }
3005
3006 if (sk && !(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
3007 result == L2CAP_CR_SUCCESS) {
3008 u8 buf[128];
3009 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3010 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3011 l2cap_build_conf_req(sk, buf), buf);
3012 l2cap_pi(sk)->num_conf_req++;
3013 }
3014
3015 return 0;
3016}
3017
3018static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
3019{
3020 struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data;
3021 u16 scid, dcid, result, status;
3022 struct sock *sk;
3023 u8 req[128];
3024
3025 scid = __le16_to_cpu(rsp->scid);
3026 dcid = __le16_to_cpu(rsp->dcid);
3027 result = __le16_to_cpu(rsp->result);
3028 status = __le16_to_cpu(rsp->status);
3029
3030 BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status);
3031
3032 if (scid) {
3033 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
3034 if (!sk)
3035 return -EFAULT;
3036 } else {
3037 sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident);
3038 if (!sk)
3039 return -EFAULT;
3040 }
3041
3042 switch (result) {
3043 case L2CAP_CR_SUCCESS:
3044 sk->sk_state = BT_CONFIG;
3045 l2cap_pi(sk)->ident = 0;
3046 l2cap_pi(sk)->dcid = dcid;
3047 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
3048
3049 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)
3050 break;
3051
3052 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3053
3054 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3055 l2cap_build_conf_req(sk, req), req);
3056 l2cap_pi(sk)->num_conf_req++;
3057 break;
3058
3059 case L2CAP_CR_PEND:
3060 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
3061 break;
3062
3063 default:
3064 /* don't delete l2cap channel if sk is owned by user */
3065 if (sock_owned_by_user(sk)) {
3066 sk->sk_state = BT_DISCONN;
3067 l2cap_sock_clear_timer(sk);
3068 l2cap_sock_set_timer(sk, HZ / 5);
3069 break;
3070 }
3071
3072 l2cap_chan_del(sk, ECONNREFUSED);
3073 break;
3074 }
3075
3076 bh_unlock_sock(sk);
3077 return 0;
3078}
3079
3080static inline void set_default_fcs(struct l2cap_pinfo *pi)
3081{
3082 /* FCS is enabled only in ERTM or streaming mode, if one or both
3083 * sides request it.
3084 */
3085 if (pi->mode != L2CAP_MODE_ERTM && pi->mode != L2CAP_MODE_STREAMING)
3086 pi->fcs = L2CAP_FCS_NONE;
3087 else if (!(pi->conf_state & L2CAP_CONF_NO_FCS_RECV))
3088 pi->fcs = L2CAP_FCS_CRC16;
3089}
3090
3091static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
3092{
3093 struct l2cap_conf_req *req = (struct l2cap_conf_req *) data;
3094 u16 dcid, flags;
3095 u8 rsp[64];
3096 struct sock *sk;
3097 int len;
3098
3099 dcid = __le16_to_cpu(req->dcid);
3100 flags = __le16_to_cpu(req->flags);
3101
3102 BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags);
3103
3104 sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid);
3105 if (!sk)
3106 return -ENOENT;
3107
3108 if (sk->sk_state != BT_CONFIG) {
3109 struct l2cap_cmd_rej rej;
3110
3111 rej.reason = cpu_to_le16(0x0002);
3112 l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
3113 sizeof(rej), &rej);
3114 goto unlock;
3115 }
3116
3117 /* Reject if config buffer is too small. */
3118 len = cmd_len - sizeof(*req);
3119 if (l2cap_pi(sk)->conf_len + len > sizeof(l2cap_pi(sk)->conf_req)) {
3120 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
3121 l2cap_build_conf_rsp(sk, rsp,
3122 L2CAP_CONF_REJECT, flags), rsp);
3123 goto unlock;
3124 }
3125
3126 /* Store config. */
3127 memcpy(l2cap_pi(sk)->conf_req + l2cap_pi(sk)->conf_len, req->data, len);
3128 l2cap_pi(sk)->conf_len += len;
3129
3130 if (flags & 0x0001) {
3131 /* Incomplete config. Send empty response. */
3132 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
3133 l2cap_build_conf_rsp(sk, rsp,
3134 L2CAP_CONF_SUCCESS, 0x0001), rsp);
3135 goto unlock;
3136 }
3137
3138 /* Complete config. */
3139 len = l2cap_parse_conf_req(sk, rsp);
3140 if (len < 0) {
3141 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3142 goto unlock;
3143 }
3144
3145 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
3146 l2cap_pi(sk)->num_conf_rsp++;
3147
3148 /* Reset config buffer. */
3149 l2cap_pi(sk)->conf_len = 0;
3150
3151 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE))
3152 goto unlock;
3153
3154 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) {
3155 set_default_fcs(l2cap_pi(sk));
3156
3157 sk->sk_state = BT_CONNECTED;
3158
3159 l2cap_pi(sk)->next_tx_seq = 0;
3160 l2cap_pi(sk)->expected_tx_seq = 0;
3161 __skb_queue_head_init(TX_QUEUE(sk));
3162 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
3163 l2cap_ertm_init(sk);
3164
3165 l2cap_chan_ready(sk);
3166 goto unlock;
3167 }
3168
3169 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) {
3170 u8 buf[64];
3171 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3172 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3173 l2cap_build_conf_req(sk, buf), buf);
3174 l2cap_pi(sk)->num_conf_req++;
3175 }
3176
3177unlock:
3178 bh_unlock_sock(sk);
3179 return 0;
3180}
3181
3182static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
3183{
3184 struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
3185 u16 scid, flags, result;
3186 struct sock *sk;
3187 int len = cmd->len - sizeof(*rsp);
3188
3189 scid = __le16_to_cpu(rsp->scid);
3190 flags = __le16_to_cpu(rsp->flags);
3191 result = __le16_to_cpu(rsp->result);
3192
3193 BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x",
3194 scid, flags, result);
3195
3196 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
3197 if (!sk)
3198 return 0;
3199
3200 switch (result) {
3201 case L2CAP_CONF_SUCCESS:
3202 l2cap_conf_rfc_get(sk, rsp->data, len);
3203 break;
3204
3205 case L2CAP_CONF_UNACCEPT:
3206 if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
3207 char req[64];
3208
3209 if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
3210 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3211 goto done;
3212 }
3213
3214 /* throw out any old stored conf requests */
3215 result = L2CAP_CONF_SUCCESS;
3216 len = l2cap_parse_conf_rsp(sk, rsp->data,
3217 len, req, &result);
3218 if (len < 0) {
3219 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3220 goto done;
3221 }
3222
3223 l2cap_send_cmd(conn, l2cap_get_ident(conn),
3224 L2CAP_CONF_REQ, len, req);
3225 l2cap_pi(sk)->num_conf_req++;
3226 if (result != L2CAP_CONF_SUCCESS)
3227 goto done;
3228 break;
3229 }
3230
3231 default:
3232 sk->sk_err = ECONNRESET;
3233 l2cap_sock_set_timer(sk, HZ * 5);
3234 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3235 goto done;
3236 }
3237
3238 if (flags & 0x01)
3239 goto done;
3240
3241 l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE;
3242
3243 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) {
3244 set_default_fcs(l2cap_pi(sk));
3245
3246 sk->sk_state = BT_CONNECTED;
3247 l2cap_pi(sk)->next_tx_seq = 0;
3248 l2cap_pi(sk)->expected_tx_seq = 0;
3249 __skb_queue_head_init(TX_QUEUE(sk));
3250 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
3251 l2cap_ertm_init(sk);
3252
3253 l2cap_chan_ready(sk);
3254 }
3255
3256done:
3257 bh_unlock_sock(sk);
3258 return 0;
3259}
3260
3261static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
3262{
3263 struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data;
3264 struct l2cap_disconn_rsp rsp;
3265 u16 dcid, scid;
3266 struct sock *sk;
3267
3268 scid = __le16_to_cpu(req->scid);
3269 dcid = __le16_to_cpu(req->dcid);
3270
3271 BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid);
3272
3273 sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid);
3274 if (!sk)
3275 return 0;
3276
3277 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
3278 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
3279 l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
3280
3281 sk->sk_shutdown = SHUTDOWN_MASK;
3282
3283 /* don't delete l2cap channel if sk is owned by user */
3284 if (sock_owned_by_user(sk)) {
3285 sk->sk_state = BT_DISCONN;
3286 l2cap_sock_clear_timer(sk);
3287 l2cap_sock_set_timer(sk, HZ / 5);
3288 bh_unlock_sock(sk);
3289 return 0;
3290 }
3291
3292 l2cap_chan_del(sk, ECONNRESET);
3293 bh_unlock_sock(sk);
3294
3295 l2cap_sock_kill(sk);
3296 return 0;
3297}
3298
3299static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
3300{
3301 struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data;
3302 u16 dcid, scid;
3303 struct sock *sk;
3304
3305 scid = __le16_to_cpu(rsp->scid);
3306 dcid = __le16_to_cpu(rsp->dcid);
3307
3308 BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid);
3309
3310 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
3311 if (!sk)
3312 return 0;
3313
3314 /* don't delete l2cap channel if sk is owned by user */
3315 if (sock_owned_by_user(sk)) {
3316 sk->sk_state = BT_DISCONN;
3317 l2cap_sock_clear_timer(sk);
3318 l2cap_sock_set_timer(sk, HZ / 5);
3319 bh_unlock_sock(sk);
3320 return 0;
3321 }
3322
3323 l2cap_chan_del(sk, 0);
3324 bh_unlock_sock(sk);
3325
3326 l2cap_sock_kill(sk);
3327 return 0;
3328}
3329
3330static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
3331{
3332 struct l2cap_info_req *req = (struct l2cap_info_req *) data;
3333 u16 type;
3334
3335 type = __le16_to_cpu(req->type);
3336
3337 BT_DBG("type 0x%4.4x", type);
3338
3339 if (type == L2CAP_IT_FEAT_MASK) {
3340 u8 buf[8];
3341 u32 feat_mask = l2cap_feat_mask;
3342 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
3343 rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
3344 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
3345 if (!disable_ertm)
3346 feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
3347 | L2CAP_FEAT_FCS;
3348 put_unaligned_le32(feat_mask, rsp->data);
3349 l2cap_send_cmd(conn, cmd->ident,
3350 L2CAP_INFO_RSP, sizeof(buf), buf);
3351 } else if (type == L2CAP_IT_FIXED_CHAN) {
3352 u8 buf[12];
3353 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
3354 rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
3355 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
3356 memcpy(buf + 4, l2cap_fixed_chan, 8);
3357 l2cap_send_cmd(conn, cmd->ident,
3358 L2CAP_INFO_RSP, sizeof(buf), buf);
3359 } else {
3360 struct l2cap_info_rsp rsp;
3361 rsp.type = cpu_to_le16(type);
3362 rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
3363 l2cap_send_cmd(conn, cmd->ident,
3364 L2CAP_INFO_RSP, sizeof(rsp), &rsp);
3365 }
3366
3367 return 0;
3368}
3369
3370static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
3371{
3372 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data;
3373 u16 type, result;
3374
3375 type = __le16_to_cpu(rsp->type);
3376 result = __le16_to_cpu(rsp->result);
3377
3378 BT_DBG("type 0x%4.4x result 0x%2.2x", type, result);
3379
3380 del_timer(&conn->info_timer);
3381
3382 if (result != L2CAP_IR_SUCCESS) {
3383 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
3384 conn->info_ident = 0;
3385
3386 l2cap_conn_start(conn);
3387
3388 return 0;
3389 }
3390
3391 if (type == L2CAP_IT_FEAT_MASK) {
3392 conn->feat_mask = get_unaligned_le32(rsp->data);
3393
3394 if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) {
3395 struct l2cap_info_req req;
3396 req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
3397
3398 conn->info_ident = l2cap_get_ident(conn);
3399
3400 l2cap_send_cmd(conn, conn->info_ident,
3401 L2CAP_INFO_REQ, sizeof(req), &req);
3402 } else {
3403 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
3404 conn->info_ident = 0;
3405
3406 l2cap_conn_start(conn);
3407 }
3408 } else if (type == L2CAP_IT_FIXED_CHAN) {
3409 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
3410 conn->info_ident = 0;
3411
3412 l2cap_conn_start(conn);
3413 }
3414
3415 return 0;
3416}
3417
3418static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
3419{
3420 u8 *data = skb->data;
3421 int len = skb->len;
3422 struct l2cap_cmd_hdr cmd;
3423 int err = 0;
3424
3425 l2cap_raw_recv(conn, skb);
3426
3427 while (len >= L2CAP_CMD_HDR_SIZE) {
3428 u16 cmd_len;
3429 memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE);
3430 data += L2CAP_CMD_HDR_SIZE;
3431 len -= L2CAP_CMD_HDR_SIZE;
3432
3433 cmd_len = le16_to_cpu(cmd.len);
3434
3435 BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident);
3436
3437 if (cmd_len > len || !cmd.ident) {
3438 BT_DBG("corrupted command");
3439 break;
3440 }
3441
3442 switch (cmd.code) {
3443 case L2CAP_COMMAND_REJ:
3444 l2cap_command_rej(conn, &cmd, data);
3445 break;
3446
3447 case L2CAP_CONN_REQ:
3448 err = l2cap_connect_req(conn, &cmd, data);
3449 break;
3450
3451 case L2CAP_CONN_RSP:
3452 err = l2cap_connect_rsp(conn, &cmd, data);
3453 break;
3454
3455 case L2CAP_CONF_REQ:
3456 err = l2cap_config_req(conn, &cmd, cmd_len, data);
3457 break;
3458
3459 case L2CAP_CONF_RSP:
3460 err = l2cap_config_rsp(conn, &cmd, data);
3461 break;
3462
3463 case L2CAP_DISCONN_REQ:
3464 err = l2cap_disconnect_req(conn, &cmd, data);
3465 break;
3466
3467 case L2CAP_DISCONN_RSP:
3468 err = l2cap_disconnect_rsp(conn, &cmd, data);
3469 break;
3470
3471 case L2CAP_ECHO_REQ:
3472 l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data);
3473 break;
3474
3475 case L2CAP_ECHO_RSP:
3476 break;
3477
3478 case L2CAP_INFO_REQ:
3479 err = l2cap_information_req(conn, &cmd, data);
3480 break;
3481
3482 case L2CAP_INFO_RSP:
3483 err = l2cap_information_rsp(conn, &cmd, data);
3484 break;
3485
3486 default:
3487 BT_ERR("Unknown signaling command 0x%2.2x", cmd.code);
3488 err = -EINVAL;
3489 break;
3490 }
3491
3492 if (err) {
3493 struct l2cap_cmd_rej rej;
3494 BT_DBG("error %d", err);
3495
3496 /* FIXME: Map err to a valid reason */
3497 rej.reason = cpu_to_le16(0);
3498 l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
3499 }
3500
3501 data += cmd_len;
3502 len -= cmd_len;
3503 }
3504
3505 kfree_skb(skb);
3506}
3507
3508static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb)
3509{
3510 u16 our_fcs, rcv_fcs;
3511 int hdr_size = L2CAP_HDR_SIZE + 2;
3512
3513 if (pi->fcs == L2CAP_FCS_CRC16) {
3514 skb_trim(skb, skb->len - 2);
3515 rcv_fcs = get_unaligned_le16(skb->data + skb->len);
3516 our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
3517
3518 if (our_fcs != rcv_fcs)
3519 return -EBADMSG;
3520 }
3521 return 0;
3522}
3523
3524static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
3525{
3526 struct l2cap_pinfo *pi = l2cap_pi(sk);
3527 u16 control = 0;
3528
3529 pi->frames_sent = 0;
3530
3531 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3532
3533 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3534 control |= L2CAP_SUPER_RCV_NOT_READY;
3535 l2cap_send_sframe(pi, control);
3536 pi->conn_state |= L2CAP_CONN_RNR_SENT;
3537 }
3538
3539 if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
3540 l2cap_retransmit_frames(sk);
3541
3542 l2cap_ertm_send(sk);
3543
3544 if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
3545 pi->frames_sent == 0) {
3546 control |= L2CAP_SUPER_RCV_READY;
3547 l2cap_send_sframe(pi, control);
3548 }
3549}
3550
3551static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
3552{
3553 struct sk_buff *next_skb;
3554 struct l2cap_pinfo *pi = l2cap_pi(sk);
3555 int tx_seq_offset, next_tx_seq_offset;
3556
3557 bt_cb(skb)->tx_seq = tx_seq;
3558 bt_cb(skb)->sar = sar;
3559
3560 next_skb = skb_peek(SREJ_QUEUE(sk));
3561 if (!next_skb) {
3562 __skb_queue_tail(SREJ_QUEUE(sk), skb);
3563 return 0;
3564 }
3565
3566 tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
3567 if (tx_seq_offset < 0)
3568 tx_seq_offset += 64;
3569
3570 do {
3571 if (bt_cb(next_skb)->tx_seq == tx_seq)
3572 return -EINVAL;
3573
3574 next_tx_seq_offset = (bt_cb(next_skb)->tx_seq -
3575 pi->buffer_seq) % 64;
3576 if (next_tx_seq_offset < 0)
3577 next_tx_seq_offset += 64;
3578
3579 if (next_tx_seq_offset > tx_seq_offset) {
3580 __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
3581 return 0;
3582 }
3583
3584 if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
3585 break;
3586
3587 } while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
3588
3589 __skb_queue_tail(SREJ_QUEUE(sk), skb);
3590
3591 return 0;
3592}
3593
3594static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
3595{
3596 struct l2cap_pinfo *pi = l2cap_pi(sk);
3597 struct sk_buff *_skb;
3598 int err;
3599
3600 switch (control & L2CAP_CTRL_SAR) {
3601 case L2CAP_SDU_UNSEGMENTED:
3602 if (pi->conn_state & L2CAP_CONN_SAR_SDU)
3603 goto drop;
3604
3605 err = sock_queue_rcv_skb(sk, skb);
3606 if (!err)
3607 return err;
3608
3609 break;
3610
3611 case L2CAP_SDU_START:
3612 if (pi->conn_state & L2CAP_CONN_SAR_SDU)
3613 goto drop;
3614
3615 pi->sdu_len = get_unaligned_le16(skb->data);
3616
3617 if (pi->sdu_len > pi->imtu)
3618 goto disconnect;
3619
3620 pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
3621 if (!pi->sdu)
3622 return -ENOMEM;
3623
3624 /* pull sdu_len bytes only after alloc, because of Local Busy
3625 * condition we have to be sure that this will be executed
3626 * only once, i.e., when alloc does not fail */
3627 skb_pull(skb, 2);
3628
3629 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3630
3631 pi->conn_state |= L2CAP_CONN_SAR_SDU;
3632 pi->partial_sdu_len = skb->len;
3633 break;
3634
3635 case L2CAP_SDU_CONTINUE:
3636 if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
3637 goto disconnect;
3638
3639 if (!pi->sdu)
3640 goto disconnect;
3641
3642 pi->partial_sdu_len += skb->len;
3643 if (pi->partial_sdu_len > pi->sdu_len)
3644 goto drop;
3645
3646 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3647
3648 break;
3649
3650 case L2CAP_SDU_END:
3651 if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
3652 goto disconnect;
3653
3654 if (!pi->sdu)
3655 goto disconnect;
3656
3657 if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) {
3658 pi->partial_sdu_len += skb->len;
3659
3660 if (pi->partial_sdu_len > pi->imtu)
3661 goto drop;
3662
3663 if (pi->partial_sdu_len != pi->sdu_len)
3664 goto drop;
3665
3666 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3667 }
3668
3669 _skb = skb_clone(pi->sdu, GFP_ATOMIC);
3670 if (!_skb) {
3671 pi->conn_state |= L2CAP_CONN_SAR_RETRY;
3672 return -ENOMEM;
3673 }
3674
3675 err = sock_queue_rcv_skb(sk, _skb);
3676 if (err < 0) {
3677 kfree_skb(_skb);
3678 pi->conn_state |= L2CAP_CONN_SAR_RETRY;
3679 return err;
3680 }
3681
3682 pi->conn_state &= ~L2CAP_CONN_SAR_RETRY;
3683 pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
3684
3685 kfree_skb(pi->sdu);
3686 break;
3687 }
3688
3689 kfree_skb(skb);
3690 return 0;
3691
3692drop:
3693 kfree_skb(pi->sdu);
3694 pi->sdu = NULL;
3695
3696disconnect:
3697 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
3698 kfree_skb(skb);
3699 return 0;
3700}
3701
3702static int l2cap_try_push_rx_skb(struct sock *sk)
3703{
3704 struct l2cap_pinfo *pi = l2cap_pi(sk);
3705 struct sk_buff *skb;
3706 u16 control;
3707 int err;
3708
3709 while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
3710 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3711 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
3712 if (err < 0) {
3713 skb_queue_head(BUSY_QUEUE(sk), skb);
3714 return -EBUSY;
3715 }
3716
3717 pi->buffer_seq = (pi->buffer_seq + 1) % 64;
3718 }
3719
3720 if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
3721 goto done;
3722
3723 control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3724 control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
3725 l2cap_send_sframe(pi, control);
3726 l2cap_pi(sk)->retry_count = 1;
3727
3728 del_timer(&pi->retrans_timer);
3729 __mod_monitor_timer();
3730
3731 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
3732
3733done:
3734 pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
3735 pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
3736
3737 BT_DBG("sk %p, Exit local busy", sk);
3738
3739 return 0;
3740}
3741
3742static void l2cap_busy_work(struct work_struct *work)
3743{
3744 DECLARE_WAITQUEUE(wait, current);
3745 struct l2cap_pinfo *pi =
3746 container_of(work, struct l2cap_pinfo, busy_work);
3747 struct sock *sk = (struct sock *)pi;
3748 int n_tries = 0, timeo = HZ/5, err;
3749 struct sk_buff *skb;
3750
3751 lock_sock(sk);
3752
3753 add_wait_queue(sk_sleep(sk), &wait);
3754 while ((skb = skb_peek(BUSY_QUEUE(sk)))) {
3755 set_current_state(TASK_INTERRUPTIBLE);
3756
3757 if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
3758 err = -EBUSY;
3759 l2cap_send_disconn_req(pi->conn, sk, EBUSY);
3760 break;
3761 }
3762
3763 if (!timeo)
3764 timeo = HZ/5;
3765
3766 if (signal_pending(current)) {
3767 err = sock_intr_errno(timeo);
3768 break;
3769 }
3770
3771 release_sock(sk);
3772 timeo = schedule_timeout(timeo);
3773 lock_sock(sk);
3774
3775 err = sock_error(sk);
3776 if (err)
3777 break;
3778
3779 if (l2cap_try_push_rx_skb(sk) == 0)
3780 break;
3781 }
3782
3783 set_current_state(TASK_RUNNING);
3784 remove_wait_queue(sk_sleep(sk), &wait);
3785
3786 release_sock(sk);
3787}
3788
3789static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
3790{
3791 struct l2cap_pinfo *pi = l2cap_pi(sk);
3792 int sctrl, err;
3793
3794 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3795 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3796 __skb_queue_tail(BUSY_QUEUE(sk), skb);
3797 return l2cap_try_push_rx_skb(sk);
3798
3799
3800 }
3801
3802 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
3803 if (err >= 0) {
3804 pi->buffer_seq = (pi->buffer_seq + 1) % 64;
3805 return err;
3806 }
3807
3808 /* Busy Condition */
3809 BT_DBG("sk %p, Enter local busy", sk);
3810
3811 pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
3812 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3813 __skb_queue_tail(BUSY_QUEUE(sk), skb);
3814
3815 sctrl = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3816 sctrl |= L2CAP_SUPER_RCV_NOT_READY;
3817 l2cap_send_sframe(pi, sctrl);
3818
3819 pi->conn_state |= L2CAP_CONN_RNR_SENT;
3820
3821 del_timer(&pi->ack_timer);
3822
3823 queue_work(_busy_wq, &pi->busy_work);
3824
3825 return err;
3826}
3827
3828static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
3829{
3830 struct l2cap_pinfo *pi = l2cap_pi(sk);
3831 struct sk_buff *_skb;
3832 int err = -EINVAL;
3833
3834 /*
3835 * TODO: We have to notify the userland if some data is lost with the
3836 * Streaming Mode.
3837 */
3838
3839 switch (control & L2CAP_CTRL_SAR) {
3840 case L2CAP_SDU_UNSEGMENTED:
3841 if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
3842 kfree_skb(pi->sdu);
3843 break;
3844 }
3845
3846 err = sock_queue_rcv_skb(sk, skb);
3847 if (!err)
3848 return 0;
3849
3850 break;
3851
3852 case L2CAP_SDU_START:
3853 if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
3854 kfree_skb(pi->sdu);
3855 break;
3856 }
3857
3858 pi->sdu_len = get_unaligned_le16(skb->data);
3859 skb_pull(skb, 2);
3860
3861 if (pi->sdu_len > pi->imtu) {
3862 err = -EMSGSIZE;
3863 break;
3864 }
3865
3866 pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
3867 if (!pi->sdu) {
3868 err = -ENOMEM;
3869 break;
3870 }
3871
3872 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3873
3874 pi->conn_state |= L2CAP_CONN_SAR_SDU;
3875 pi->partial_sdu_len = skb->len;
3876 err = 0;
3877 break;
3878
3879 case L2CAP_SDU_CONTINUE:
3880 if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
3881 break;
3882
3883 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3884
3885 pi->partial_sdu_len += skb->len;
3886 if (pi->partial_sdu_len > pi->sdu_len)
3887 kfree_skb(pi->sdu);
3888 else
3889 err = 0;
3890
3891 break;
3892
3893 case L2CAP_SDU_END:
3894 if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
3895 break;
3896
3897 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3898
3899 pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
3900 pi->partial_sdu_len += skb->len;
3901
3902 if (pi->partial_sdu_len > pi->imtu)
3903 goto drop;
3904
3905 if (pi->partial_sdu_len == pi->sdu_len) {
3906 _skb = skb_clone(pi->sdu, GFP_ATOMIC);
3907 err = sock_queue_rcv_skb(sk, _skb);
3908 if (err < 0)
3909 kfree_skb(_skb);
3910 }
3911 err = 0;
3912
3913drop:
3914 kfree_skb(pi->sdu);
3915 break;
3916 }
3917
3918 kfree_skb(skb);
3919 return err;
3920}
3921
3922static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
3923{
3924 struct sk_buff *skb;
3925 u16 control;
3926
3927 while ((skb = skb_peek(SREJ_QUEUE(sk)))) {
3928 if (bt_cb(skb)->tx_seq != tx_seq)
3929 break;
3930
3931 skb = skb_dequeue(SREJ_QUEUE(sk));
3932 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3933 l2cap_ertm_reassembly_sdu(sk, skb, control);
3934 l2cap_pi(sk)->buffer_seq_srej =
3935 (l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
3936 tx_seq = (tx_seq + 1) % 64;
3937 }
3938}
3939
3940static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq)
3941{
3942 struct l2cap_pinfo *pi = l2cap_pi(sk);
3943 struct srej_list *l, *tmp;
3944 u16 control;
3945
3946 list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
3947 if (l->tx_seq == tx_seq) {
3948 list_del(&l->list);
3949 kfree(l);
3950 return;
3951 }
3952 control = L2CAP_SUPER_SELECT_REJECT;
3953 control |= l->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3954 l2cap_send_sframe(pi, control);
3955 list_del(&l->list);
3956 list_add_tail(&l->list, SREJ_LIST(sk));
3957 }
3958}
3959
3960static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
3961{
3962 struct l2cap_pinfo *pi = l2cap_pi(sk);
3963 struct srej_list *new;
3964 u16 control;
3965
3966 while (tx_seq != pi->expected_tx_seq) {
3967 control = L2CAP_SUPER_SELECT_REJECT;
3968 control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3969 l2cap_send_sframe(pi, control);
3970
3971 new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
3972 new->tx_seq = pi->expected_tx_seq;
3973 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3974 list_add_tail(&new->list, SREJ_LIST(sk));
3975 }
3976 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3977}
3978
3979static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
3980{
3981 struct l2cap_pinfo *pi = l2cap_pi(sk);
3982 u8 tx_seq = __get_txseq(rx_control);
3983 u8 req_seq = __get_reqseq(rx_control);
3984 u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
3985 int tx_seq_offset, expected_tx_seq_offset;
3986 int num_to_ack = (pi->tx_win/6) + 1;
3987 int err = 0;
3988
3989 BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq,
3990 rx_control);
3991
3992 if (L2CAP_CTRL_FINAL & rx_control &&
3993 l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
3994 del_timer(&pi->monitor_timer);
3995 if (pi->unacked_frames > 0)
3996 __mod_retrans_timer();
3997 pi->conn_state &= ~L2CAP_CONN_WAIT_F;
3998 }
3999
4000 pi->expected_ack_seq = req_seq;
4001 l2cap_drop_acked_frames(sk);
4002
4003 if (tx_seq == pi->expected_tx_seq)
4004 goto expected;
4005
4006 tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
4007 if (tx_seq_offset < 0)
4008 tx_seq_offset += 64;
4009
4010 /* invalid tx_seq */
4011 if (tx_seq_offset >= pi->tx_win) {
4012 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4013 goto drop;
4014 }
4015
4016 if (pi->conn_state == L2CAP_CONN_LOCAL_BUSY)
4017 goto drop;
4018
4019 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
4020 struct srej_list *first;
4021
4022 first = list_first_entry(SREJ_LIST(sk),
4023 struct srej_list, list);
4024 if (tx_seq == first->tx_seq) {
4025 l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
4026 l2cap_check_srej_gap(sk, tx_seq);
4027
4028 list_del(&first->list);
4029 kfree(first);
4030
4031 if (list_empty(SREJ_LIST(sk))) {
4032 pi->buffer_seq = pi->buffer_seq_srej;
4033 pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
4034 l2cap_send_ack(pi);
4035 BT_DBG("sk %p, Exit SREJ_SENT", sk);
4036 }
4037 } else {
4038 struct srej_list *l;
4039
4040 /* duplicated tx_seq */
4041 if (l2cap_add_to_srej_queue(sk, skb, tx_seq, sar) < 0)
4042 goto drop;
4043
4044 list_for_each_entry(l, SREJ_LIST(sk), list) {
4045 if (l->tx_seq == tx_seq) {
4046 l2cap_resend_srejframe(sk, tx_seq);
4047 return 0;
4048 }
4049 }
4050 l2cap_send_srejframe(sk, tx_seq);
4051 }
4052 } else {
4053 expected_tx_seq_offset =
4054 (pi->expected_tx_seq - pi->buffer_seq) % 64;
4055 if (expected_tx_seq_offset < 0)
4056 expected_tx_seq_offset += 64;
4057
4058 /* duplicated tx_seq */
4059 if (tx_seq_offset < expected_tx_seq_offset)
4060 goto drop;
4061
4062 pi->conn_state |= L2CAP_CONN_SREJ_SENT;
4063
4064 BT_DBG("sk %p, Enter SREJ", sk);
4065
4066 INIT_LIST_HEAD(SREJ_LIST(sk));
4067 pi->buffer_seq_srej = pi->buffer_seq;
4068
4069 __skb_queue_head_init(SREJ_QUEUE(sk));
4070 __skb_queue_head_init(BUSY_QUEUE(sk));
4071 l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
4072
4073 pi->conn_state |= L2CAP_CONN_SEND_PBIT;
4074
4075 l2cap_send_srejframe(sk, tx_seq);
4076
4077 del_timer(&pi->ack_timer);
4078 }
4079 return 0;
4080
4081expected:
4082 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
4083
4084 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
4085 bt_cb(skb)->tx_seq = tx_seq;
4086 bt_cb(skb)->sar = sar;
4087 __skb_queue_tail(SREJ_QUEUE(sk), skb);
4088 return 0;
4089 }
4090
4091 err = l2cap_push_rx_skb(sk, skb, rx_control);
4092 if (err < 0)
4093 return 0;
4094
4095 if (rx_control & L2CAP_CTRL_FINAL) {
4096 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
4097 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
4098 else
4099 l2cap_retransmit_frames(sk);
4100 }
4101
4102 __mod_ack_timer();
4103
4104 pi->num_acked = (pi->num_acked + 1) % num_to_ack;
4105 if (pi->num_acked == num_to_ack - 1)
4106 l2cap_send_ack(pi);
4107
4108 return 0;
4109
4110drop:
4111 kfree_skb(skb);
4112 return 0;
4113}
4114
4115static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
4116{
4117 struct l2cap_pinfo *pi = l2cap_pi(sk);
4118
4119 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control),
4120 rx_control);
4121
4122 pi->expected_ack_seq = __get_reqseq(rx_control);
4123 l2cap_drop_acked_frames(sk);
4124
4125 if (rx_control & L2CAP_CTRL_POLL) {
4126 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4127 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
4128 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
4129 (pi->unacked_frames > 0))
4130 __mod_retrans_timer();
4131
4132 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
4133 l2cap_send_srejtail(sk);
4134 } else {
4135 l2cap_send_i_or_rr_or_rnr(sk);
4136 }
4137
4138 } else if (rx_control & L2CAP_CTRL_FINAL) {
4139 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
4140
4141 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
4142 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
4143 else
4144 l2cap_retransmit_frames(sk);
4145
4146 } else {
4147 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
4148 (pi->unacked_frames > 0))
4149 __mod_retrans_timer();
4150
4151 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
4152 if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
4153 l2cap_send_ack(pi);
4154 else
4155 l2cap_ertm_send(sk);
4156 }
4157}
4158
4159static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
4160{
4161 struct l2cap_pinfo *pi = l2cap_pi(sk);
4162 u8 tx_seq = __get_reqseq(rx_control);
4163
4164 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4165
4166 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
4167
4168 pi->expected_ack_seq = tx_seq;
4169 l2cap_drop_acked_frames(sk);
4170
4171 if (rx_control & L2CAP_CTRL_FINAL) {
4172 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
4173 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
4174 else
4175 l2cap_retransmit_frames(sk);
4176 } else {
4177 l2cap_retransmit_frames(sk);
4178
4179 if (pi->conn_state & L2CAP_CONN_WAIT_F)
4180 pi->conn_state |= L2CAP_CONN_REJ_ACT;
4181 }
4182}
4183static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
4184{
4185 struct l2cap_pinfo *pi = l2cap_pi(sk);
4186 u8 tx_seq = __get_reqseq(rx_control);
4187
4188 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4189
4190 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
4191
4192 if (rx_control & L2CAP_CTRL_POLL) {
4193 pi->expected_ack_seq = tx_seq;
4194 l2cap_drop_acked_frames(sk);
4195
4196 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4197 l2cap_retransmit_one_frame(sk, tx_seq);
4198
4199 l2cap_ertm_send(sk);
4200
4201 if (pi->conn_state & L2CAP_CONN_WAIT_F) {
4202 pi->srej_save_reqseq = tx_seq;
4203 pi->conn_state |= L2CAP_CONN_SREJ_ACT;
4204 }
4205 } else if (rx_control & L2CAP_CTRL_FINAL) {
4206 if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
4207 pi->srej_save_reqseq == tx_seq)
4208 pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
4209 else
4210 l2cap_retransmit_one_frame(sk, tx_seq);
4211 } else {
4212 l2cap_retransmit_one_frame(sk, tx_seq);
4213 if (pi->conn_state & L2CAP_CONN_WAIT_F) {
4214 pi->srej_save_reqseq = tx_seq;
4215 pi->conn_state |= L2CAP_CONN_SREJ_ACT;
4216 }
4217 }
4218}
4219
4220static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
4221{
4222 struct l2cap_pinfo *pi = l2cap_pi(sk);
4223 u8 tx_seq = __get_reqseq(rx_control);
4224
4225 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4226
4227 pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
4228 pi->expected_ack_seq = tx_seq;
4229 l2cap_drop_acked_frames(sk);
4230
4231 if (rx_control & L2CAP_CTRL_POLL)
4232 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4233
4234 if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
4235 del_timer(&pi->retrans_timer);
4236 if (rx_control & L2CAP_CTRL_POLL)
4237 l2cap_send_rr_or_rnr(pi, L2CAP_CTRL_FINAL);
4238 return;
4239 }
4240
4241 if (rx_control & L2CAP_CTRL_POLL)
4242 l2cap_send_srejtail(sk);
4243 else
4244 l2cap_send_sframe(pi, L2CAP_SUPER_RCV_READY);
4245}
4246
4247static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
4248{
4249 BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
4250
4251 if (L2CAP_CTRL_FINAL & rx_control &&
4252 l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
4253 del_timer(&l2cap_pi(sk)->monitor_timer);
4254 if (l2cap_pi(sk)->unacked_frames > 0)
4255 __mod_retrans_timer();
4256 l2cap_pi(sk)->conn_state &= ~L2CAP_CONN_WAIT_F;
4257 }
4258
4259 switch (rx_control & L2CAP_CTRL_SUPERVISE) {
4260 case L2CAP_SUPER_RCV_READY:
4261 l2cap_data_channel_rrframe(sk, rx_control);
4262 break;
4263
4264 case L2CAP_SUPER_REJECT:
4265 l2cap_data_channel_rejframe(sk, rx_control);
4266 break;
4267
4268 case L2CAP_SUPER_SELECT_REJECT:
4269 l2cap_data_channel_srejframe(sk, rx_control);
4270 break;
4271
4272 case L2CAP_SUPER_RCV_NOT_READY:
4273 l2cap_data_channel_rnrframe(sk, rx_control);
4274 break;
4275 }
4276
4277 kfree_skb(skb);
4278 return 0;
4279}
4280
4281static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb)
4282{
4283 struct l2cap_pinfo *pi = l2cap_pi(sk);
4284 u16 control;
4285 u8 req_seq;
4286 int len, next_tx_seq_offset, req_seq_offset;
4287
4288 control = get_unaligned_le16(skb->data);
4289 skb_pull(skb, 2);
4290 len = skb->len;
4291
4292 /*
4293 * We can just drop the corrupted I-frame here.
4294 * Receiver will miss it and start proper recovery
4295 * procedures and ask retransmission.
4296 */
4297 if (l2cap_check_fcs(pi, skb))
4298 goto drop;
4299
4300 if (__is_sar_start(control) && __is_iframe(control))
4301 len -= 2;
4302
4303 if (pi->fcs == L2CAP_FCS_CRC16)
4304 len -= 2;
4305
4306 if (len > pi->mps) {
4307 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4308 goto drop;
4309 }
4310
4311 req_seq = __get_reqseq(control);
4312 req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
4313 if (req_seq_offset < 0)
4314 req_seq_offset += 64;
4315
4316 next_tx_seq_offset =
4317 (pi->next_tx_seq - pi->expected_ack_seq) % 64;
4318 if (next_tx_seq_offset < 0)
4319 next_tx_seq_offset += 64;
4320
4321 /* check for invalid req-seq */
4322 if (req_seq_offset > next_tx_seq_offset) {
4323 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4324 goto drop;
4325 }
4326
4327 if (__is_iframe(control)) {
4328 if (len < 0) {
4329 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4330 goto drop;
4331 }
4332
4333 l2cap_data_channel_iframe(sk, control, skb);
4334 } else {
4335 if (len != 0) {
4336 BT_ERR("%d", len);
4337 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4338 goto drop;
4339 }
4340
4341 l2cap_data_channel_sframe(sk, control, skb);
4342 }
4343
4344 return 0;
4345
4346drop:
4347 kfree_skb(skb);
4348 return 0;
4349}
4350
4351static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
4352{
4353 struct sock *sk;
4354 struct l2cap_pinfo *pi;
4355 u16 control;
4356 u8 tx_seq;
4357 int len;
4358
4359 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
4360 if (!sk) {
4361 BT_DBG("unknown cid 0x%4.4x", cid);
4362 goto drop;
4363 }
4364
4365 pi = l2cap_pi(sk);
4366
4367 BT_DBG("sk %p, len %d", sk, skb->len);
4368
4369 if (sk->sk_state != BT_CONNECTED)
4370 goto drop;
4371
4372 switch (pi->mode) {
4373 case L2CAP_MODE_BASIC:
4374 /* If socket recv buffers overflows we drop data here
4375 * which is *bad* because L2CAP has to be reliable.
4376 * But we don't have any other choice. L2CAP doesn't
4377 * provide flow control mechanism. */
4378
4379 if (pi->imtu < skb->len)
4380 goto drop;
4381
4382 if (!sock_queue_rcv_skb(sk, skb))
4383 goto done;
4384 break;
4385
4386 case L2CAP_MODE_ERTM:
4387 if (!sock_owned_by_user(sk)) {
4388 l2cap_ertm_data_rcv(sk, skb);
4389 } else {
4390 if (sk_add_backlog(sk, skb))
4391 goto drop;
4392 }
4393
4394 goto done;
4395
4396 case L2CAP_MODE_STREAMING:
4397 control = get_unaligned_le16(skb->data);
4398 skb_pull(skb, 2);
4399 len = skb->len;
4400
4401 if (l2cap_check_fcs(pi, skb))
4402 goto drop;
4403
4404 if (__is_sar_start(control))
4405 len -= 2;
4406
4407 if (pi->fcs == L2CAP_FCS_CRC16)
4408 len -= 2;
4409
4410 if (len > pi->mps || len < 0 || __is_sframe(control))
4411 goto drop;
4412
4413 tx_seq = __get_txseq(control);
4414
4415 if (pi->expected_tx_seq == tx_seq)
4416 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
4417 else
4418 pi->expected_tx_seq = (tx_seq + 1) % 64;
4419
4420 l2cap_streaming_reassembly_sdu(sk, skb, control);
4421
4422 goto done;
4423
4424 default:
4425 BT_DBG("sk %p: bad mode 0x%2.2x", sk, pi->mode);
4426 break;
4427 }
4428
4429drop:
4430 kfree_skb(skb);
4431
4432done:
4433 if (sk)
4434 bh_unlock_sock(sk);
4435
4436 return 0;
4437}
4438
4439static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb)
4440{
4441 struct sock *sk;
4442
4443 sk = l2cap_get_sock_by_psm(0, psm, conn->src);
4444 if (!sk)
4445 goto drop;
4446
4447 bh_lock_sock(sk);
4448
4449 BT_DBG("sk %p, len %d", sk, skb->len);
4450
4451 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED)
4452 goto drop;
4453
4454 if (l2cap_pi(sk)->imtu < skb->len)
4455 goto drop;
4456
4457 if (!sock_queue_rcv_skb(sk, skb))
4458 goto done;
4459
4460drop:
4461 kfree_skb(skb);
4462
4463done:
4464 if (sk)
4465 bh_unlock_sock(sk);
4466 return 0;
4467}
4468
4469static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
4470{
4471 struct l2cap_hdr *lh = (void *) skb->data;
4472 u16 cid, len;
4473 __le16 psm;
4474
4475 skb_pull(skb, L2CAP_HDR_SIZE);
4476 cid = __le16_to_cpu(lh->cid);
4477 len = __le16_to_cpu(lh->len);
4478
4479 if (len != skb->len) {
4480 kfree_skb(skb);
4481 return;
4482 }
4483
4484 BT_DBG("len %d, cid 0x%4.4x", len, cid);
4485
4486 switch (cid) {
4487 case L2CAP_CID_SIGNALING:
4488 l2cap_sig_channel(conn, skb);
4489 break;
4490
4491 case L2CAP_CID_CONN_LESS:
4492 psm = get_unaligned_le16(skb->data);
4493 skb_pull(skb, 2);
4494 l2cap_conless_channel(conn, psm, skb);
4495 break;
4496
4497 default:
4498 l2cap_data_channel(conn, cid, skb);
4499 break;
4500 }
4501}
4502
4503/* ---- L2CAP interface with lower layer (HCI) ---- */
4504
4505static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
4506{
4507 int exact = 0, lm1 = 0, lm2 = 0;
4508 register struct sock *sk;
4509 struct hlist_node *node;
4510
4511 if (type != ACL_LINK)
4512 return -EINVAL;
4513
4514 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
4515
4516 /* Find listening sockets and check their link_mode */
4517 read_lock(&l2cap_sk_list.lock);
4518 sk_for_each(sk, node, &l2cap_sk_list.head) {
4519 if (sk->sk_state != BT_LISTEN)
4520 continue;
4521
4522 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) {
4523 lm1 |= HCI_LM_ACCEPT;
4524 if (l2cap_pi(sk)->role_switch)
4525 lm1 |= HCI_LM_MASTER;
4526 exact++;
4527 } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) {
4528 lm2 |= HCI_LM_ACCEPT;
4529 if (l2cap_pi(sk)->role_switch)
4530 lm2 |= HCI_LM_MASTER;
4531 }
4532 }
4533 read_unlock(&l2cap_sk_list.lock);
4534
4535 return exact ? lm1 : lm2;
4536}
4537
4538static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
4539{
4540 struct l2cap_conn *conn;
4541
4542 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
4543
4544 if (hcon->type != ACL_LINK)
4545 return -EINVAL;
4546
4547 if (!status) {
4548 conn = l2cap_conn_add(hcon, status);
4549 if (conn)
4550 l2cap_conn_ready(conn);
4551 } else
4552 l2cap_conn_del(hcon, bt_err(status));
4553
4554 return 0;
4555}
4556
4557static int l2cap_disconn_ind(struct hci_conn *hcon)
4558{
4559 struct l2cap_conn *conn = hcon->l2cap_data;
4560
4561 BT_DBG("hcon %p", hcon);
4562
4563 if (hcon->type != ACL_LINK || !conn)
4564 return 0x13;
4565
4566 return conn->disc_reason;
4567}
4568
4569static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
4570{
4571 BT_DBG("hcon %p reason %d", hcon, reason);
4572
4573 if (hcon->type != ACL_LINK)
4574 return -EINVAL;
4575
4576 l2cap_conn_del(hcon, bt_err(reason));
4577
4578 return 0;
4579}
4580
4581static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt)
4582{
4583 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM)
4584 return;
4585
4586 if (encrypt == 0x00) {
4587 if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) {
4588 l2cap_sock_clear_timer(sk);
4589 l2cap_sock_set_timer(sk, HZ * 5);
4590 } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
4591 __l2cap_sock_close(sk, ECONNREFUSED);
4592 } else {
4593 if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM)
4594 l2cap_sock_clear_timer(sk);
4595 }
4596}
4597
4598static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
4599{
4600 struct l2cap_chan_list *l;
4601 struct l2cap_conn *conn = hcon->l2cap_data;
4602 struct sock *sk;
4603
4604 if (!conn)
4605 return 0;
4606
4607 l = &conn->chan_list;
4608
4609 BT_DBG("conn %p", conn);
4610
4611 read_lock(&l->lock);
4612
4613 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
4614 bh_lock_sock(sk);
4615
4616 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) {
4617 bh_unlock_sock(sk);
4618 continue;
4619 }
4620
4621 if (!status && (sk->sk_state == BT_CONNECTED ||
4622 sk->sk_state == BT_CONFIG)) {
4623 l2cap_check_encryption(sk, encrypt);
4624 bh_unlock_sock(sk);
4625 continue;
4626 }
4627
4628 if (sk->sk_state == BT_CONNECT) {
4629 if (!status) {
4630 struct l2cap_conn_req req;
4631 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
4632 req.psm = l2cap_pi(sk)->psm;
4633
4634 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
4635 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
4636
4637 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
4638 L2CAP_CONN_REQ, sizeof(req), &req);
4639 } else {
4640 l2cap_sock_clear_timer(sk);
4641 l2cap_sock_set_timer(sk, HZ / 10);
4642 }
4643 } else if (sk->sk_state == BT_CONNECT2) {
4644 struct l2cap_conn_rsp rsp;
4645 __u16 result;
4646
4647 if (!status) {
4648 sk->sk_state = BT_CONFIG;
4649 result = L2CAP_CR_SUCCESS;
4650 } else {
4651 sk->sk_state = BT_DISCONN;
4652 l2cap_sock_set_timer(sk, HZ / 10);
4653 result = L2CAP_CR_SEC_BLOCK;
4654 }
4655
4656 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
4657 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
4658 rsp.result = cpu_to_le16(result);
4659 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
4660 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
4661 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
4662 }
4663
4664 bh_unlock_sock(sk);
4665 }
4666
4667 read_unlock(&l->lock);
4668
4669 return 0;
4670}
4671
4672static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
4673{
4674 struct l2cap_conn *conn = hcon->l2cap_data;
4675
4676 if (!conn && !(conn = l2cap_conn_add(hcon, 0)))
4677 goto drop;
4678
4679 BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags);
4680
4681 if (flags & ACL_START) {
4682 struct l2cap_hdr *hdr;
4683 struct sock *sk;
4684 u16 cid;
4685 int len;
4686
4687 if (conn->rx_len) {
4688 BT_ERR("Unexpected start frame (len %d)", skb->len);
4689 kfree_skb(conn->rx_skb);
4690 conn->rx_skb = NULL;
4691 conn->rx_len = 0;
4692 l2cap_conn_unreliable(conn, ECOMM);
4693 }
4694
4695 /* Start fragment always begin with Basic L2CAP header */
4696 if (skb->len < L2CAP_HDR_SIZE) {
4697 BT_ERR("Frame is too short (len %d)", skb->len);
4698 l2cap_conn_unreliable(conn, ECOMM);
4699 goto drop;
4700 }
4701
4702 hdr = (struct l2cap_hdr *) skb->data;
4703 len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
4704 cid = __le16_to_cpu(hdr->cid);
4705
4706 if (len == skb->len) {
4707 /* Complete frame received */
4708 l2cap_recv_frame(conn, skb);
4709 return 0;
4710 }
4711
4712 BT_DBG("Start: total len %d, frag len %d", len, skb->len);
4713
4714 if (skb->len > len) {
4715 BT_ERR("Frame is too long (len %d, expected len %d)",
4716 skb->len, len);
4717 l2cap_conn_unreliable(conn, ECOMM);
4718 goto drop;
4719 }
4720
4721 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
4722
4723 if (sk && l2cap_pi(sk)->imtu < len - L2CAP_HDR_SIZE) {
4724 BT_ERR("Frame exceeding recv MTU (len %d, MTU %d)",
4725 len, l2cap_pi(sk)->imtu);
4726 bh_unlock_sock(sk);
4727 l2cap_conn_unreliable(conn, ECOMM);
4728 goto drop;
4729 }
4730
4731 if (sk)
4732 bh_unlock_sock(sk);
4733
4734 /* Allocate skb for the complete frame (with header) */
4735 conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC);
4736 if (!conn->rx_skb)
4737 goto drop;
4738
4739 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
4740 skb->len);
4741 conn->rx_len = len - skb->len;
4742 } else {
4743 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
4744
4745 if (!conn->rx_len) {
4746 BT_ERR("Unexpected continuation frame (len %d)", skb->len);
4747 l2cap_conn_unreliable(conn, ECOMM);
4748 goto drop;
4749 }
4750
4751 if (skb->len > conn->rx_len) {
4752 BT_ERR("Fragment is too long (len %d, expected %d)",
4753 skb->len, conn->rx_len);
4754 kfree_skb(conn->rx_skb);
4755 conn->rx_skb = NULL;
4756 conn->rx_len = 0;
4757 l2cap_conn_unreliable(conn, ECOMM);
4758 goto drop;
4759 }
4760
4761 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
4762 skb->len);
4763 conn->rx_len -= skb->len;
4764
4765 if (!conn->rx_len) {
4766 /* Complete frame received */
4767 l2cap_recv_frame(conn, conn->rx_skb);
4768 conn->rx_skb = NULL;
4769 }
4770 }
4771
4772drop:
4773 kfree_skb(skb);
4774 return 0;
4775}
4776
4777static int l2cap_debugfs_show(struct seq_file *f, void *p)
4778{
4779 struct sock *sk;
4780 struct hlist_node *node;
4781
4782 read_lock_bh(&l2cap_sk_list.lock);
4783
4784 sk_for_each(sk, node, &l2cap_sk_list.head) {
4785 struct l2cap_pinfo *pi = l2cap_pi(sk);
4786
4787 seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
4788 batostr(&bt_sk(sk)->src),
4789 batostr(&bt_sk(sk)->dst),
4790 sk->sk_state, __le16_to_cpu(pi->psm),
4791 pi->scid, pi->dcid,
4792 pi->imtu, pi->omtu, pi->sec_level);
4793 }
4794
4795 read_unlock_bh(&l2cap_sk_list.lock);
4796
4797 return 0;
4798}
4799
4800static int l2cap_debugfs_open(struct inode *inode, struct file *file)
4801{
4802 return single_open(file, l2cap_debugfs_show, inode->i_private);
4803}
4804
4805static const struct file_operations l2cap_debugfs_fops = {
4806 .open = l2cap_debugfs_open,
4807 .read = seq_read,
4808 .llseek = seq_lseek,
4809 .release = single_release,
4810};
4811
4812static struct dentry *l2cap_debugfs;
4813
4814static const struct proto_ops l2cap_sock_ops = {
4815 .family = PF_BLUETOOTH,
4816 .owner = THIS_MODULE,
4817 .release = l2cap_sock_release,
4818 .bind = l2cap_sock_bind,
4819 .connect = l2cap_sock_connect,
4820 .listen = l2cap_sock_listen,
4821 .accept = l2cap_sock_accept,
4822 .getname = l2cap_sock_getname,
4823 .sendmsg = l2cap_sock_sendmsg,
4824 .recvmsg = l2cap_sock_recvmsg,
4825 .poll = bt_sock_poll,
4826 .ioctl = bt_sock_ioctl,
4827 .mmap = sock_no_mmap,
4828 .socketpair = sock_no_socketpair,
4829 .shutdown = l2cap_sock_shutdown,
4830 .setsockopt = l2cap_sock_setsockopt,
4831 .getsockopt = l2cap_sock_getsockopt
4832};
4833
4834static const struct net_proto_family l2cap_sock_family_ops = {
4835 .family = PF_BLUETOOTH,
4836 .owner = THIS_MODULE,
4837 .create = l2cap_sock_create,
4838};
4839
4840static struct hci_proto l2cap_hci_proto = {
4841 .name = "L2CAP",
4842 .id = HCI_PROTO_L2CAP,
4843 .connect_ind = l2cap_connect_ind,
4844 .connect_cfm = l2cap_connect_cfm,
4845 .disconn_ind = l2cap_disconn_ind,
4846 .disconn_cfm = l2cap_disconn_cfm,
4847 .security_cfm = l2cap_security_cfm,
4848 .recv_acldata = l2cap_recv_acldata
4849};
4850
4851static int __init l2cap_init(void)
4852{
4853 int err;
4854
4855 err = proto_register(&l2cap_proto, 0);
4856 if (err < 0)
4857 return err;
4858
4859 _busy_wq = create_singlethread_workqueue("l2cap");
4860 if (!_busy_wq) {
4861 proto_unregister(&l2cap_proto);
4862 return -ENOMEM;
4863 }
4864
4865 err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
4866 if (err < 0) {
4867 BT_ERR("L2CAP socket registration failed");
4868 goto error;
4869 }
4870
4871 err = hci_register_proto(&l2cap_hci_proto);
4872 if (err < 0) {
4873 BT_ERR("L2CAP protocol registration failed");
4874 bt_sock_unregister(BTPROTO_L2CAP);
4875 goto error;
4876 }
4877
4878 if (bt_debugfs) {
4879 l2cap_debugfs = debugfs_create_file("l2cap", 0444,
4880 bt_debugfs, NULL, &l2cap_debugfs_fops);
4881 if (!l2cap_debugfs)
4882 BT_ERR("Failed to create L2CAP debug file");
4883 }
4884
4885 BT_INFO("L2CAP ver %s", VERSION);
4886 BT_INFO("L2CAP socket layer initialized");
4887
4888 return 0;
4889
4890error:
4891 destroy_workqueue(_busy_wq);
4892 proto_unregister(&l2cap_proto);
4893 return err;
4894}
4895
4896static void __exit l2cap_exit(void)
4897{
4898 debugfs_remove(l2cap_debugfs);
4899
4900 flush_workqueue(_busy_wq);
4901 destroy_workqueue(_busy_wq);
4902
4903 if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
4904 BT_ERR("L2CAP socket unregistration failed");
4905
4906 if (hci_unregister_proto(&l2cap_hci_proto) < 0)
4907 BT_ERR("L2CAP protocol unregistration failed");
4908
4909 proto_unregister(&l2cap_proto);
4910}
4911
4912void l2cap_load(void)
4913{
4914 /* Dummy function to trigger automatic L2CAP module loading by
4915 * other modules that use L2CAP sockets but don't use any other
4916 * symbols from it. */
4917}
4918EXPORT_SYMBOL(l2cap_load);
4919
4920module_init(l2cap_init);
4921module_exit(l2cap_exit);
4922
4923module_param(disable_ertm, bool, 0644);
4924MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
4925
4926MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
4927MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
4928MODULE_VERSION(VERSION);
4929MODULE_LICENSE("GPL");
4930MODULE_ALIAS("bt-proto-0");
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
new file mode 100644
index 000000000000..a86f9ba4f05c
--- /dev/null
+++ b/net/bluetooth/l2cap_core.c
@@ -0,0 +1,4240 @@
1/*
2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated
4 Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
5 Copyright (C) 2010 Google Inc.
6
7 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License version 2 as
11 published by the Free Software Foundation;
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
16 IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
17 CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
18 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
22 ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
23 COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
24 SOFTWARE IS DISCLAIMED.
25*/
26
27/* Bluetooth L2CAP core. */
28
29#include <linux/module.h>
30
31#include <linux/types.h>
32#include <linux/capability.h>
33#include <linux/errno.h>
34#include <linux/kernel.h>
35#include <linux/sched.h>
36#include <linux/slab.h>
37#include <linux/poll.h>
38#include <linux/fcntl.h>
39#include <linux/init.h>
40#include <linux/interrupt.h>
41#include <linux/socket.h>
42#include <linux/skbuff.h>
43#include <linux/list.h>
44#include <linux/device.h>
45#include <linux/debugfs.h>
46#include <linux/seq_file.h>
47#include <linux/uaccess.h>
48#include <linux/crc16.h>
49#include <net/sock.h>
50
51#include <asm/system.h>
52#include <asm/unaligned.h>
53
54#include <net/bluetooth/bluetooth.h>
55#include <net/bluetooth/hci_core.h>
56#include <net/bluetooth/l2cap.h>
57
58int disable_ertm;
59
60static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
61static u8 l2cap_fixed_chan[8] = { 0x02, };
62
63static struct workqueue_struct *_busy_wq;
64
65LIST_HEAD(chan_list);
66DEFINE_RWLOCK(chan_list_lock);
67
68static void l2cap_busy_work(struct work_struct *work);
69
70static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
71 u8 code, u8 ident, u16 dlen, void *data);
72static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data);
73
74static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
75
76/* ---- L2CAP channels ---- */
77static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid)
78{
79 struct l2cap_chan *c;
80
81 list_for_each_entry(c, &conn->chan_l, list) {
82 if (c->dcid == cid)
83 return c;
84 }
85 return NULL;
86
87}
88
89static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid)
90{
91 struct l2cap_chan *c;
92
93 list_for_each_entry(c, &conn->chan_l, list) {
94 if (c->scid == cid)
95 return c;
96 }
97 return NULL;
98}
99
100/* Find channel with given SCID.
101 * Returns locked socket */
102static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid)
103{
104 struct l2cap_chan *c;
105
106 read_lock(&conn->chan_lock);
107 c = __l2cap_get_chan_by_scid(conn, cid);
108 if (c)
109 bh_lock_sock(c->sk);
110 read_unlock(&conn->chan_lock);
111 return c;
112}
113
114static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident)
115{
116 struct l2cap_chan *c;
117
118 list_for_each_entry(c, &conn->chan_l, list) {
119 if (c->ident == ident)
120 return c;
121 }
122 return NULL;
123}
124
125static inline struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident)
126{
127 struct l2cap_chan *c;
128
129 read_lock(&conn->chan_lock);
130 c = __l2cap_get_chan_by_ident(conn, ident);
131 if (c)
132 bh_lock_sock(c->sk);
133 read_unlock(&conn->chan_lock);
134 return c;
135}
136
137static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src)
138{
139 struct l2cap_chan *c;
140
141 list_for_each_entry(c, &chan_list, global_l) {
142 if (c->sport == psm && !bacmp(&bt_sk(c->sk)->src, src))
143 goto found;
144 }
145
146 c = NULL;
147found:
148 return c;
149}
150
151int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
152{
153 int err;
154
155 write_lock_bh(&chan_list_lock);
156
157 if (psm && __l2cap_global_chan_by_addr(psm, src)) {
158 err = -EADDRINUSE;
159 goto done;
160 }
161
162 if (psm) {
163 chan->psm = psm;
164 chan->sport = psm;
165 err = 0;
166 } else {
167 u16 p;
168
169 err = -EINVAL;
170 for (p = 0x1001; p < 0x1100; p += 2)
171 if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
172 chan->psm = cpu_to_le16(p);
173 chan->sport = cpu_to_le16(p);
174 err = 0;
175 break;
176 }
177 }
178
179done:
180 write_unlock_bh(&chan_list_lock);
181 return err;
182}
183
184int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid)
185{
186 write_lock_bh(&chan_list_lock);
187
188 chan->scid = scid;
189
190 write_unlock_bh(&chan_list_lock);
191
192 return 0;
193}
194
195static u16 l2cap_alloc_cid(struct l2cap_conn *conn)
196{
197 u16 cid = L2CAP_CID_DYN_START;
198
199 for (; cid < L2CAP_CID_DYN_END; cid++) {
200 if (!__l2cap_get_chan_by_scid(conn, cid))
201 return cid;
202 }
203
204 return 0;
205}
206
207struct l2cap_chan *l2cap_chan_create(struct sock *sk)
208{
209 struct l2cap_chan *chan;
210
211 chan = kzalloc(sizeof(*chan), GFP_ATOMIC);
212 if (!chan)
213 return NULL;
214
215 chan->sk = sk;
216
217 write_lock_bh(&chan_list_lock);
218 list_add(&chan->global_l, &chan_list);
219 write_unlock_bh(&chan_list_lock);
220
221 return chan;
222}
223
224void l2cap_chan_destroy(struct l2cap_chan *chan)
225{
226 write_lock_bh(&chan_list_lock);
227 list_del(&chan->global_l);
228 write_unlock_bh(&chan_list_lock);
229
230 kfree(chan);
231}
232
233static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
234{
235 struct sock *sk = chan->sk;
236
237 BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn,
238 chan->psm, chan->dcid);
239
240 conn->disc_reason = 0x13;
241
242 chan->conn = conn;
243
244 if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
245 if (conn->hcon->type == LE_LINK) {
246 /* LE connection */
247 chan->omtu = L2CAP_LE_DEFAULT_MTU;
248 chan->scid = L2CAP_CID_LE_DATA;
249 chan->dcid = L2CAP_CID_LE_DATA;
250 } else {
251 /* Alloc CID for connection-oriented socket */
252 chan->scid = l2cap_alloc_cid(conn);
253 chan->omtu = L2CAP_DEFAULT_MTU;
254 }
255 } else if (sk->sk_type == SOCK_DGRAM) {
256 /* Connectionless socket */
257 chan->scid = L2CAP_CID_CONN_LESS;
258 chan->dcid = L2CAP_CID_CONN_LESS;
259 chan->omtu = L2CAP_DEFAULT_MTU;
260 } else {
261 /* Raw socket can send/recv signalling messages only */
262 chan->scid = L2CAP_CID_SIGNALING;
263 chan->dcid = L2CAP_CID_SIGNALING;
264 chan->omtu = L2CAP_DEFAULT_MTU;
265 }
266
267 sock_hold(sk);
268
269 list_add(&chan->list, &conn->chan_l);
270}
271
272/* Delete channel.
273 * Must be called on the locked socket. */
274void l2cap_chan_del(struct l2cap_chan *chan, int err)
275{
276 struct sock *sk = chan->sk;
277 struct l2cap_conn *conn = chan->conn;
278 struct sock *parent = bt_sk(sk)->parent;
279
280 l2cap_sock_clear_timer(sk);
281
282 BT_DBG("chan %p, conn %p, err %d", chan, conn, err);
283
284 if (conn) {
285 /* Delete from channel list */
286 write_lock_bh(&conn->chan_lock);
287 list_del(&chan->list);
288 write_unlock_bh(&conn->chan_lock);
289 __sock_put(sk);
290
291 chan->conn = NULL;
292 hci_conn_put(conn->hcon);
293 }
294
295 sk->sk_state = BT_CLOSED;
296 sock_set_flag(sk, SOCK_ZAPPED);
297
298 if (err)
299 sk->sk_err = err;
300
301 if (parent) {
302 bt_accept_unlink(sk);
303 parent->sk_data_ready(parent, 0);
304 } else
305 sk->sk_state_change(sk);
306
307 if (!(chan->conf_state & L2CAP_CONF_OUTPUT_DONE &&
308 chan->conf_state & L2CAP_CONF_INPUT_DONE))
309 return;
310
311 skb_queue_purge(&chan->tx_q);
312
313 if (chan->mode == L2CAP_MODE_ERTM) {
314 struct srej_list *l, *tmp;
315
316 del_timer(&chan->retrans_timer);
317 del_timer(&chan->monitor_timer);
318 del_timer(&chan->ack_timer);
319
320 skb_queue_purge(&chan->srej_q);
321 skb_queue_purge(&chan->busy_q);
322
323 list_for_each_entry_safe(l, tmp, &chan->srej_l, list) {
324 list_del(&l->list);
325 kfree(l);
326 }
327 }
328}
329
330static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
331{
332 struct sock *sk = chan->sk;
333
334 if (sk->sk_type == SOCK_RAW) {
335 switch (chan->sec_level) {
336 case BT_SECURITY_HIGH:
337 return HCI_AT_DEDICATED_BONDING_MITM;
338 case BT_SECURITY_MEDIUM:
339 return HCI_AT_DEDICATED_BONDING;
340 default:
341 return HCI_AT_NO_BONDING;
342 }
343 } else if (chan->psm == cpu_to_le16(0x0001)) {
344 if (chan->sec_level == BT_SECURITY_LOW)
345 chan->sec_level = BT_SECURITY_SDP;
346
347 if (chan->sec_level == BT_SECURITY_HIGH)
348 return HCI_AT_NO_BONDING_MITM;
349 else
350 return HCI_AT_NO_BONDING;
351 } else {
352 switch (chan->sec_level) {
353 case BT_SECURITY_HIGH:
354 return HCI_AT_GENERAL_BONDING_MITM;
355 case BT_SECURITY_MEDIUM:
356 return HCI_AT_GENERAL_BONDING;
357 default:
358 return HCI_AT_NO_BONDING;
359 }
360 }
361}
362
363/* Service level security */
364static inline int l2cap_check_security(struct l2cap_chan *chan)
365{
366 struct l2cap_conn *conn = chan->conn;
367 __u8 auth_type;
368
369 auth_type = l2cap_get_auth_type(chan);
370
371 return hci_conn_security(conn->hcon, chan->sec_level, auth_type);
372}
373
374u8 l2cap_get_ident(struct l2cap_conn *conn)
375{
376 u8 id;
377
378 /* Get next available identificator.
379 * 1 - 128 are used by kernel.
380 * 129 - 199 are reserved.
381 * 200 - 254 are used by utilities like l2ping, etc.
382 */
383
384 spin_lock_bh(&conn->lock);
385
386 if (++conn->tx_ident > 128)
387 conn->tx_ident = 1;
388
389 id = conn->tx_ident;
390
391 spin_unlock_bh(&conn->lock);
392
393 return id;
394}
395
396void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
397{
398 struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
399 u8 flags;
400
401 BT_DBG("code 0x%2.2x", code);
402
403 if (!skb)
404 return;
405
406 if (lmp_no_flush_capable(conn->hcon->hdev))
407 flags = ACL_START_NO_FLUSH;
408 else
409 flags = ACL_START;
410
411 hci_send_acl(conn->hcon, skb, flags);
412}
413
414static inline void l2cap_send_sframe(struct l2cap_chan *chan, u16 control)
415{
416 struct sk_buff *skb;
417 struct l2cap_hdr *lh;
418 struct l2cap_pinfo *pi = l2cap_pi(chan->sk);
419 struct l2cap_conn *conn = chan->conn;
420 struct sock *sk = (struct sock *)pi;
421 int count, hlen = L2CAP_HDR_SIZE + 2;
422 u8 flags;
423
424 if (sk->sk_state != BT_CONNECTED)
425 return;
426
427 if (chan->fcs == L2CAP_FCS_CRC16)
428 hlen += 2;
429
430 BT_DBG("chan %p, control 0x%2.2x", chan, control);
431
432 count = min_t(unsigned int, conn->mtu, hlen);
433 control |= L2CAP_CTRL_FRAME_TYPE;
434
435 if (chan->conn_state & L2CAP_CONN_SEND_FBIT) {
436 control |= L2CAP_CTRL_FINAL;
437 chan->conn_state &= ~L2CAP_CONN_SEND_FBIT;
438 }
439
440 if (chan->conn_state & L2CAP_CONN_SEND_PBIT) {
441 control |= L2CAP_CTRL_POLL;
442 chan->conn_state &= ~L2CAP_CONN_SEND_PBIT;
443 }
444
445 skb = bt_skb_alloc(count, GFP_ATOMIC);
446 if (!skb)
447 return;
448
449 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
450 lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
451 lh->cid = cpu_to_le16(chan->dcid);
452 put_unaligned_le16(control, skb_put(skb, 2));
453
454 if (chan->fcs == L2CAP_FCS_CRC16) {
455 u16 fcs = crc16(0, (u8 *)lh, count - 2);
456 put_unaligned_le16(fcs, skb_put(skb, 2));
457 }
458
459 if (lmp_no_flush_capable(conn->hcon->hdev))
460 flags = ACL_START_NO_FLUSH;
461 else
462 flags = ACL_START;
463
464 hci_send_acl(chan->conn->hcon, skb, flags);
465}
466
467static inline void l2cap_send_rr_or_rnr(struct l2cap_chan *chan, u16 control)
468{
469 if (chan->conn_state & L2CAP_CONN_LOCAL_BUSY) {
470 control |= L2CAP_SUPER_RCV_NOT_READY;
471 chan->conn_state |= L2CAP_CONN_RNR_SENT;
472 } else
473 control |= L2CAP_SUPER_RCV_READY;
474
475 control |= chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
476
477 l2cap_send_sframe(chan, control);
478}
479
480static inline int __l2cap_no_conn_pending(struct l2cap_chan *chan)
481{
482 return !(chan->conf_state & L2CAP_CONF_CONNECT_PEND);
483}
484
485static void l2cap_do_start(struct l2cap_chan *chan)
486{
487 struct l2cap_conn *conn = chan->conn;
488
489 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
490 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
491 return;
492
493 if (l2cap_check_security(chan) &&
494 __l2cap_no_conn_pending(chan)) {
495 struct l2cap_conn_req req;
496 req.scid = cpu_to_le16(chan->scid);
497 req.psm = chan->psm;
498
499 chan->ident = l2cap_get_ident(conn);
500 chan->conf_state |= L2CAP_CONF_CONNECT_PEND;
501
502 l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_REQ,
503 sizeof(req), &req);
504 }
505 } else {
506 struct l2cap_info_req req;
507 req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
508
509 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
510 conn->info_ident = l2cap_get_ident(conn);
511
512 mod_timer(&conn->info_timer, jiffies +
513 msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
514
515 l2cap_send_cmd(conn, conn->info_ident,
516 L2CAP_INFO_REQ, sizeof(req), &req);
517 }
518}
519
520static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
521{
522 u32 local_feat_mask = l2cap_feat_mask;
523 if (!disable_ertm)
524 local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
525
526 switch (mode) {
527 case L2CAP_MODE_ERTM:
528 return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
529 case L2CAP_MODE_STREAMING:
530 return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
531 default:
532 return 0x00;
533 }
534}
535
536void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *chan, int err)
537{
538 struct sock *sk;
539 struct l2cap_disconn_req req;
540
541 if (!conn)
542 return;
543
544 sk = chan->sk;
545
546 if (chan->mode == L2CAP_MODE_ERTM) {
547 del_timer(&chan->retrans_timer);
548 del_timer(&chan->monitor_timer);
549 del_timer(&chan->ack_timer);
550 }
551
552 req.dcid = cpu_to_le16(chan->dcid);
553 req.scid = cpu_to_le16(chan->scid);
554 l2cap_send_cmd(conn, l2cap_get_ident(conn),
555 L2CAP_DISCONN_REQ, sizeof(req), &req);
556
557 sk->sk_state = BT_DISCONN;
558 sk->sk_err = err;
559}
560
561/* ---- L2CAP connections ---- */
562static void l2cap_conn_start(struct l2cap_conn *conn)
563{
564 struct l2cap_chan *chan, *tmp;
565
566 BT_DBG("conn %p", conn);
567
568 read_lock(&conn->chan_lock);
569
570 list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
571 struct sock *sk = chan->sk;
572
573 bh_lock_sock(sk);
574
575 if (sk->sk_type != SOCK_SEQPACKET &&
576 sk->sk_type != SOCK_STREAM) {
577 bh_unlock_sock(sk);
578 continue;
579 }
580
581 if (sk->sk_state == BT_CONNECT) {
582 struct l2cap_conn_req req;
583
584 if (!l2cap_check_security(chan) ||
585 !__l2cap_no_conn_pending(chan)) {
586 bh_unlock_sock(sk);
587 continue;
588 }
589
590 if (!l2cap_mode_supported(chan->mode,
591 conn->feat_mask)
592 && chan->conf_state &
593 L2CAP_CONF_STATE2_DEVICE) {
594 /* __l2cap_sock_close() calls list_del(chan)
595 * so release the lock */
596 read_unlock_bh(&conn->chan_lock);
597 __l2cap_sock_close(sk, ECONNRESET);
598 read_lock_bh(&conn->chan_lock);
599 bh_unlock_sock(sk);
600 continue;
601 }
602
603 req.scid = cpu_to_le16(chan->scid);
604 req.psm = chan->psm;
605
606 chan->ident = l2cap_get_ident(conn);
607 chan->conf_state |= L2CAP_CONF_CONNECT_PEND;
608
609 l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_REQ,
610 sizeof(req), &req);
611
612 } else if (sk->sk_state == BT_CONNECT2) {
613 struct l2cap_conn_rsp rsp;
614 char buf[128];
615 rsp.scid = cpu_to_le16(chan->dcid);
616 rsp.dcid = cpu_to_le16(chan->scid);
617
618 if (l2cap_check_security(chan)) {
619 if (bt_sk(sk)->defer_setup) {
620 struct sock *parent = bt_sk(sk)->parent;
621 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
622 rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
623 parent->sk_data_ready(parent, 0);
624
625 } else {
626 sk->sk_state = BT_CONFIG;
627 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
628 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
629 }
630 } else {
631 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
632 rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
633 }
634
635 l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP,
636 sizeof(rsp), &rsp);
637
638 if (chan->conf_state & L2CAP_CONF_REQ_SENT ||
639 rsp.result != L2CAP_CR_SUCCESS) {
640 bh_unlock_sock(sk);
641 continue;
642 }
643
644 chan->conf_state |= L2CAP_CONF_REQ_SENT;
645 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
646 l2cap_build_conf_req(chan, buf), buf);
647 chan->num_conf_req++;
648 }
649
650 bh_unlock_sock(sk);
651 }
652
653 read_unlock(&conn->chan_lock);
654}
655
656/* Find socket with cid and source bdaddr.
657 * Returns closest match, locked.
658 */
659static struct l2cap_chan *l2cap_global_chan_by_scid(int state, __le16 cid, bdaddr_t *src)
660{
661 struct l2cap_chan *c, *c1 = NULL;
662
663 read_lock(&chan_list_lock);
664
665 list_for_each_entry(c, &chan_list, global_l) {
666 struct sock *sk = c->sk;
667
668 if (state && sk->sk_state != state)
669 continue;
670
671 if (c->scid == cid) {
672 /* Exact match. */
673 if (!bacmp(&bt_sk(sk)->src, src)) {
674 read_unlock(&chan_list_lock);
675 return c;
676 }
677
678 /* Closest match */
679 if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
680 c1 = c;
681 }
682 }
683
684 read_unlock(&chan_list_lock);
685
686 return c1;
687}
688
689static void l2cap_le_conn_ready(struct l2cap_conn *conn)
690{
691 struct sock *parent, *sk;
692 struct l2cap_chan *chan, *pchan;
693
694 BT_DBG("");
695
696 /* Check if we have socket listening on cid */
697 pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA,
698 conn->src);
699 if (!pchan)
700 return;
701
702 parent = pchan->sk;
703
704 bh_lock_sock(parent);
705
706 /* Check for backlog size */
707 if (sk_acceptq_is_full(parent)) {
708 BT_DBG("backlog full %d", parent->sk_ack_backlog);
709 goto clean;
710 }
711
712 sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC);
713 if (!sk)
714 goto clean;
715
716 chan = l2cap_chan_create(sk);
717 if (!chan) {
718 l2cap_sock_kill(sk);
719 goto clean;
720 }
721
722 l2cap_pi(sk)->chan = chan;
723
724 write_lock_bh(&conn->chan_lock);
725
726 hci_conn_hold(conn->hcon);
727
728 l2cap_sock_init(sk, parent);
729
730 bacpy(&bt_sk(sk)->src, conn->src);
731 bacpy(&bt_sk(sk)->dst, conn->dst);
732
733 bt_accept_enqueue(parent, sk);
734
735 __l2cap_chan_add(conn, chan);
736
737 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
738
739 sk->sk_state = BT_CONNECTED;
740 parent->sk_data_ready(parent, 0);
741
742 write_unlock_bh(&conn->chan_lock);
743
744clean:
745 bh_unlock_sock(parent);
746}
747
748static void l2cap_conn_ready(struct l2cap_conn *conn)
749{
750 struct l2cap_chan *chan;
751
752 BT_DBG("conn %p", conn);
753
754 if (!conn->hcon->out && conn->hcon->type == LE_LINK)
755 l2cap_le_conn_ready(conn);
756
757 read_lock(&conn->chan_lock);
758
759 list_for_each_entry(chan, &conn->chan_l, list) {
760 struct sock *sk = chan->sk;
761
762 bh_lock_sock(sk);
763
764 if (conn->hcon->type == LE_LINK) {
765 l2cap_sock_clear_timer(sk);
766 sk->sk_state = BT_CONNECTED;
767 sk->sk_state_change(sk);
768 }
769
770 if (sk->sk_type != SOCK_SEQPACKET &&
771 sk->sk_type != SOCK_STREAM) {
772 l2cap_sock_clear_timer(sk);
773 sk->sk_state = BT_CONNECTED;
774 sk->sk_state_change(sk);
775 } else if (sk->sk_state == BT_CONNECT)
776 l2cap_do_start(chan);
777
778 bh_unlock_sock(sk);
779 }
780
781 read_unlock(&conn->chan_lock);
782}
783
784/* Notify sockets that we cannot guaranty reliability anymore */
785static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err)
786{
787 struct l2cap_chan *chan;
788
789 BT_DBG("conn %p", conn);
790
791 read_lock(&conn->chan_lock);
792
793 list_for_each_entry(chan, &conn->chan_l, list) {
794 struct sock *sk = chan->sk;
795
796 if (chan->force_reliable)
797 sk->sk_err = err;
798 }
799
800 read_unlock(&conn->chan_lock);
801}
802
803static void l2cap_info_timeout(unsigned long arg)
804{
805 struct l2cap_conn *conn = (void *) arg;
806
807 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
808 conn->info_ident = 0;
809
810 l2cap_conn_start(conn);
811}
812
813static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
814{
815 struct l2cap_conn *conn = hcon->l2cap_data;
816
817 if (conn || status)
818 return conn;
819
820 conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC);
821 if (!conn)
822 return NULL;
823
824 hcon->l2cap_data = conn;
825 conn->hcon = hcon;
826
827 BT_DBG("hcon %p conn %p", hcon, conn);
828
829 if (hcon->hdev->le_mtu && hcon->type == LE_LINK)
830 conn->mtu = hcon->hdev->le_mtu;
831 else
832 conn->mtu = hcon->hdev->acl_mtu;
833
834 conn->src = &hcon->hdev->bdaddr;
835 conn->dst = &hcon->dst;
836
837 conn->feat_mask = 0;
838
839 spin_lock_init(&conn->lock);
840 rwlock_init(&conn->chan_lock);
841
842 INIT_LIST_HEAD(&conn->chan_l);
843
844 if (hcon->type != LE_LINK)
845 setup_timer(&conn->info_timer, l2cap_info_timeout,
846 (unsigned long) conn);
847
848 conn->disc_reason = 0x13;
849
850 return conn;
851}
852
853static void l2cap_conn_del(struct hci_conn *hcon, int err)
854{
855 struct l2cap_conn *conn = hcon->l2cap_data;
856 struct l2cap_chan *chan, *l;
857 struct sock *sk;
858
859 if (!conn)
860 return;
861
862 BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
863
864 kfree_skb(conn->rx_skb);
865
866 /* Kill channels */
867 list_for_each_entry_safe(chan, l, &conn->chan_l, list) {
868 sk = chan->sk;
869 bh_lock_sock(sk);
870 l2cap_chan_del(chan, err);
871 bh_unlock_sock(sk);
872 l2cap_sock_kill(sk);
873 }
874
875 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
876 del_timer_sync(&conn->info_timer);
877
878 hcon->l2cap_data = NULL;
879 kfree(conn);
880}
881
882static inline void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
883{
884 write_lock_bh(&conn->chan_lock);
885 __l2cap_chan_add(conn, chan);
886 write_unlock_bh(&conn->chan_lock);
887}
888
889/* ---- Socket interface ---- */
890
891/* Find socket with psm and source bdaddr.
892 * Returns closest match.
893 */
894static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *src)
895{
896 struct l2cap_chan *c, *c1 = NULL;
897
898 read_lock(&chan_list_lock);
899
900 list_for_each_entry(c, &chan_list, global_l) {
901 struct sock *sk = c->sk;
902
903 if (state && sk->sk_state != state)
904 continue;
905
906 if (c->psm == psm) {
907 /* Exact match. */
908 if (!bacmp(&bt_sk(sk)->src, src)) {
909 read_unlock_bh(&chan_list_lock);
910 return c;
911 }
912
913 /* Closest match */
914 if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
915 c1 = c;
916 }
917 }
918
919 read_unlock(&chan_list_lock);
920
921 return c1;
922}
923
924int l2cap_chan_connect(struct l2cap_chan *chan)
925{
926 struct sock *sk = chan->sk;
927 bdaddr_t *src = &bt_sk(sk)->src;
928 bdaddr_t *dst = &bt_sk(sk)->dst;
929 struct l2cap_conn *conn;
930 struct hci_conn *hcon;
931 struct hci_dev *hdev;
932 __u8 auth_type;
933 int err;
934
935 BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst),
936 chan->psm);
937
938 hdev = hci_get_route(dst, src);
939 if (!hdev)
940 return -EHOSTUNREACH;
941
942 hci_dev_lock_bh(hdev);
943
944 auth_type = l2cap_get_auth_type(chan);
945
946 if (chan->dcid == L2CAP_CID_LE_DATA)
947 hcon = hci_connect(hdev, LE_LINK, dst,
948 chan->sec_level, auth_type);
949 else
950 hcon = hci_connect(hdev, ACL_LINK, dst,
951 chan->sec_level, auth_type);
952
953 if (IS_ERR(hcon)) {
954 err = PTR_ERR(hcon);
955 goto done;
956 }
957
958 conn = l2cap_conn_add(hcon, 0);
959 if (!conn) {
960 hci_conn_put(hcon);
961 err = -ENOMEM;
962 goto done;
963 }
964
965 /* Update source addr of the socket */
966 bacpy(src, conn->src);
967
968 l2cap_chan_add(conn, chan);
969
970 sk->sk_state = BT_CONNECT;
971 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
972
973 if (hcon->state == BT_CONNECTED) {
974 if (sk->sk_type != SOCK_SEQPACKET &&
975 sk->sk_type != SOCK_STREAM) {
976 l2cap_sock_clear_timer(sk);
977 if (l2cap_check_security(chan))
978 sk->sk_state = BT_CONNECTED;
979 } else
980 l2cap_do_start(chan);
981 }
982
983 err = 0;
984
985done:
986 hci_dev_unlock_bh(hdev);
987 hci_dev_put(hdev);
988 return err;
989}
990
991int __l2cap_wait_ack(struct sock *sk)
992{
993 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
994 DECLARE_WAITQUEUE(wait, current);
995 int err = 0;
996 int timeo = HZ/5;
997
998 add_wait_queue(sk_sleep(sk), &wait);
999 while ((chan->unacked_frames > 0 && chan->conn)) {
1000 set_current_state(TASK_INTERRUPTIBLE);
1001
1002 if (!timeo)
1003 timeo = HZ/5;
1004
1005 if (signal_pending(current)) {
1006 err = sock_intr_errno(timeo);
1007 break;
1008 }
1009
1010 release_sock(sk);
1011 timeo = schedule_timeout(timeo);
1012 lock_sock(sk);
1013
1014 err = sock_error(sk);
1015 if (err)
1016 break;
1017 }
1018 set_current_state(TASK_RUNNING);
1019 remove_wait_queue(sk_sleep(sk), &wait);
1020 return err;
1021}
1022
1023static void l2cap_monitor_timeout(unsigned long arg)
1024{
1025 struct l2cap_chan *chan = (void *) arg;
1026 struct sock *sk = chan->sk;
1027
1028 BT_DBG("chan %p", chan);
1029
1030 bh_lock_sock(sk);
1031 if (chan->retry_count >= chan->remote_max_tx) {
1032 l2cap_send_disconn_req(chan->conn, chan, ECONNABORTED);
1033 bh_unlock_sock(sk);
1034 return;
1035 }
1036
1037 chan->retry_count++;
1038 __mod_monitor_timer();
1039
1040 l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_POLL);
1041 bh_unlock_sock(sk);
1042}
1043
1044static void l2cap_retrans_timeout(unsigned long arg)
1045{
1046 struct l2cap_chan *chan = (void *) arg;
1047 struct sock *sk = chan->sk;
1048
1049 BT_DBG("chan %p", chan);
1050
1051 bh_lock_sock(sk);
1052 chan->retry_count = 1;
1053 __mod_monitor_timer();
1054
1055 chan->conn_state |= L2CAP_CONN_WAIT_F;
1056
1057 l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_POLL);
1058 bh_unlock_sock(sk);
1059}
1060
1061static void l2cap_drop_acked_frames(struct l2cap_chan *chan)
1062{
1063 struct sk_buff *skb;
1064
1065 while ((skb = skb_peek(&chan->tx_q)) &&
1066 chan->unacked_frames) {
1067 if (bt_cb(skb)->tx_seq == chan->expected_ack_seq)
1068 break;
1069
1070 skb = skb_dequeue(&chan->tx_q);
1071 kfree_skb(skb);
1072
1073 chan->unacked_frames--;
1074 }
1075
1076 if (!chan->unacked_frames)
1077 del_timer(&chan->retrans_timer);
1078}
1079
1080void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb)
1081{
1082 struct hci_conn *hcon = chan->conn->hcon;
1083 u16 flags;
1084
1085 BT_DBG("chan %p, skb %p len %d", chan, skb, skb->len);
1086
1087 if (!chan->flushable && lmp_no_flush_capable(hcon->hdev))
1088 flags = ACL_START_NO_FLUSH;
1089 else
1090 flags = ACL_START;
1091
1092 hci_send_acl(hcon, skb, flags);
1093}
1094
1095void l2cap_streaming_send(struct l2cap_chan *chan)
1096{
1097 struct sk_buff *skb;
1098 u16 control, fcs;
1099
1100 while ((skb = skb_dequeue(&chan->tx_q))) {
1101 control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE);
1102 control |= chan->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
1103 put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE);
1104
1105 if (chan->fcs == L2CAP_FCS_CRC16) {
1106 fcs = crc16(0, (u8 *)skb->data, skb->len - 2);
1107 put_unaligned_le16(fcs, skb->data + skb->len - 2);
1108 }
1109
1110 l2cap_do_send(chan, skb);
1111
1112 chan->next_tx_seq = (chan->next_tx_seq + 1) % 64;
1113 }
1114}
1115
1116static void l2cap_retransmit_one_frame(struct l2cap_chan *chan, u8 tx_seq)
1117{
1118 struct sk_buff *skb, *tx_skb;
1119 u16 control, fcs;
1120
1121 skb = skb_peek(&chan->tx_q);
1122 if (!skb)
1123 return;
1124
1125 do {
1126 if (bt_cb(skb)->tx_seq == tx_seq)
1127 break;
1128
1129 if (skb_queue_is_last(&chan->tx_q, skb))
1130 return;
1131
1132 } while ((skb = skb_queue_next(&chan->tx_q, skb)));
1133
1134 if (chan->remote_max_tx &&
1135 bt_cb(skb)->retries == chan->remote_max_tx) {
1136 l2cap_send_disconn_req(chan->conn, chan, ECONNABORTED);
1137 return;
1138 }
1139
1140 tx_skb = skb_clone(skb, GFP_ATOMIC);
1141 bt_cb(skb)->retries++;
1142 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1143 control &= L2CAP_CTRL_SAR;
1144
1145 if (chan->conn_state & L2CAP_CONN_SEND_FBIT) {
1146 control |= L2CAP_CTRL_FINAL;
1147 chan->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1148 }
1149
1150 control |= (chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1151 | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1152
1153 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1154
1155 if (chan->fcs == L2CAP_FCS_CRC16) {
1156 fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
1157 put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
1158 }
1159
1160 l2cap_do_send(chan, tx_skb);
1161}
1162
1163int l2cap_ertm_send(struct l2cap_chan *chan)
1164{
1165 struct sk_buff *skb, *tx_skb;
1166 struct sock *sk = chan->sk;
1167 u16 control, fcs;
1168 int nsent = 0;
1169
1170 if (sk->sk_state != BT_CONNECTED)
1171 return -ENOTCONN;
1172
1173 while ((skb = chan->tx_send_head) && (!l2cap_tx_window_full(chan))) {
1174
1175 if (chan->remote_max_tx &&
1176 bt_cb(skb)->retries == chan->remote_max_tx) {
1177 l2cap_send_disconn_req(chan->conn, chan, ECONNABORTED);
1178 break;
1179 }
1180
1181 tx_skb = skb_clone(skb, GFP_ATOMIC);
1182
1183 bt_cb(skb)->retries++;
1184
1185 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1186 control &= L2CAP_CTRL_SAR;
1187
1188 if (chan->conn_state & L2CAP_CONN_SEND_FBIT) {
1189 control |= L2CAP_CTRL_FINAL;
1190 chan->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1191 }
1192 control |= (chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1193 | (chan->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1194 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1195
1196
1197 if (chan->fcs == L2CAP_FCS_CRC16) {
1198 fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2);
1199 put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
1200 }
1201
1202 l2cap_do_send(chan, tx_skb);
1203
1204 __mod_retrans_timer();
1205
1206 bt_cb(skb)->tx_seq = chan->next_tx_seq;
1207 chan->next_tx_seq = (chan->next_tx_seq + 1) % 64;
1208
1209 if (bt_cb(skb)->retries == 1)
1210 chan->unacked_frames++;
1211
1212 chan->frames_sent++;
1213
1214 if (skb_queue_is_last(&chan->tx_q, skb))
1215 chan->tx_send_head = NULL;
1216 else
1217 chan->tx_send_head = skb_queue_next(&chan->tx_q, skb);
1218
1219 nsent++;
1220 }
1221
1222 return nsent;
1223}
1224
1225static int l2cap_retransmit_frames(struct l2cap_chan *chan)
1226{
1227 int ret;
1228
1229 if (!skb_queue_empty(&chan->tx_q))
1230 chan->tx_send_head = chan->tx_q.next;
1231
1232 chan->next_tx_seq = chan->expected_ack_seq;
1233 ret = l2cap_ertm_send(chan);
1234 return ret;
1235}
1236
1237static void l2cap_send_ack(struct l2cap_chan *chan)
1238{
1239 u16 control = 0;
1240
1241 control |= chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1242
1243 if (chan->conn_state & L2CAP_CONN_LOCAL_BUSY) {
1244 control |= L2CAP_SUPER_RCV_NOT_READY;
1245 chan->conn_state |= L2CAP_CONN_RNR_SENT;
1246 l2cap_send_sframe(chan, control);
1247 return;
1248 }
1249
1250 if (l2cap_ertm_send(chan) > 0)
1251 return;
1252
1253 control |= L2CAP_SUPER_RCV_READY;
1254 l2cap_send_sframe(chan, control);
1255}
1256
1257static void l2cap_send_srejtail(struct l2cap_chan *chan)
1258{
1259 struct srej_list *tail;
1260 u16 control;
1261
1262 control = L2CAP_SUPER_SELECT_REJECT;
1263 control |= L2CAP_CTRL_FINAL;
1264
1265 tail = list_entry((&chan->srej_l)->prev, struct srej_list, list);
1266 control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1267
1268 l2cap_send_sframe(chan, control);
1269}
1270
1271static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
1272{
1273 struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;
1274 struct sk_buff **frag;
1275 int err, sent = 0;
1276
1277 if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count))
1278 return -EFAULT;
1279
1280 sent += count;
1281 len -= count;
1282
1283 /* Continuation fragments (no L2CAP header) */
1284 frag = &skb_shinfo(skb)->frag_list;
1285 while (len) {
1286 count = min_t(unsigned int, conn->mtu, len);
1287
1288 *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
1289 if (!*frag)
1290 return err;
1291 if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
1292 return -EFAULT;
1293
1294 sent += count;
1295 len -= count;
1296
1297 frag = &(*frag)->next;
1298 }
1299
1300 return sent;
1301}
1302
1303struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
1304{
1305 struct sock *sk = chan->sk;
1306 struct l2cap_conn *conn = chan->conn;
1307 struct sk_buff *skb;
1308 int err, count, hlen = L2CAP_HDR_SIZE + 2;
1309 struct l2cap_hdr *lh;
1310
1311 BT_DBG("sk %p len %d", sk, (int)len);
1312
1313 count = min_t(unsigned int, (conn->mtu - hlen), len);
1314 skb = bt_skb_send_alloc(sk, count + hlen,
1315 msg->msg_flags & MSG_DONTWAIT, &err);
1316 if (!skb)
1317 return ERR_PTR(err);
1318
1319 /* Create L2CAP header */
1320 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1321 lh->cid = cpu_to_le16(chan->dcid);
1322 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
1323 put_unaligned_le16(chan->psm, skb_put(skb, 2));
1324
1325 err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
1326 if (unlikely(err < 0)) {
1327 kfree_skb(skb);
1328 return ERR_PTR(err);
1329 }
1330 return skb;
1331}
1332
1333struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
1334{
1335 struct sock *sk = chan->sk;
1336 struct l2cap_conn *conn = chan->conn;
1337 struct sk_buff *skb;
1338 int err, count, hlen = L2CAP_HDR_SIZE;
1339 struct l2cap_hdr *lh;
1340
1341 BT_DBG("sk %p len %d", sk, (int)len);
1342
1343 count = min_t(unsigned int, (conn->mtu - hlen), len);
1344 skb = bt_skb_send_alloc(sk, count + hlen,
1345 msg->msg_flags & MSG_DONTWAIT, &err);
1346 if (!skb)
1347 return ERR_PTR(err);
1348
1349 /* Create L2CAP header */
1350 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1351 lh->cid = cpu_to_le16(chan->dcid);
1352 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
1353
1354 err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
1355 if (unlikely(err < 0)) {
1356 kfree_skb(skb);
1357 return ERR_PTR(err);
1358 }
1359 return skb;
1360}
1361
1362struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u16 control, u16 sdulen)
1363{
1364 struct sock *sk = chan->sk;
1365 struct l2cap_conn *conn = chan->conn;
1366 struct sk_buff *skb;
1367 int err, count, hlen = L2CAP_HDR_SIZE + 2;
1368 struct l2cap_hdr *lh;
1369
1370 BT_DBG("sk %p len %d", sk, (int)len);
1371
1372 if (!conn)
1373 return ERR_PTR(-ENOTCONN);
1374
1375 if (sdulen)
1376 hlen += 2;
1377
1378 if (chan->fcs == L2CAP_FCS_CRC16)
1379 hlen += 2;
1380
1381 count = min_t(unsigned int, (conn->mtu - hlen), len);
1382 skb = bt_skb_send_alloc(sk, count + hlen,
1383 msg->msg_flags & MSG_DONTWAIT, &err);
1384 if (!skb)
1385 return ERR_PTR(err);
1386
1387 /* Create L2CAP header */
1388 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1389 lh->cid = cpu_to_le16(chan->dcid);
1390 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
1391 put_unaligned_le16(control, skb_put(skb, 2));
1392 if (sdulen)
1393 put_unaligned_le16(sdulen, skb_put(skb, 2));
1394
1395 err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
1396 if (unlikely(err < 0)) {
1397 kfree_skb(skb);
1398 return ERR_PTR(err);
1399 }
1400
1401 if (chan->fcs == L2CAP_FCS_CRC16)
1402 put_unaligned_le16(0, skb_put(skb, 2));
1403
1404 bt_cb(skb)->retries = 0;
1405 return skb;
1406}
1407
1408int l2cap_sar_segment_sdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
1409{
1410 struct sk_buff *skb;
1411 struct sk_buff_head sar_queue;
1412 u16 control;
1413 size_t size = 0;
1414
1415 skb_queue_head_init(&sar_queue);
1416 control = L2CAP_SDU_START;
1417 skb = l2cap_create_iframe_pdu(chan, msg, chan->remote_mps, control, len);
1418 if (IS_ERR(skb))
1419 return PTR_ERR(skb);
1420
1421 __skb_queue_tail(&sar_queue, skb);
1422 len -= chan->remote_mps;
1423 size += chan->remote_mps;
1424
1425 while (len > 0) {
1426 size_t buflen;
1427
1428 if (len > chan->remote_mps) {
1429 control = L2CAP_SDU_CONTINUE;
1430 buflen = chan->remote_mps;
1431 } else {
1432 control = L2CAP_SDU_END;
1433 buflen = len;
1434 }
1435
1436 skb = l2cap_create_iframe_pdu(chan, msg, buflen, control, 0);
1437 if (IS_ERR(skb)) {
1438 skb_queue_purge(&sar_queue);
1439 return PTR_ERR(skb);
1440 }
1441
1442 __skb_queue_tail(&sar_queue, skb);
1443 len -= buflen;
1444 size += buflen;
1445 }
1446 skb_queue_splice_tail(&sar_queue, &chan->tx_q);
1447 if (chan->tx_send_head == NULL)
1448 chan->tx_send_head = sar_queue.next;
1449
1450 return size;
1451}
1452
1453static void l2cap_chan_ready(struct sock *sk)
1454{
1455 struct sock *parent = bt_sk(sk)->parent;
1456 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
1457
1458 BT_DBG("sk %p, parent %p", sk, parent);
1459
1460 chan->conf_state = 0;
1461 l2cap_sock_clear_timer(sk);
1462
1463 if (!parent) {
1464 /* Outgoing channel.
1465 * Wake up socket sleeping on connect.
1466 */
1467 sk->sk_state = BT_CONNECTED;
1468 sk->sk_state_change(sk);
1469 } else {
1470 /* Incoming channel.
1471 * Wake up socket sleeping on accept.
1472 */
1473 parent->sk_data_ready(parent, 0);
1474 }
1475}
1476
1477/* Copy frame to all raw sockets on that connection */
1478static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb)
1479{
1480 struct sk_buff *nskb;
1481 struct l2cap_chan *chan;
1482
1483 BT_DBG("conn %p", conn);
1484
1485 read_lock(&conn->chan_lock);
1486 list_for_each_entry(chan, &conn->chan_l, list) {
1487 struct sock *sk = chan->sk;
1488 if (sk->sk_type != SOCK_RAW)
1489 continue;
1490
1491 /* Don't send frame to the socket it came from */
1492 if (skb->sk == sk)
1493 continue;
1494 nskb = skb_clone(skb, GFP_ATOMIC);
1495 if (!nskb)
1496 continue;
1497
1498 if (sock_queue_rcv_skb(sk, nskb))
1499 kfree_skb(nskb);
1500 }
1501 read_unlock(&conn->chan_lock);
1502}
1503
1504/* ---- L2CAP signalling commands ---- */
1505static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
1506 u8 code, u8 ident, u16 dlen, void *data)
1507{
1508 struct sk_buff *skb, **frag;
1509 struct l2cap_cmd_hdr *cmd;
1510 struct l2cap_hdr *lh;
1511 int len, count;
1512
1513 BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %d",
1514 conn, code, ident, dlen);
1515
1516 len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen;
1517 count = min_t(unsigned int, conn->mtu, len);
1518
1519 skb = bt_skb_alloc(count, GFP_ATOMIC);
1520 if (!skb)
1521 return NULL;
1522
1523 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1524 lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
1525
1526 if (conn->hcon->type == LE_LINK)
1527 lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING);
1528 else
1529 lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
1530
1531 cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
1532 cmd->code = code;
1533 cmd->ident = ident;
1534 cmd->len = cpu_to_le16(dlen);
1535
1536 if (dlen) {
1537 count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
1538 memcpy(skb_put(skb, count), data, count);
1539 data += count;
1540 }
1541
1542 len -= skb->len;
1543
1544 /* Continuation fragments (no L2CAP header) */
1545 frag = &skb_shinfo(skb)->frag_list;
1546 while (len) {
1547 count = min_t(unsigned int, conn->mtu, len);
1548
1549 *frag = bt_skb_alloc(count, GFP_ATOMIC);
1550 if (!*frag)
1551 goto fail;
1552
1553 memcpy(skb_put(*frag, count), data, count);
1554
1555 len -= count;
1556 data += count;
1557
1558 frag = &(*frag)->next;
1559 }
1560
1561 return skb;
1562
1563fail:
1564 kfree_skb(skb);
1565 return NULL;
1566}
1567
1568static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val)
1569{
1570 struct l2cap_conf_opt *opt = *ptr;
1571 int len;
1572
1573 len = L2CAP_CONF_OPT_SIZE + opt->len;
1574 *ptr += len;
1575
1576 *type = opt->type;
1577 *olen = opt->len;
1578
1579 switch (opt->len) {
1580 case 1:
1581 *val = *((u8 *) opt->val);
1582 break;
1583
1584 case 2:
1585 *val = get_unaligned_le16(opt->val);
1586 break;
1587
1588 case 4:
1589 *val = get_unaligned_le32(opt->val);
1590 break;
1591
1592 default:
1593 *val = (unsigned long) opt->val;
1594 break;
1595 }
1596
1597 BT_DBG("type 0x%2.2x len %d val 0x%lx", *type, opt->len, *val);
1598 return len;
1599}
1600
1601static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
1602{
1603 struct l2cap_conf_opt *opt = *ptr;
1604
1605 BT_DBG("type 0x%2.2x len %d val 0x%lx", type, len, val);
1606
1607 opt->type = type;
1608 opt->len = len;
1609
1610 switch (len) {
1611 case 1:
1612 *((u8 *) opt->val) = val;
1613 break;
1614
1615 case 2:
1616 put_unaligned_le16(val, opt->val);
1617 break;
1618
1619 case 4:
1620 put_unaligned_le32(val, opt->val);
1621 break;
1622
1623 default:
1624 memcpy(opt->val, (void *) val, len);
1625 break;
1626 }
1627
1628 *ptr += L2CAP_CONF_OPT_SIZE + len;
1629}
1630
1631static void l2cap_ack_timeout(unsigned long arg)
1632{
1633 struct l2cap_chan *chan = (void *) arg;
1634
1635 bh_lock_sock(chan->sk);
1636 l2cap_send_ack(chan);
1637 bh_unlock_sock(chan->sk);
1638}
1639
1640static inline void l2cap_ertm_init(struct l2cap_chan *chan)
1641{
1642 struct sock *sk = chan->sk;
1643
1644 chan->expected_ack_seq = 0;
1645 chan->unacked_frames = 0;
1646 chan->buffer_seq = 0;
1647 chan->num_acked = 0;
1648 chan->frames_sent = 0;
1649
1650 setup_timer(&chan->retrans_timer, l2cap_retrans_timeout,
1651 (unsigned long) chan);
1652 setup_timer(&chan->monitor_timer, l2cap_monitor_timeout,
1653 (unsigned long) chan);
1654 setup_timer(&chan->ack_timer, l2cap_ack_timeout, (unsigned long) chan);
1655
1656 skb_queue_head_init(&chan->srej_q);
1657 skb_queue_head_init(&chan->busy_q);
1658
1659 INIT_LIST_HEAD(&chan->srej_l);
1660
1661 INIT_WORK(&chan->busy_work, l2cap_busy_work);
1662
1663 sk->sk_backlog_rcv = l2cap_ertm_data_rcv;
1664}
1665
1666static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
1667{
1668 switch (mode) {
1669 case L2CAP_MODE_STREAMING:
1670 case L2CAP_MODE_ERTM:
1671 if (l2cap_mode_supported(mode, remote_feat_mask))
1672 return mode;
1673 /* fall through */
1674 default:
1675 return L2CAP_MODE_BASIC;
1676 }
1677}
1678
1679static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
1680{
1681 struct l2cap_conf_req *req = data;
1682 struct l2cap_conf_rfc rfc = { .mode = chan->mode };
1683 void *ptr = req->data;
1684
1685 BT_DBG("chan %p", chan);
1686
1687 if (chan->num_conf_req || chan->num_conf_rsp)
1688 goto done;
1689
1690 switch (chan->mode) {
1691 case L2CAP_MODE_STREAMING:
1692 case L2CAP_MODE_ERTM:
1693 if (chan->conf_state & L2CAP_CONF_STATE2_DEVICE)
1694 break;
1695
1696 /* fall through */
1697 default:
1698 chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask);
1699 break;
1700 }
1701
1702done:
1703 if (chan->imtu != L2CAP_DEFAULT_MTU)
1704 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
1705
1706 switch (chan->mode) {
1707 case L2CAP_MODE_BASIC:
1708 if (!(chan->conn->feat_mask & L2CAP_FEAT_ERTM) &&
1709 !(chan->conn->feat_mask & L2CAP_FEAT_STREAMING))
1710 break;
1711
1712 rfc.mode = L2CAP_MODE_BASIC;
1713 rfc.txwin_size = 0;
1714 rfc.max_transmit = 0;
1715 rfc.retrans_timeout = 0;
1716 rfc.monitor_timeout = 0;
1717 rfc.max_pdu_size = 0;
1718
1719 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
1720 (unsigned long) &rfc);
1721 break;
1722
1723 case L2CAP_MODE_ERTM:
1724 rfc.mode = L2CAP_MODE_ERTM;
1725 rfc.txwin_size = chan->tx_win;
1726 rfc.max_transmit = chan->max_tx;
1727 rfc.retrans_timeout = 0;
1728 rfc.monitor_timeout = 0;
1729 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
1730 if (L2CAP_DEFAULT_MAX_PDU_SIZE > chan->conn->mtu - 10)
1731 rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10);
1732
1733 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
1734 (unsigned long) &rfc);
1735
1736 if (!(chan->conn->feat_mask & L2CAP_FEAT_FCS))
1737 break;
1738
1739 if (chan->fcs == L2CAP_FCS_NONE ||
1740 chan->conf_state & L2CAP_CONF_NO_FCS_RECV) {
1741 chan->fcs = L2CAP_FCS_NONE;
1742 l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs);
1743 }
1744 break;
1745
1746 case L2CAP_MODE_STREAMING:
1747 rfc.mode = L2CAP_MODE_STREAMING;
1748 rfc.txwin_size = 0;
1749 rfc.max_transmit = 0;
1750 rfc.retrans_timeout = 0;
1751 rfc.monitor_timeout = 0;
1752 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
1753 if (L2CAP_DEFAULT_MAX_PDU_SIZE > chan->conn->mtu - 10)
1754 rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10);
1755
1756 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
1757 (unsigned long) &rfc);
1758
1759 if (!(chan->conn->feat_mask & L2CAP_FEAT_FCS))
1760 break;
1761
1762 if (chan->fcs == L2CAP_FCS_NONE ||
1763 chan->conf_state & L2CAP_CONF_NO_FCS_RECV) {
1764 chan->fcs = L2CAP_FCS_NONE;
1765 l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs);
1766 }
1767 break;
1768 }
1769
1770 req->dcid = cpu_to_le16(chan->dcid);
1771 req->flags = cpu_to_le16(0);
1772
1773 return ptr - data;
1774}
1775
1776static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data)
1777{
1778 struct l2cap_conf_rsp *rsp = data;
1779 void *ptr = rsp->data;
1780 void *req = chan->conf_req;
1781 int len = chan->conf_len;
1782 int type, hint, olen;
1783 unsigned long val;
1784 struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
1785 u16 mtu = L2CAP_DEFAULT_MTU;
1786 u16 result = L2CAP_CONF_SUCCESS;
1787
1788 BT_DBG("chan %p", chan);
1789
1790 while (len >= L2CAP_CONF_OPT_SIZE) {
1791 len -= l2cap_get_conf_opt(&req, &type, &olen, &val);
1792
1793 hint = type & L2CAP_CONF_HINT;
1794 type &= L2CAP_CONF_MASK;
1795
1796 switch (type) {
1797 case L2CAP_CONF_MTU:
1798 mtu = val;
1799 break;
1800
1801 case L2CAP_CONF_FLUSH_TO:
1802 chan->flush_to = val;
1803 break;
1804
1805 case L2CAP_CONF_QOS:
1806 break;
1807
1808 case L2CAP_CONF_RFC:
1809 if (olen == sizeof(rfc))
1810 memcpy(&rfc, (void *) val, olen);
1811 break;
1812
1813 case L2CAP_CONF_FCS:
1814 if (val == L2CAP_FCS_NONE)
1815 chan->conf_state |= L2CAP_CONF_NO_FCS_RECV;
1816
1817 break;
1818
1819 default:
1820 if (hint)
1821 break;
1822
1823 result = L2CAP_CONF_UNKNOWN;
1824 *((u8 *) ptr++) = type;
1825 break;
1826 }
1827 }
1828
1829 if (chan->num_conf_rsp || chan->num_conf_req > 1)
1830 goto done;
1831
1832 switch (chan->mode) {
1833 case L2CAP_MODE_STREAMING:
1834 case L2CAP_MODE_ERTM:
1835 if (!(chan->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
1836 chan->mode = l2cap_select_mode(rfc.mode,
1837 chan->conn->feat_mask);
1838 break;
1839 }
1840
1841 if (chan->mode != rfc.mode)
1842 return -ECONNREFUSED;
1843
1844 break;
1845 }
1846
1847done:
1848 if (chan->mode != rfc.mode) {
1849 result = L2CAP_CONF_UNACCEPT;
1850 rfc.mode = chan->mode;
1851
1852 if (chan->num_conf_rsp == 1)
1853 return -ECONNREFUSED;
1854
1855 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
1856 sizeof(rfc), (unsigned long) &rfc);
1857 }
1858
1859
1860 if (result == L2CAP_CONF_SUCCESS) {
1861 /* Configure output options and let the other side know
1862 * which ones we don't like. */
1863
1864 if (mtu < L2CAP_DEFAULT_MIN_MTU)
1865 result = L2CAP_CONF_UNACCEPT;
1866 else {
1867 chan->omtu = mtu;
1868 chan->conf_state |= L2CAP_CONF_MTU_DONE;
1869 }
1870 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu);
1871
1872 switch (rfc.mode) {
1873 case L2CAP_MODE_BASIC:
1874 chan->fcs = L2CAP_FCS_NONE;
1875 chan->conf_state |= L2CAP_CONF_MODE_DONE;
1876 break;
1877
1878 case L2CAP_MODE_ERTM:
1879 chan->remote_tx_win = rfc.txwin_size;
1880 chan->remote_max_tx = rfc.max_transmit;
1881
1882 if (le16_to_cpu(rfc.max_pdu_size) > chan->conn->mtu - 10)
1883 rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10);
1884
1885 chan->remote_mps = le16_to_cpu(rfc.max_pdu_size);
1886
1887 rfc.retrans_timeout =
1888 le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO);
1889 rfc.monitor_timeout =
1890 le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO);
1891
1892 chan->conf_state |= L2CAP_CONF_MODE_DONE;
1893
1894 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
1895 sizeof(rfc), (unsigned long) &rfc);
1896
1897 break;
1898
1899 case L2CAP_MODE_STREAMING:
1900 if (le16_to_cpu(rfc.max_pdu_size) > chan->conn->mtu - 10)
1901 rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10);
1902
1903 chan->remote_mps = le16_to_cpu(rfc.max_pdu_size);
1904
1905 chan->conf_state |= L2CAP_CONF_MODE_DONE;
1906
1907 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
1908 sizeof(rfc), (unsigned long) &rfc);
1909
1910 break;
1911
1912 default:
1913 result = L2CAP_CONF_UNACCEPT;
1914
1915 memset(&rfc, 0, sizeof(rfc));
1916 rfc.mode = chan->mode;
1917 }
1918
1919 if (result == L2CAP_CONF_SUCCESS)
1920 chan->conf_state |= L2CAP_CONF_OUTPUT_DONE;
1921 }
1922 rsp->scid = cpu_to_le16(chan->dcid);
1923 rsp->result = cpu_to_le16(result);
1924 rsp->flags = cpu_to_le16(0x0000);
1925
1926 return ptr - data;
1927}
1928
1929static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, void *data, u16 *result)
1930{
1931 struct l2cap_conf_req *req = data;
1932 void *ptr = req->data;
1933 int type, olen;
1934 unsigned long val;
1935 struct l2cap_conf_rfc rfc;
1936
1937 BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data);
1938
1939 while (len >= L2CAP_CONF_OPT_SIZE) {
1940 len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
1941
1942 switch (type) {
1943 case L2CAP_CONF_MTU:
1944 if (val < L2CAP_DEFAULT_MIN_MTU) {
1945 *result = L2CAP_CONF_UNACCEPT;
1946 chan->imtu = L2CAP_DEFAULT_MIN_MTU;
1947 } else
1948 chan->imtu = val;
1949 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
1950 break;
1951
1952 case L2CAP_CONF_FLUSH_TO:
1953 chan->flush_to = val;
1954 l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
1955 2, chan->flush_to);
1956 break;
1957
1958 case L2CAP_CONF_RFC:
1959 if (olen == sizeof(rfc))
1960 memcpy(&rfc, (void *)val, olen);
1961
1962 if ((chan->conf_state & L2CAP_CONF_STATE2_DEVICE) &&
1963 rfc.mode != chan->mode)
1964 return -ECONNREFUSED;
1965
1966 chan->fcs = 0;
1967
1968 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
1969 sizeof(rfc), (unsigned long) &rfc);
1970 break;
1971 }
1972 }
1973
1974 if (chan->mode == L2CAP_MODE_BASIC && chan->mode != rfc.mode)
1975 return -ECONNREFUSED;
1976
1977 chan->mode = rfc.mode;
1978
1979 if (*result == L2CAP_CONF_SUCCESS) {
1980 switch (rfc.mode) {
1981 case L2CAP_MODE_ERTM:
1982 chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
1983 chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
1984 chan->mps = le16_to_cpu(rfc.max_pdu_size);
1985 break;
1986 case L2CAP_MODE_STREAMING:
1987 chan->mps = le16_to_cpu(rfc.max_pdu_size);
1988 }
1989 }
1990
1991 req->dcid = cpu_to_le16(chan->dcid);
1992 req->flags = cpu_to_le16(0x0000);
1993
1994 return ptr - data;
1995}
1996
1997static int l2cap_build_conf_rsp(struct l2cap_chan *chan, void *data, u16 result, u16 flags)
1998{
1999 struct l2cap_conf_rsp *rsp = data;
2000 void *ptr = rsp->data;
2001
2002 BT_DBG("chan %p", chan);
2003
2004 rsp->scid = cpu_to_le16(chan->dcid);
2005 rsp->result = cpu_to_le16(result);
2006 rsp->flags = cpu_to_le16(flags);
2007
2008 return ptr - data;
2009}
2010
2011void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
2012{
2013 struct l2cap_conn_rsp rsp;
2014 struct l2cap_conn *conn = chan->conn;
2015 u8 buf[128];
2016
2017 rsp.scid = cpu_to_le16(chan->dcid);
2018 rsp.dcid = cpu_to_le16(chan->scid);
2019 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
2020 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
2021 l2cap_send_cmd(conn, chan->ident,
2022 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2023
2024 if (chan->conf_state & L2CAP_CONF_REQ_SENT)
2025 return;
2026
2027 chan->conf_state |= L2CAP_CONF_REQ_SENT;
2028 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
2029 l2cap_build_conf_req(chan, buf), buf);
2030 chan->num_conf_req++;
2031}
2032
2033static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
2034{
2035 int type, olen;
2036 unsigned long val;
2037 struct l2cap_conf_rfc rfc;
2038
2039 BT_DBG("chan %p, rsp %p, len %d", chan, rsp, len);
2040
2041 if ((chan->mode != L2CAP_MODE_ERTM) && (chan->mode != L2CAP_MODE_STREAMING))
2042 return;
2043
2044 while (len >= L2CAP_CONF_OPT_SIZE) {
2045 len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
2046
2047 switch (type) {
2048 case L2CAP_CONF_RFC:
2049 if (olen == sizeof(rfc))
2050 memcpy(&rfc, (void *)val, olen);
2051 goto done;
2052 }
2053 }
2054
2055done:
2056 switch (rfc.mode) {
2057 case L2CAP_MODE_ERTM:
2058 chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
2059 chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
2060 chan->mps = le16_to_cpu(rfc.max_pdu_size);
2061 break;
2062 case L2CAP_MODE_STREAMING:
2063 chan->mps = le16_to_cpu(rfc.max_pdu_size);
2064 }
2065}
2066
2067static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2068{
2069 struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data;
2070
2071 if (rej->reason != 0x0000)
2072 return 0;
2073
2074 if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) &&
2075 cmd->ident == conn->info_ident) {
2076 del_timer(&conn->info_timer);
2077
2078 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2079 conn->info_ident = 0;
2080
2081 l2cap_conn_start(conn);
2082 }
2083
2084 return 0;
2085}
2086
2087static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2088{
2089 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
2090 struct l2cap_conn_rsp rsp;
2091 struct l2cap_chan *chan = NULL, *pchan;
2092 struct sock *parent, *sk = NULL;
2093 int result, status = L2CAP_CS_NO_INFO;
2094
2095 u16 dcid = 0, scid = __le16_to_cpu(req->scid);
2096 __le16 psm = req->psm;
2097
2098 BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid);
2099
2100 /* Check if we have socket listening on psm */
2101 pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, conn->src);
2102 if (!pchan) {
2103 result = L2CAP_CR_BAD_PSM;
2104 goto sendresp;
2105 }
2106
2107 parent = pchan->sk;
2108
2109 bh_lock_sock(parent);
2110
2111 /* Check if the ACL is secure enough (if not SDP) */
2112 if (psm != cpu_to_le16(0x0001) &&
2113 !hci_conn_check_link_mode(conn->hcon)) {
2114 conn->disc_reason = 0x05;
2115 result = L2CAP_CR_SEC_BLOCK;
2116 goto response;
2117 }
2118
2119 result = L2CAP_CR_NO_MEM;
2120
2121 /* Check for backlog size */
2122 if (sk_acceptq_is_full(parent)) {
2123 BT_DBG("backlog full %d", parent->sk_ack_backlog);
2124 goto response;
2125 }
2126
2127 sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC);
2128 if (!sk)
2129 goto response;
2130
2131 chan = l2cap_chan_create(sk);
2132 if (!chan) {
2133 l2cap_sock_kill(sk);
2134 goto response;
2135 }
2136
2137 l2cap_pi(sk)->chan = chan;
2138
2139 write_lock_bh(&conn->chan_lock);
2140
2141 /* Check if we already have channel with that dcid */
2142 if (__l2cap_get_chan_by_dcid(conn, scid)) {
2143 write_unlock_bh(&conn->chan_lock);
2144 sock_set_flag(sk, SOCK_ZAPPED);
2145 l2cap_sock_kill(sk);
2146 goto response;
2147 }
2148
2149 hci_conn_hold(conn->hcon);
2150
2151 l2cap_sock_init(sk, parent);
2152 bacpy(&bt_sk(sk)->src, conn->src);
2153 bacpy(&bt_sk(sk)->dst, conn->dst);
2154 chan->psm = psm;
2155 chan->dcid = scid;
2156
2157 bt_accept_enqueue(parent, sk);
2158
2159 __l2cap_chan_add(conn, chan);
2160
2161 dcid = chan->scid;
2162
2163 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
2164
2165 chan->ident = cmd->ident;
2166
2167 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) {
2168 if (l2cap_check_security(chan)) {
2169 if (bt_sk(sk)->defer_setup) {
2170 sk->sk_state = BT_CONNECT2;
2171 result = L2CAP_CR_PEND;
2172 status = L2CAP_CS_AUTHOR_PEND;
2173 parent->sk_data_ready(parent, 0);
2174 } else {
2175 sk->sk_state = BT_CONFIG;
2176 result = L2CAP_CR_SUCCESS;
2177 status = L2CAP_CS_NO_INFO;
2178 }
2179 } else {
2180 sk->sk_state = BT_CONNECT2;
2181 result = L2CAP_CR_PEND;
2182 status = L2CAP_CS_AUTHEN_PEND;
2183 }
2184 } else {
2185 sk->sk_state = BT_CONNECT2;
2186 result = L2CAP_CR_PEND;
2187 status = L2CAP_CS_NO_INFO;
2188 }
2189
2190 write_unlock_bh(&conn->chan_lock);
2191
2192response:
2193 bh_unlock_sock(parent);
2194
2195sendresp:
2196 rsp.scid = cpu_to_le16(scid);
2197 rsp.dcid = cpu_to_le16(dcid);
2198 rsp.result = cpu_to_le16(result);
2199 rsp.status = cpu_to_le16(status);
2200 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2201
2202 if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
2203 struct l2cap_info_req info;
2204 info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
2205
2206 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
2207 conn->info_ident = l2cap_get_ident(conn);
2208
2209 mod_timer(&conn->info_timer, jiffies +
2210 msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
2211
2212 l2cap_send_cmd(conn, conn->info_ident,
2213 L2CAP_INFO_REQ, sizeof(info), &info);
2214 }
2215
2216 if (chan && !(chan->conf_state & L2CAP_CONF_REQ_SENT) &&
2217 result == L2CAP_CR_SUCCESS) {
2218 u8 buf[128];
2219 chan->conf_state |= L2CAP_CONF_REQ_SENT;
2220 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
2221 l2cap_build_conf_req(chan, buf), buf);
2222 chan->num_conf_req++;
2223 }
2224
2225 return 0;
2226}
2227
2228static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2229{
2230 struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data;
2231 u16 scid, dcid, result, status;
2232 struct l2cap_chan *chan;
2233 struct sock *sk;
2234 u8 req[128];
2235
2236 scid = __le16_to_cpu(rsp->scid);
2237 dcid = __le16_to_cpu(rsp->dcid);
2238 result = __le16_to_cpu(rsp->result);
2239 status = __le16_to_cpu(rsp->status);
2240
2241 BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status);
2242
2243 if (scid) {
2244 chan = l2cap_get_chan_by_scid(conn, scid);
2245 if (!chan)
2246 return -EFAULT;
2247 } else {
2248 chan = l2cap_get_chan_by_ident(conn, cmd->ident);
2249 if (!chan)
2250 return -EFAULT;
2251 }
2252
2253 sk = chan->sk;
2254
2255 switch (result) {
2256 case L2CAP_CR_SUCCESS:
2257 sk->sk_state = BT_CONFIG;
2258 chan->ident = 0;
2259 chan->dcid = dcid;
2260 chan->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
2261
2262 if (chan->conf_state & L2CAP_CONF_REQ_SENT)
2263 break;
2264
2265 chan->conf_state |= L2CAP_CONF_REQ_SENT;
2266
2267 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
2268 l2cap_build_conf_req(chan, req), req);
2269 chan->num_conf_req++;
2270 break;
2271
2272 case L2CAP_CR_PEND:
2273 chan->conf_state |= L2CAP_CONF_CONNECT_PEND;
2274 break;
2275
2276 default:
2277 /* don't delete l2cap channel if sk is owned by user */
2278 if (sock_owned_by_user(sk)) {
2279 sk->sk_state = BT_DISCONN;
2280 l2cap_sock_clear_timer(sk);
2281 l2cap_sock_set_timer(sk, HZ / 5);
2282 break;
2283 }
2284
2285 l2cap_chan_del(chan, ECONNREFUSED);
2286 break;
2287 }
2288
2289 bh_unlock_sock(sk);
2290 return 0;
2291}
2292
2293static inline void set_default_fcs(struct l2cap_chan *chan)
2294{
2295 struct l2cap_pinfo *pi = l2cap_pi(chan->sk);
2296
2297 /* FCS is enabled only in ERTM or streaming mode, if one or both
2298 * sides request it.
2299 */
2300 if (chan->mode != L2CAP_MODE_ERTM && chan->mode != L2CAP_MODE_STREAMING)
2301 chan->fcs = L2CAP_FCS_NONE;
2302 else if (!(pi->chan->conf_state & L2CAP_CONF_NO_FCS_RECV))
2303 chan->fcs = L2CAP_FCS_CRC16;
2304}
2305
2306static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
2307{
2308 struct l2cap_conf_req *req = (struct l2cap_conf_req *) data;
2309 u16 dcid, flags;
2310 u8 rsp[64];
2311 struct l2cap_chan *chan;
2312 struct sock *sk;
2313 int len;
2314
2315 dcid = __le16_to_cpu(req->dcid);
2316 flags = __le16_to_cpu(req->flags);
2317
2318 BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags);
2319
2320 chan = l2cap_get_chan_by_scid(conn, dcid);
2321 if (!chan)
2322 return -ENOENT;
2323
2324 sk = chan->sk;
2325
2326 if (sk->sk_state != BT_CONFIG) {
2327 struct l2cap_cmd_rej rej;
2328
2329 rej.reason = cpu_to_le16(0x0002);
2330 l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
2331 sizeof(rej), &rej);
2332 goto unlock;
2333 }
2334
2335 /* Reject if config buffer is too small. */
2336 len = cmd_len - sizeof(*req);
2337 if (chan->conf_len + len > sizeof(chan->conf_req)) {
2338 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
2339 l2cap_build_conf_rsp(chan, rsp,
2340 L2CAP_CONF_REJECT, flags), rsp);
2341 goto unlock;
2342 }
2343
2344 /* Store config. */
2345 memcpy(chan->conf_req + chan->conf_len, req->data, len);
2346 chan->conf_len += len;
2347
2348 if (flags & 0x0001) {
2349 /* Incomplete config. Send empty response. */
2350 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
2351 l2cap_build_conf_rsp(chan, rsp,
2352 L2CAP_CONF_SUCCESS, 0x0001), rsp);
2353 goto unlock;
2354 }
2355
2356 /* Complete config. */
2357 len = l2cap_parse_conf_req(chan, rsp);
2358 if (len < 0) {
2359 l2cap_send_disconn_req(conn, chan, ECONNRESET);
2360 goto unlock;
2361 }
2362
2363 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
2364 chan->num_conf_rsp++;
2365
2366 /* Reset config buffer. */
2367 chan->conf_len = 0;
2368
2369 if (!(chan->conf_state & L2CAP_CONF_OUTPUT_DONE))
2370 goto unlock;
2371
2372 if (chan->conf_state & L2CAP_CONF_INPUT_DONE) {
2373 set_default_fcs(chan);
2374
2375 sk->sk_state = BT_CONNECTED;
2376
2377 chan->next_tx_seq = 0;
2378 chan->expected_tx_seq = 0;
2379 skb_queue_head_init(&chan->tx_q);
2380 if (chan->mode == L2CAP_MODE_ERTM)
2381 l2cap_ertm_init(chan);
2382
2383 l2cap_chan_ready(sk);
2384 goto unlock;
2385 }
2386
2387 if (!(chan->conf_state & L2CAP_CONF_REQ_SENT)) {
2388 u8 buf[64];
2389 chan->conf_state |= L2CAP_CONF_REQ_SENT;
2390 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
2391 l2cap_build_conf_req(chan, buf), buf);
2392 chan->num_conf_req++;
2393 }
2394
2395unlock:
2396 bh_unlock_sock(sk);
2397 return 0;
2398}
2399
2400static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2401{
2402 struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
2403 u16 scid, flags, result;
2404 struct l2cap_chan *chan;
2405 struct sock *sk;
2406 int len = cmd->len - sizeof(*rsp);
2407
2408 scid = __le16_to_cpu(rsp->scid);
2409 flags = __le16_to_cpu(rsp->flags);
2410 result = __le16_to_cpu(rsp->result);
2411
2412 BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x",
2413 scid, flags, result);
2414
2415 chan = l2cap_get_chan_by_scid(conn, scid);
2416 if (!chan)
2417 return 0;
2418
2419 sk = chan->sk;
2420
2421 switch (result) {
2422 case L2CAP_CONF_SUCCESS:
2423 l2cap_conf_rfc_get(chan, rsp->data, len);
2424 break;
2425
2426 case L2CAP_CONF_UNACCEPT:
2427 if (chan->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
2428 char req[64];
2429
2430 if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
2431 l2cap_send_disconn_req(conn, chan, ECONNRESET);
2432 goto done;
2433 }
2434
2435 /* throw out any old stored conf requests */
2436 result = L2CAP_CONF_SUCCESS;
2437 len = l2cap_parse_conf_rsp(chan, rsp->data, len,
2438 req, &result);
2439 if (len < 0) {
2440 l2cap_send_disconn_req(conn, chan, ECONNRESET);
2441 goto done;
2442 }
2443
2444 l2cap_send_cmd(conn, l2cap_get_ident(conn),
2445 L2CAP_CONF_REQ, len, req);
2446 chan->num_conf_req++;
2447 if (result != L2CAP_CONF_SUCCESS)
2448 goto done;
2449 break;
2450 }
2451
2452 default:
2453 sk->sk_err = ECONNRESET;
2454 l2cap_sock_set_timer(sk, HZ * 5);
2455 l2cap_send_disconn_req(conn, chan, ECONNRESET);
2456 goto done;
2457 }
2458
2459 if (flags & 0x01)
2460 goto done;
2461
2462 chan->conf_state |= L2CAP_CONF_INPUT_DONE;
2463
2464 if (chan->conf_state & L2CAP_CONF_OUTPUT_DONE) {
2465 set_default_fcs(chan);
2466
2467 sk->sk_state = BT_CONNECTED;
2468 chan->next_tx_seq = 0;
2469 chan->expected_tx_seq = 0;
2470 skb_queue_head_init(&chan->tx_q);
2471 if (chan->mode == L2CAP_MODE_ERTM)
2472 l2cap_ertm_init(chan);
2473
2474 l2cap_chan_ready(sk);
2475 }
2476
2477done:
2478 bh_unlock_sock(sk);
2479 return 0;
2480}
2481
2482static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2483{
2484 struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data;
2485 struct l2cap_disconn_rsp rsp;
2486 u16 dcid, scid;
2487 struct l2cap_chan *chan;
2488 struct sock *sk;
2489
2490 scid = __le16_to_cpu(req->scid);
2491 dcid = __le16_to_cpu(req->dcid);
2492
2493 BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid);
2494
2495 chan = l2cap_get_chan_by_scid(conn, dcid);
2496 if (!chan)
2497 return 0;
2498
2499 sk = chan->sk;
2500
2501 rsp.dcid = cpu_to_le16(chan->scid);
2502 rsp.scid = cpu_to_le16(chan->dcid);
2503 l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
2504
2505 sk->sk_shutdown = SHUTDOWN_MASK;
2506
2507 /* don't delete l2cap channel if sk is owned by user */
2508 if (sock_owned_by_user(sk)) {
2509 sk->sk_state = BT_DISCONN;
2510 l2cap_sock_clear_timer(sk);
2511 l2cap_sock_set_timer(sk, HZ / 5);
2512 bh_unlock_sock(sk);
2513 return 0;
2514 }
2515
2516 l2cap_chan_del(chan, ECONNRESET);
2517 bh_unlock_sock(sk);
2518
2519 l2cap_sock_kill(sk);
2520 return 0;
2521}
2522
2523static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2524{
2525 struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data;
2526 u16 dcid, scid;
2527 struct l2cap_chan *chan;
2528 struct sock *sk;
2529
2530 scid = __le16_to_cpu(rsp->scid);
2531 dcid = __le16_to_cpu(rsp->dcid);
2532
2533 BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid);
2534
2535 chan = l2cap_get_chan_by_scid(conn, scid);
2536 if (!chan)
2537 return 0;
2538
2539 sk = chan->sk;
2540
2541 /* don't delete l2cap channel if sk is owned by user */
2542 if (sock_owned_by_user(sk)) {
2543 sk->sk_state = BT_DISCONN;
2544 l2cap_sock_clear_timer(sk);
2545 l2cap_sock_set_timer(sk, HZ / 5);
2546 bh_unlock_sock(sk);
2547 return 0;
2548 }
2549
2550 l2cap_chan_del(chan, 0);
2551 bh_unlock_sock(sk);
2552
2553 l2cap_sock_kill(sk);
2554 return 0;
2555}
2556
2557static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2558{
2559 struct l2cap_info_req *req = (struct l2cap_info_req *) data;
2560 u16 type;
2561
2562 type = __le16_to_cpu(req->type);
2563
2564 BT_DBG("type 0x%4.4x", type);
2565
2566 if (type == L2CAP_IT_FEAT_MASK) {
2567 u8 buf[8];
2568 u32 feat_mask = l2cap_feat_mask;
2569 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
2570 rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
2571 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
2572 if (!disable_ertm)
2573 feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
2574 | L2CAP_FEAT_FCS;
2575 put_unaligned_le32(feat_mask, rsp->data);
2576 l2cap_send_cmd(conn, cmd->ident,
2577 L2CAP_INFO_RSP, sizeof(buf), buf);
2578 } else if (type == L2CAP_IT_FIXED_CHAN) {
2579 u8 buf[12];
2580 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
2581 rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
2582 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
2583 memcpy(buf + 4, l2cap_fixed_chan, 8);
2584 l2cap_send_cmd(conn, cmd->ident,
2585 L2CAP_INFO_RSP, sizeof(buf), buf);
2586 } else {
2587 struct l2cap_info_rsp rsp;
2588 rsp.type = cpu_to_le16(type);
2589 rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
2590 l2cap_send_cmd(conn, cmd->ident,
2591 L2CAP_INFO_RSP, sizeof(rsp), &rsp);
2592 }
2593
2594 return 0;
2595}
2596
2597static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2598{
2599 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data;
2600 u16 type, result;
2601
2602 type = __le16_to_cpu(rsp->type);
2603 result = __le16_to_cpu(rsp->result);
2604
2605 BT_DBG("type 0x%4.4x result 0x%2.2x", type, result);
2606
2607 /* L2CAP Info req/rsp are unbound to channels, add extra checks */
2608 if (cmd->ident != conn->info_ident ||
2609 conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)
2610 return 0;
2611
2612 del_timer(&conn->info_timer);
2613
2614 if (result != L2CAP_IR_SUCCESS) {
2615 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2616 conn->info_ident = 0;
2617
2618 l2cap_conn_start(conn);
2619
2620 return 0;
2621 }
2622
2623 if (type == L2CAP_IT_FEAT_MASK) {
2624 conn->feat_mask = get_unaligned_le32(rsp->data);
2625
2626 if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) {
2627 struct l2cap_info_req req;
2628 req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
2629
2630 conn->info_ident = l2cap_get_ident(conn);
2631
2632 l2cap_send_cmd(conn, conn->info_ident,
2633 L2CAP_INFO_REQ, sizeof(req), &req);
2634 } else {
2635 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2636 conn->info_ident = 0;
2637
2638 l2cap_conn_start(conn);
2639 }
2640 } else if (type == L2CAP_IT_FIXED_CHAN) {
2641 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2642 conn->info_ident = 0;
2643
2644 l2cap_conn_start(conn);
2645 }
2646
2647 return 0;
2648}
2649
2650static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency,
2651 u16 to_multiplier)
2652{
2653 u16 max_latency;
2654
2655 if (min > max || min < 6 || max > 3200)
2656 return -EINVAL;
2657
2658 if (to_multiplier < 10 || to_multiplier > 3200)
2659 return -EINVAL;
2660
2661 if (max >= to_multiplier * 8)
2662 return -EINVAL;
2663
2664 max_latency = (to_multiplier * 8 / max) - 1;
2665 if (latency > 499 || latency > max_latency)
2666 return -EINVAL;
2667
2668 return 0;
2669}
2670
2671static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
2672 struct l2cap_cmd_hdr *cmd, u8 *data)
2673{
2674 struct hci_conn *hcon = conn->hcon;
2675 struct l2cap_conn_param_update_req *req;
2676 struct l2cap_conn_param_update_rsp rsp;
2677 u16 min, max, latency, to_multiplier, cmd_len;
2678 int err;
2679
2680 if (!(hcon->link_mode & HCI_LM_MASTER))
2681 return -EINVAL;
2682
2683 cmd_len = __le16_to_cpu(cmd->len);
2684 if (cmd_len != sizeof(struct l2cap_conn_param_update_req))
2685 return -EPROTO;
2686
2687 req = (struct l2cap_conn_param_update_req *) data;
2688 min = __le16_to_cpu(req->min);
2689 max = __le16_to_cpu(req->max);
2690 latency = __le16_to_cpu(req->latency);
2691 to_multiplier = __le16_to_cpu(req->to_multiplier);
2692
2693 BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x",
2694 min, max, latency, to_multiplier);
2695
2696 memset(&rsp, 0, sizeof(rsp));
2697
2698 err = l2cap_check_conn_param(min, max, latency, to_multiplier);
2699 if (err)
2700 rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
2701 else
2702 rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED);
2703
2704 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP,
2705 sizeof(rsp), &rsp);
2706
2707 if (!err)
2708 hci_le_conn_update(hcon, min, max, latency, to_multiplier);
2709
2710 return 0;
2711}
2712
2713static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
2714 struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
2715{
2716 int err = 0;
2717
2718 switch (cmd->code) {
2719 case L2CAP_COMMAND_REJ:
2720 l2cap_command_rej(conn, cmd, data);
2721 break;
2722
2723 case L2CAP_CONN_REQ:
2724 err = l2cap_connect_req(conn, cmd, data);
2725 break;
2726
2727 case L2CAP_CONN_RSP:
2728 err = l2cap_connect_rsp(conn, cmd, data);
2729 break;
2730
2731 case L2CAP_CONF_REQ:
2732 err = l2cap_config_req(conn, cmd, cmd_len, data);
2733 break;
2734
2735 case L2CAP_CONF_RSP:
2736 err = l2cap_config_rsp(conn, cmd, data);
2737 break;
2738
2739 case L2CAP_DISCONN_REQ:
2740 err = l2cap_disconnect_req(conn, cmd, data);
2741 break;
2742
2743 case L2CAP_DISCONN_RSP:
2744 err = l2cap_disconnect_rsp(conn, cmd, data);
2745 break;
2746
2747 case L2CAP_ECHO_REQ:
2748 l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data);
2749 break;
2750
2751 case L2CAP_ECHO_RSP:
2752 break;
2753
2754 case L2CAP_INFO_REQ:
2755 err = l2cap_information_req(conn, cmd, data);
2756 break;
2757
2758 case L2CAP_INFO_RSP:
2759 err = l2cap_information_rsp(conn, cmd, data);
2760 break;
2761
2762 default:
2763 BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code);
2764 err = -EINVAL;
2765 break;
2766 }
2767
2768 return err;
2769}
2770
2771static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn,
2772 struct l2cap_cmd_hdr *cmd, u8 *data)
2773{
2774 switch (cmd->code) {
2775 case L2CAP_COMMAND_REJ:
2776 return 0;
2777
2778 case L2CAP_CONN_PARAM_UPDATE_REQ:
2779 return l2cap_conn_param_update_req(conn, cmd, data);
2780
2781 case L2CAP_CONN_PARAM_UPDATE_RSP:
2782 return 0;
2783
2784 default:
2785 BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code);
2786 return -EINVAL;
2787 }
2788}
2789
2790static inline void l2cap_sig_channel(struct l2cap_conn *conn,
2791 struct sk_buff *skb)
2792{
2793 u8 *data = skb->data;
2794 int len = skb->len;
2795 struct l2cap_cmd_hdr cmd;
2796 int err;
2797
2798 l2cap_raw_recv(conn, skb);
2799
2800 while (len >= L2CAP_CMD_HDR_SIZE) {
2801 u16 cmd_len;
2802 memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE);
2803 data += L2CAP_CMD_HDR_SIZE;
2804 len -= L2CAP_CMD_HDR_SIZE;
2805
2806 cmd_len = le16_to_cpu(cmd.len);
2807
2808 BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident);
2809
2810 if (cmd_len > len || !cmd.ident) {
2811 BT_DBG("corrupted command");
2812 break;
2813 }
2814
2815 if (conn->hcon->type == LE_LINK)
2816 err = l2cap_le_sig_cmd(conn, &cmd, data);
2817 else
2818 err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data);
2819
2820 if (err) {
2821 struct l2cap_cmd_rej rej;
2822
2823 BT_ERR("Wrong link type (%d)", err);
2824
2825 /* FIXME: Map err to a valid reason */
2826 rej.reason = cpu_to_le16(0);
2827 l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
2828 }
2829
2830 data += cmd_len;
2831 len -= cmd_len;
2832 }
2833
2834 kfree_skb(skb);
2835}
2836
2837static int l2cap_check_fcs(struct l2cap_chan *chan, struct sk_buff *skb)
2838{
2839 u16 our_fcs, rcv_fcs;
2840 int hdr_size = L2CAP_HDR_SIZE + 2;
2841
2842 if (chan->fcs == L2CAP_FCS_CRC16) {
2843 skb_trim(skb, skb->len - 2);
2844 rcv_fcs = get_unaligned_le16(skb->data + skb->len);
2845 our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
2846
2847 if (our_fcs != rcv_fcs)
2848 return -EBADMSG;
2849 }
2850 return 0;
2851}
2852
2853static inline void l2cap_send_i_or_rr_or_rnr(struct l2cap_chan *chan)
2854{
2855 u16 control = 0;
2856
2857 chan->frames_sent = 0;
2858
2859 control |= chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
2860
2861 if (chan->conn_state & L2CAP_CONN_LOCAL_BUSY) {
2862 control |= L2CAP_SUPER_RCV_NOT_READY;
2863 l2cap_send_sframe(chan, control);
2864 chan->conn_state |= L2CAP_CONN_RNR_SENT;
2865 }
2866
2867 if (chan->conn_state & L2CAP_CONN_REMOTE_BUSY)
2868 l2cap_retransmit_frames(chan);
2869
2870 l2cap_ertm_send(chan);
2871
2872 if (!(chan->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
2873 chan->frames_sent == 0) {
2874 control |= L2CAP_SUPER_RCV_READY;
2875 l2cap_send_sframe(chan, control);
2876 }
2877}
2878
2879static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, u8 tx_seq, u8 sar)
2880{
2881 struct sk_buff *next_skb;
2882 int tx_seq_offset, next_tx_seq_offset;
2883
2884 bt_cb(skb)->tx_seq = tx_seq;
2885 bt_cb(skb)->sar = sar;
2886
2887 next_skb = skb_peek(&chan->srej_q);
2888 if (!next_skb) {
2889 __skb_queue_tail(&chan->srej_q, skb);
2890 return 0;
2891 }
2892
2893 tx_seq_offset = (tx_seq - chan->buffer_seq) % 64;
2894 if (tx_seq_offset < 0)
2895 tx_seq_offset += 64;
2896
2897 do {
2898 if (bt_cb(next_skb)->tx_seq == tx_seq)
2899 return -EINVAL;
2900
2901 next_tx_seq_offset = (bt_cb(next_skb)->tx_seq -
2902 chan->buffer_seq) % 64;
2903 if (next_tx_seq_offset < 0)
2904 next_tx_seq_offset += 64;
2905
2906 if (next_tx_seq_offset > tx_seq_offset) {
2907 __skb_queue_before(&chan->srej_q, next_skb, skb);
2908 return 0;
2909 }
2910
2911 if (skb_queue_is_last(&chan->srej_q, next_skb))
2912 break;
2913
2914 } while ((next_skb = skb_queue_next(&chan->srej_q, next_skb)));
2915
2916 __skb_queue_tail(&chan->srej_q, skb);
2917
2918 return 0;
2919}
2920
2921static int l2cap_ertm_reassembly_sdu(struct l2cap_chan *chan, struct sk_buff *skb, u16 control)
2922{
2923 struct sk_buff *_skb;
2924 int err;
2925
2926 switch (control & L2CAP_CTRL_SAR) {
2927 case L2CAP_SDU_UNSEGMENTED:
2928 if (chan->conn_state & L2CAP_CONN_SAR_SDU)
2929 goto drop;
2930
2931 err = sock_queue_rcv_skb(chan->sk, skb);
2932 if (!err)
2933 return err;
2934
2935 break;
2936
2937 case L2CAP_SDU_START:
2938 if (chan->conn_state & L2CAP_CONN_SAR_SDU)
2939 goto drop;
2940
2941 chan->sdu_len = get_unaligned_le16(skb->data);
2942
2943 if (chan->sdu_len > chan->imtu)
2944 goto disconnect;
2945
2946 chan->sdu = bt_skb_alloc(chan->sdu_len, GFP_ATOMIC);
2947 if (!chan->sdu)
2948 return -ENOMEM;
2949
2950 /* pull sdu_len bytes only after alloc, because of Local Busy
2951 * condition we have to be sure that this will be executed
2952 * only once, i.e., when alloc does not fail */
2953 skb_pull(skb, 2);
2954
2955 memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len);
2956
2957 chan->conn_state |= L2CAP_CONN_SAR_SDU;
2958 chan->partial_sdu_len = skb->len;
2959 break;
2960
2961 case L2CAP_SDU_CONTINUE:
2962 if (!(chan->conn_state & L2CAP_CONN_SAR_SDU))
2963 goto disconnect;
2964
2965 if (!chan->sdu)
2966 goto disconnect;
2967
2968 chan->partial_sdu_len += skb->len;
2969 if (chan->partial_sdu_len > chan->sdu_len)
2970 goto drop;
2971
2972 memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len);
2973
2974 break;
2975
2976 case L2CAP_SDU_END:
2977 if (!(chan->conn_state & L2CAP_CONN_SAR_SDU))
2978 goto disconnect;
2979
2980 if (!chan->sdu)
2981 goto disconnect;
2982
2983 if (!(chan->conn_state & L2CAP_CONN_SAR_RETRY)) {
2984 chan->partial_sdu_len += skb->len;
2985
2986 if (chan->partial_sdu_len > chan->imtu)
2987 goto drop;
2988
2989 if (chan->partial_sdu_len != chan->sdu_len)
2990 goto drop;
2991
2992 memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len);
2993 }
2994
2995 _skb = skb_clone(chan->sdu, GFP_ATOMIC);
2996 if (!_skb) {
2997 chan->conn_state |= L2CAP_CONN_SAR_RETRY;
2998 return -ENOMEM;
2999 }
3000
3001 err = sock_queue_rcv_skb(chan->sk, _skb);
3002 if (err < 0) {
3003 kfree_skb(_skb);
3004 chan->conn_state |= L2CAP_CONN_SAR_RETRY;
3005 return err;
3006 }
3007
3008 chan->conn_state &= ~L2CAP_CONN_SAR_RETRY;
3009 chan->conn_state &= ~L2CAP_CONN_SAR_SDU;
3010
3011 kfree_skb(chan->sdu);
3012 break;
3013 }
3014
3015 kfree_skb(skb);
3016 return 0;
3017
3018drop:
3019 kfree_skb(chan->sdu);
3020 chan->sdu = NULL;
3021
3022disconnect:
3023 l2cap_send_disconn_req(chan->conn, chan, ECONNRESET);
3024 kfree_skb(skb);
3025 return 0;
3026}
3027
3028static int l2cap_try_push_rx_skb(struct l2cap_chan *chan)
3029{
3030 struct sk_buff *skb;
3031 u16 control;
3032 int err;
3033
3034 while ((skb = skb_dequeue(&chan->busy_q))) {
3035 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3036 err = l2cap_ertm_reassembly_sdu(chan, skb, control);
3037 if (err < 0) {
3038 skb_queue_head(&chan->busy_q, skb);
3039 return -EBUSY;
3040 }
3041
3042 chan->buffer_seq = (chan->buffer_seq + 1) % 64;
3043 }
3044
3045 if (!(chan->conn_state & L2CAP_CONN_RNR_SENT))
3046 goto done;
3047
3048 control = chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3049 control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
3050 l2cap_send_sframe(chan, control);
3051 chan->retry_count = 1;
3052
3053 del_timer(&chan->retrans_timer);
3054 __mod_monitor_timer();
3055
3056 chan->conn_state |= L2CAP_CONN_WAIT_F;
3057
3058done:
3059 chan->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
3060 chan->conn_state &= ~L2CAP_CONN_RNR_SENT;
3061
3062 BT_DBG("chan %p, Exit local busy", chan);
3063
3064 return 0;
3065}
3066
3067static void l2cap_busy_work(struct work_struct *work)
3068{
3069 DECLARE_WAITQUEUE(wait, current);
3070 struct l2cap_chan *chan =
3071 container_of(work, struct l2cap_chan, busy_work);
3072 struct sock *sk = chan->sk;
3073 int n_tries = 0, timeo = HZ/5, err;
3074 struct sk_buff *skb;
3075
3076 lock_sock(sk);
3077
3078 add_wait_queue(sk_sleep(sk), &wait);
3079 while ((skb = skb_peek(&chan->busy_q))) {
3080 set_current_state(TASK_INTERRUPTIBLE);
3081
3082 if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
3083 err = -EBUSY;
3084 l2cap_send_disconn_req(chan->conn, chan, EBUSY);
3085 break;
3086 }
3087
3088 if (!timeo)
3089 timeo = HZ/5;
3090
3091 if (signal_pending(current)) {
3092 err = sock_intr_errno(timeo);
3093 break;
3094 }
3095
3096 release_sock(sk);
3097 timeo = schedule_timeout(timeo);
3098 lock_sock(sk);
3099
3100 err = sock_error(sk);
3101 if (err)
3102 break;
3103
3104 if (l2cap_try_push_rx_skb(chan) == 0)
3105 break;
3106 }
3107
3108 set_current_state(TASK_RUNNING);
3109 remove_wait_queue(sk_sleep(sk), &wait);
3110
3111 release_sock(sk);
3112}
3113
3114static int l2cap_push_rx_skb(struct l2cap_chan *chan, struct sk_buff *skb, u16 control)
3115{
3116 int sctrl, err;
3117
3118 if (chan->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3119 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3120 __skb_queue_tail(&chan->busy_q, skb);
3121 return l2cap_try_push_rx_skb(chan);
3122
3123
3124 }
3125
3126 err = l2cap_ertm_reassembly_sdu(chan, skb, control);
3127 if (err >= 0) {
3128 chan->buffer_seq = (chan->buffer_seq + 1) % 64;
3129 return err;
3130 }
3131
3132 /* Busy Condition */
3133 BT_DBG("chan %p, Enter local busy", chan);
3134
3135 chan->conn_state |= L2CAP_CONN_LOCAL_BUSY;
3136 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3137 __skb_queue_tail(&chan->busy_q, skb);
3138
3139 sctrl = chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3140 sctrl |= L2CAP_SUPER_RCV_NOT_READY;
3141 l2cap_send_sframe(chan, sctrl);
3142
3143 chan->conn_state |= L2CAP_CONN_RNR_SENT;
3144
3145 del_timer(&chan->ack_timer);
3146
3147 queue_work(_busy_wq, &chan->busy_work);
3148
3149 return err;
3150}
3151
3152static int l2cap_streaming_reassembly_sdu(struct l2cap_chan *chan, struct sk_buff *skb, u16 control)
3153{
3154 struct sk_buff *_skb;
3155 int err = -EINVAL;
3156
3157 /*
3158 * TODO: We have to notify the userland if some data is lost with the
3159 * Streaming Mode.
3160 */
3161
3162 switch (control & L2CAP_CTRL_SAR) {
3163 case L2CAP_SDU_UNSEGMENTED:
3164 if (chan->conn_state & L2CAP_CONN_SAR_SDU) {
3165 kfree_skb(chan->sdu);
3166 break;
3167 }
3168
3169 err = sock_queue_rcv_skb(chan->sk, skb);
3170 if (!err)
3171 return 0;
3172
3173 break;
3174
3175 case L2CAP_SDU_START:
3176 if (chan->conn_state & L2CAP_CONN_SAR_SDU) {
3177 kfree_skb(chan->sdu);
3178 break;
3179 }
3180
3181 chan->sdu_len = get_unaligned_le16(skb->data);
3182 skb_pull(skb, 2);
3183
3184 if (chan->sdu_len > chan->imtu) {
3185 err = -EMSGSIZE;
3186 break;
3187 }
3188
3189 chan->sdu = bt_skb_alloc(chan->sdu_len, GFP_ATOMIC);
3190 if (!chan->sdu) {
3191 err = -ENOMEM;
3192 break;
3193 }
3194
3195 memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len);
3196
3197 chan->conn_state |= L2CAP_CONN_SAR_SDU;
3198 chan->partial_sdu_len = skb->len;
3199 err = 0;
3200 break;
3201
3202 case L2CAP_SDU_CONTINUE:
3203 if (!(chan->conn_state & L2CAP_CONN_SAR_SDU))
3204 break;
3205
3206 memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len);
3207
3208 chan->partial_sdu_len += skb->len;
3209 if (chan->partial_sdu_len > chan->sdu_len)
3210 kfree_skb(chan->sdu);
3211 else
3212 err = 0;
3213
3214 break;
3215
3216 case L2CAP_SDU_END:
3217 if (!(chan->conn_state & L2CAP_CONN_SAR_SDU))
3218 break;
3219
3220 memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len);
3221
3222 chan->conn_state &= ~L2CAP_CONN_SAR_SDU;
3223 chan->partial_sdu_len += skb->len;
3224
3225 if (chan->partial_sdu_len > chan->imtu)
3226 goto drop;
3227
3228 if (chan->partial_sdu_len == chan->sdu_len) {
3229 _skb = skb_clone(chan->sdu, GFP_ATOMIC);
3230 err = sock_queue_rcv_skb(chan->sk, _skb);
3231 if (err < 0)
3232 kfree_skb(_skb);
3233 }
3234 err = 0;
3235
3236drop:
3237 kfree_skb(chan->sdu);
3238 break;
3239 }
3240
3241 kfree_skb(skb);
3242 return err;
3243}
3244
3245static void l2cap_check_srej_gap(struct l2cap_chan *chan, u8 tx_seq)
3246{
3247 struct sk_buff *skb;
3248 u16 control;
3249
3250 while ((skb = skb_peek(&chan->srej_q))) {
3251 if (bt_cb(skb)->tx_seq != tx_seq)
3252 break;
3253
3254 skb = skb_dequeue(&chan->srej_q);
3255 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3256 l2cap_ertm_reassembly_sdu(chan, skb, control);
3257 chan->buffer_seq_srej =
3258 (chan->buffer_seq_srej + 1) % 64;
3259 tx_seq = (tx_seq + 1) % 64;
3260 }
3261}
3262
3263static void l2cap_resend_srejframe(struct l2cap_chan *chan, u8 tx_seq)
3264{
3265 struct srej_list *l, *tmp;
3266 u16 control;
3267
3268 list_for_each_entry_safe(l, tmp, &chan->srej_l, list) {
3269 if (l->tx_seq == tx_seq) {
3270 list_del(&l->list);
3271 kfree(l);
3272 return;
3273 }
3274 control = L2CAP_SUPER_SELECT_REJECT;
3275 control |= l->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3276 l2cap_send_sframe(chan, control);
3277 list_del(&l->list);
3278 list_add_tail(&l->list, &chan->srej_l);
3279 }
3280}
3281
3282static void l2cap_send_srejframe(struct l2cap_chan *chan, u8 tx_seq)
3283{
3284 struct srej_list *new;
3285 u16 control;
3286
3287 while (tx_seq != chan->expected_tx_seq) {
3288 control = L2CAP_SUPER_SELECT_REJECT;
3289 control |= chan->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3290 l2cap_send_sframe(chan, control);
3291
3292 new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
3293 new->tx_seq = chan->expected_tx_seq;
3294 chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64;
3295 list_add_tail(&new->list, &chan->srej_l);
3296 }
3297 chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64;
3298}
3299
3300static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u16 rx_control, struct sk_buff *skb)
3301{
3302 u8 tx_seq = __get_txseq(rx_control);
3303 u8 req_seq = __get_reqseq(rx_control);
3304 u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
3305 int tx_seq_offset, expected_tx_seq_offset;
3306 int num_to_ack = (chan->tx_win/6) + 1;
3307 int err = 0;
3308
3309 BT_DBG("chan %p len %d tx_seq %d rx_control 0x%4.4x", chan, skb->len,
3310 tx_seq, rx_control);
3311
3312 if (L2CAP_CTRL_FINAL & rx_control &&
3313 chan->conn_state & L2CAP_CONN_WAIT_F) {
3314 del_timer(&chan->monitor_timer);
3315 if (chan->unacked_frames > 0)
3316 __mod_retrans_timer();
3317 chan->conn_state &= ~L2CAP_CONN_WAIT_F;
3318 }
3319
3320 chan->expected_ack_seq = req_seq;
3321 l2cap_drop_acked_frames(chan);
3322
3323 if (tx_seq == chan->expected_tx_seq)
3324 goto expected;
3325
3326 tx_seq_offset = (tx_seq - chan->buffer_seq) % 64;
3327 if (tx_seq_offset < 0)
3328 tx_seq_offset += 64;
3329
3330 /* invalid tx_seq */
3331 if (tx_seq_offset >= chan->tx_win) {
3332 l2cap_send_disconn_req(chan->conn, chan, ECONNRESET);
3333 goto drop;
3334 }
3335
3336 if (chan->conn_state == L2CAP_CONN_LOCAL_BUSY)
3337 goto drop;
3338
3339 if (chan->conn_state & L2CAP_CONN_SREJ_SENT) {
3340 struct srej_list *first;
3341
3342 first = list_first_entry(&chan->srej_l,
3343 struct srej_list, list);
3344 if (tx_seq == first->tx_seq) {
3345 l2cap_add_to_srej_queue(chan, skb, tx_seq, sar);
3346 l2cap_check_srej_gap(chan, tx_seq);
3347
3348 list_del(&first->list);
3349 kfree(first);
3350
3351 if (list_empty(&chan->srej_l)) {
3352 chan->buffer_seq = chan->buffer_seq_srej;
3353 chan->conn_state &= ~L2CAP_CONN_SREJ_SENT;
3354 l2cap_send_ack(chan);
3355 BT_DBG("chan %p, Exit SREJ_SENT", chan);
3356 }
3357 } else {
3358 struct srej_list *l;
3359
3360 /* duplicated tx_seq */
3361 if (l2cap_add_to_srej_queue(chan, skb, tx_seq, sar) < 0)
3362 goto drop;
3363
3364 list_for_each_entry(l, &chan->srej_l, list) {
3365 if (l->tx_seq == tx_seq) {
3366 l2cap_resend_srejframe(chan, tx_seq);
3367 return 0;
3368 }
3369 }
3370 l2cap_send_srejframe(chan, tx_seq);
3371 }
3372 } else {
3373 expected_tx_seq_offset =
3374 (chan->expected_tx_seq - chan->buffer_seq) % 64;
3375 if (expected_tx_seq_offset < 0)
3376 expected_tx_seq_offset += 64;
3377
3378 /* duplicated tx_seq */
3379 if (tx_seq_offset < expected_tx_seq_offset)
3380 goto drop;
3381
3382 chan->conn_state |= L2CAP_CONN_SREJ_SENT;
3383
3384 BT_DBG("chan %p, Enter SREJ", chan);
3385
3386 INIT_LIST_HEAD(&chan->srej_l);
3387 chan->buffer_seq_srej = chan->buffer_seq;
3388
3389 __skb_queue_head_init(&chan->srej_q);
3390 __skb_queue_head_init(&chan->busy_q);
3391 l2cap_add_to_srej_queue(chan, skb, tx_seq, sar);
3392
3393 chan->conn_state |= L2CAP_CONN_SEND_PBIT;
3394
3395 l2cap_send_srejframe(chan, tx_seq);
3396
3397 del_timer(&chan->ack_timer);
3398 }
3399 return 0;
3400
3401expected:
3402 chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64;
3403
3404 if (chan->conn_state & L2CAP_CONN_SREJ_SENT) {
3405 bt_cb(skb)->tx_seq = tx_seq;
3406 bt_cb(skb)->sar = sar;
3407 __skb_queue_tail(&chan->srej_q, skb);
3408 return 0;
3409 }
3410
3411 err = l2cap_push_rx_skb(chan, skb, rx_control);
3412 if (err < 0)
3413 return 0;
3414
3415 if (rx_control & L2CAP_CTRL_FINAL) {
3416 if (chan->conn_state & L2CAP_CONN_REJ_ACT)
3417 chan->conn_state &= ~L2CAP_CONN_REJ_ACT;
3418 else
3419 l2cap_retransmit_frames(chan);
3420 }
3421
3422 __mod_ack_timer();
3423
3424 chan->num_acked = (chan->num_acked + 1) % num_to_ack;
3425 if (chan->num_acked == num_to_ack - 1)
3426 l2cap_send_ack(chan);
3427
3428 return 0;
3429
3430drop:
3431 kfree_skb(skb);
3432 return 0;
3433}
3434
3435static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u16 rx_control)
3436{
3437 BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, __get_reqseq(rx_control),
3438 rx_control);
3439
3440 chan->expected_ack_seq = __get_reqseq(rx_control);
3441 l2cap_drop_acked_frames(chan);
3442
3443 if (rx_control & L2CAP_CTRL_POLL) {
3444 chan->conn_state |= L2CAP_CONN_SEND_FBIT;
3445 if (chan->conn_state & L2CAP_CONN_SREJ_SENT) {
3446 if ((chan->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
3447 (chan->unacked_frames > 0))
3448 __mod_retrans_timer();
3449
3450 chan->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3451 l2cap_send_srejtail(chan);
3452 } else {
3453 l2cap_send_i_or_rr_or_rnr(chan);
3454 }
3455
3456 } else if (rx_control & L2CAP_CTRL_FINAL) {
3457 chan->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3458
3459 if (chan->conn_state & L2CAP_CONN_REJ_ACT)
3460 chan->conn_state &= ~L2CAP_CONN_REJ_ACT;
3461 else
3462 l2cap_retransmit_frames(chan);
3463
3464 } else {
3465 if ((chan->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
3466 (chan->unacked_frames > 0))
3467 __mod_retrans_timer();
3468
3469 chan->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3470 if (chan->conn_state & L2CAP_CONN_SREJ_SENT)
3471 l2cap_send_ack(chan);
3472 else
3473 l2cap_ertm_send(chan);
3474 }
3475}
3476
3477static inline void l2cap_data_channel_rejframe(struct l2cap_chan *chan, u16 rx_control)
3478{
3479 u8 tx_seq = __get_reqseq(rx_control);
3480
3481 BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, tx_seq, rx_control);
3482
3483 chan->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3484
3485 chan->expected_ack_seq = tx_seq;
3486 l2cap_drop_acked_frames(chan);
3487
3488 if (rx_control & L2CAP_CTRL_FINAL) {
3489 if (chan->conn_state & L2CAP_CONN_REJ_ACT)
3490 chan->conn_state &= ~L2CAP_CONN_REJ_ACT;
3491 else
3492 l2cap_retransmit_frames(chan);
3493 } else {
3494 l2cap_retransmit_frames(chan);
3495
3496 if (chan->conn_state & L2CAP_CONN_WAIT_F)
3497 chan->conn_state |= L2CAP_CONN_REJ_ACT;
3498 }
3499}
3500static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u16 rx_control)
3501{
3502 u8 tx_seq = __get_reqseq(rx_control);
3503
3504 BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, tx_seq, rx_control);
3505
3506 chan->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3507
3508 if (rx_control & L2CAP_CTRL_POLL) {
3509 chan->expected_ack_seq = tx_seq;
3510 l2cap_drop_acked_frames(chan);
3511
3512 chan->conn_state |= L2CAP_CONN_SEND_FBIT;
3513 l2cap_retransmit_one_frame(chan, tx_seq);
3514
3515 l2cap_ertm_send(chan);
3516
3517 if (chan->conn_state & L2CAP_CONN_WAIT_F) {
3518 chan->srej_save_reqseq = tx_seq;
3519 chan->conn_state |= L2CAP_CONN_SREJ_ACT;
3520 }
3521 } else if (rx_control & L2CAP_CTRL_FINAL) {
3522 if ((chan->conn_state & L2CAP_CONN_SREJ_ACT) &&
3523 chan->srej_save_reqseq == tx_seq)
3524 chan->conn_state &= ~L2CAP_CONN_SREJ_ACT;
3525 else
3526 l2cap_retransmit_one_frame(chan, tx_seq);
3527 } else {
3528 l2cap_retransmit_one_frame(chan, tx_seq);
3529 if (chan->conn_state & L2CAP_CONN_WAIT_F) {
3530 chan->srej_save_reqseq = tx_seq;
3531 chan->conn_state |= L2CAP_CONN_SREJ_ACT;
3532 }
3533 }
3534}
3535
3536static inline void l2cap_data_channel_rnrframe(struct l2cap_chan *chan, u16 rx_control)
3537{
3538 u8 tx_seq = __get_reqseq(rx_control);
3539
3540 BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, tx_seq, rx_control);
3541
3542 chan->conn_state |= L2CAP_CONN_REMOTE_BUSY;
3543 chan->expected_ack_seq = tx_seq;
3544 l2cap_drop_acked_frames(chan);
3545
3546 if (rx_control & L2CAP_CTRL_POLL)
3547 chan->conn_state |= L2CAP_CONN_SEND_FBIT;
3548
3549 if (!(chan->conn_state & L2CAP_CONN_SREJ_SENT)) {
3550 del_timer(&chan->retrans_timer);
3551 if (rx_control & L2CAP_CTRL_POLL)
3552 l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_FINAL);
3553 return;
3554 }
3555
3556 if (rx_control & L2CAP_CTRL_POLL)
3557 l2cap_send_srejtail(chan);
3558 else
3559 l2cap_send_sframe(chan, L2CAP_SUPER_RCV_READY);
3560}
3561
3562static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u16 rx_control, struct sk_buff *skb)
3563{
3564 BT_DBG("chan %p rx_control 0x%4.4x len %d", chan, rx_control, skb->len);
3565
3566 if (L2CAP_CTRL_FINAL & rx_control &&
3567 chan->conn_state & L2CAP_CONN_WAIT_F) {
3568 del_timer(&chan->monitor_timer);
3569 if (chan->unacked_frames > 0)
3570 __mod_retrans_timer();
3571 chan->conn_state &= ~L2CAP_CONN_WAIT_F;
3572 }
3573
3574 switch (rx_control & L2CAP_CTRL_SUPERVISE) {
3575 case L2CAP_SUPER_RCV_READY:
3576 l2cap_data_channel_rrframe(chan, rx_control);
3577 break;
3578
3579 case L2CAP_SUPER_REJECT:
3580 l2cap_data_channel_rejframe(chan, rx_control);
3581 break;
3582
3583 case L2CAP_SUPER_SELECT_REJECT:
3584 l2cap_data_channel_srejframe(chan, rx_control);
3585 break;
3586
3587 case L2CAP_SUPER_RCV_NOT_READY:
3588 l2cap_data_channel_rnrframe(chan, rx_control);
3589 break;
3590 }
3591
3592 kfree_skb(skb);
3593 return 0;
3594}
3595
3596static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb)
3597{
3598 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
3599 u16 control;
3600 u8 req_seq;
3601 int len, next_tx_seq_offset, req_seq_offset;
3602
3603 control = get_unaligned_le16(skb->data);
3604 skb_pull(skb, 2);
3605 len = skb->len;
3606
3607 /*
3608 * We can just drop the corrupted I-frame here.
3609 * Receiver will miss it and start proper recovery
3610 * procedures and ask retransmission.
3611 */
3612 if (l2cap_check_fcs(chan, skb))
3613 goto drop;
3614
3615 if (__is_sar_start(control) && __is_iframe(control))
3616 len -= 2;
3617
3618 if (chan->fcs == L2CAP_FCS_CRC16)
3619 len -= 2;
3620
3621 if (len > chan->mps) {
3622 l2cap_send_disconn_req(chan->conn, chan, ECONNRESET);
3623 goto drop;
3624 }
3625
3626 req_seq = __get_reqseq(control);
3627 req_seq_offset = (req_seq - chan->expected_ack_seq) % 64;
3628 if (req_seq_offset < 0)
3629 req_seq_offset += 64;
3630
3631 next_tx_seq_offset =
3632 (chan->next_tx_seq - chan->expected_ack_seq) % 64;
3633 if (next_tx_seq_offset < 0)
3634 next_tx_seq_offset += 64;
3635
3636 /* check for invalid req-seq */
3637 if (req_seq_offset > next_tx_seq_offset) {
3638 l2cap_send_disconn_req(chan->conn, chan, ECONNRESET);
3639 goto drop;
3640 }
3641
3642 if (__is_iframe(control)) {
3643 if (len < 0) {
3644 l2cap_send_disconn_req(chan->conn, chan, ECONNRESET);
3645 goto drop;
3646 }
3647
3648 l2cap_data_channel_iframe(chan, control, skb);
3649 } else {
3650 if (len != 0) {
3651 BT_ERR("%d", len);
3652 l2cap_send_disconn_req(chan->conn, chan, ECONNRESET);
3653 goto drop;
3654 }
3655
3656 l2cap_data_channel_sframe(chan, control, skb);
3657 }
3658
3659 return 0;
3660
3661drop:
3662 kfree_skb(skb);
3663 return 0;
3664}
3665
3666static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
3667{
3668 struct l2cap_chan *chan;
3669 struct sock *sk = NULL;
3670 struct l2cap_pinfo *pi;
3671 u16 control;
3672 u8 tx_seq;
3673 int len;
3674
3675 chan = l2cap_get_chan_by_scid(conn, cid);
3676 if (!chan) {
3677 BT_DBG("unknown cid 0x%4.4x", cid);
3678 goto drop;
3679 }
3680
3681 sk = chan->sk;
3682 pi = l2cap_pi(sk);
3683
3684 BT_DBG("chan %p, len %d", chan, skb->len);
3685
3686 if (sk->sk_state != BT_CONNECTED)
3687 goto drop;
3688
3689 switch (chan->mode) {
3690 case L2CAP_MODE_BASIC:
3691 /* If socket recv buffers overflows we drop data here
3692 * which is *bad* because L2CAP has to be reliable.
3693 * But we don't have any other choice. L2CAP doesn't
3694 * provide flow control mechanism. */
3695
3696 if (chan->imtu < skb->len)
3697 goto drop;
3698
3699 if (!sock_queue_rcv_skb(sk, skb))
3700 goto done;
3701 break;
3702
3703 case L2CAP_MODE_ERTM:
3704 if (!sock_owned_by_user(sk)) {
3705 l2cap_ertm_data_rcv(sk, skb);
3706 } else {
3707 if (sk_add_backlog(sk, skb))
3708 goto drop;
3709 }
3710
3711 goto done;
3712
3713 case L2CAP_MODE_STREAMING:
3714 control = get_unaligned_le16(skb->data);
3715 skb_pull(skb, 2);
3716 len = skb->len;
3717
3718 if (l2cap_check_fcs(chan, skb))
3719 goto drop;
3720
3721 if (__is_sar_start(control))
3722 len -= 2;
3723
3724 if (chan->fcs == L2CAP_FCS_CRC16)
3725 len -= 2;
3726
3727 if (len > chan->mps || len < 0 || __is_sframe(control))
3728 goto drop;
3729
3730 tx_seq = __get_txseq(control);
3731
3732 if (chan->expected_tx_seq == tx_seq)
3733 chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64;
3734 else
3735 chan->expected_tx_seq = (tx_seq + 1) % 64;
3736
3737 l2cap_streaming_reassembly_sdu(chan, skb, control);
3738
3739 goto done;
3740
3741 default:
3742 BT_DBG("chan %p: bad mode 0x%2.2x", chan, chan->mode);
3743 break;
3744 }
3745
3746drop:
3747 kfree_skb(skb);
3748
3749done:
3750 if (sk)
3751 bh_unlock_sock(sk);
3752
3753 return 0;
3754}
3755
3756static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb)
3757{
3758 struct sock *sk = NULL;
3759 struct l2cap_chan *chan;
3760
3761 chan = l2cap_global_chan_by_psm(0, psm, conn->src);
3762 if (!chan)
3763 goto drop;
3764
3765 sk = chan->sk;
3766
3767 bh_lock_sock(sk);
3768
3769 BT_DBG("sk %p, len %d", sk, skb->len);
3770
3771 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED)
3772 goto drop;
3773
3774 if (l2cap_pi(sk)->chan->imtu < skb->len)
3775 goto drop;
3776
3777 if (!sock_queue_rcv_skb(sk, skb))
3778 goto done;
3779
3780drop:
3781 kfree_skb(skb);
3782
3783done:
3784 if (sk)
3785 bh_unlock_sock(sk);
3786 return 0;
3787}
3788
3789static inline int l2cap_att_channel(struct l2cap_conn *conn, __le16 cid, struct sk_buff *skb)
3790{
3791 struct sock *sk = NULL;
3792 struct l2cap_chan *chan;
3793
3794 chan = l2cap_global_chan_by_scid(0, cid, conn->src);
3795 if (!chan)
3796 goto drop;
3797
3798 sk = chan->sk;
3799
3800 bh_lock_sock(sk);
3801
3802 BT_DBG("sk %p, len %d", sk, skb->len);
3803
3804 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED)
3805 goto drop;
3806
3807 if (l2cap_pi(sk)->chan->imtu < skb->len)
3808 goto drop;
3809
3810 if (!sock_queue_rcv_skb(sk, skb))
3811 goto done;
3812
3813drop:
3814 kfree_skb(skb);
3815
3816done:
3817 if (sk)
3818 bh_unlock_sock(sk);
3819 return 0;
3820}
3821
3822static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
3823{
3824 struct l2cap_hdr *lh = (void *) skb->data;
3825 u16 cid, len;
3826 __le16 psm;
3827
3828 skb_pull(skb, L2CAP_HDR_SIZE);
3829 cid = __le16_to_cpu(lh->cid);
3830 len = __le16_to_cpu(lh->len);
3831
3832 if (len != skb->len) {
3833 kfree_skb(skb);
3834 return;
3835 }
3836
3837 BT_DBG("len %d, cid 0x%4.4x", len, cid);
3838
3839 switch (cid) {
3840 case L2CAP_CID_LE_SIGNALING:
3841 case L2CAP_CID_SIGNALING:
3842 l2cap_sig_channel(conn, skb);
3843 break;
3844
3845 case L2CAP_CID_CONN_LESS:
3846 psm = get_unaligned_le16(skb->data);
3847 skb_pull(skb, 2);
3848 l2cap_conless_channel(conn, psm, skb);
3849 break;
3850
3851 case L2CAP_CID_LE_DATA:
3852 l2cap_att_channel(conn, cid, skb);
3853 break;
3854
3855 default:
3856 l2cap_data_channel(conn, cid, skb);
3857 break;
3858 }
3859}
3860
3861/* ---- L2CAP interface with lower layer (HCI) ---- */
3862
3863static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
3864{
3865 int exact = 0, lm1 = 0, lm2 = 0;
3866 struct l2cap_chan *c;
3867
3868 if (type != ACL_LINK)
3869 return -EINVAL;
3870
3871 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
3872
3873 /* Find listening sockets and check their link_mode */
3874 read_lock(&chan_list_lock);
3875 list_for_each_entry(c, &chan_list, global_l) {
3876 struct sock *sk = c->sk;
3877
3878 if (sk->sk_state != BT_LISTEN)
3879 continue;
3880
3881 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) {
3882 lm1 |= HCI_LM_ACCEPT;
3883 if (c->role_switch)
3884 lm1 |= HCI_LM_MASTER;
3885 exact++;
3886 } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) {
3887 lm2 |= HCI_LM_ACCEPT;
3888 if (c->role_switch)
3889 lm2 |= HCI_LM_MASTER;
3890 }
3891 }
3892 read_unlock(&chan_list_lock);
3893
3894 return exact ? lm1 : lm2;
3895}
3896
3897static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
3898{
3899 struct l2cap_conn *conn;
3900
3901 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
3902
3903 if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK))
3904 return -EINVAL;
3905
3906 if (!status) {
3907 conn = l2cap_conn_add(hcon, status);
3908 if (conn)
3909 l2cap_conn_ready(conn);
3910 } else
3911 l2cap_conn_del(hcon, bt_err(status));
3912
3913 return 0;
3914}
3915
3916static int l2cap_disconn_ind(struct hci_conn *hcon)
3917{
3918 struct l2cap_conn *conn = hcon->l2cap_data;
3919
3920 BT_DBG("hcon %p", hcon);
3921
3922 if (hcon->type != ACL_LINK || !conn)
3923 return 0x13;
3924
3925 return conn->disc_reason;
3926}
3927
3928static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
3929{
3930 BT_DBG("hcon %p reason %d", hcon, reason);
3931
3932 if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK))
3933 return -EINVAL;
3934
3935 l2cap_conn_del(hcon, bt_err(reason));
3936
3937 return 0;
3938}
3939
3940static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
3941{
3942 struct sock *sk = chan->sk;
3943
3944 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM)
3945 return;
3946
3947 if (encrypt == 0x00) {
3948 if (chan->sec_level == BT_SECURITY_MEDIUM) {
3949 l2cap_sock_clear_timer(sk);
3950 l2cap_sock_set_timer(sk, HZ * 5);
3951 } else if (chan->sec_level == BT_SECURITY_HIGH)
3952 __l2cap_sock_close(sk, ECONNREFUSED);
3953 } else {
3954 if (chan->sec_level == BT_SECURITY_MEDIUM)
3955 l2cap_sock_clear_timer(sk);
3956 }
3957}
3958
3959static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
3960{
3961 struct l2cap_conn *conn = hcon->l2cap_data;
3962 struct l2cap_chan *chan;
3963
3964 if (!conn)
3965 return 0;
3966
3967 BT_DBG("conn %p", conn);
3968
3969 read_lock(&conn->chan_lock);
3970
3971 list_for_each_entry(chan, &conn->chan_l, list) {
3972 struct sock *sk = chan->sk;
3973
3974 bh_lock_sock(sk);
3975
3976 if (chan->conf_state & L2CAP_CONF_CONNECT_PEND) {
3977 bh_unlock_sock(sk);
3978 continue;
3979 }
3980
3981 if (!status && (sk->sk_state == BT_CONNECTED ||
3982 sk->sk_state == BT_CONFIG)) {
3983 l2cap_check_encryption(chan, encrypt);
3984 bh_unlock_sock(sk);
3985 continue;
3986 }
3987
3988 if (sk->sk_state == BT_CONNECT) {
3989 if (!status) {
3990 struct l2cap_conn_req req;
3991 req.scid = cpu_to_le16(chan->scid);
3992 req.psm = chan->psm;
3993
3994 chan->ident = l2cap_get_ident(conn);
3995 chan->conf_state |= L2CAP_CONF_CONNECT_PEND;
3996
3997 l2cap_send_cmd(conn, chan->ident,
3998 L2CAP_CONN_REQ, sizeof(req), &req);
3999 } else {
4000 l2cap_sock_clear_timer(sk);
4001 l2cap_sock_set_timer(sk, HZ / 10);
4002 }
4003 } else if (sk->sk_state == BT_CONNECT2) {
4004 struct l2cap_conn_rsp rsp;
4005 __u16 result;
4006
4007 if (!status) {
4008 sk->sk_state = BT_CONFIG;
4009 result = L2CAP_CR_SUCCESS;
4010 } else {
4011 sk->sk_state = BT_DISCONN;
4012 l2cap_sock_set_timer(sk, HZ / 10);
4013 result = L2CAP_CR_SEC_BLOCK;
4014 }
4015
4016 rsp.scid = cpu_to_le16(chan->dcid);
4017 rsp.dcid = cpu_to_le16(chan->scid);
4018 rsp.result = cpu_to_le16(result);
4019 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
4020 l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP,
4021 sizeof(rsp), &rsp);
4022 }
4023
4024 bh_unlock_sock(sk);
4025 }
4026
4027 read_unlock(&conn->chan_lock);
4028
4029 return 0;
4030}
4031
4032static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
4033{
4034 struct l2cap_conn *conn = hcon->l2cap_data;
4035
4036 if (!conn)
4037 conn = l2cap_conn_add(hcon, 0);
4038
4039 if (!conn)
4040 goto drop;
4041
4042 BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags);
4043
4044 if (!(flags & ACL_CONT)) {
4045 struct l2cap_hdr *hdr;
4046 struct l2cap_chan *chan;
4047 u16 cid;
4048 int len;
4049
4050 if (conn->rx_len) {
4051 BT_ERR("Unexpected start frame (len %d)", skb->len);
4052 kfree_skb(conn->rx_skb);
4053 conn->rx_skb = NULL;
4054 conn->rx_len = 0;
4055 l2cap_conn_unreliable(conn, ECOMM);
4056 }
4057
4058 /* Start fragment always begin with Basic L2CAP header */
4059 if (skb->len < L2CAP_HDR_SIZE) {
4060 BT_ERR("Frame is too short (len %d)", skb->len);
4061 l2cap_conn_unreliable(conn, ECOMM);
4062 goto drop;
4063 }
4064
4065 hdr = (struct l2cap_hdr *) skb->data;
4066 len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
4067 cid = __le16_to_cpu(hdr->cid);
4068
4069 if (len == skb->len) {
4070 /* Complete frame received */
4071 l2cap_recv_frame(conn, skb);
4072 return 0;
4073 }
4074
4075 BT_DBG("Start: total len %d, frag len %d", len, skb->len);
4076
4077 if (skb->len > len) {
4078 BT_ERR("Frame is too long (len %d, expected len %d)",
4079 skb->len, len);
4080 l2cap_conn_unreliable(conn, ECOMM);
4081 goto drop;
4082 }
4083
4084 chan = l2cap_get_chan_by_scid(conn, cid);
4085
4086 if (chan && chan->sk) {
4087 struct sock *sk = chan->sk;
4088
4089 if (chan->imtu < len - L2CAP_HDR_SIZE) {
4090 BT_ERR("Frame exceeding recv MTU (len %d, "
4091 "MTU %d)", len,
4092 chan->imtu);
4093 bh_unlock_sock(sk);
4094 l2cap_conn_unreliable(conn, ECOMM);
4095 goto drop;
4096 }
4097 bh_unlock_sock(sk);
4098 }
4099
4100 /* Allocate skb for the complete frame (with header) */
4101 conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC);
4102 if (!conn->rx_skb)
4103 goto drop;
4104
4105 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
4106 skb->len);
4107 conn->rx_len = len - skb->len;
4108 } else {
4109 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
4110
4111 if (!conn->rx_len) {
4112 BT_ERR("Unexpected continuation frame (len %d)", skb->len);
4113 l2cap_conn_unreliable(conn, ECOMM);
4114 goto drop;
4115 }
4116
4117 if (skb->len > conn->rx_len) {
4118 BT_ERR("Fragment is too long (len %d, expected %d)",
4119 skb->len, conn->rx_len);
4120 kfree_skb(conn->rx_skb);
4121 conn->rx_skb = NULL;
4122 conn->rx_len = 0;
4123 l2cap_conn_unreliable(conn, ECOMM);
4124 goto drop;
4125 }
4126
4127 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
4128 skb->len);
4129 conn->rx_len -= skb->len;
4130
4131 if (!conn->rx_len) {
4132 /* Complete frame received */
4133 l2cap_recv_frame(conn, conn->rx_skb);
4134 conn->rx_skb = NULL;
4135 }
4136 }
4137
4138drop:
4139 kfree_skb(skb);
4140 return 0;
4141}
4142
4143static int l2cap_debugfs_show(struct seq_file *f, void *p)
4144{
4145 struct l2cap_chan *c;
4146
4147 read_lock_bh(&chan_list_lock);
4148
4149 list_for_each_entry(c, &chan_list, global_l) {
4150 struct sock *sk = c->sk;
4151
4152 seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n",
4153 batostr(&bt_sk(sk)->src),
4154 batostr(&bt_sk(sk)->dst),
4155 sk->sk_state, __le16_to_cpu(c->psm),
4156 c->scid, c->dcid, c->imtu, c->omtu,
4157 c->sec_level, c->mode);
4158 }
4159
4160 read_unlock_bh(&chan_list_lock);
4161
4162 return 0;
4163}
4164
4165static int l2cap_debugfs_open(struct inode *inode, struct file *file)
4166{
4167 return single_open(file, l2cap_debugfs_show, inode->i_private);
4168}
4169
4170static const struct file_operations l2cap_debugfs_fops = {
4171 .open = l2cap_debugfs_open,
4172 .read = seq_read,
4173 .llseek = seq_lseek,
4174 .release = single_release,
4175};
4176
4177static struct dentry *l2cap_debugfs;
4178
4179static struct hci_proto l2cap_hci_proto = {
4180 .name = "L2CAP",
4181 .id = HCI_PROTO_L2CAP,
4182 .connect_ind = l2cap_connect_ind,
4183 .connect_cfm = l2cap_connect_cfm,
4184 .disconn_ind = l2cap_disconn_ind,
4185 .disconn_cfm = l2cap_disconn_cfm,
4186 .security_cfm = l2cap_security_cfm,
4187 .recv_acldata = l2cap_recv_acldata
4188};
4189
4190int __init l2cap_init(void)
4191{
4192 int err;
4193
4194 err = l2cap_init_sockets();
4195 if (err < 0)
4196 return err;
4197
4198 _busy_wq = create_singlethread_workqueue("l2cap");
4199 if (!_busy_wq) {
4200 err = -ENOMEM;
4201 goto error;
4202 }
4203
4204 err = hci_register_proto(&l2cap_hci_proto);
4205 if (err < 0) {
4206 BT_ERR("L2CAP protocol registration failed");
4207 bt_sock_unregister(BTPROTO_L2CAP);
4208 goto error;
4209 }
4210
4211 if (bt_debugfs) {
4212 l2cap_debugfs = debugfs_create_file("l2cap", 0444,
4213 bt_debugfs, NULL, &l2cap_debugfs_fops);
4214 if (!l2cap_debugfs)
4215 BT_ERR("Failed to create L2CAP debug file");
4216 }
4217
4218 return 0;
4219
4220error:
4221 destroy_workqueue(_busy_wq);
4222 l2cap_cleanup_sockets();
4223 return err;
4224}
4225
4226void l2cap_exit(void)
4227{
4228 debugfs_remove(l2cap_debugfs);
4229
4230 flush_workqueue(_busy_wq);
4231 destroy_workqueue(_busy_wq);
4232
4233 if (hci_unregister_proto(&l2cap_hci_proto) < 0)
4234 BT_ERR("L2CAP protocol unregistration failed");
4235
4236 l2cap_cleanup_sockets();
4237}
4238
4239module_param(disable_ertm, bool, 0644);
4240MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
new file mode 100644
index 000000000000..18dc9888d8c2
--- /dev/null
+++ b/net/bluetooth/l2cap_sock.c
@@ -0,0 +1,1119 @@
1/*
2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated
4 Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
5 Copyright (C) 2010 Google Inc.
6
7 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License version 2 as
11 published by the Free Software Foundation;
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
16 IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
17 CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
18 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
22 ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
23 COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
24 SOFTWARE IS DISCLAIMED.
25*/
26
27/* Bluetooth L2CAP sockets. */
28
29#include <net/bluetooth/bluetooth.h>
30#include <net/bluetooth/hci_core.h>
31#include <net/bluetooth/l2cap.h>
32
33static const struct proto_ops l2cap_sock_ops;
34
35/* ---- L2CAP timers ---- */
36static void l2cap_sock_timeout(unsigned long arg)
37{
38 struct sock *sk = (struct sock *) arg;
39 int reason;
40
41 BT_DBG("sock %p state %d", sk, sk->sk_state);
42
43 bh_lock_sock(sk);
44
45 if (sock_owned_by_user(sk)) {
46 /* sk is owned by user. Try again later */
47 l2cap_sock_set_timer(sk, HZ / 5);
48 bh_unlock_sock(sk);
49 sock_put(sk);
50 return;
51 }
52
53 if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
54 reason = ECONNREFUSED;
55 else if (sk->sk_state == BT_CONNECT &&
56 l2cap_pi(sk)->chan->sec_level != BT_SECURITY_SDP)
57 reason = ECONNREFUSED;
58 else
59 reason = ETIMEDOUT;
60
61 __l2cap_sock_close(sk, reason);
62
63 bh_unlock_sock(sk);
64
65 l2cap_sock_kill(sk);
66 sock_put(sk);
67}
68
69void l2cap_sock_set_timer(struct sock *sk, long timeout)
70{
71 BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
72 sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
73}
74
75void l2cap_sock_clear_timer(struct sock *sk)
76{
77 BT_DBG("sock %p state %d", sk, sk->sk_state);
78 sk_stop_timer(sk, &sk->sk_timer);
79}
80
81static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
82{
83 struct sock *sk = sock->sk;
84 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
85 struct sockaddr_l2 la;
86 int len, err = 0;
87
88 BT_DBG("sk %p", sk);
89
90 if (!addr || addr->sa_family != AF_BLUETOOTH)
91 return -EINVAL;
92
93 memset(&la, 0, sizeof(la));
94 len = min_t(unsigned int, sizeof(la), alen);
95 memcpy(&la, addr, len);
96
97 if (la.l2_cid && la.l2_psm)
98 return -EINVAL;
99
100 lock_sock(sk);
101
102 if (sk->sk_state != BT_OPEN) {
103 err = -EBADFD;
104 goto done;
105 }
106
107 if (la.l2_psm) {
108 __u16 psm = __le16_to_cpu(la.l2_psm);
109
110 /* PSM must be odd and lsb of upper byte must be 0 */
111 if ((psm & 0x0101) != 0x0001) {
112 err = -EINVAL;
113 goto done;
114 }
115
116 /* Restrict usage of well-known PSMs */
117 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
118 err = -EACCES;
119 goto done;
120 }
121 }
122
123 if (la.l2_cid)
124 err = l2cap_add_scid(chan, la.l2_cid);
125 else
126 err = l2cap_add_psm(chan, &la.l2_bdaddr, la.l2_psm);
127
128 if (err < 0)
129 goto done;
130
131 if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
132 __le16_to_cpu(la.l2_psm) == 0x0003)
133 chan->sec_level = BT_SECURITY_SDP;
134
135 bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
136 sk->sk_state = BT_BOUND;
137
138done:
139 release_sock(sk);
140 return err;
141}
142
143static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
144{
145 struct sock *sk = sock->sk;
146 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
147 struct sockaddr_l2 la;
148 int len, err = 0;
149
150 BT_DBG("sk %p", sk);
151
152 if (!addr || alen < sizeof(addr->sa_family) ||
153 addr->sa_family != AF_BLUETOOTH)
154 return -EINVAL;
155
156 memset(&la, 0, sizeof(la));
157 len = min_t(unsigned int, sizeof(la), alen);
158 memcpy(&la, addr, len);
159
160 if (la.l2_cid && la.l2_psm)
161 return -EINVAL;
162
163 lock_sock(sk);
164
165 if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
166 && !(la.l2_psm || la.l2_cid)) {
167 err = -EINVAL;
168 goto done;
169 }
170
171 switch (chan->mode) {
172 case L2CAP_MODE_BASIC:
173 break;
174 case L2CAP_MODE_ERTM:
175 case L2CAP_MODE_STREAMING:
176 if (!disable_ertm)
177 break;
178 /* fall through */
179 default:
180 err = -ENOTSUPP;
181 goto done;
182 }
183
184 switch (sk->sk_state) {
185 case BT_CONNECT:
186 case BT_CONNECT2:
187 case BT_CONFIG:
188 /* Already connecting */
189 goto wait;
190
191 case BT_CONNECTED:
192 /* Already connected */
193 err = -EISCONN;
194 goto done;
195
196 case BT_OPEN:
197 case BT_BOUND:
198 /* Can connect */
199 break;
200
201 default:
202 err = -EBADFD;
203 goto done;
204 }
205
206 /* PSM must be odd and lsb of upper byte must be 0 */
207 if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
208 sk->sk_type != SOCK_RAW && !la.l2_cid) {
209 err = -EINVAL;
210 goto done;
211 }
212
213 /* Set destination address and psm */
214 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
215 chan->psm = la.l2_psm;
216 chan->dcid = la.l2_cid;
217
218 err = l2cap_chan_connect(l2cap_pi(sk)->chan);
219 if (err)
220 goto done;
221
222wait:
223 err = bt_sock_wait_state(sk, BT_CONNECTED,
224 sock_sndtimeo(sk, flags & O_NONBLOCK));
225done:
226 release_sock(sk);
227 return err;
228}
229
230static int l2cap_sock_listen(struct socket *sock, int backlog)
231{
232 struct sock *sk = sock->sk;
233 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
234 int err = 0;
235
236 BT_DBG("sk %p backlog %d", sk, backlog);
237
238 lock_sock(sk);
239
240 if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
241 || sk->sk_state != BT_BOUND) {
242 err = -EBADFD;
243 goto done;
244 }
245
246 switch (chan->mode) {
247 case L2CAP_MODE_BASIC:
248 break;
249 case L2CAP_MODE_ERTM:
250 case L2CAP_MODE_STREAMING:
251 if (!disable_ertm)
252 break;
253 /* fall through */
254 default:
255 err = -ENOTSUPP;
256 goto done;
257 }
258
259 sk->sk_max_ack_backlog = backlog;
260 sk->sk_ack_backlog = 0;
261 sk->sk_state = BT_LISTEN;
262
263done:
264 release_sock(sk);
265 return err;
266}
267
268static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
269{
270 DECLARE_WAITQUEUE(wait, current);
271 struct sock *sk = sock->sk, *nsk;
272 long timeo;
273 int err = 0;
274
275 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
276
277 if (sk->sk_state != BT_LISTEN) {
278 err = -EBADFD;
279 goto done;
280 }
281
282 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
283
284 BT_DBG("sk %p timeo %ld", sk, timeo);
285
286 /* Wait for an incoming connection. (wake-one). */
287 add_wait_queue_exclusive(sk_sleep(sk), &wait);
288 while (!(nsk = bt_accept_dequeue(sk, newsock))) {
289 set_current_state(TASK_INTERRUPTIBLE);
290 if (!timeo) {
291 err = -EAGAIN;
292 break;
293 }
294
295 release_sock(sk);
296 timeo = schedule_timeout(timeo);
297 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
298
299 if (sk->sk_state != BT_LISTEN) {
300 err = -EBADFD;
301 break;
302 }
303
304 if (signal_pending(current)) {
305 err = sock_intr_errno(timeo);
306 break;
307 }
308 }
309 set_current_state(TASK_RUNNING);
310 remove_wait_queue(sk_sleep(sk), &wait);
311
312 if (err)
313 goto done;
314
315 newsock->state = SS_CONNECTED;
316
317 BT_DBG("new socket %p", nsk);
318
319done:
320 release_sock(sk);
321 return err;
322}
323
324static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
325{
326 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
327 struct sock *sk = sock->sk;
328 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
329
330 BT_DBG("sock %p, sk %p", sock, sk);
331
332 addr->sa_family = AF_BLUETOOTH;
333 *len = sizeof(struct sockaddr_l2);
334
335 if (peer) {
336 la->l2_psm = chan->psm;
337 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
338 la->l2_cid = cpu_to_le16(chan->dcid);
339 } else {
340 la->l2_psm = chan->sport;
341 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
342 la->l2_cid = cpu_to_le16(chan->scid);
343 }
344
345 return 0;
346}
347
348static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
349{
350 struct sock *sk = sock->sk;
351 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
352 struct l2cap_options opts;
353 struct l2cap_conninfo cinfo;
354 int len, err = 0;
355 u32 opt;
356
357 BT_DBG("sk %p", sk);
358
359 if (get_user(len, optlen))
360 return -EFAULT;
361
362 lock_sock(sk);
363
364 switch (optname) {
365 case L2CAP_OPTIONS:
366 memset(&opts, 0, sizeof(opts));
367 opts.imtu = chan->imtu;
368 opts.omtu = chan->omtu;
369 opts.flush_to = chan->flush_to;
370 opts.mode = chan->mode;
371 opts.fcs = chan->fcs;
372 opts.max_tx = chan->max_tx;
373 opts.txwin_size = (__u16)chan->tx_win;
374
375 len = min_t(unsigned int, len, sizeof(opts));
376 if (copy_to_user(optval, (char *) &opts, len))
377 err = -EFAULT;
378
379 break;
380
381 case L2CAP_LM:
382 switch (chan->sec_level) {
383 case BT_SECURITY_LOW:
384 opt = L2CAP_LM_AUTH;
385 break;
386 case BT_SECURITY_MEDIUM:
387 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
388 break;
389 case BT_SECURITY_HIGH:
390 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
391 L2CAP_LM_SECURE;
392 break;
393 default:
394 opt = 0;
395 break;
396 }
397
398 if (chan->role_switch)
399 opt |= L2CAP_LM_MASTER;
400
401 if (chan->force_reliable)
402 opt |= L2CAP_LM_RELIABLE;
403
404 if (put_user(opt, (u32 __user *) optval))
405 err = -EFAULT;
406 break;
407
408 case L2CAP_CONNINFO:
409 if (sk->sk_state != BT_CONNECTED &&
410 !(sk->sk_state == BT_CONNECT2 &&
411 bt_sk(sk)->defer_setup)) {
412 err = -ENOTCONN;
413 break;
414 }
415
416 cinfo.hci_handle = chan->conn->hcon->handle;
417 memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3);
418
419 len = min_t(unsigned int, len, sizeof(cinfo));
420 if (copy_to_user(optval, (char *) &cinfo, len))
421 err = -EFAULT;
422
423 break;
424
425 default:
426 err = -ENOPROTOOPT;
427 break;
428 }
429
430 release_sock(sk);
431 return err;
432}
433
434static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
435{
436 struct sock *sk = sock->sk;
437 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
438 struct bt_security sec;
439 int len, err = 0;
440
441 BT_DBG("sk %p", sk);
442
443 if (level == SOL_L2CAP)
444 return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
445
446 if (level != SOL_BLUETOOTH)
447 return -ENOPROTOOPT;
448
449 if (get_user(len, optlen))
450 return -EFAULT;
451
452 lock_sock(sk);
453
454 switch (optname) {
455 case BT_SECURITY:
456 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
457 && sk->sk_type != SOCK_RAW) {
458 err = -EINVAL;
459 break;
460 }
461
462 sec.level = chan->sec_level;
463
464 len = min_t(unsigned int, len, sizeof(sec));
465 if (copy_to_user(optval, (char *) &sec, len))
466 err = -EFAULT;
467
468 break;
469
470 case BT_DEFER_SETUP:
471 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
472 err = -EINVAL;
473 break;
474 }
475
476 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
477 err = -EFAULT;
478
479 break;
480
481 case BT_FLUSHABLE:
482 if (put_user(chan->flushable, (u32 __user *) optval))
483 err = -EFAULT;
484
485 break;
486
487 default:
488 err = -ENOPROTOOPT;
489 break;
490 }
491
492 release_sock(sk);
493 return err;
494}
495
496static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
497{
498 struct sock *sk = sock->sk;
499 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
500 struct l2cap_options opts;
501 int len, err = 0;
502 u32 opt;
503
504 BT_DBG("sk %p", sk);
505
506 lock_sock(sk);
507
508 switch (optname) {
509 case L2CAP_OPTIONS:
510 if (sk->sk_state == BT_CONNECTED) {
511 err = -EINVAL;
512 break;
513 }
514
515 opts.imtu = chan->imtu;
516 opts.omtu = chan->omtu;
517 opts.flush_to = chan->flush_to;
518 opts.mode = chan->mode;
519 opts.fcs = chan->fcs;
520 opts.max_tx = chan->max_tx;
521 opts.txwin_size = (__u16)chan->tx_win;
522
523 len = min_t(unsigned int, sizeof(opts), optlen);
524 if (copy_from_user((char *) &opts, optval, len)) {
525 err = -EFAULT;
526 break;
527 }
528
529 if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
530 err = -EINVAL;
531 break;
532 }
533
534 chan->mode = opts.mode;
535 switch (chan->mode) {
536 case L2CAP_MODE_BASIC:
537 chan->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
538 break;
539 case L2CAP_MODE_ERTM:
540 case L2CAP_MODE_STREAMING:
541 if (!disable_ertm)
542 break;
543 /* fall through */
544 default:
545 err = -EINVAL;
546 break;
547 }
548
549 chan->imtu = opts.imtu;
550 chan->omtu = opts.omtu;
551 chan->fcs = opts.fcs;
552 chan->max_tx = opts.max_tx;
553 chan->tx_win = (__u8)opts.txwin_size;
554 break;
555
556 case L2CAP_LM:
557 if (get_user(opt, (u32 __user *) optval)) {
558 err = -EFAULT;
559 break;
560 }
561
562 if (opt & L2CAP_LM_AUTH)
563 chan->sec_level = BT_SECURITY_LOW;
564 if (opt & L2CAP_LM_ENCRYPT)
565 chan->sec_level = BT_SECURITY_MEDIUM;
566 if (opt & L2CAP_LM_SECURE)
567 chan->sec_level = BT_SECURITY_HIGH;
568
569 chan->role_switch = (opt & L2CAP_LM_MASTER);
570 chan->force_reliable = (opt & L2CAP_LM_RELIABLE);
571 break;
572
573 default:
574 err = -ENOPROTOOPT;
575 break;
576 }
577
578 release_sock(sk);
579 return err;
580}
581
582static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
583{
584 struct sock *sk = sock->sk;
585 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
586 struct bt_security sec;
587 int len, err = 0;
588 u32 opt;
589
590 BT_DBG("sk %p", sk);
591
592 if (level == SOL_L2CAP)
593 return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
594
595 if (level != SOL_BLUETOOTH)
596 return -ENOPROTOOPT;
597
598 lock_sock(sk);
599
600 switch (optname) {
601 case BT_SECURITY:
602 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
603 && sk->sk_type != SOCK_RAW) {
604 err = -EINVAL;
605 break;
606 }
607
608 sec.level = BT_SECURITY_LOW;
609
610 len = min_t(unsigned int, sizeof(sec), optlen);
611 if (copy_from_user((char *) &sec, optval, len)) {
612 err = -EFAULT;
613 break;
614 }
615
616 if (sec.level < BT_SECURITY_LOW ||
617 sec.level > BT_SECURITY_HIGH) {
618 err = -EINVAL;
619 break;
620 }
621
622 chan->sec_level = sec.level;
623 break;
624
625 case BT_DEFER_SETUP:
626 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
627 err = -EINVAL;
628 break;
629 }
630
631 if (get_user(opt, (u32 __user *) optval)) {
632 err = -EFAULT;
633 break;
634 }
635
636 bt_sk(sk)->defer_setup = opt;
637 break;
638
639 case BT_FLUSHABLE:
640 if (get_user(opt, (u32 __user *) optval)) {
641 err = -EFAULT;
642 break;
643 }
644
645 if (opt > BT_FLUSHABLE_ON) {
646 err = -EINVAL;
647 break;
648 }
649
650 if (opt == BT_FLUSHABLE_OFF) {
651 struct l2cap_conn *conn = chan->conn;
652 /* proceed further only when we have l2cap_conn and
653 No Flush support in the LM */
654 if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) {
655 err = -EINVAL;
656 break;
657 }
658 }
659
660 chan->flushable = opt;
661 break;
662
663 default:
664 err = -ENOPROTOOPT;
665 break;
666 }
667
668 release_sock(sk);
669 return err;
670}
671
672static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
673{
674 struct sock *sk = sock->sk;
675 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
676 struct sk_buff *skb;
677 u16 control;
678 int err;
679
680 BT_DBG("sock %p, sk %p", sock, sk);
681
682 err = sock_error(sk);
683 if (err)
684 return err;
685
686 if (msg->msg_flags & MSG_OOB)
687 return -EOPNOTSUPP;
688
689 lock_sock(sk);
690
691 if (sk->sk_state != BT_CONNECTED) {
692 err = -ENOTCONN;
693 goto done;
694 }
695
696 /* Connectionless channel */
697 if (sk->sk_type == SOCK_DGRAM) {
698 skb = l2cap_create_connless_pdu(chan, msg, len);
699 if (IS_ERR(skb)) {
700 err = PTR_ERR(skb);
701 } else {
702 l2cap_do_send(chan, skb);
703 err = len;
704 }
705 goto done;
706 }
707
708 switch (chan->mode) {
709 case L2CAP_MODE_BASIC:
710 /* Check outgoing MTU */
711 if (len > chan->omtu) {
712 err = -EMSGSIZE;
713 goto done;
714 }
715
716 /* Create a basic PDU */
717 skb = l2cap_create_basic_pdu(chan, msg, len);
718 if (IS_ERR(skb)) {
719 err = PTR_ERR(skb);
720 goto done;
721 }
722
723 l2cap_do_send(chan, skb);
724 err = len;
725 break;
726
727 case L2CAP_MODE_ERTM:
728 case L2CAP_MODE_STREAMING:
729 /* Entire SDU fits into one PDU */
730 if (len <= chan->remote_mps) {
731 control = L2CAP_SDU_UNSEGMENTED;
732 skb = l2cap_create_iframe_pdu(chan, msg, len, control,
733 0);
734 if (IS_ERR(skb)) {
735 err = PTR_ERR(skb);
736 goto done;
737 }
738 __skb_queue_tail(&chan->tx_q, skb);
739
740 if (chan->tx_send_head == NULL)
741 chan->tx_send_head = skb;
742
743 } else {
744 /* Segment SDU into multiples PDUs */
745 err = l2cap_sar_segment_sdu(chan, msg, len);
746 if (err < 0)
747 goto done;
748 }
749
750 if (chan->mode == L2CAP_MODE_STREAMING) {
751 l2cap_streaming_send(chan);
752 err = len;
753 break;
754 }
755
756 if ((chan->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
757 (chan->conn_state & L2CAP_CONN_WAIT_F)) {
758 err = len;
759 break;
760 }
761 err = l2cap_ertm_send(chan);
762
763 if (err >= 0)
764 err = len;
765 break;
766
767 default:
768 BT_DBG("bad state %1.1x", chan->mode);
769 err = -EBADFD;
770 }
771
772done:
773 release_sock(sk);
774 return err;
775}
776
777static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
778{
779 struct sock *sk = sock->sk;
780
781 lock_sock(sk);
782
783 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
784 sk->sk_state = BT_CONFIG;
785
786 __l2cap_connect_rsp_defer(l2cap_pi(sk)->chan);
787 release_sock(sk);
788 return 0;
789 }
790
791 release_sock(sk);
792
793 if (sock->type == SOCK_STREAM)
794 return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
795
796 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
797}
798
799/* Kill socket (only if zapped and orphan)
800 * Must be called on unlocked socket.
801 */
802void l2cap_sock_kill(struct sock *sk)
803{
804 if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
805 return;
806
807 BT_DBG("sk %p state %d", sk, sk->sk_state);
808
809 /* Kill poor orphan */
810
811 l2cap_chan_destroy(l2cap_pi(sk)->chan);
812 sock_set_flag(sk, SOCK_DEAD);
813 sock_put(sk);
814}
815
816/* Must be called on unlocked socket. */
817static void l2cap_sock_close(struct sock *sk)
818{
819 l2cap_sock_clear_timer(sk);
820 lock_sock(sk);
821 __l2cap_sock_close(sk, ECONNRESET);
822 release_sock(sk);
823 l2cap_sock_kill(sk);
824}
825
826static void l2cap_sock_cleanup_listen(struct sock *parent)
827{
828 struct sock *sk;
829
830 BT_DBG("parent %p", parent);
831
832 /* Close not yet accepted channels */
833 while ((sk = bt_accept_dequeue(parent, NULL)))
834 l2cap_sock_close(sk);
835
836 parent->sk_state = BT_CLOSED;
837 sock_set_flag(parent, SOCK_ZAPPED);
838}
839
840void __l2cap_sock_close(struct sock *sk, int reason)
841{
842 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
843 struct l2cap_conn *conn = chan->conn;
844
845 BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
846
847 switch (sk->sk_state) {
848 case BT_LISTEN:
849 l2cap_sock_cleanup_listen(sk);
850 break;
851
852 case BT_CONNECTED:
853 case BT_CONFIG:
854 if ((sk->sk_type == SOCK_SEQPACKET ||
855 sk->sk_type == SOCK_STREAM) &&
856 conn->hcon->type == ACL_LINK) {
857 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
858 l2cap_send_disconn_req(conn, chan, reason);
859 } else
860 l2cap_chan_del(chan, reason);
861 break;
862
863 case BT_CONNECT2:
864 if ((sk->sk_type == SOCK_SEQPACKET ||
865 sk->sk_type == SOCK_STREAM) &&
866 conn->hcon->type == ACL_LINK) {
867 struct l2cap_conn_rsp rsp;
868 __u16 result;
869
870 if (bt_sk(sk)->defer_setup)
871 result = L2CAP_CR_SEC_BLOCK;
872 else
873 result = L2CAP_CR_BAD_PSM;
874
875 rsp.scid = cpu_to_le16(chan->dcid);
876 rsp.dcid = cpu_to_le16(chan->scid);
877 rsp.result = cpu_to_le16(result);
878 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
879 l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP,
880 sizeof(rsp), &rsp);
881 }
882
883 l2cap_chan_del(chan, reason);
884 break;
885
886 case BT_CONNECT:
887 case BT_DISCONN:
888 l2cap_chan_del(chan, reason);
889 break;
890
891 default:
892 sock_set_flag(sk, SOCK_ZAPPED);
893 break;
894 }
895}
896
897static int l2cap_sock_shutdown(struct socket *sock, int how)
898{
899 struct sock *sk = sock->sk;
900 struct l2cap_chan *chan = l2cap_pi(sk)->chan;
901 int err = 0;
902
903 BT_DBG("sock %p, sk %p", sock, sk);
904
905 if (!sk)
906 return 0;
907
908 lock_sock(sk);
909 if (!sk->sk_shutdown) {
910 if (chan->mode == L2CAP_MODE_ERTM)
911 err = __l2cap_wait_ack(sk);
912
913 sk->sk_shutdown = SHUTDOWN_MASK;
914 l2cap_sock_clear_timer(sk);
915 __l2cap_sock_close(sk, 0);
916
917 if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
918 err = bt_sock_wait_state(sk, BT_CLOSED,
919 sk->sk_lingertime);
920 }
921
922 if (!err && sk->sk_err)
923 err = -sk->sk_err;
924
925 release_sock(sk);
926 return err;
927}
928
929static int l2cap_sock_release(struct socket *sock)
930{
931 struct sock *sk = sock->sk;
932 int err;
933
934 BT_DBG("sock %p, sk %p", sock, sk);
935
936 if (!sk)
937 return 0;
938
939 err = l2cap_sock_shutdown(sock, 2);
940
941 sock_orphan(sk);
942 l2cap_sock_kill(sk);
943 return err;
944}
945
946static void l2cap_sock_destruct(struct sock *sk)
947{
948 BT_DBG("sk %p", sk);
949
950 skb_queue_purge(&sk->sk_receive_queue);
951 skb_queue_purge(&sk->sk_write_queue);
952}
953
954void l2cap_sock_init(struct sock *sk, struct sock *parent)
955{
956 struct l2cap_pinfo *pi = l2cap_pi(sk);
957 struct l2cap_chan *chan = pi->chan;
958
959 BT_DBG("sk %p", sk);
960
961 if (parent) {
962 struct l2cap_chan *pchan = l2cap_pi(parent)->chan;
963
964 sk->sk_type = parent->sk_type;
965 bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
966
967 chan->imtu = pchan->imtu;
968 chan->omtu = pchan->omtu;
969 chan->conf_state = pchan->conf_state;
970 chan->mode = pchan->mode;
971 chan->fcs = pchan->fcs;
972 chan->max_tx = pchan->max_tx;
973 chan->tx_win = pchan->tx_win;
974 chan->sec_level = pchan->sec_level;
975 chan->role_switch = pchan->role_switch;
976 chan->force_reliable = pchan->force_reliable;
977 chan->flushable = pchan->flushable;
978 } else {
979 chan->imtu = L2CAP_DEFAULT_MTU;
980 chan->omtu = 0;
981 if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
982 chan->mode = L2CAP_MODE_ERTM;
983 chan->conf_state |= L2CAP_CONF_STATE2_DEVICE;
984 } else {
985 chan->mode = L2CAP_MODE_BASIC;
986 }
987 chan->max_tx = L2CAP_DEFAULT_MAX_TX;
988 chan->fcs = L2CAP_FCS_CRC16;
989 chan->tx_win = L2CAP_DEFAULT_TX_WINDOW;
990 chan->sec_level = BT_SECURITY_LOW;
991 chan->role_switch = 0;
992 chan->force_reliable = 0;
993 chan->flushable = BT_FLUSHABLE_OFF;
994 }
995
996 /* Default config options */
997 chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
998}
999
1000static struct proto l2cap_proto = {
1001 .name = "L2CAP",
1002 .owner = THIS_MODULE,
1003 .obj_size = sizeof(struct l2cap_pinfo)
1004};
1005
1006struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
1007{
1008 struct sock *sk;
1009
1010 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
1011 if (!sk)
1012 return NULL;
1013
1014 sock_init_data(sock, sk);
1015 INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
1016
1017 sk->sk_destruct = l2cap_sock_destruct;
1018 sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
1019
1020 sock_reset_flag(sk, SOCK_ZAPPED);
1021
1022 sk->sk_protocol = proto;
1023 sk->sk_state = BT_OPEN;
1024
1025 setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
1026
1027 return sk;
1028}
1029
1030static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
1031 int kern)
1032{
1033 struct sock *sk;
1034 struct l2cap_chan *chan;
1035
1036 BT_DBG("sock %p", sock);
1037
1038 sock->state = SS_UNCONNECTED;
1039
1040 if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
1041 sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
1042 return -ESOCKTNOSUPPORT;
1043
1044 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
1045 return -EPERM;
1046
1047 sock->ops = &l2cap_sock_ops;
1048
1049 sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
1050 if (!sk)
1051 return -ENOMEM;
1052
1053 chan = l2cap_chan_create(sk);
1054 if (!chan) {
1055 l2cap_sock_kill(sk);
1056 return -ENOMEM;
1057 }
1058
1059 l2cap_pi(sk)->chan = chan;
1060
1061 l2cap_sock_init(sk, NULL);
1062 return 0;
1063}
1064
1065static const struct proto_ops l2cap_sock_ops = {
1066 .family = PF_BLUETOOTH,
1067 .owner = THIS_MODULE,
1068 .release = l2cap_sock_release,
1069 .bind = l2cap_sock_bind,
1070 .connect = l2cap_sock_connect,
1071 .listen = l2cap_sock_listen,
1072 .accept = l2cap_sock_accept,
1073 .getname = l2cap_sock_getname,
1074 .sendmsg = l2cap_sock_sendmsg,
1075 .recvmsg = l2cap_sock_recvmsg,
1076 .poll = bt_sock_poll,
1077 .ioctl = bt_sock_ioctl,
1078 .mmap = sock_no_mmap,
1079 .socketpair = sock_no_socketpair,
1080 .shutdown = l2cap_sock_shutdown,
1081 .setsockopt = l2cap_sock_setsockopt,
1082 .getsockopt = l2cap_sock_getsockopt
1083};
1084
1085static const struct net_proto_family l2cap_sock_family_ops = {
1086 .family = PF_BLUETOOTH,
1087 .owner = THIS_MODULE,
1088 .create = l2cap_sock_create,
1089};
1090
1091int __init l2cap_init_sockets(void)
1092{
1093 int err;
1094
1095 err = proto_register(&l2cap_proto, 0);
1096 if (err < 0)
1097 return err;
1098
1099 err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
1100 if (err < 0)
1101 goto error;
1102
1103 BT_INFO("L2CAP socket layer initialized");
1104
1105 return 0;
1106
1107error:
1108 BT_ERR("L2CAP socket registration failed");
1109 proto_unregister(&l2cap_proto);
1110 return err;
1111}
1112
1113void l2cap_cleanup_sockets(void)
1114{
1115 if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
1116 BT_ERR("L2CAP socket unregistration failed");
1117
1118 proto_unregister(&l2cap_proto);
1119}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f827fd908380..dae382ce7020 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -22,7 +22,7 @@
22 22
23/* Bluetooth HCI Management interface */ 23/* Bluetooth HCI Management interface */
24 24
25#include <asm/uaccess.h> 25#include <linux/uaccess.h>
26#include <asm/unaligned.h> 26#include <asm/unaligned.h>
27 27
28#include <net/bluetooth/bluetooth.h> 28#include <net/bluetooth/bluetooth.h>
@@ -32,13 +32,24 @@
32#define MGMT_VERSION 0 32#define MGMT_VERSION 0
33#define MGMT_REVISION 1 33#define MGMT_REVISION 1
34 34
35static int cmd_status(struct sock *sk, u16 cmd, u8 status) 35struct pending_cmd {
36 struct list_head list;
37 __u16 opcode;
38 int index;
39 void *param;
40 struct sock *sk;
41 void *user_data;
42};
43
44LIST_HEAD(cmd_list);
45
46static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
36{ 47{
37 struct sk_buff *skb; 48 struct sk_buff *skb;
38 struct mgmt_hdr *hdr; 49 struct mgmt_hdr *hdr;
39 struct mgmt_ev_cmd_status *ev; 50 struct mgmt_ev_cmd_status *ev;
40 51
41 BT_DBG("sock %p", sk); 52 BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status);
42 53
43 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); 54 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC);
44 if (!skb) 55 if (!skb)
@@ -47,6 +58,7 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status)
47 hdr = (void *) skb_put(skb, sizeof(*hdr)); 58 hdr = (void *) skb_put(skb, sizeof(*hdr));
48 59
49 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); 60 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
61 hdr->index = cpu_to_le16(index);
50 hdr->len = cpu_to_le16(sizeof(*ev)); 62 hdr->len = cpu_to_le16(sizeof(*ev));
51 63
52 ev = (void *) skb_put(skb, sizeof(*ev)); 64 ev = (void *) skb_put(skb, sizeof(*ev));
@@ -59,29 +71,30 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status)
59 return 0; 71 return 0;
60} 72}
61 73
62static int read_version(struct sock *sk) 74static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp,
75 size_t rp_len)
63{ 76{
64 struct sk_buff *skb; 77 struct sk_buff *skb;
65 struct mgmt_hdr *hdr; 78 struct mgmt_hdr *hdr;
66 struct mgmt_ev_cmd_complete *ev; 79 struct mgmt_ev_cmd_complete *ev;
67 struct mgmt_rp_read_version *rp;
68 80
69 BT_DBG("sock %p", sk); 81 BT_DBG("sock %p", sk);
70 82
71 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); 83 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_ATOMIC);
72 if (!skb) 84 if (!skb)
73 return -ENOMEM; 85 return -ENOMEM;
74 86
75 hdr = (void *) skb_put(skb, sizeof(*hdr)); 87 hdr = (void *) skb_put(skb, sizeof(*hdr));
88
76 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); 89 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
77 hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); 90 hdr->index = cpu_to_le16(index);
91 hdr->len = cpu_to_le16(sizeof(*ev) + rp_len);
78 92
79 ev = (void *) skb_put(skb, sizeof(*ev)); 93 ev = (void *) skb_put(skb, sizeof(*ev) + rp_len);
80 put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode); 94 put_unaligned_le16(cmd, &ev->opcode);
81 95
82 rp = (void *) skb_put(skb, sizeof(*rp)); 96 if (rp)
83 rp->version = MGMT_VERSION; 97 memcpy(ev->data, rp, rp_len);
84 put_unaligned_le16(MGMT_REVISION, &rp->revision);
85 98
86 if (sock_queue_rcv_skb(sk, skb) < 0) 99 if (sock_queue_rcv_skb(sk, skb) < 0)
87 kfree_skb(skb); 100 kfree_skb(skb);
@@ -89,16 +102,26 @@ static int read_version(struct sock *sk)
89 return 0; 102 return 0;
90} 103}
91 104
105static int read_version(struct sock *sk)
106{
107 struct mgmt_rp_read_version rp;
108
109 BT_DBG("sock %p", sk);
110
111 rp.version = MGMT_VERSION;
112 put_unaligned_le16(MGMT_REVISION, &rp.revision);
113
114 return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, &rp,
115 sizeof(rp));
116}
117
92static int read_index_list(struct sock *sk) 118static int read_index_list(struct sock *sk)
93{ 119{
94 struct sk_buff *skb;
95 struct mgmt_hdr *hdr;
96 struct mgmt_ev_cmd_complete *ev;
97 struct mgmt_rp_read_index_list *rp; 120 struct mgmt_rp_read_index_list *rp;
98 struct list_head *p; 121 struct list_head *p;
99 size_t body_len; 122 size_t rp_len;
100 u16 count; 123 u16 count;
101 int i; 124 int i, err;
102 125
103 BT_DBG("sock %p", sk); 126 BT_DBG("sock %p", sk);
104 127
@@ -109,112 +132,1520 @@ static int read_index_list(struct sock *sk)
109 count++; 132 count++;
110 } 133 }
111 134
112 body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); 135 rp_len = sizeof(*rp) + (2 * count);
113 skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); 136 rp = kmalloc(rp_len, GFP_ATOMIC);
114 if (!skb) 137 if (!rp) {
138 read_unlock(&hci_dev_list_lock);
115 return -ENOMEM; 139 return -ENOMEM;
140 }
116 141
117 hdr = (void *) skb_put(skb, sizeof(*hdr));
118 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
119 hdr->len = cpu_to_le16(body_len);
120
121 ev = (void *) skb_put(skb, sizeof(*ev));
122 put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode);
123
124 rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count));
125 put_unaligned_le16(count, &rp->num_controllers); 142 put_unaligned_le16(count, &rp->num_controllers);
126 143
127 i = 0; 144 i = 0;
128 list_for_each(p, &hci_dev_list) { 145 list_for_each(p, &hci_dev_list) {
129 struct hci_dev *d = list_entry(p, struct hci_dev, list); 146 struct hci_dev *d = list_entry(p, struct hci_dev, list);
147
148 hci_del_off_timer(d);
149
150 set_bit(HCI_MGMT, &d->flags);
151
152 if (test_bit(HCI_SETUP, &d->flags))
153 continue;
154
130 put_unaligned_le16(d->id, &rp->index[i++]); 155 put_unaligned_le16(d->id, &rp->index[i++]);
131 BT_DBG("Added hci%u", d->id); 156 BT_DBG("Added hci%u", d->id);
132 } 157 }
133 158
134 read_unlock(&hci_dev_list_lock); 159 read_unlock(&hci_dev_list_lock);
135 160
136 if (sock_queue_rcv_skb(sk, skb) < 0) 161 err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, rp,
137 kfree_skb(skb); 162 rp_len);
138 163
139 return 0; 164 kfree(rp);
165
166 return err;
140} 167}
141 168
142static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) 169static int read_controller_info(struct sock *sk, u16 index)
143{ 170{
144 struct sk_buff *skb; 171 struct mgmt_rp_read_info rp;
145 struct mgmt_hdr *hdr;
146 struct mgmt_ev_cmd_complete *ev;
147 struct mgmt_rp_read_info *rp;
148 struct mgmt_cp_read_info *cp;
149 struct hci_dev *hdev; 172 struct hci_dev *hdev;
150 u16 dev_id;
151 173
152 BT_DBG("sock %p", sk); 174 BT_DBG("sock %p hci%u", sk, index);
175
176 hdev = hci_dev_get(index);
177 if (!hdev)
178 return cmd_status(sk, index, MGMT_OP_READ_INFO, ENODEV);
179
180 hci_del_off_timer(hdev);
181
182 hci_dev_lock(hdev);
183
184 set_bit(HCI_MGMT, &hdev->flags);
185
186 memset(&rp, 0, sizeof(rp));
187
188 rp.type = hdev->dev_type;
189
190 rp.powered = test_bit(HCI_UP, &hdev->flags);
191 rp.connectable = test_bit(HCI_PSCAN, &hdev->flags);
192 rp.discoverable = test_bit(HCI_ISCAN, &hdev->flags);
193 rp.pairable = test_bit(HCI_PSCAN, &hdev->flags);
194
195 if (test_bit(HCI_AUTH, &hdev->flags))
196 rp.sec_mode = 3;
197 else if (hdev->ssp_mode > 0)
198 rp.sec_mode = 4;
199 else
200 rp.sec_mode = 2;
201
202 bacpy(&rp.bdaddr, &hdev->bdaddr);
203 memcpy(rp.features, hdev->features, 8);
204 memcpy(rp.dev_class, hdev->dev_class, 3);
205 put_unaligned_le16(hdev->manufacturer, &rp.manufacturer);
206 rp.hci_ver = hdev->hci_ver;
207 put_unaligned_le16(hdev->hci_rev, &rp.hci_rev);
208
209 memcpy(rp.name, hdev->dev_name, sizeof(hdev->dev_name));
210
211 hci_dev_unlock(hdev);
212 hci_dev_put(hdev);
213
214 return cmd_complete(sk, index, MGMT_OP_READ_INFO, &rp, sizeof(rp));
215}
153 216
154 if (len != 2) 217static void mgmt_pending_free(struct pending_cmd *cmd)
155 return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); 218{
219 sock_put(cmd->sk);
220 kfree(cmd->param);
221 kfree(cmd);
222}
223
224static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
225 u16 index, void *data, u16 len)
226{
227 struct pending_cmd *cmd;
228
229 cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
230 if (!cmd)
231 return NULL;
232
233 cmd->opcode = opcode;
234 cmd->index = index;
235
236 cmd->param = kmalloc(len, GFP_ATOMIC);
237 if (!cmd->param) {
238 kfree(cmd);
239 return NULL;
240 }
241
242 if (data)
243 memcpy(cmd->param, data, len);
244
245 cmd->sk = sk;
246 sock_hold(sk);
247
248 list_add(&cmd->list, &cmd_list);
249
250 return cmd;
251}
252
253static void mgmt_pending_foreach(u16 opcode, int index,
254 void (*cb)(struct pending_cmd *cmd, void *data),
255 void *data)
256{
257 struct list_head *p, *n;
258
259 list_for_each_safe(p, n, &cmd_list) {
260 struct pending_cmd *cmd;
261
262 cmd = list_entry(p, struct pending_cmd, list);
263
264 if (cmd->opcode != opcode)
265 continue;
266
267 if (index >= 0 && cmd->index != index)
268 continue;
269
270 cb(cmd, data);
271 }
272}
273
274static struct pending_cmd *mgmt_pending_find(u16 opcode, int index)
275{
276 struct list_head *p;
277
278 list_for_each(p, &cmd_list) {
279 struct pending_cmd *cmd;
280
281 cmd = list_entry(p, struct pending_cmd, list);
282
283 if (cmd->opcode != opcode)
284 continue;
156 285
157 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); 286 if (index >= 0 && cmd->index != index)
287 continue;
288
289 return cmd;
290 }
291
292 return NULL;
293}
294
295static void mgmt_pending_remove(struct pending_cmd *cmd)
296{
297 list_del(&cmd->list);
298 mgmt_pending_free(cmd);
299}
300
301static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len)
302{
303 struct mgmt_mode *cp;
304 struct hci_dev *hdev;
305 struct pending_cmd *cmd;
306 int err, up;
307
308 cp = (void *) data;
309
310 BT_DBG("request for hci%u", index);
311
312 if (len != sizeof(*cp))
313 return cmd_status(sk, index, MGMT_OP_SET_POWERED, EINVAL);
314
315 hdev = hci_dev_get(index);
316 if (!hdev)
317 return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV);
318
319 hci_dev_lock(hdev);
320
321 up = test_bit(HCI_UP, &hdev->flags);
322 if ((cp->val && up) || (!cp->val && !up)) {
323 err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EALREADY);
324 goto failed;
325 }
326
327 if (mgmt_pending_find(MGMT_OP_SET_POWERED, index)) {
328 err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EBUSY);
329 goto failed;
330 }
331
332 cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, index, data, len);
333 if (!cmd) {
334 err = -ENOMEM;
335 goto failed;
336 }
337
338 if (cp->val)
339 queue_work(hdev->workqueue, &hdev->power_on);
340 else
341 queue_work(hdev->workqueue, &hdev->power_off);
342
343 err = 0;
344
345failed:
346 hci_dev_unlock(hdev);
347 hci_dev_put(hdev);
348 return err;
349}
350
351static int set_discoverable(struct sock *sk, u16 index, unsigned char *data,
352 u16 len)
353{
354 struct mgmt_mode *cp;
355 struct hci_dev *hdev;
356 struct pending_cmd *cmd;
357 u8 scan;
358 int err;
359
360 cp = (void *) data;
361
362 BT_DBG("request for hci%u", index);
363
364 if (len != sizeof(*cp))
365 return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EINVAL);
366
367 hdev = hci_dev_get(index);
368 if (!hdev)
369 return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV);
370
371 hci_dev_lock(hdev);
372
373 if (!test_bit(HCI_UP, &hdev->flags)) {
374 err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENETDOWN);
375 goto failed;
376 }
377
378 if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) ||
379 mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) {
380 err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EBUSY);
381 goto failed;
382 }
383
384 if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) &&
385 test_bit(HCI_PSCAN, &hdev->flags)) {
386 err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EALREADY);
387 goto failed;
388 }
389
390 cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, index, data, len);
391 if (!cmd) {
392 err = -ENOMEM;
393 goto failed;
394 }
395
396 scan = SCAN_PAGE;
397
398 if (cp->val)
399 scan |= SCAN_INQUIRY;
400
401 err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
402 if (err < 0)
403 mgmt_pending_remove(cmd);
404
405failed:
406 hci_dev_unlock(hdev);
407 hci_dev_put(hdev);
408
409 return err;
410}
411
412static int set_connectable(struct sock *sk, u16 index, unsigned char *data,
413 u16 len)
414{
415 struct mgmt_mode *cp;
416 struct hci_dev *hdev;
417 struct pending_cmd *cmd;
418 u8 scan;
419 int err;
420
421 cp = (void *) data;
422
423 BT_DBG("request for hci%u", index);
424
425 if (len != sizeof(*cp))
426 return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EINVAL);
427
428 hdev = hci_dev_get(index);
429 if (!hdev)
430 return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV);
431
432 hci_dev_lock(hdev);
433
434 if (!test_bit(HCI_UP, &hdev->flags)) {
435 err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENETDOWN);
436 goto failed;
437 }
438
439 if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) ||
440 mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) {
441 err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EBUSY);
442 goto failed;
443 }
444
445 if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) {
446 err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EALREADY);
447 goto failed;
448 }
449
450 cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, index, data, len);
451 if (!cmd) {
452 err = -ENOMEM;
453 goto failed;
454 }
455
456 if (cp->val)
457 scan = SCAN_PAGE;
458 else
459 scan = 0;
460
461 err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
462 if (err < 0)
463 mgmt_pending_remove(cmd);
464
465failed:
466 hci_dev_unlock(hdev);
467 hci_dev_put(hdev);
468
469 return err;
470}
471
472static int mgmt_event(u16 event, u16 index, void *data, u16 data_len,
473 struct sock *skip_sk)
474{
475 struct sk_buff *skb;
476 struct mgmt_hdr *hdr;
477
478 skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC);
158 if (!skb) 479 if (!skb)
159 return -ENOMEM; 480 return -ENOMEM;
160 481
482 bt_cb(skb)->channel = HCI_CHANNEL_CONTROL;
483
161 hdr = (void *) skb_put(skb, sizeof(*hdr)); 484 hdr = (void *) skb_put(skb, sizeof(*hdr));
162 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); 485 hdr->opcode = cpu_to_le16(event);
163 hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); 486 hdr->index = cpu_to_le16(index);
487 hdr->len = cpu_to_le16(data_len);
164 488
165 ev = (void *) skb_put(skb, sizeof(*ev)); 489 if (data)
166 put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode); 490 memcpy(skb_put(skb, data_len), data, data_len);
491
492 hci_send_to_sock(NULL, skb, skip_sk);
493 kfree_skb(skb);
494
495 return 0;
496}
497
498static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val)
499{
500 struct mgmt_mode rp;
501
502 rp.val = val;
503
504 return cmd_complete(sk, index, opcode, &rp, sizeof(rp));
505}
167 506
168 rp = (void *) skb_put(skb, sizeof(*rp)); 507static int set_pairable(struct sock *sk, u16 index, unsigned char *data,
508 u16 len)
509{
510 struct mgmt_mode *cp, ev;
511 struct hci_dev *hdev;
512 int err;
169 513
170 cp = (void *) data; 514 cp = (void *) data;
171 dev_id = get_unaligned_le16(&cp->index);
172 515
173 BT_DBG("request for hci%u", dev_id); 516 BT_DBG("request for hci%u", index);
174 517
175 hdev = hci_dev_get(dev_id); 518 if (len != sizeof(*cp))
176 if (!hdev) { 519 return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, EINVAL);
177 kfree_skb(skb); 520
178 return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); 521 hdev = hci_dev_get(index);
522 if (!hdev)
523 return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV);
524
525 hci_dev_lock(hdev);
526
527 if (cp->val)
528 set_bit(HCI_PAIRABLE, &hdev->flags);
529 else
530 clear_bit(HCI_PAIRABLE, &hdev->flags);
531
532 err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, index, cp->val);
533 if (err < 0)
534 goto failed;
535
536 ev.val = cp->val;
537
538 err = mgmt_event(MGMT_EV_PAIRABLE, index, &ev, sizeof(ev), sk);
539
540failed:
541 hci_dev_unlock(hdev);
542 hci_dev_put(hdev);
543
544 return err;
545}
546
547#define EIR_FLAGS 0x01 /* flags */
548#define EIR_UUID16_SOME 0x02 /* 16-bit UUID, more available */
549#define EIR_UUID16_ALL 0x03 /* 16-bit UUID, all listed */
550#define EIR_UUID32_SOME 0x04 /* 32-bit UUID, more available */
551#define EIR_UUID32_ALL 0x05 /* 32-bit UUID, all listed */
552#define EIR_UUID128_SOME 0x06 /* 128-bit UUID, more available */
553#define EIR_UUID128_ALL 0x07 /* 128-bit UUID, all listed */
554#define EIR_NAME_SHORT 0x08 /* shortened local name */
555#define EIR_NAME_COMPLETE 0x09 /* complete local name */
556#define EIR_TX_POWER 0x0A /* transmit power level */
557#define EIR_DEVICE_ID 0x10 /* device ID */
558
559#define PNP_INFO_SVCLASS_ID 0x1200
560
561static u8 bluetooth_base_uuid[] = {
562 0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80,
563 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
564};
565
566static u16 get_uuid16(u8 *uuid128)
567{
568 u32 val;
569 int i;
570
571 for (i = 0; i < 12; i++) {
572 if (bluetooth_base_uuid[i] != uuid128[i])
573 return 0;
179 } 574 }
180 575
181 hci_dev_lock_bh(hdev); 576 memcpy(&val, &uuid128[12], 4);
182 577
183 put_unaligned_le16(hdev->id, &rp->index); 578 val = le32_to_cpu(val);
184 rp->type = hdev->dev_type; 579 if (val > 0xffff)
580 return 0;
185 581
186 rp->powered = test_bit(HCI_UP, &hdev->flags); 582 return (u16) val;
187 rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); 583}
188 rp->pairable = test_bit(HCI_PSCAN, &hdev->flags);
189 584
190 if (test_bit(HCI_AUTH, &hdev->flags)) 585static void create_eir(struct hci_dev *hdev, u8 *data)
191 rp->sec_mode = 3; 586{
192 else if (hdev->ssp_mode > 0) 587 u8 *ptr = data;
193 rp->sec_mode = 4; 588 u16 eir_len = 0;
589 u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)];
590 int i, truncated = 0;
591 struct list_head *p;
592 size_t name_len;
593
594 name_len = strlen(hdev->dev_name);
595
596 if (name_len > 0) {
597 /* EIR Data type */
598 if (name_len > 48) {
599 name_len = 48;
600 ptr[1] = EIR_NAME_SHORT;
601 } else
602 ptr[1] = EIR_NAME_COMPLETE;
603
604 /* EIR Data length */
605 ptr[0] = name_len + 1;
606
607 memcpy(ptr + 2, hdev->dev_name, name_len);
608
609 eir_len += (name_len + 2);
610 ptr += (name_len + 2);
611 }
612
613 memset(uuid16_list, 0, sizeof(uuid16_list));
614
615 /* Group all UUID16 types */
616 list_for_each(p, &hdev->uuids) {
617 struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list);
618 u16 uuid16;
619
620 uuid16 = get_uuid16(uuid->uuid);
621 if (uuid16 == 0)
622 return;
623
624 if (uuid16 < 0x1100)
625 continue;
626
627 if (uuid16 == PNP_INFO_SVCLASS_ID)
628 continue;
629
630 /* Stop if not enough space to put next UUID */
631 if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) {
632 truncated = 1;
633 break;
634 }
635
636 /* Check for duplicates */
637 for (i = 0; uuid16_list[i] != 0; i++)
638 if (uuid16_list[i] == uuid16)
639 break;
640
641 if (uuid16_list[i] == 0) {
642 uuid16_list[i] = uuid16;
643 eir_len += sizeof(u16);
644 }
645 }
646
647 if (uuid16_list[0] != 0) {
648 u8 *length = ptr;
649
650 /* EIR Data type */
651 ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL;
652
653 ptr += 2;
654 eir_len += 2;
655
656 for (i = 0; uuid16_list[i] != 0; i++) {
657 *ptr++ = (uuid16_list[i] & 0x00ff);
658 *ptr++ = (uuid16_list[i] & 0xff00) >> 8;
659 }
660
661 /* EIR Data length */
662 *length = (i * sizeof(u16)) + 1;
663 }
664}
665
666static int update_eir(struct hci_dev *hdev)
667{
668 struct hci_cp_write_eir cp;
669
670 if (!(hdev->features[6] & LMP_EXT_INQ))
671 return 0;
672
673 if (hdev->ssp_mode == 0)
674 return 0;
675
676 if (test_bit(HCI_SERVICE_CACHE, &hdev->flags))
677 return 0;
678
679 memset(&cp, 0, sizeof(cp));
680
681 create_eir(hdev, cp.data);
682
683 if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
684 return 0;
685
686 memcpy(hdev->eir, cp.data, sizeof(cp.data));
687
688 return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
689}
690
691static u8 get_service_classes(struct hci_dev *hdev)
692{
693 struct list_head *p;
694 u8 val = 0;
695
696 list_for_each(p, &hdev->uuids) {
697 struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list);
698
699 val |= uuid->svc_hint;
700 }
701
702 return val;
703}
704
705static int update_class(struct hci_dev *hdev)
706{
707 u8 cod[3];
708
709 BT_DBG("%s", hdev->name);
710
711 if (test_bit(HCI_SERVICE_CACHE, &hdev->flags))
712 return 0;
713
714 cod[0] = hdev->minor_class;
715 cod[1] = hdev->major_class;
716 cod[2] = get_service_classes(hdev);
717
718 if (memcmp(cod, hdev->dev_class, 3) == 0)
719 return 0;
720
721 return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
722}
723
724static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len)
725{
726 struct mgmt_cp_add_uuid *cp;
727 struct hci_dev *hdev;
728 struct bt_uuid *uuid;
729 int err;
730
731 cp = (void *) data;
732
733 BT_DBG("request for hci%u", index);
734
735 if (len != sizeof(*cp))
736 return cmd_status(sk, index, MGMT_OP_ADD_UUID, EINVAL);
737
738 hdev = hci_dev_get(index);
739 if (!hdev)
740 return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV);
741
742 hci_dev_lock(hdev);
743
744 uuid = kmalloc(sizeof(*uuid), GFP_ATOMIC);
745 if (!uuid) {
746 err = -ENOMEM;
747 goto failed;
748 }
749
750 memcpy(uuid->uuid, cp->uuid, 16);
751 uuid->svc_hint = cp->svc_hint;
752
753 list_add(&uuid->list, &hdev->uuids);
754
755 err = update_class(hdev);
756 if (err < 0)
757 goto failed;
758
759 err = update_eir(hdev);
760 if (err < 0)
761 goto failed;
762
763 err = cmd_complete(sk, index, MGMT_OP_ADD_UUID, NULL, 0);
764
765failed:
766 hci_dev_unlock(hdev);
767 hci_dev_put(hdev);
768
769 return err;
770}
771
772static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len)
773{
774 struct list_head *p, *n;
775 struct mgmt_cp_remove_uuid *cp;
776 struct hci_dev *hdev;
777 u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
778 int err, found;
779
780 cp = (void *) data;
781
782 BT_DBG("request for hci%u", index);
783
784 if (len != sizeof(*cp))
785 return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, EINVAL);
786
787 hdev = hci_dev_get(index);
788 if (!hdev)
789 return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV);
790
791 hci_dev_lock(hdev);
792
793 if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) {
794 err = hci_uuids_clear(hdev);
795 goto unlock;
796 }
797
798 found = 0;
799
800 list_for_each_safe(p, n, &hdev->uuids) {
801 struct bt_uuid *match = list_entry(p, struct bt_uuid, list);
802
803 if (memcmp(match->uuid, cp->uuid, 16) != 0)
804 continue;
805
806 list_del(&match->list);
807 found++;
808 }
809
810 if (found == 0) {
811 err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENOENT);
812 goto unlock;
813 }
814
815 err = update_class(hdev);
816 if (err < 0)
817 goto unlock;
818
819 err = update_eir(hdev);
820 if (err < 0)
821 goto unlock;
822
823 err = cmd_complete(sk, index, MGMT_OP_REMOVE_UUID, NULL, 0);
824
825unlock:
826 hci_dev_unlock(hdev);
827 hci_dev_put(hdev);
828
829 return err;
830}
831
832static int set_dev_class(struct sock *sk, u16 index, unsigned char *data,
833 u16 len)
834{
835 struct hci_dev *hdev;
836 struct mgmt_cp_set_dev_class *cp;
837 int err;
838
839 cp = (void *) data;
840
841 BT_DBG("request for hci%u", index);
842
843 if (len != sizeof(*cp))
844 return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, EINVAL);
845
846 hdev = hci_dev_get(index);
847 if (!hdev)
848 return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV);
849
850 hci_dev_lock(hdev);
851
852 hdev->major_class = cp->major;
853 hdev->minor_class = cp->minor;
854
855 err = update_class(hdev);
856
857 if (err == 0)
858 err = cmd_complete(sk, index, MGMT_OP_SET_DEV_CLASS, NULL, 0);
859
860 hci_dev_unlock(hdev);
861 hci_dev_put(hdev);
862
863 return err;
864}
865
866static int set_service_cache(struct sock *sk, u16 index, unsigned char *data,
867 u16 len)
868{
869 struct hci_dev *hdev;
870 struct mgmt_cp_set_service_cache *cp;
871 int err;
872
873 cp = (void *) data;
874
875 if (len != sizeof(*cp))
876 return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, EINVAL);
877
878 hdev = hci_dev_get(index);
879 if (!hdev)
880 return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV);
881
882 hci_dev_lock(hdev);
883
884 BT_DBG("hci%u enable %d", index, cp->enable);
885
886 if (cp->enable) {
887 set_bit(HCI_SERVICE_CACHE, &hdev->flags);
888 err = 0;
889 } else {
890 clear_bit(HCI_SERVICE_CACHE, &hdev->flags);
891 err = update_class(hdev);
892 if (err == 0)
893 err = update_eir(hdev);
894 }
895
896 if (err == 0)
897 err = cmd_complete(sk, index, MGMT_OP_SET_SERVICE_CACHE, NULL,
898 0);
899
900 hci_dev_unlock(hdev);
901 hci_dev_put(hdev);
902
903 return err;
904}
905
906static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len)
907{
908 struct hci_dev *hdev;
909 struct mgmt_cp_load_keys *cp;
910 u16 key_count, expected_len;
911 int i;
912
913 cp = (void *) data;
914
915 if (len < sizeof(*cp))
916 return -EINVAL;
917
918 key_count = get_unaligned_le16(&cp->key_count);
919
920 expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info);
921 if (expected_len != len) {
922 BT_ERR("load_keys: expected %u bytes, got %u bytes",
923 len, expected_len);
924 return -EINVAL;
925 }
926
927 hdev = hci_dev_get(index);
928 if (!hdev)
929 return cmd_status(sk, index, MGMT_OP_LOAD_KEYS, ENODEV);
930
931 BT_DBG("hci%u debug_keys %u key_count %u", index, cp->debug_keys,
932 key_count);
933
934 hci_dev_lock(hdev);
935
936 hci_link_keys_clear(hdev);
937
938 set_bit(HCI_LINK_KEYS, &hdev->flags);
939
940 if (cp->debug_keys)
941 set_bit(HCI_DEBUG_KEYS, &hdev->flags);
194 else 942 else
195 rp->sec_mode = 2; 943 clear_bit(HCI_DEBUG_KEYS, &hdev->flags);
944
945 for (i = 0; i < key_count; i++) {
946 struct mgmt_key_info *key = &cp->keys[i];
947
948 hci_add_link_key(hdev, NULL, 0, &key->bdaddr, key->val, key->type,
949 key->pin_len);
950 }
951
952 hci_dev_unlock(hdev);
953 hci_dev_put(hdev);
954
955 return 0;
956}
957
958static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len)
959{
960 struct hci_dev *hdev;
961 struct mgmt_cp_remove_key *cp;
962 struct hci_conn *conn;
963 int err;
964
965 cp = (void *) data;
966
967 if (len != sizeof(*cp))
968 return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, EINVAL);
969
970 hdev = hci_dev_get(index);
971 if (!hdev)
972 return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV);
973
974 hci_dev_lock(hdev);
975
976 err = hci_remove_link_key(hdev, &cp->bdaddr);
977 if (err < 0) {
978 err = cmd_status(sk, index, MGMT_OP_REMOVE_KEY, -err);
979 goto unlock;
980 }
196 981
197 bacpy(&rp->bdaddr, &hdev->bdaddr); 982 err = 0;
198 memcpy(rp->features, hdev->features, 8); 983
199 memcpy(rp->dev_class, hdev->dev_class, 3); 984 if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect)
200 put_unaligned_le16(hdev->manufacturer, &rp->manufacturer); 985 goto unlock;
201 rp->hci_ver = hdev->hci_ver; 986
202 put_unaligned_le16(hdev->hci_rev, &rp->hci_rev); 987 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
988 if (conn) {
989 struct hci_cp_disconnect dc;
990
991 put_unaligned_le16(conn->handle, &dc.handle);
992 dc.reason = 0x13; /* Remote User Terminated Connection */
993 err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, 0, NULL);
994 }
995
996unlock:
997 hci_dev_unlock(hdev);
998 hci_dev_put(hdev);
999
1000 return err;
1001}
1002
1003static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len)
1004{
1005 struct hci_dev *hdev;
1006 struct mgmt_cp_disconnect *cp;
1007 struct hci_cp_disconnect dc;
1008 struct pending_cmd *cmd;
1009 struct hci_conn *conn;
1010 int err;
1011
1012 BT_DBG("");
1013
1014 cp = (void *) data;
1015
1016 if (len != sizeof(*cp))
1017 return cmd_status(sk, index, MGMT_OP_DISCONNECT, EINVAL);
1018
1019 hdev = hci_dev_get(index);
1020 if (!hdev)
1021 return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV);
1022
1023 hci_dev_lock(hdev);
1024
1025 if (!test_bit(HCI_UP, &hdev->flags)) {
1026 err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENETDOWN);
1027 goto failed;
1028 }
1029
1030 if (mgmt_pending_find(MGMT_OP_DISCONNECT, index)) {
1031 err = cmd_status(sk, index, MGMT_OP_DISCONNECT, EBUSY);
1032 goto failed;
1033 }
1034
1035 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
1036 if (!conn)
1037 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->bdaddr);
1038
1039 if (!conn) {
1040 err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENOTCONN);
1041 goto failed;
1042 }
1043
1044 cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, index, data, len);
1045 if (!cmd) {
1046 err = -ENOMEM;
1047 goto failed;
1048 }
1049
1050 put_unaligned_le16(conn->handle, &dc.handle);
1051 dc.reason = 0x13; /* Remote User Terminated Connection */
1052
1053 err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
1054 if (err < 0)
1055 mgmt_pending_remove(cmd);
1056
1057failed:
1058 hci_dev_unlock(hdev);
1059 hci_dev_put(hdev);
1060
1061 return err;
1062}
1063
1064static int get_connections(struct sock *sk, u16 index)
1065{
1066 struct mgmt_rp_get_connections *rp;
1067 struct hci_dev *hdev;
1068 struct list_head *p;
1069 size_t rp_len;
1070 u16 count;
1071 int i, err;
1072
1073 BT_DBG("");
1074
1075 hdev = hci_dev_get(index);
1076 if (!hdev)
1077 return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV);
1078
1079 hci_dev_lock(hdev);
1080
1081 count = 0;
1082 list_for_each(p, &hdev->conn_hash.list) {
1083 count++;
1084 }
1085
1086 rp_len = sizeof(*rp) + (count * sizeof(bdaddr_t));
1087 rp = kmalloc(rp_len, GFP_ATOMIC);
1088 if (!rp) {
1089 err = -ENOMEM;
1090 goto unlock;
1091 }
1092
1093 put_unaligned_le16(count, &rp->conn_count);
1094
1095 read_lock(&hci_dev_list_lock);
1096
1097 i = 0;
1098 list_for_each(p, &hdev->conn_hash.list) {
1099 struct hci_conn *c = list_entry(p, struct hci_conn, list);
203 1100
1101 bacpy(&rp->conn[i++], &c->dst);
1102 }
1103
1104 read_unlock(&hci_dev_list_lock);
1105
1106 err = cmd_complete(sk, index, MGMT_OP_GET_CONNECTIONS, rp, rp_len);
1107
1108unlock:
1109 kfree(rp);
1110 hci_dev_unlock(hdev);
1111 hci_dev_put(hdev);
1112 return err;
1113}
1114
1115static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data,
1116 u16 len)
1117{
1118 struct hci_dev *hdev;
1119 struct mgmt_cp_pin_code_reply *cp;
1120 struct hci_cp_pin_code_reply reply;
1121 struct pending_cmd *cmd;
1122 int err;
1123
1124 BT_DBG("");
1125
1126 cp = (void *) data;
1127
1128 if (len != sizeof(*cp))
1129 return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, EINVAL);
1130
1131 hdev = hci_dev_get(index);
1132 if (!hdev)
1133 return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV);
1134
1135 hci_dev_lock(hdev);
1136
1137 if (!test_bit(HCI_UP, &hdev->flags)) {
1138 err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENETDOWN);
1139 goto failed;
1140 }
1141
1142 cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, index, data, len);
1143 if (!cmd) {
1144 err = -ENOMEM;
1145 goto failed;
1146 }
1147
1148 bacpy(&reply.bdaddr, &cp->bdaddr);
1149 reply.pin_len = cp->pin_len;
1150 memcpy(reply.pin_code, cp->pin_code, 16);
1151
1152 err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply);
1153 if (err < 0)
1154 mgmt_pending_remove(cmd);
1155
1156failed:
1157 hci_dev_unlock(hdev);
1158 hci_dev_put(hdev);
1159
1160 return err;
1161}
1162
1163static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data,
1164 u16 len)
1165{
1166 struct hci_dev *hdev;
1167 struct mgmt_cp_pin_code_neg_reply *cp;
1168 struct pending_cmd *cmd;
1169 int err;
1170
1171 BT_DBG("");
1172
1173 cp = (void *) data;
1174
1175 if (len != sizeof(*cp))
1176 return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY,
1177 EINVAL);
1178
1179 hdev = hci_dev_get(index);
1180 if (!hdev)
1181 return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY,
1182 ENODEV);
1183
1184 hci_dev_lock(hdev);
1185
1186 if (!test_bit(HCI_UP, &hdev->flags)) {
1187 err = cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY,
1188 ENETDOWN);
1189 goto failed;
1190 }
1191
1192 cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, index,
1193 data, len);
1194 if (!cmd) {
1195 err = -ENOMEM;
1196 goto failed;
1197 }
1198
1199 err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->bdaddr),
1200 &cp->bdaddr);
1201 if (err < 0)
1202 mgmt_pending_remove(cmd);
1203
1204failed:
1205 hci_dev_unlock(hdev);
1206 hci_dev_put(hdev);
1207
1208 return err;
1209}
1210
1211static int set_io_capability(struct sock *sk, u16 index, unsigned char *data,
1212 u16 len)
1213{
1214 struct hci_dev *hdev;
1215 struct mgmt_cp_set_io_capability *cp;
1216
1217 BT_DBG("");
1218
1219 cp = (void *) data;
1220
1221 if (len != sizeof(*cp))
1222 return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, EINVAL);
1223
1224 hdev = hci_dev_get(index);
1225 if (!hdev)
1226 return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV);
1227
1228 hci_dev_lock(hdev);
1229
1230 hdev->io_capability = cp->io_capability;
1231
1232 BT_DBG("%s IO capability set to 0x%02x", hdev->name,
1233 hdev->io_capability);
1234
1235 hci_dev_unlock(hdev);
1236 hci_dev_put(hdev);
1237
1238 return cmd_complete(sk, index, MGMT_OP_SET_IO_CAPABILITY, NULL, 0);
1239}
1240
1241static inline struct pending_cmd *find_pairing(struct hci_conn *conn)
1242{
1243 struct hci_dev *hdev = conn->hdev;
1244 struct list_head *p;
1245
1246 list_for_each(p, &cmd_list) {
1247 struct pending_cmd *cmd;
1248
1249 cmd = list_entry(p, struct pending_cmd, list);
1250
1251 if (cmd->opcode != MGMT_OP_PAIR_DEVICE)
1252 continue;
1253
1254 if (cmd->index != hdev->id)
1255 continue;
1256
1257 if (cmd->user_data != conn)
1258 continue;
1259
1260 return cmd;
1261 }
1262
1263 return NULL;
1264}
1265
1266static void pairing_complete(struct pending_cmd *cmd, u8 status)
1267{
1268 struct mgmt_rp_pair_device rp;
1269 struct hci_conn *conn = cmd->user_data;
1270
1271 bacpy(&rp.bdaddr, &conn->dst);
1272 rp.status = status;
1273
1274 cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp));
1275
1276 /* So we don't get further callbacks for this connection */
1277 conn->connect_cfm_cb = NULL;
1278 conn->security_cfm_cb = NULL;
1279 conn->disconn_cfm_cb = NULL;
1280
1281 hci_conn_put(conn);
1282
1283 mgmt_pending_remove(cmd);
1284}
1285
1286static void pairing_complete_cb(struct hci_conn *conn, u8 status)
1287{
1288 struct pending_cmd *cmd;
1289
1290 BT_DBG("status %u", status);
1291
1292 cmd = find_pairing(conn);
1293 if (!cmd) {
1294 BT_DBG("Unable to find a pending command");
1295 return;
1296 }
1297
1298 pairing_complete(cmd, status);
1299}
1300
1301static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len)
1302{
1303 struct hci_dev *hdev;
1304 struct mgmt_cp_pair_device *cp;
1305 struct pending_cmd *cmd;
1306 u8 sec_level, auth_type;
1307 struct hci_conn *conn;
1308 int err;
1309
1310 BT_DBG("");
1311
1312 cp = (void *) data;
1313
1314 if (len != sizeof(*cp))
1315 return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EINVAL);
1316
1317 hdev = hci_dev_get(index);
1318 if (!hdev)
1319 return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV);
1320
1321 hci_dev_lock(hdev);
1322
1323 if (cp->io_cap == 0x03) {
1324 sec_level = BT_SECURITY_MEDIUM;
1325 auth_type = HCI_AT_DEDICATED_BONDING;
1326 } else {
1327 sec_level = BT_SECURITY_HIGH;
1328 auth_type = HCI_AT_DEDICATED_BONDING_MITM;
1329 }
1330
1331 conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type);
1332 if (IS_ERR(conn)) {
1333 err = PTR_ERR(conn);
1334 goto unlock;
1335 }
1336
1337 if (conn->connect_cfm_cb) {
1338 hci_conn_put(conn);
1339 err = cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EBUSY);
1340 goto unlock;
1341 }
1342
1343 cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, index, data, len);
1344 if (!cmd) {
1345 err = -ENOMEM;
1346 hci_conn_put(conn);
1347 goto unlock;
1348 }
1349
1350 conn->connect_cfm_cb = pairing_complete_cb;
1351 conn->security_cfm_cb = pairing_complete_cb;
1352 conn->disconn_cfm_cb = pairing_complete_cb;
1353 conn->io_capability = cp->io_cap;
1354 cmd->user_data = conn;
1355
1356 if (conn->state == BT_CONNECTED &&
1357 hci_conn_security(conn, sec_level, auth_type))
1358 pairing_complete(cmd, 0);
1359
1360 err = 0;
1361
1362unlock:
1363 hci_dev_unlock(hdev);
1364 hci_dev_put(hdev);
1365
1366 return err;
1367}
1368
1369static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data,
1370 u16 len, int success)
1371{
1372 struct mgmt_cp_user_confirm_reply *cp = (void *) data;
1373 u16 mgmt_op, hci_op;
1374 struct pending_cmd *cmd;
1375 struct hci_dev *hdev;
1376 int err;
1377
1378 BT_DBG("");
1379
1380 if (success) {
1381 mgmt_op = MGMT_OP_USER_CONFIRM_REPLY;
1382 hci_op = HCI_OP_USER_CONFIRM_REPLY;
1383 } else {
1384 mgmt_op = MGMT_OP_USER_CONFIRM_NEG_REPLY;
1385 hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY;
1386 }
1387
1388 if (len != sizeof(*cp))
1389 return cmd_status(sk, index, mgmt_op, EINVAL);
1390
1391 hdev = hci_dev_get(index);
1392 if (!hdev)
1393 return cmd_status(sk, index, mgmt_op, ENODEV);
1394
1395 hci_dev_lock(hdev);
1396
1397 if (!test_bit(HCI_UP, &hdev->flags)) {
1398 err = cmd_status(sk, index, mgmt_op, ENETDOWN);
1399 goto failed;
1400 }
1401
1402 cmd = mgmt_pending_add(sk, mgmt_op, index, data, len);
1403 if (!cmd) {
1404 err = -ENOMEM;
1405 goto failed;
1406 }
1407
1408 err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr);
1409 if (err < 0)
1410 mgmt_pending_remove(cmd);
1411
1412failed:
1413 hci_dev_unlock(hdev);
1414 hci_dev_put(hdev);
1415
1416 return err;
1417}
1418
1419static int set_local_name(struct sock *sk, u16 index, unsigned char *data,
1420 u16 len)
1421{
1422 struct mgmt_cp_set_local_name *mgmt_cp = (void *) data;
1423 struct hci_cp_write_local_name hci_cp;
1424 struct hci_dev *hdev;
1425 struct pending_cmd *cmd;
1426 int err;
1427
1428 BT_DBG("");
1429
1430 if (len != sizeof(*mgmt_cp))
1431 return cmd_status(sk, index, MGMT_OP_SET_LOCAL_NAME, EINVAL);
1432
1433 hdev = hci_dev_get(index);
1434 if (!hdev)
1435 return cmd_status(sk, index, MGMT_OP_SET_LOCAL_NAME, ENODEV);
1436
1437 hci_dev_lock(hdev);
1438
1439 cmd = mgmt_pending_add(sk, MGMT_OP_SET_LOCAL_NAME, index, data, len);
1440 if (!cmd) {
1441 err = -ENOMEM;
1442 goto failed;
1443 }
1444
1445 memcpy(hci_cp.name, mgmt_cp->name, sizeof(hci_cp.name));
1446 err = hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(hci_cp),
1447 &hci_cp);
1448 if (err < 0)
1449 mgmt_pending_remove(cmd);
1450
1451failed:
1452 hci_dev_unlock(hdev);
1453 hci_dev_put(hdev);
1454
1455 return err;
1456}
1457
1458static int read_local_oob_data(struct sock *sk, u16 index)
1459{
1460 struct hci_dev *hdev;
1461 struct pending_cmd *cmd;
1462 int err;
1463
1464 BT_DBG("hci%u", index);
1465
1466 hdev = hci_dev_get(index);
1467 if (!hdev)
1468 return cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA,
1469 ENODEV);
1470
1471 hci_dev_lock(hdev);
1472
1473 if (!test_bit(HCI_UP, &hdev->flags)) {
1474 err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA,
1475 ENETDOWN);
1476 goto unlock;
1477 }
1478
1479 if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) {
1480 err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA,
1481 EOPNOTSUPP);
1482 goto unlock;
1483 }
1484
1485 if (mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, index)) {
1486 err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, EBUSY);
1487 goto unlock;
1488 }
1489
1490 cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_DATA, index, NULL, 0);
1491 if (!cmd) {
1492 err = -ENOMEM;
1493 goto unlock;
1494 }
1495
1496 err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL);
1497 if (err < 0)
1498 mgmt_pending_remove(cmd);
1499
1500unlock:
1501 hci_dev_unlock(hdev);
1502 hci_dev_put(hdev);
1503
1504 return err;
1505}
1506
1507static int add_remote_oob_data(struct sock *sk, u16 index, unsigned char *data,
1508 u16 len)
1509{
1510 struct hci_dev *hdev;
1511 struct mgmt_cp_add_remote_oob_data *cp = (void *) data;
1512 int err;
1513
1514 BT_DBG("hci%u ", index);
1515
1516 if (len != sizeof(*cp))
1517 return cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA,
1518 EINVAL);
1519
1520 hdev = hci_dev_get(index);
1521 if (!hdev)
1522 return cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA,
1523 ENODEV);
1524
1525 hci_dev_lock(hdev);
1526
1527 err = hci_add_remote_oob_data(hdev, &cp->bdaddr, cp->hash,
1528 cp->randomizer);
1529 if (err < 0)
1530 err = cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, -err);
1531 else
1532 err = cmd_complete(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, NULL,
1533 0);
1534
1535 hci_dev_unlock(hdev);
1536 hci_dev_put(hdev);
1537
1538 return err;
1539}
1540
1541static int remove_remote_oob_data(struct sock *sk, u16 index,
1542 unsigned char *data, u16 len)
1543{
1544 struct hci_dev *hdev;
1545 struct mgmt_cp_remove_remote_oob_data *cp = (void *) data;
1546 int err;
1547
1548 BT_DBG("hci%u ", index);
1549
1550 if (len != sizeof(*cp))
1551 return cmd_status(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA,
1552 EINVAL);
1553
1554 hdev = hci_dev_get(index);
1555 if (!hdev)
1556 return cmd_status(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA,
1557 ENODEV);
1558
1559 hci_dev_lock(hdev);
1560
1561 err = hci_remove_remote_oob_data(hdev, &cp->bdaddr);
1562 if (err < 0)
1563 err = cmd_status(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA,
1564 -err);
1565 else
1566 err = cmd_complete(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA,
1567 NULL, 0);
1568
1569 hci_dev_unlock(hdev);
1570 hci_dev_put(hdev);
1571
1572 return err;
1573}
1574
1575static int start_discovery(struct sock *sk, u16 index)
1576{
1577 u8 lap[3] = { 0x33, 0x8b, 0x9e };
1578 struct hci_cp_inquiry cp;
1579 struct pending_cmd *cmd;
1580 struct hci_dev *hdev;
1581 int err;
1582
1583 BT_DBG("hci%u", index);
1584
1585 hdev = hci_dev_get(index);
1586 if (!hdev)
1587 return cmd_status(sk, index, MGMT_OP_START_DISCOVERY, ENODEV);
1588
1589 hci_dev_lock_bh(hdev);
1590
1591 cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, index, NULL, 0);
1592 if (!cmd) {
1593 err = -ENOMEM;
1594 goto failed;
1595 }
1596
1597 memset(&cp, 0, sizeof(cp));
1598 memcpy(&cp.lap, lap, 3);
1599 cp.length = 0x08;
1600 cp.num_rsp = 0x00;
1601
1602 err = hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp);
1603 if (err < 0)
1604 mgmt_pending_remove(cmd);
1605
1606failed:
204 hci_dev_unlock_bh(hdev); 1607 hci_dev_unlock_bh(hdev);
205 hci_dev_put(hdev); 1608 hci_dev_put(hdev);
206 1609
207 if (sock_queue_rcv_skb(sk, skb) < 0) 1610 return err;
208 kfree_skb(skb); 1611}
209 1612
210 return 0; 1613static int stop_discovery(struct sock *sk, u16 index)
1614{
1615 struct hci_dev *hdev;
1616 struct pending_cmd *cmd;
1617 int err;
1618
1619 BT_DBG("hci%u", index);
1620
1621 hdev = hci_dev_get(index);
1622 if (!hdev)
1623 return cmd_status(sk, index, MGMT_OP_STOP_DISCOVERY, ENODEV);
1624
1625 hci_dev_lock_bh(hdev);
1626
1627 cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, index, NULL, 0);
1628 if (!cmd) {
1629 err = -ENOMEM;
1630 goto failed;
1631 }
1632
1633 err = hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL);
1634 if (err < 0)
1635 mgmt_pending_remove(cmd);
1636
1637failed:
1638 hci_dev_unlock_bh(hdev);
1639 hci_dev_put(hdev);
1640
1641 return err;
211} 1642}
212 1643
213int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) 1644int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
214{ 1645{
215 unsigned char *buf; 1646 unsigned char *buf;
216 struct mgmt_hdr *hdr; 1647 struct mgmt_hdr *hdr;
217 u16 opcode, len; 1648 u16 opcode, index, len;
218 int err; 1649 int err;
219 1650
220 BT_DBG("got %zu bytes", msglen); 1651 BT_DBG("got %zu bytes", msglen);
@@ -222,7 +1653,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
222 if (msglen < sizeof(*hdr)) 1653 if (msglen < sizeof(*hdr))
223 return -EINVAL; 1654 return -EINVAL;
224 1655
225 buf = kmalloc(msglen, GFP_ATOMIC); 1656 buf = kmalloc(msglen, GFP_KERNEL);
226 if (!buf) 1657 if (!buf)
227 return -ENOMEM; 1658 return -ENOMEM;
228 1659
@@ -233,6 +1664,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
233 1664
234 hdr = (struct mgmt_hdr *) buf; 1665 hdr = (struct mgmt_hdr *) buf;
235 opcode = get_unaligned_le16(&hdr->opcode); 1666 opcode = get_unaligned_le16(&hdr->opcode);
1667 index = get_unaligned_le16(&hdr->index);
236 len = get_unaligned_le16(&hdr->len); 1668 len = get_unaligned_le16(&hdr->len);
237 1669
238 if (len != msglen - sizeof(*hdr)) { 1670 if (len != msglen - sizeof(*hdr)) {
@@ -248,11 +1680,84 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
248 err = read_index_list(sk); 1680 err = read_index_list(sk);
249 break; 1681 break;
250 case MGMT_OP_READ_INFO: 1682 case MGMT_OP_READ_INFO:
251 err = read_controller_info(sk, buf + sizeof(*hdr), len); 1683 err = read_controller_info(sk, index);
1684 break;
1685 case MGMT_OP_SET_POWERED:
1686 err = set_powered(sk, index, buf + sizeof(*hdr), len);
1687 break;
1688 case MGMT_OP_SET_DISCOVERABLE:
1689 err = set_discoverable(sk, index, buf + sizeof(*hdr), len);
1690 break;
1691 case MGMT_OP_SET_CONNECTABLE:
1692 err = set_connectable(sk, index, buf + sizeof(*hdr), len);
1693 break;
1694 case MGMT_OP_SET_PAIRABLE:
1695 err = set_pairable(sk, index, buf + sizeof(*hdr), len);
1696 break;
1697 case MGMT_OP_ADD_UUID:
1698 err = add_uuid(sk, index, buf + sizeof(*hdr), len);
1699 break;
1700 case MGMT_OP_REMOVE_UUID:
1701 err = remove_uuid(sk, index, buf + sizeof(*hdr), len);
1702 break;
1703 case MGMT_OP_SET_DEV_CLASS:
1704 err = set_dev_class(sk, index, buf + sizeof(*hdr), len);
1705 break;
1706 case MGMT_OP_SET_SERVICE_CACHE:
1707 err = set_service_cache(sk, index, buf + sizeof(*hdr), len);
1708 break;
1709 case MGMT_OP_LOAD_KEYS:
1710 err = load_keys(sk, index, buf + sizeof(*hdr), len);
1711 break;
1712 case MGMT_OP_REMOVE_KEY:
1713 err = remove_key(sk, index, buf + sizeof(*hdr), len);
1714 break;
1715 case MGMT_OP_DISCONNECT:
1716 err = disconnect(sk, index, buf + sizeof(*hdr), len);
1717 break;
1718 case MGMT_OP_GET_CONNECTIONS:
1719 err = get_connections(sk, index);
1720 break;
1721 case MGMT_OP_PIN_CODE_REPLY:
1722 err = pin_code_reply(sk, index, buf + sizeof(*hdr), len);
1723 break;
1724 case MGMT_OP_PIN_CODE_NEG_REPLY:
1725 err = pin_code_neg_reply(sk, index, buf + sizeof(*hdr), len);
1726 break;
1727 case MGMT_OP_SET_IO_CAPABILITY:
1728 err = set_io_capability(sk, index, buf + sizeof(*hdr), len);
1729 break;
1730 case MGMT_OP_PAIR_DEVICE:
1731 err = pair_device(sk, index, buf + sizeof(*hdr), len);
1732 break;
1733 case MGMT_OP_USER_CONFIRM_REPLY:
1734 err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 1);
1735 break;
1736 case MGMT_OP_USER_CONFIRM_NEG_REPLY:
1737 err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 0);
1738 break;
1739 case MGMT_OP_SET_LOCAL_NAME:
1740 err = set_local_name(sk, index, buf + sizeof(*hdr), len);
1741 break;
1742 case MGMT_OP_READ_LOCAL_OOB_DATA:
1743 err = read_local_oob_data(sk, index);
1744 break;
1745 case MGMT_OP_ADD_REMOTE_OOB_DATA:
1746 err = add_remote_oob_data(sk, index, buf + sizeof(*hdr), len);
1747 break;
1748 case MGMT_OP_REMOVE_REMOTE_OOB_DATA:
1749 err = remove_remote_oob_data(sk, index, buf + sizeof(*hdr),
1750 len);
1751 break;
1752 case MGMT_OP_START_DISCOVERY:
1753 err = start_discovery(sk, index);
1754 break;
1755 case MGMT_OP_STOP_DISCOVERY:
1756 err = stop_discovery(sk, index);
252 break; 1757 break;
253 default: 1758 default:
254 BT_DBG("Unknown op %u", opcode); 1759 BT_DBG("Unknown op %u", opcode);
255 err = cmd_status(sk, opcode, 0x01); 1760 err = cmd_status(sk, index, opcode, 0x01);
256 break; 1761 break;
257 } 1762 }
258 1763
@@ -266,43 +1771,393 @@ done:
266 return err; 1771 return err;
267} 1772}
268 1773
269static int mgmt_event(u16 event, void *data, u16 data_len) 1774int mgmt_index_added(u16 index)
270{ 1775{
271 struct sk_buff *skb; 1776 return mgmt_event(MGMT_EV_INDEX_ADDED, index, NULL, 0, NULL);
272 struct mgmt_hdr *hdr; 1777}
273 1778
274 skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); 1779int mgmt_index_removed(u16 index)
275 if (!skb) 1780{
276 return -ENOMEM; 1781 return mgmt_event(MGMT_EV_INDEX_REMOVED, index, NULL, 0, NULL);
1782}
277 1783
278 bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; 1784struct cmd_lookup {
1785 u8 val;
1786 struct sock *sk;
1787};
279 1788
280 hdr = (void *) skb_put(skb, sizeof(*hdr)); 1789static void mode_rsp(struct pending_cmd *cmd, void *data)
281 hdr->opcode = cpu_to_le16(event); 1790{
282 hdr->len = cpu_to_le16(data_len); 1791 struct mgmt_mode *cp = cmd->param;
1792 struct cmd_lookup *match = data;
283 1793
284 memcpy(skb_put(skb, data_len), data, data_len); 1794 if (cp->val != match->val)
1795 return;
285 1796
286 hci_send_to_sock(NULL, skb); 1797 send_mode_rsp(cmd->sk, cmd->opcode, cmd->index, cp->val);
287 kfree_skb(skb);
288 1798
289 return 0; 1799 list_del(&cmd->list);
1800
1801 if (match->sk == NULL) {
1802 match->sk = cmd->sk;
1803 sock_hold(match->sk);
1804 }
1805
1806 mgmt_pending_free(cmd);
290} 1807}
291 1808
292int mgmt_index_added(u16 index) 1809int mgmt_powered(u16 index, u8 powered)
1810{
1811 struct mgmt_mode ev;
1812 struct cmd_lookup match = { powered, NULL };
1813 int ret;
1814
1815 mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match);
1816
1817 ev.val = powered;
1818
1819 ret = mgmt_event(MGMT_EV_POWERED, index, &ev, sizeof(ev), match.sk);
1820
1821 if (match.sk)
1822 sock_put(match.sk);
1823
1824 return ret;
1825}
1826
1827int mgmt_discoverable(u16 index, u8 discoverable)
293{ 1828{
294 struct mgmt_ev_index_added ev; 1829 struct mgmt_mode ev;
1830 struct cmd_lookup match = { discoverable, NULL };
1831 int ret;
295 1832
296 put_unaligned_le16(index, &ev.index); 1833 mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match);
297 1834
298 return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev)); 1835 ev.val = discoverable;
1836
1837 ret = mgmt_event(MGMT_EV_DISCOVERABLE, index, &ev, sizeof(ev),
1838 match.sk);
1839
1840 if (match.sk)
1841 sock_put(match.sk);
1842
1843 return ret;
299} 1844}
300 1845
301int mgmt_index_removed(u16 index) 1846int mgmt_connectable(u16 index, u8 connectable)
1847{
1848 struct mgmt_mode ev;
1849 struct cmd_lookup match = { connectable, NULL };
1850 int ret;
1851
1852 mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match);
1853
1854 ev.val = connectable;
1855
1856 ret = mgmt_event(MGMT_EV_CONNECTABLE, index, &ev, sizeof(ev), match.sk);
1857
1858 if (match.sk)
1859 sock_put(match.sk);
1860
1861 return ret;
1862}
1863
1864int mgmt_new_key(u16 index, struct link_key *key, u8 persistent)
1865{
1866 struct mgmt_ev_new_key ev;
1867
1868 memset(&ev, 0, sizeof(ev));
1869
1870 ev.store_hint = persistent;
1871 bacpy(&ev.key.bdaddr, &key->bdaddr);
1872 ev.key.type = key->type;
1873 memcpy(ev.key.val, key->val, 16);
1874 ev.key.pin_len = key->pin_len;
1875
1876 return mgmt_event(MGMT_EV_NEW_KEY, index, &ev, sizeof(ev), NULL);
1877}
1878
1879int mgmt_connected(u16 index, bdaddr_t *bdaddr)
1880{
1881 struct mgmt_ev_connected ev;
1882
1883 bacpy(&ev.bdaddr, bdaddr);
1884
1885 return mgmt_event(MGMT_EV_CONNECTED, index, &ev, sizeof(ev), NULL);
1886}
1887
1888static void disconnect_rsp(struct pending_cmd *cmd, void *data)
1889{
1890 struct mgmt_cp_disconnect *cp = cmd->param;
1891 struct sock **sk = data;
1892 struct mgmt_rp_disconnect rp;
1893
1894 bacpy(&rp.bdaddr, &cp->bdaddr);
1895
1896 cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, &rp, sizeof(rp));
1897
1898 *sk = cmd->sk;
1899 sock_hold(*sk);
1900
1901 mgmt_pending_remove(cmd);
1902}
1903
1904int mgmt_disconnected(u16 index, bdaddr_t *bdaddr)
1905{
1906 struct mgmt_ev_disconnected ev;
1907 struct sock *sk = NULL;
1908 int err;
1909
1910 mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk);
1911
1912 bacpy(&ev.bdaddr, bdaddr);
1913
1914 err = mgmt_event(MGMT_EV_DISCONNECTED, index, &ev, sizeof(ev), sk);
1915
1916 if (sk)
1917 sock_put(sk);
1918
1919 return err;
1920}
1921
1922int mgmt_disconnect_failed(u16 index)
1923{
1924 struct pending_cmd *cmd;
1925 int err;
1926
1927 cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, index);
1928 if (!cmd)
1929 return -ENOENT;
1930
1931 err = cmd_status(cmd->sk, index, MGMT_OP_DISCONNECT, EIO);
1932
1933 mgmt_pending_remove(cmd);
1934
1935 return err;
1936}
1937
1938int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status)
1939{
1940 struct mgmt_ev_connect_failed ev;
1941
1942 bacpy(&ev.bdaddr, bdaddr);
1943 ev.status = status;
1944
1945 return mgmt_event(MGMT_EV_CONNECT_FAILED, index, &ev, sizeof(ev), NULL);
1946}
1947
1948int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr, u8 secure)
1949{
1950 struct mgmt_ev_pin_code_request ev;
1951
1952 bacpy(&ev.bdaddr, bdaddr);
1953 ev.secure = secure;
1954
1955 return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, index, &ev, sizeof(ev),
1956 NULL);
1957}
1958
1959int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
1960{
1961 struct pending_cmd *cmd;
1962 struct mgmt_rp_pin_code_reply rp;
1963 int err;
1964
1965 cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index);
1966 if (!cmd)
1967 return -ENOENT;
1968
1969 bacpy(&rp.bdaddr, bdaddr);
1970 rp.status = status;
1971
1972 err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_REPLY, &rp,
1973 sizeof(rp));
1974
1975 mgmt_pending_remove(cmd);
1976
1977 return err;
1978}
1979
1980int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
1981{
1982 struct pending_cmd *cmd;
1983 struct mgmt_rp_pin_code_reply rp;
1984 int err;
1985
1986 cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index);
1987 if (!cmd)
1988 return -ENOENT;
1989
1990 bacpy(&rp.bdaddr, bdaddr);
1991 rp.status = status;
1992
1993 err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, &rp,
1994 sizeof(rp));
1995
1996 mgmt_pending_remove(cmd);
1997
1998 return err;
1999}
2000
2001int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value,
2002 u8 confirm_hint)
2003{
2004 struct mgmt_ev_user_confirm_request ev;
2005
2006 BT_DBG("hci%u", index);
2007
2008 bacpy(&ev.bdaddr, bdaddr);
2009 ev.confirm_hint = confirm_hint;
2010 put_unaligned_le32(value, &ev.value);
2011
2012 return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, index, &ev, sizeof(ev),
2013 NULL);
2014}
2015
2016static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status,
2017 u8 opcode)
302{ 2018{
303 struct mgmt_ev_index_added ev; 2019 struct pending_cmd *cmd;
2020 struct mgmt_rp_user_confirm_reply rp;
2021 int err;
2022
2023 cmd = mgmt_pending_find(opcode, index);
2024 if (!cmd)
2025 return -ENOENT;
304 2026
305 put_unaligned_le16(index, &ev.index); 2027 bacpy(&rp.bdaddr, bdaddr);
2028 rp.status = status;
2029 err = cmd_complete(cmd->sk, index, opcode, &rp, sizeof(rp));
306 2030
307 return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); 2031 mgmt_pending_remove(cmd);
2032
2033 return err;
2034}
2035
2036int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
2037{
2038 return confirm_reply_complete(index, bdaddr, status,
2039 MGMT_OP_USER_CONFIRM_REPLY);
2040}
2041
2042int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
2043{
2044 return confirm_reply_complete(index, bdaddr, status,
2045 MGMT_OP_USER_CONFIRM_NEG_REPLY);
2046}
2047
2048int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status)
2049{
2050 struct mgmt_ev_auth_failed ev;
2051
2052 bacpy(&ev.bdaddr, bdaddr);
2053 ev.status = status;
2054
2055 return mgmt_event(MGMT_EV_AUTH_FAILED, index, &ev, sizeof(ev), NULL);
2056}
2057
2058int mgmt_set_local_name_complete(u16 index, u8 *name, u8 status)
2059{
2060 struct pending_cmd *cmd;
2061 struct hci_dev *hdev;
2062 struct mgmt_cp_set_local_name ev;
2063 int err;
2064
2065 memset(&ev, 0, sizeof(ev));
2066 memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
2067
2068 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, index);
2069 if (!cmd)
2070 goto send_event;
2071
2072 if (status) {
2073 err = cmd_status(cmd->sk, index, MGMT_OP_SET_LOCAL_NAME, EIO);
2074 goto failed;
2075 }
2076
2077 hdev = hci_dev_get(index);
2078 if (hdev) {
2079 hci_dev_lock_bh(hdev);
2080 update_eir(hdev);
2081 hci_dev_unlock_bh(hdev);
2082 hci_dev_put(hdev);
2083 }
2084
2085 err = cmd_complete(cmd->sk, index, MGMT_OP_SET_LOCAL_NAME, &ev,
2086 sizeof(ev));
2087 if (err < 0)
2088 goto failed;
2089
2090send_event:
2091 err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, index, &ev, sizeof(ev),
2092 cmd ? cmd->sk : NULL);
2093
2094failed:
2095 if (cmd)
2096 mgmt_pending_remove(cmd);
2097 return err;
2098}
2099
2100int mgmt_read_local_oob_data_reply_complete(u16 index, u8 *hash, u8 *randomizer,
2101 u8 status)
2102{
2103 struct pending_cmd *cmd;
2104 int err;
2105
2106 BT_DBG("hci%u status %u", index, status);
2107
2108 cmd = mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, index);
2109 if (!cmd)
2110 return -ENOENT;
2111
2112 if (status) {
2113 err = cmd_status(cmd->sk, index, MGMT_OP_READ_LOCAL_OOB_DATA,
2114 EIO);
2115 } else {
2116 struct mgmt_rp_read_local_oob_data rp;
2117
2118 memcpy(rp.hash, hash, sizeof(rp.hash));
2119 memcpy(rp.randomizer, randomizer, sizeof(rp.randomizer));
2120
2121 err = cmd_complete(cmd->sk, index, MGMT_OP_READ_LOCAL_OOB_DATA,
2122 &rp, sizeof(rp));
2123 }
2124
2125 mgmt_pending_remove(cmd);
2126
2127 return err;
2128}
2129
2130int mgmt_device_found(u16 index, bdaddr_t *bdaddr, u8 *dev_class, s8 rssi,
2131 u8 *eir)
2132{
2133 struct mgmt_ev_device_found ev;
2134
2135 memset(&ev, 0, sizeof(ev));
2136
2137 bacpy(&ev.bdaddr, bdaddr);
2138 memcpy(ev.dev_class, dev_class, sizeof(ev.dev_class));
2139 ev.rssi = rssi;
2140
2141 if (eir)
2142 memcpy(ev.eir, eir, sizeof(ev.eir));
2143
2144 return mgmt_event(MGMT_EV_DEVICE_FOUND, index, &ev, sizeof(ev), NULL);
2145}
2146
2147int mgmt_remote_name(u16 index, bdaddr_t *bdaddr, u8 *name)
2148{
2149 struct mgmt_ev_remote_name ev;
2150
2151 memset(&ev, 0, sizeof(ev));
2152
2153 bacpy(&ev.bdaddr, bdaddr);
2154 memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
2155
2156 return mgmt_event(MGMT_EV_REMOTE_NAME, index, &ev, sizeof(ev), NULL);
2157}
2158
2159int mgmt_discovering(u16 index, u8 discovering)
2160{
2161 return mgmt_event(MGMT_EV_DISCOVERING, index, &discovering,
2162 sizeof(discovering), NULL);
308} 2163}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 6b83776534fb..5759bb7054f7 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -232,6 +232,8 @@ static int rfcomm_l2sock_create(struct socket **sock)
232static inline int rfcomm_check_security(struct rfcomm_dlc *d) 232static inline int rfcomm_check_security(struct rfcomm_dlc *d)
233{ 233{
234 struct sock *sk = d->session->sock->sk; 234 struct sock *sk = d->session->sock->sk;
235 struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;
236
235 __u8 auth_type; 237 __u8 auth_type;
236 238
237 switch (d->sec_level) { 239 switch (d->sec_level) {
@@ -246,8 +248,7 @@ static inline int rfcomm_check_security(struct rfcomm_dlc *d)
246 break; 248 break;
247 } 249 }
248 250
249 return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level, 251 return hci_conn_security(conn->hcon, d->sec_level, auth_type);
250 auth_type);
251} 252}
252 253
253static void rfcomm_session_timeout(unsigned long arg) 254static void rfcomm_session_timeout(unsigned long arg)
@@ -710,10 +711,10 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
710 /* Set L2CAP options */ 711 /* Set L2CAP options */
711 sk = sock->sk; 712 sk = sock->sk;
712 lock_sock(sk); 713 lock_sock(sk);
713 l2cap_pi(sk)->imtu = l2cap_mtu; 714 l2cap_pi(sk)->chan->imtu = l2cap_mtu;
714 l2cap_pi(sk)->sec_level = sec_level; 715 l2cap_pi(sk)->chan->sec_level = sec_level;
715 if (l2cap_ertm) 716 if (l2cap_ertm)
716 l2cap_pi(sk)->mode = L2CAP_MODE_ERTM; 717 l2cap_pi(sk)->chan->mode = L2CAP_MODE_ERTM;
717 release_sock(sk); 718 release_sock(sk);
718 719
719 s = rfcomm_session_add(sock, BT_BOUND); 720 s = rfcomm_session_add(sock, BT_BOUND);
@@ -1241,6 +1242,7 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
1241void rfcomm_dlc_accept(struct rfcomm_dlc *d) 1242void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1242{ 1243{
1243 struct sock *sk = d->session->sock->sk; 1244 struct sock *sk = d->session->sock->sk;
1245 struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;
1244 1246
1245 BT_DBG("dlc %p", d); 1247 BT_DBG("dlc %p", d);
1246 1248
@@ -1254,7 +1256,7 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1254 rfcomm_dlc_unlock(d); 1256 rfcomm_dlc_unlock(d);
1255 1257
1256 if (d->role_switch) 1258 if (d->role_switch)
1257 hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); 1259 hci_conn_switch_role(conn->hcon, 0x00);
1258 1260
1259 rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); 1261 rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig);
1260} 1262}
@@ -1890,7 +1892,8 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
1890 1892
1891 /* We should adjust MTU on incoming sessions. 1893 /* We should adjust MTU on incoming sessions.
1892 * L2CAP MTU minus UIH header and FCS. */ 1894 * L2CAP MTU minus UIH header and FCS. */
1893 s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5; 1895 s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu,
1896 l2cap_pi(nsock->sk)->chan->imtu) - 5;
1894 1897
1895 rfcomm_schedule(); 1898 rfcomm_schedule();
1896 } else 1899 } else
@@ -1909,7 +1912,7 @@ static inline void rfcomm_check_connection(struct rfcomm_session *s)
1909 1912
1910 /* We can adjust MTU on outgoing sessions. 1913 /* We can adjust MTU on outgoing sessions.
1911 * L2CAP MTU minus UIH header and FCS. */ 1914 * L2CAP MTU minus UIH header and FCS. */
1912 s->mtu = min(l2cap_pi(sk)->omtu, l2cap_pi(sk)->imtu) - 5; 1915 s->mtu = min(l2cap_pi(sk)->chan->omtu, l2cap_pi(sk)->chan->imtu) - 5;
1913 1916
1914 rfcomm_send_sabm(s, 0); 1917 rfcomm_send_sabm(s, 0);
1915 break; 1918 break;
@@ -1992,7 +1995,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
1992 /* Set L2CAP options */ 1995 /* Set L2CAP options */
1993 sk = sock->sk; 1996 sk = sock->sk;
1994 lock_sock(sk); 1997 lock_sock(sk);
1995 l2cap_pi(sk)->imtu = l2cap_mtu; 1998 l2cap_pi(sk)->chan->imtu = l2cap_mtu;
1996 release_sock(sk); 1999 release_sock(sk);
1997 2000
1998 /* Start listening on the socket */ 2001 /* Start listening on the socket */
@@ -2093,7 +2096,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2093 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) 2096 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
2094 continue; 2097 continue;
2095 2098
2096 if (!status) 2099 if (!status && hci_conn_check_secure(conn, d->sec_level))
2097 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); 2100 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
2098 else 2101 else
2099 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 2102 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
@@ -2154,8 +2157,6 @@ static int __init rfcomm_init(void)
2154{ 2157{
2155 int err; 2158 int err;
2156 2159
2157 l2cap_load();
2158
2159 hci_register_cb(&rfcomm_cb); 2160 hci_register_cb(&rfcomm_cb);
2160 2161
2161 rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); 2162 rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd");
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 66cc1f0c3df8..386cfaffd4b7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -743,6 +743,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
743 struct sock *sk = sock->sk; 743 struct sock *sk = sock->sk;
744 struct sock *l2cap_sk; 744 struct sock *l2cap_sk;
745 struct rfcomm_conninfo cinfo; 745 struct rfcomm_conninfo cinfo;
746 struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;
746 int len, err = 0; 747 int len, err = 0;
747 u32 opt; 748 u32 opt;
748 749
@@ -787,8 +788,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
787 788
788 l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; 789 l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk;
789 790
790 cinfo.hci_handle = l2cap_pi(l2cap_sk)->conn->hcon->handle; 791 cinfo.hci_handle = conn->hcon->handle;
791 memcpy(cinfo.dev_class, l2cap_pi(l2cap_sk)->conn->hcon->dev_class, 3); 792 memcpy(cinfo.dev_class, conn->hcon->dev_class, 3);
792 793
793 len = min_t(unsigned int, len, sizeof(cinfo)); 794 len = min_t(unsigned int, len, sizeof(cinfo));
794 if (copy_to_user(optval, (char *) &cinfo, len)) 795 if (copy_to_user(optval, (char *) &cinfo, len))
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index d7b9af4703d0..c258796313e0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -832,7 +832,7 @@ static int rfcomm_tty_write_room(struct tty_struct *tty)
832 return room; 832 return room;
833} 833}
834 834
835static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, unsigned long arg) 835static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
836{ 836{
837 BT_DBG("tty %p cmd 0x%02x", tty, cmd); 837 BT_DBG("tty %p cmd 0x%02x", tty, cmd);
838 838
@@ -1091,7 +1091,7 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
1091 } 1091 }
1092} 1092}
1093 1093
1094static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) 1094static int rfcomm_tty_tiocmget(struct tty_struct *tty)
1095{ 1095{
1096 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 1096 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
1097 1097
@@ -1100,7 +1100,7 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
1100 return dev->modem_status; 1100 return dev->modem_status;
1101} 1101}
1102 1102
1103static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear) 1103static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
1104{ 1104{
1105 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 1105 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
1106 struct rfcomm_dlc *dlc = dev->dlc; 1106 struct rfcomm_dlc *dlc = dev->dlc;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 960c6d1637da..42fdffd1d76c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -50,8 +50,6 @@
50#include <net/bluetooth/hci_core.h> 50#include <net/bluetooth/hci_core.h>
51#include <net/bluetooth/sco.h> 51#include <net/bluetooth/sco.h>
52 52
53#define VERSION "0.6"
54
55static int disable_esco; 53static int disable_esco;
56 54
57static const struct proto_ops sco_sock_ops; 55static const struct proto_ops sco_sock_ops;
@@ -192,20 +190,21 @@ static int sco_connect(struct sock *sk)
192 190
193 hci_dev_lock_bh(hdev); 191 hci_dev_lock_bh(hdev);
194 192
195 err = -ENOMEM;
196
197 if (lmp_esco_capable(hdev) && !disable_esco) 193 if (lmp_esco_capable(hdev) && !disable_esco)
198 type = ESCO_LINK; 194 type = ESCO_LINK;
199 else 195 else
200 type = SCO_LINK; 196 type = SCO_LINK;
201 197
202 hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); 198 hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
203 if (!hcon) 199 if (IS_ERR(hcon)) {
200 err = PTR_ERR(hcon);
204 goto done; 201 goto done;
202 }
205 203
206 conn = sco_conn_add(hcon, 0); 204 conn = sco_conn_add(hcon, 0);
207 if (!conn) { 205 if (!conn) {
208 hci_conn_put(hcon); 206 hci_conn_put(hcon);
207 err = -ENOMEM;
209 goto done; 208 goto done;
210 } 209 }
211 210
@@ -703,6 +702,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
703 break; 702 break;
704 } 703 }
705 704
705 memset(&cinfo, 0, sizeof(cinfo));
706 cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; 706 cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
707 memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); 707 memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
708 708
@@ -1023,7 +1023,7 @@ static struct hci_proto sco_hci_proto = {
1023 .recv_scodata = sco_recv_scodata 1023 .recv_scodata = sco_recv_scodata
1024}; 1024};
1025 1025
1026static int __init sco_init(void) 1026int __init sco_init(void)
1027{ 1027{
1028 int err; 1028 int err;
1029 1029
@@ -1051,7 +1051,6 @@ static int __init sco_init(void)
1051 BT_ERR("Failed to create SCO debug file"); 1051 BT_ERR("Failed to create SCO debug file");
1052 } 1052 }
1053 1053
1054 BT_INFO("SCO (Voice Link) ver %s", VERSION);
1055 BT_INFO("SCO socket layer initialized"); 1054 BT_INFO("SCO socket layer initialized");
1056 1055
1057 return 0; 1056 return 0;
@@ -1061,7 +1060,7 @@ error:
1061 return err; 1060 return err;
1062} 1061}
1063 1062
1064static void __exit sco_exit(void) 1063void __exit sco_exit(void)
1065{ 1064{
1066 debugfs_remove(sco_debugfs); 1065 debugfs_remove(sco_debugfs);
1067 1066
@@ -1074,14 +1073,5 @@ static void __exit sco_exit(void)
1074 proto_unregister(&sco_proto); 1073 proto_unregister(&sco_proto);
1075} 1074}
1076 1075
1077module_init(sco_init);
1078module_exit(sco_exit);
1079
1080module_param(disable_esco, bool, 0644); 1076module_param(disable_esco, bool, 0644);
1081MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation"); 1077MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation");
1082
1083MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
1084MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION);
1085MODULE_VERSION(VERSION);
1086MODULE_LICENSE("GPL");
1087MODULE_ALIAS("bt-proto-2");
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb4..6dee7bf648a9 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
6 tristate "802.1d Ethernet Bridging" 6 tristate "802.1d Ethernet Bridging"
7 select LLC 7 select LLC
8 select STP 8 select STP
9 depends on IPV6 || IPV6=n
9 ---help--- 10 ---help---
10 If you say Y here, then your Linux box will be able to act as an 11 If you say Y here, then your Linux box will be able to act as an
11 Ethernet bridge, which means that the different Ethernet segments it 12 Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 84bbb82599b2..f20c4fd915a8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -104,3 +104,4 @@ module_init(br_init)
104module_exit(br_deinit) 104module_exit(br_deinit)
105MODULE_LICENSE("GPL"); 105MODULE_LICENSE("GPL");
106MODULE_VERSION(BR_VERSION); 106MODULE_VERSION(BR_VERSION);
107MODULE_ALIAS_RTNL_LINK("bridge");
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 556443566e9c..a6b2f86378c7 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -74,11 +74,23 @@ out:
74 return NETDEV_TX_OK; 74 return NETDEV_TX_OK;
75} 75}
76 76
77static int br_dev_init(struct net_device *dev)
78{
79 struct net_bridge *br = netdev_priv(dev);
80
81 br->stats = alloc_percpu(struct br_cpu_netstats);
82 if (!br->stats)
83 return -ENOMEM;
84
85 return 0;
86}
87
77static int br_dev_open(struct net_device *dev) 88static int br_dev_open(struct net_device *dev)
78{ 89{
79 struct net_bridge *br = netdev_priv(dev); 90 struct net_bridge *br = netdev_priv(dev);
80 91
81 br_features_recompute(br); 92 netif_carrier_off(dev);
93 netdev_update_features(dev);
82 netif_start_queue(dev); 94 netif_start_queue(dev);
83 br_stp_enable_bridge(br); 95 br_stp_enable_bridge(br);
84 br_multicast_open(br); 96 br_multicast_open(br);
@@ -94,6 +106,8 @@ static int br_dev_stop(struct net_device *dev)
94{ 106{
95 struct net_bridge *br = netdev_priv(dev); 107 struct net_bridge *br = netdev_priv(dev);
96 108
109 netif_carrier_off(dev);
110
97 br_stp_disable_bridge(br); 111 br_stp_disable_bridge(br);
98 br_multicast_stop(br); 112 br_multicast_stop(br);
99 113
@@ -173,48 +187,11 @@ static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
173 strcpy(info->bus_info, "N/A"); 187 strcpy(info->bus_info, "N/A");
174} 188}
175 189
176static int br_set_sg(struct net_device *dev, u32 data) 190static u32 br_fix_features(struct net_device *dev, u32 features)
177{ 191{
178 struct net_bridge *br = netdev_priv(dev); 192 struct net_bridge *br = netdev_priv(dev);
179 193
180 if (data) 194 return br_features_recompute(br, features);
181 br->feature_mask |= NETIF_F_SG;
182 else
183 br->feature_mask &= ~NETIF_F_SG;
184
185 br_features_recompute(br);
186 return 0;
187}
188
189static int br_set_tso(struct net_device *dev, u32 data)
190{
191 struct net_bridge *br = netdev_priv(dev);
192
193 if (data)
194 br->feature_mask |= NETIF_F_TSO;
195 else
196 br->feature_mask &= ~NETIF_F_TSO;
197
198 br_features_recompute(br);
199 return 0;
200}
201
202static int br_set_tx_csum(struct net_device *dev, u32 data)
203{
204 struct net_bridge *br = netdev_priv(dev);
205
206 if (data)
207 br->feature_mask |= NETIF_F_NO_CSUM;
208 else
209 br->feature_mask &= ~NETIF_F_ALL_CSUM;
210
211 br_features_recompute(br);
212 return 0;
213}
214
215static int br_set_flags(struct net_device *netdev, u32 data)
216{
217 return ethtool_op_set_flags(netdev, data, ETH_FLAG_TXVLAN);
218} 195}
219 196
220#ifdef CONFIG_NET_POLL_CONTROLLER 197#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -297,24 +274,30 @@ void br_netpoll_disable(struct net_bridge_port *p)
297 274
298#endif 275#endif
299 276
277static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
278
279{
280 struct net_bridge *br = netdev_priv(dev);
281
282 return br_add_if(br, slave_dev);
283}
284
285static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
286{
287 struct net_bridge *br = netdev_priv(dev);
288
289 return br_del_if(br, slave_dev);
290}
291
300static const struct ethtool_ops br_ethtool_ops = { 292static const struct ethtool_ops br_ethtool_ops = {
301 .get_drvinfo = br_getinfo, 293 .get_drvinfo = br_getinfo,
302 .get_link = ethtool_op_get_link, 294 .get_link = ethtool_op_get_link,
303 .get_tx_csum = ethtool_op_get_tx_csum,
304 .set_tx_csum = br_set_tx_csum,
305 .get_sg = ethtool_op_get_sg,
306 .set_sg = br_set_sg,
307 .get_tso = ethtool_op_get_tso,
308 .set_tso = br_set_tso,
309 .get_ufo = ethtool_op_get_ufo,
310 .set_ufo = ethtool_op_set_ufo,
311 .get_flags = ethtool_op_get_flags,
312 .set_flags = br_set_flags,
313}; 295};
314 296
315static const struct net_device_ops br_netdev_ops = { 297static const struct net_device_ops br_netdev_ops = {
316 .ndo_open = br_dev_open, 298 .ndo_open = br_dev_open,
317 .ndo_stop = br_dev_stop, 299 .ndo_stop = br_dev_stop,
300 .ndo_init = br_dev_init,
318 .ndo_start_xmit = br_dev_xmit, 301 .ndo_start_xmit = br_dev_xmit,
319 .ndo_get_stats64 = br_get_stats64, 302 .ndo_get_stats64 = br_get_stats64,
320 .ndo_set_mac_address = br_set_mac_address, 303 .ndo_set_mac_address = br_set_mac_address,
@@ -326,6 +309,9 @@ static const struct net_device_ops br_netdev_ops = {
326 .ndo_netpoll_cleanup = br_netpoll_cleanup, 309 .ndo_netpoll_cleanup = br_netpoll_cleanup,
327 .ndo_poll_controller = br_poll_controller, 310 .ndo_poll_controller = br_poll_controller,
328#endif 311#endif
312 .ndo_add_slave = br_add_slave,
313 .ndo_del_slave = br_del_slave,
314 .ndo_fix_features = br_fix_features,
329}; 315};
330 316
331static void br_dev_free(struct net_device *dev) 317static void br_dev_free(struct net_device *dev)
@@ -336,18 +322,49 @@ static void br_dev_free(struct net_device *dev)
336 free_netdev(dev); 322 free_netdev(dev);
337} 323}
338 324
325static struct device_type br_type = {
326 .name = "bridge",
327};
328
339void br_dev_setup(struct net_device *dev) 329void br_dev_setup(struct net_device *dev)
340{ 330{
331 struct net_bridge *br = netdev_priv(dev);
332
341 random_ether_addr(dev->dev_addr); 333 random_ether_addr(dev->dev_addr);
342 ether_setup(dev); 334 ether_setup(dev);
343 335
344 dev->netdev_ops = &br_netdev_ops; 336 dev->netdev_ops = &br_netdev_ops;
345 dev->destructor = br_dev_free; 337 dev->destructor = br_dev_free;
346 SET_ETHTOOL_OPS(dev, &br_ethtool_ops); 338 SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
339 SET_NETDEV_DEVTYPE(dev, &br_type);
347 dev->tx_queue_len = 0; 340 dev->tx_queue_len = 0;
348 dev->priv_flags = IFF_EBRIDGE; 341 dev->priv_flags = IFF_EBRIDGE;
349 342
350 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 343 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
351 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | 344 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
352 NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX; 345 NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
346 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
347 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM |
348 NETIF_F_HW_VLAN_TX;
349
350 br->dev = dev;
351 spin_lock_init(&br->lock);
352 INIT_LIST_HEAD(&br->port_list);
353 spin_lock_init(&br->hash_lock);
354
355 br->bridge_id.prio[0] = 0x80;
356 br->bridge_id.prio[1] = 0x00;
357
358 memcpy(br->group_addr, br_group_address, ETH_ALEN);
359
360 br->stp_enabled = BR_NO_STP;
361 br->designated_root = br->bridge_id;
362 br->bridge_max_age = br->max_age = 20 * HZ;
363 br->bridge_hello_time = br->hello_time = 2 * HZ;
364 br->bridge_forward_delay = br->forward_delay = 15 * HZ;
365 br->ageing_time = 300 * HZ;
366
367 br_netfilter_rtable_init(br);
368 br_stp_timer_init(br);
369 br_multicast_init(br);
353} 370}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 88485cc74dc3..e0dfbc151dd7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -28,6 +28,7 @@
28static struct kmem_cache *br_fdb_cache __read_mostly; 28static struct kmem_cache *br_fdb_cache __read_mostly;
29static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 29static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
30 const unsigned char *addr); 30 const unsigned char *addr);
31static void fdb_notify(const struct net_bridge_fdb_entry *, int);
31 32
32static u32 fdb_salt __read_mostly; 33static u32 fdb_salt __read_mostly;
33 34
@@ -62,7 +63,7 @@ static inline int has_expired(const struct net_bridge *br,
62 const struct net_bridge_fdb_entry *fdb) 63 const struct net_bridge_fdb_entry *fdb)
63{ 64{
64 return !fdb->is_static && 65 return !fdb->is_static &&
65 time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); 66 time_before_eq(fdb->updated + hold_time(br), jiffies);
66} 67}
67 68
68static inline int br_mac_hash(const unsigned char *mac) 69static inline int br_mac_hash(const unsigned char *mac)
@@ -81,6 +82,7 @@ static void fdb_rcu_free(struct rcu_head *head)
81 82
82static inline void fdb_delete(struct net_bridge_fdb_entry *f) 83static inline void fdb_delete(struct net_bridge_fdb_entry *f)
83{ 84{
85 fdb_notify(f, RTM_DELNEIGH);
84 hlist_del_rcu(&f->hlist); 86 hlist_del_rcu(&f->hlist);
85 call_rcu(&f->rcu, fdb_rcu_free); 87 call_rcu(&f->rcu, fdb_rcu_free);
86} 88}
@@ -140,7 +142,7 @@ void br_fdb_cleanup(unsigned long _data)
140 unsigned long this_timer; 142 unsigned long this_timer;
141 if (f->is_static) 143 if (f->is_static)
142 continue; 144 continue;
143 this_timer = f->ageing_timer + delay; 145 this_timer = f->updated + delay;
144 if (time_before_eq(this_timer, jiffies)) 146 if (time_before_eq(this_timer, jiffies))
145 fdb_delete(f); 147 fdb_delete(f);
146 else if (time_before(this_timer, next_timer)) 148 else if (time_before(this_timer, next_timer))
@@ -169,7 +171,7 @@ void br_fdb_flush(struct net_bridge *br)
169 spin_unlock_bh(&br->hash_lock); 171 spin_unlock_bh(&br->hash_lock);
170} 172}
171 173
172/* Flush all entries refering to a specific port. 174/* Flush all entries referring to a specific port.
173 * if do_all is set also flush static entries 175 * if do_all is set also flush static entries
174 */ 176 */
175void br_fdb_delete_by_port(struct net_bridge *br, 177void br_fdb_delete_by_port(struct net_bridge *br,
@@ -293,7 +295,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
293 295
294 fe->is_local = f->is_local; 296 fe->is_local = f->is_local;
295 if (!f->is_static) 297 if (!f->is_static)
296 fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); 298 fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
297 ++fe; 299 ++fe;
298 ++num; 300 ++num;
299 } 301 }
@@ -305,8 +307,21 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
305 return num; 307 return num;
306} 308}
307 309
308static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, 310static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
309 const unsigned char *addr) 311 const unsigned char *addr)
312{
313 struct hlist_node *h;
314 struct net_bridge_fdb_entry *fdb;
315
316 hlist_for_each_entry(fdb, h, head, hlist) {
317 if (!compare_ether_addr(fdb->addr.addr, addr))
318 return fdb;
319 }
320 return NULL;
321}
322
323static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
324 const unsigned char *addr)
310{ 325{
311 struct hlist_node *h; 326 struct hlist_node *h;
312 struct net_bridge_fdb_entry *fdb; 327 struct net_bridge_fdb_entry *fdb;
@@ -320,8 +335,7 @@ static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
320 335
321static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, 336static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
322 struct net_bridge_port *source, 337 struct net_bridge_port *source,
323 const unsigned char *addr, 338 const unsigned char *addr)
324 int is_local)
325{ 339{
326 struct net_bridge_fdb_entry *fdb; 340 struct net_bridge_fdb_entry *fdb;
327 341
@@ -329,11 +343,11 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
329 if (fdb) { 343 if (fdb) {
330 memcpy(fdb->addr.addr, addr, ETH_ALEN); 344 memcpy(fdb->addr.addr, addr, ETH_ALEN);
331 fdb->dst = source; 345 fdb->dst = source;
332 fdb->is_local = is_local; 346 fdb->is_local = 0;
333 fdb->is_static = is_local; 347 fdb->is_static = 0;
334 fdb->ageing_timer = jiffies; 348 fdb->updated = fdb->used = jiffies;
335
336 hlist_add_head_rcu(&fdb->hlist, head); 349 hlist_add_head_rcu(&fdb->hlist, head);
350 fdb_notify(fdb, RTM_NEWNEIGH);
337 } 351 }
338 return fdb; 352 return fdb;
339} 353}
@@ -360,12 +374,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
360 fdb_delete(fdb); 374 fdb_delete(fdb);
361 } 375 }
362 376
363 if (!fdb_create(head, source, addr, 1)) 377 fdb = fdb_create(head, source, addr);
378 if (!fdb)
364 return -ENOMEM; 379 return -ENOMEM;
365 380
381 fdb->is_local = fdb->is_static = 1;
366 return 0; 382 return 0;
367} 383}
368 384
385/* Add entry for local address of interface */
369int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 386int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
370 const unsigned char *addr) 387 const unsigned char *addr)
371{ 388{
@@ -392,7 +409,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
392 source->state == BR_STATE_FORWARDING)) 409 source->state == BR_STATE_FORWARDING))
393 return; 410 return;
394 411
395 fdb = fdb_find(head, addr); 412 fdb = fdb_find_rcu(head, addr);
396 if (likely(fdb)) { 413 if (likely(fdb)) {
397 /* attempt to update an entry for a local interface */ 414 /* attempt to update an entry for a local interface */
398 if (unlikely(fdb->is_local)) { 415 if (unlikely(fdb->is_local)) {
@@ -403,15 +420,277 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
403 } else { 420 } else {
404 /* fastpath: update of existing entry */ 421 /* fastpath: update of existing entry */
405 fdb->dst = source; 422 fdb->dst = source;
406 fdb->ageing_timer = jiffies; 423 fdb->updated = jiffies;
407 } 424 }
408 } else { 425 } else {
409 spin_lock(&br->hash_lock); 426 spin_lock(&br->hash_lock);
410 if (!fdb_find(head, addr)) 427 if (likely(!fdb_find(head, addr)))
411 fdb_create(head, source, addr, 0); 428 fdb_create(head, source, addr);
429
412 /* else we lose race and someone else inserts 430 /* else we lose race and someone else inserts
413 * it first, don't bother updating 431 * it first, don't bother updating
414 */ 432 */
415 spin_unlock(&br->hash_lock); 433 spin_unlock(&br->hash_lock);
416 } 434 }
417} 435}
436
437static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
438{
439 if (fdb->is_local)
440 return NUD_PERMANENT;
441 else if (fdb->is_static)
442 return NUD_NOARP;
443 else if (has_expired(fdb->dst->br, fdb))
444 return NUD_STALE;
445 else
446 return NUD_REACHABLE;
447}
448
449static int fdb_fill_info(struct sk_buff *skb,
450 const struct net_bridge_fdb_entry *fdb,
451 u32 pid, u32 seq, int type, unsigned int flags)
452{
453 unsigned long now = jiffies;
454 struct nda_cacheinfo ci;
455 struct nlmsghdr *nlh;
456 struct ndmsg *ndm;
457
458 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
459 if (nlh == NULL)
460 return -EMSGSIZE;
461
462
463 ndm = nlmsg_data(nlh);
464 ndm->ndm_family = AF_BRIDGE;
465 ndm->ndm_pad1 = 0;
466 ndm->ndm_pad2 = 0;
467 ndm->ndm_flags = 0;
468 ndm->ndm_type = 0;
469 ndm->ndm_ifindex = fdb->dst->dev->ifindex;
470 ndm->ndm_state = fdb_to_nud(fdb);
471
472 NLA_PUT(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr);
473
474 ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
475 ci.ndm_confirmed = 0;
476 ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
477 ci.ndm_refcnt = 0;
478 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
479
480 return nlmsg_end(skb, nlh);
481
482nla_put_failure:
483 nlmsg_cancel(skb, nlh);
484 return -EMSGSIZE;
485}
486
487static inline size_t fdb_nlmsg_size(void)
488{
489 return NLMSG_ALIGN(sizeof(struct ndmsg))
490 + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
491 + nla_total_size(sizeof(struct nda_cacheinfo));
492}
493
494static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
495{
496 struct net *net = dev_net(fdb->dst->dev);
497 struct sk_buff *skb;
498 int err = -ENOBUFS;
499
500 skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
501 if (skb == NULL)
502 goto errout;
503
504 err = fdb_fill_info(skb, fdb, 0, 0, type, 0);
505 if (err < 0) {
506 /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
507 WARN_ON(err == -EMSGSIZE);
508 kfree_skb(skb);
509 goto errout;
510 }
511 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
512 return;
513errout:
514 if (err < 0)
515 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
516}
517
518/* Dump information about entries, in response to GETNEIGH */
519int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
520{
521 struct net *net = sock_net(skb->sk);
522 struct net_device *dev;
523 int idx = 0;
524
525 rcu_read_lock();
526 for_each_netdev_rcu(net, dev) {
527 struct net_bridge *br = netdev_priv(dev);
528 int i;
529
530 if (!(dev->priv_flags & IFF_EBRIDGE))
531 continue;
532
533 for (i = 0; i < BR_HASH_SIZE; i++) {
534 struct hlist_node *h;
535 struct net_bridge_fdb_entry *f;
536
537 hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
538 if (idx < cb->args[0])
539 goto skip;
540
541 if (fdb_fill_info(skb, f,
542 NETLINK_CB(cb->skb).pid,
543 cb->nlh->nlmsg_seq,
544 RTM_NEWNEIGH,
545 NLM_F_MULTI) < 0)
546 break;
547skip:
548 ++idx;
549 }
550 }
551 }
552 rcu_read_unlock();
553
554 cb->args[0] = idx;
555
556 return skb->len;
557}
558
559/* Create new static fdb entry */
560static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
561 __u16 state)
562{
563 struct net_bridge *br = source->br;
564 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
565 struct net_bridge_fdb_entry *fdb;
566
567 fdb = fdb_find(head, addr);
568 if (fdb)
569 return -EEXIST;
570
571 fdb = fdb_create(head, source, addr);
572 if (!fdb)
573 return -ENOMEM;
574
575 if (state & NUD_PERMANENT)
576 fdb->is_local = fdb->is_static = 1;
577 else if (state & NUD_NOARP)
578 fdb->is_static = 1;
579 return 0;
580}
581
582/* Add new permanent fdb entry with RTM_NEWNEIGH */
583int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
584{
585 struct net *net = sock_net(skb->sk);
586 struct ndmsg *ndm;
587 struct nlattr *tb[NDA_MAX+1];
588 struct net_device *dev;
589 struct net_bridge_port *p;
590 const __u8 *addr;
591 int err;
592
593 ASSERT_RTNL();
594 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
595 if (err < 0)
596 return err;
597
598 ndm = nlmsg_data(nlh);
599 if (ndm->ndm_ifindex == 0) {
600 pr_info("bridge: RTM_NEWNEIGH with invalid ifindex\n");
601 return -EINVAL;
602 }
603
604 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
605 if (dev == NULL) {
606 pr_info("bridge: RTM_NEWNEIGH with unknown ifindex\n");
607 return -ENODEV;
608 }
609
610 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
611 pr_info("bridge: RTM_NEWNEIGH with invalid address\n");
612 return -EINVAL;
613 }
614
615 addr = nla_data(tb[NDA_LLADDR]);
616 if (!is_valid_ether_addr(addr)) {
617 pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
618 return -EINVAL;
619 }
620
621 p = br_port_get_rtnl(dev);
622 if (p == NULL) {
623 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
624 dev->name);
625 return -EINVAL;
626 }
627
628 spin_lock_bh(&p->br->hash_lock);
629 err = fdb_add_entry(p, addr, ndm->ndm_state);
630 spin_unlock_bh(&p->br->hash_lock);
631
632 return err;
633}
634
635static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
636{
637 struct net_bridge *br = p->br;
638 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
639 struct net_bridge_fdb_entry *fdb;
640
641 fdb = fdb_find(head, addr);
642 if (!fdb)
643 return -ENOENT;
644
645 fdb_delete(fdb);
646 return 0;
647}
648
649/* Remove neighbor entry with RTM_DELNEIGH */
650int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
651{
652 struct net *net = sock_net(skb->sk);
653 struct ndmsg *ndm;
654 struct net_bridge_port *p;
655 struct nlattr *llattr;
656 const __u8 *addr;
657 struct net_device *dev;
658 int err;
659
660 ASSERT_RTNL();
661 if (nlmsg_len(nlh) < sizeof(*ndm))
662 return -EINVAL;
663
664 ndm = nlmsg_data(nlh);
665 if (ndm->ndm_ifindex == 0) {
666 pr_info("bridge: RTM_DELNEIGH with invalid ifindex\n");
667 return -EINVAL;
668 }
669
670 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
671 if (dev == NULL) {
672 pr_info("bridge: RTM_DELNEIGH with unknown ifindex\n");
673 return -ENODEV;
674 }
675
676 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
677 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
678 pr_info("bridge: RTM_DELNEIGH with invalid address\n");
679 return -EINVAL;
680 }
681
682 addr = nla_data(llattr);
683
684 p = br_port_get_rtnl(dev);
685 if (p == NULL) {
686 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
687 dev->name);
688 return -EINVAL;
689 }
690
691 spin_lock_bh(&p->br->hash_lock);
692 err = fdb_delete_by_addr(p, addr);
693 spin_unlock_bh(&p->br->hash_lock);
694
695 return err;
696}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9d1e2bac1d6..5dbdfdfc3a34 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -36,8 +36,8 @@ static int port_cost(struct net_device *dev)
36 if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { 36 if (dev->ethtool_ops && dev->ethtool_ops->get_settings) {
37 struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; 37 struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, };
38 38
39 if (!dev->ethtool_ops->get_settings(dev, &ecmd)) { 39 if (!dev_ethtool_get_settings(dev, &ecmd)) {
40 switch(ecmd.speed) { 40 switch (ethtool_cmd_speed(&ecmd)) {
41 case SPEED_10000: 41 case SPEED_10000:
42 return 2; 42 return 2;
43 case SPEED_1000: 43 case SPEED_1000:
@@ -148,6 +148,8 @@ static void del_nbp(struct net_bridge_port *p)
148 148
149 netdev_rx_handler_unregister(dev); 149 netdev_rx_handler_unregister(dev);
150 150
151 netdev_set_master(dev, NULL);
152
151 br_multicast_del_port(p); 153 br_multicast_del_port(p);
152 154
153 kobject_uevent(&p->kobj, KOBJ_REMOVE); 155 kobject_uevent(&p->kobj, KOBJ_REMOVE);
@@ -173,56 +175,6 @@ static void del_br(struct net_bridge *br, struct list_head *head)
173 unregister_netdevice_queue(br->dev, head); 175 unregister_netdevice_queue(br->dev, head);
174} 176}
175 177
176static struct net_device *new_bridge_dev(struct net *net, const char *name)
177{
178 struct net_bridge *br;
179 struct net_device *dev;
180
181 dev = alloc_netdev(sizeof(struct net_bridge), name,
182 br_dev_setup);
183
184 if (!dev)
185 return NULL;
186 dev_net_set(dev, net);
187
188 br = netdev_priv(dev);
189 br->dev = dev;
190
191 br->stats = alloc_percpu(struct br_cpu_netstats);
192 if (!br->stats) {
193 free_netdev(dev);
194 return NULL;
195 }
196
197 spin_lock_init(&br->lock);
198 INIT_LIST_HEAD(&br->port_list);
199 spin_lock_init(&br->hash_lock);
200
201 br->bridge_id.prio[0] = 0x80;
202 br->bridge_id.prio[1] = 0x00;
203
204 memcpy(br->group_addr, br_group_address, ETH_ALEN);
205
206 br->feature_mask = dev->features;
207 br->stp_enabled = BR_NO_STP;
208 br->designated_root = br->bridge_id;
209 br->root_path_cost = 0;
210 br->root_port = 0;
211 br->bridge_max_age = br->max_age = 20 * HZ;
212 br->bridge_hello_time = br->hello_time = 2 * HZ;
213 br->bridge_forward_delay = br->forward_delay = 15 * HZ;
214 br->topology_change = 0;
215 br->topology_change_detected = 0;
216 br->ageing_time = 300 * HZ;
217
218 br_netfilter_rtable_init(br);
219
220 br_stp_timer_init(br);
221 br_multicast_init(br);
222
223 return dev;
224}
225
226/* find an available port number */ 178/* find an available port number */
227static int find_portno(struct net_bridge *br) 179static int find_portno(struct net_bridge *br)
228{ 180{
@@ -275,42 +227,19 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
275 return p; 227 return p;
276} 228}
277 229
278static struct device_type br_type = {
279 .name = "bridge",
280};
281
282int br_add_bridge(struct net *net, const char *name) 230int br_add_bridge(struct net *net, const char *name)
283{ 231{
284 struct net_device *dev; 232 struct net_device *dev;
285 int ret;
286 233
287 dev = new_bridge_dev(net, name); 234 dev = alloc_netdev(sizeof(struct net_bridge), name,
235 br_dev_setup);
236
288 if (!dev) 237 if (!dev)
289 return -ENOMEM; 238 return -ENOMEM;
290 239
291 rtnl_lock(); 240 dev_net_set(dev, net);
292 if (strchr(dev->name, '%')) {
293 ret = dev_alloc_name(dev, dev->name);
294 if (ret < 0)
295 goto out_free;
296 }
297
298 SET_NETDEV_DEVTYPE(dev, &br_type);
299
300 ret = register_netdevice(dev);
301 if (ret)
302 goto out_free;
303
304 ret = br_sysfs_addbr(dev);
305 if (ret)
306 unregister_netdevice(dev);
307 out:
308 rtnl_unlock();
309 return ret;
310 241
311out_free: 242 return register_netdev(dev);
312 free_netdev(dev);
313 goto out;
314} 243}
315 244
316int br_del_bridge(struct net *net, const char *name) 245int br_del_bridge(struct net *net, const char *name)
@@ -362,15 +291,15 @@ int br_min_mtu(const struct net_bridge *br)
362/* 291/*
363 * Recomputes features using slave's features 292 * Recomputes features using slave's features
364 */ 293 */
365void br_features_recompute(struct net_bridge *br) 294u32 br_features_recompute(struct net_bridge *br, u32 features)
366{ 295{
367 struct net_bridge_port *p; 296 struct net_bridge_port *p;
368 unsigned long features, mask; 297 u32 mask;
369 298
370 features = mask = br->feature_mask;
371 if (list_empty(&br->port_list)) 299 if (list_empty(&br->port_list))
372 goto done; 300 return features;
373 301
302 mask = features;
374 features &= ~NETIF_F_ONE_FOR_ALL; 303 features &= ~NETIF_F_ONE_FOR_ALL;
375 304
376 list_for_each_entry(p, &br->port_list, list) { 305 list_for_each_entry(p, &br->port_list, list) {
@@ -378,8 +307,7 @@ void br_features_recompute(struct net_bridge *br)
378 p->dev->features, mask); 307 p->dev->features, mask);
379 } 308 }
380 309
381done: 310 return features;
382 br->dev->features = netdev_fix_features(features, NULL);
383} 311}
384 312
385/* called with RTNL */ 313/* called with RTNL */
@@ -387,6 +315,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
387{ 315{
388 struct net_bridge_port *p; 316 struct net_bridge_port *p;
389 int err = 0; 317 int err = 0;
318 bool changed_addr;
390 319
391 /* Don't allow bridging non-ethernet like devices */ 320 /* Don't allow bridging non-ethernet like devices */
392 if ((dev->flags & IFF_LOOPBACK) || 321 if ((dev->flags & IFF_LOOPBACK) ||
@@ -429,19 +358,24 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
429 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) 358 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
430 goto err3; 359 goto err3;
431 360
432 err = netdev_rx_handler_register(dev, br_handle_frame, p); 361 err = netdev_set_master(dev, br->dev);
433 if (err) 362 if (err)
434 goto err3; 363 goto err3;
435 364
365 err = netdev_rx_handler_register(dev, br_handle_frame, p);
366 if (err)
367 goto err4;
368
436 dev->priv_flags |= IFF_BRIDGE_PORT; 369 dev->priv_flags |= IFF_BRIDGE_PORT;
437 370
438 dev_disable_lro(dev); 371 dev_disable_lro(dev);
439 372
440 list_add_rcu(&p->list, &br->port_list); 373 list_add_rcu(&p->list, &br->port_list);
441 374
375 netdev_update_features(br->dev);
376
442 spin_lock_bh(&br->lock); 377 spin_lock_bh(&br->lock);
443 br_stp_recalculate_bridge_id(br); 378 changed_addr = br_stp_recalculate_bridge_id(br);
444 br_features_recompute(br);
445 379
446 if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && 380 if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
447 (br->dev->flags & IFF_UP)) 381 (br->dev->flags & IFF_UP))
@@ -450,11 +384,17 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
450 384
451 br_ifinfo_notify(RTM_NEWLINK, p); 385 br_ifinfo_notify(RTM_NEWLINK, p);
452 386
387 if (changed_addr)
388 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
389
453 dev_set_mtu(br->dev, br_min_mtu(br)); 390 dev_set_mtu(br->dev, br_min_mtu(br));
454 391
455 kobject_uevent(&p->kobj, KOBJ_ADD); 392 kobject_uevent(&p->kobj, KOBJ_ADD);
456 393
457 return 0; 394 return 0;
395
396err4:
397 netdev_set_master(dev, NULL);
458err3: 398err3:
459 sysfs_remove_link(br->ifobj, p->dev->name); 399 sysfs_remove_link(br->ifobj, p->dev->name);
460err2: 400err2:
@@ -483,9 +423,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
483 423
484 spin_lock_bh(&br->lock); 424 spin_lock_bh(&br->lock);
485 br_stp_recalculate_bridge_id(br); 425 br_stp_recalculate_bridge_id(br);
486 br_features_recompute(br);
487 spin_unlock_bh(&br->lock); 426 spin_unlock_bh(&br->lock);
488 427
428 netdev_update_features(br->dev);
429
489 return 0; 430 return 0;
490} 431}
491 432
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 88e4aa9cb1f9..f3ac1e858ee1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -98,9 +98,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
98 } 98 }
99 99
100 if (skb) { 100 if (skb) {
101 if (dst) 101 if (dst) {
102 dst->used = jiffies;
102 br_forward(dst->dst, skb, skb2); 103 br_forward(dst->dst, skb, skb2);
103 else 104 } else
104 br_flood_forward(br, skb, skb2); 105 br_flood_forward(br, skb, skb2);
105 } 106 }
106 107
@@ -139,21 +140,22 @@ static inline int is_link_local(const unsigned char *dest)
139 * Return NULL if skb is handled 140 * Return NULL if skb is handled
140 * note: already called with rcu_read_lock 141 * note: already called with rcu_read_lock
141 */ 142 */
142struct sk_buff *br_handle_frame(struct sk_buff *skb) 143rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
143{ 144{
144 struct net_bridge_port *p; 145 struct net_bridge_port *p;
146 struct sk_buff *skb = *pskb;
145 const unsigned char *dest = eth_hdr(skb)->h_dest; 147 const unsigned char *dest = eth_hdr(skb)->h_dest;
146 br_should_route_hook_t *rhook; 148 br_should_route_hook_t *rhook;
147 149
148 if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) 150 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
149 return skb; 151 return RX_HANDLER_PASS;
150 152
151 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 153 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
152 goto drop; 154 goto drop;
153 155
154 skb = skb_share_check(skb, GFP_ATOMIC); 156 skb = skb_share_check(skb, GFP_ATOMIC);
155 if (!skb) 157 if (!skb)
156 return NULL; 158 return RX_HANDLER_CONSUMED;
157 159
158 p = br_port_get_rcu(skb->dev); 160 p = br_port_get_rcu(skb->dev);
159 161
@@ -163,14 +165,16 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
163 goto drop; 165 goto drop;
164 166
165 /* If STP is turned off, then forward */ 167 /* If STP is turned off, then forward */
166 if (p->br->stp_enabled == BR_NO_STP) 168 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
167 goto forward; 169 goto forward;
168 170
169 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 171 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
170 NULL, br_handle_local_finish)) 172 NULL, br_handle_local_finish)) {
171 return NULL; /* frame consumed by filter */ 173 return RX_HANDLER_CONSUMED; /* consumed by filter */
172 else 174 } else {
173 return skb; /* continue processing */ 175 *pskb = skb;
176 return RX_HANDLER_PASS; /* continue processing */
177 }
174 } 178 }
175 179
176forward: 180forward:
@@ -178,8 +182,10 @@ forward:
178 case BR_STATE_FORWARDING: 182 case BR_STATE_FORWARDING:
179 rhook = rcu_dereference(br_should_route_hook); 183 rhook = rcu_dereference(br_should_route_hook);
180 if (rhook) { 184 if (rhook) {
181 if ((*rhook)(skb)) 185 if ((*rhook)(skb)) {
182 return skb; 186 *pskb = skb;
187 return RX_HANDLER_PASS;
188 }
183 dest = eth_hdr(skb)->h_dest; 189 dest = eth_hdr(skb)->h_dest;
184 } 190 }
185 /* fall through */ 191 /* fall through */
@@ -194,5 +200,5 @@ forward:
194drop: 200drop:
195 kfree_skb(skb); 201 kfree_skb(skb);
196 } 202 }
197 return NULL; 203 return RX_HANDLER_CONSUMED;
198} 204}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index cb43312b846e..7222fe1d5460 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -106,7 +106,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
106/* 106/*
107 * Legacy ioctl's through SIOCDEVPRIVATE 107 * Legacy ioctl's through SIOCDEVPRIVATE
108 * This interface is deprecated because it was too difficult to 108 * This interface is deprecated because it was too difficult to
109 * to do the translation for 32/64bit ioctl compatability. 109 * to do the translation for 32/64bit ioctl compatibility.
110 */ 110 */
111static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) 111static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
112{ 112{
@@ -181,40 +181,19 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
181 if (!capable(CAP_NET_ADMIN)) 181 if (!capable(CAP_NET_ADMIN))
182 return -EPERM; 182 return -EPERM;
183 183
184 spin_lock_bh(&br->lock); 184 return br_set_forward_delay(br, args[1]);
185 br->bridge_forward_delay = clock_t_to_jiffies(args[1]);
186 if (br_is_root_bridge(br))
187 br->forward_delay = br->bridge_forward_delay;
188 spin_unlock_bh(&br->lock);
189 return 0;
190 185
191 case BRCTL_SET_BRIDGE_HELLO_TIME: 186 case BRCTL_SET_BRIDGE_HELLO_TIME:
192 {
193 unsigned long t = clock_t_to_jiffies(args[1]);
194 if (!capable(CAP_NET_ADMIN)) 187 if (!capable(CAP_NET_ADMIN))
195 return -EPERM; 188 return -EPERM;
196 189
197 if (t < HZ) 190 return br_set_hello_time(br, args[1]);
198 return -EINVAL;
199
200 spin_lock_bh(&br->lock);
201 br->bridge_hello_time = t;
202 if (br_is_root_bridge(br))
203 br->hello_time = br->bridge_hello_time;
204 spin_unlock_bh(&br->lock);
205 return 0;
206 }
207 191
208 case BRCTL_SET_BRIDGE_MAX_AGE: 192 case BRCTL_SET_BRIDGE_MAX_AGE:
209 if (!capable(CAP_NET_ADMIN)) 193 if (!capable(CAP_NET_ADMIN))
210 return -EPERM; 194 return -EPERM;
211 195
212 spin_lock_bh(&br->lock); 196 return br_set_max_age(br, args[1]);
213 br->bridge_max_age = clock_t_to_jiffies(args[1]);
214 if (br_is_root_bridge(br))
215 br->max_age = br->bridge_max_age;
216 spin_unlock_bh(&br->lock);
217 return 0;
218 197
219 case BRCTL_SET_AGEING_TIME: 198 case BRCTL_SET_AGEING_TIME:
220 if (!capable(CAP_NET_ADMIN)) 199 if (!capable(CAP_NET_ADMIN))
@@ -275,19 +254,16 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
275 case BRCTL_SET_PORT_PRIORITY: 254 case BRCTL_SET_PORT_PRIORITY:
276 { 255 {
277 struct net_bridge_port *p; 256 struct net_bridge_port *p;
278 int ret = 0; 257 int ret;
279 258
280 if (!capable(CAP_NET_ADMIN)) 259 if (!capable(CAP_NET_ADMIN))
281 return -EPERM; 260 return -EPERM;
282 261
283 if (args[2] >= (1<<(16-BR_PORT_BITS)))
284 return -ERANGE;
285
286 spin_lock_bh(&br->lock); 262 spin_lock_bh(&br->lock);
287 if ((p = br_get_port(br, args[1])) == NULL) 263 if ((p = br_get_port(br, args[1])) == NULL)
288 ret = -EINVAL; 264 ret = -EINVAL;
289 else 265 else
290 br_stp_set_port_priority(p, args[2]); 266 ret = br_stp_set_port_priority(p, args[2]);
291 spin_unlock_bh(&br->lock); 267 spin_unlock_bh(&br->lock);
292 return ret; 268 return ret;
293 } 269 }
@@ -295,15 +271,17 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
295 case BRCTL_SET_PATH_COST: 271 case BRCTL_SET_PATH_COST:
296 { 272 {
297 struct net_bridge_port *p; 273 struct net_bridge_port *p;
298 int ret = 0; 274 int ret;
299 275
300 if (!capable(CAP_NET_ADMIN)) 276 if (!capable(CAP_NET_ADMIN))
301 return -EPERM; 277 return -EPERM;
302 278
279 spin_lock_bh(&br->lock);
303 if ((p = br_get_port(br, args[1])) == NULL) 280 if ((p = br_get_port(br, args[1])) == NULL)
304 ret = -EINVAL; 281 ret = -EINVAL;
305 else 282 else
306 br_stp_set_path_cost(p, args[2]); 283 ret = br_stp_set_path_cost(p, args[2]);
284 spin_unlock_bh(&br->lock);
307 285
308 return ret; 286 return ret;
309 } 287 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 030a002ff8ee..2f14eafdeeab 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -413,7 +413,7 @@ out:
413 413
414#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 414#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
415static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, 415static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
416 struct in6_addr *group) 416 const struct in6_addr *group)
417{ 417{
418 struct sk_buff *skb; 418 struct sk_buff *skb;
419 struct ipv6hdr *ip6h; 419 struct ipv6hdr *ip6h;
@@ -445,9 +445,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
445 ip6h->payload_len = htons(8 + sizeof(*mldq)); 445 ip6h->payload_len = htons(8 + sizeof(*mldq));
446 ip6h->nexthdr = IPPROTO_HOPOPTS; 446 ip6h->nexthdr = IPPROTO_HOPOPTS;
447 ip6h->hop_limit = 1; 447 ip6h->hop_limit = 1;
448 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
448 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, 449 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
449 &ip6h->saddr); 450 &ip6h->saddr);
450 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
451 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); 451 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
452 452
453 hopopt = (u8 *)(ip6h + 1); 453 hopopt = (u8 *)(ip6h + 1);
@@ -1115,7 +1115,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1115 struct net_bridge_port *port, 1115 struct net_bridge_port *port,
1116 struct sk_buff *skb) 1116 struct sk_buff *skb)
1117{ 1117{
1118 struct iphdr *iph = ip_hdr(skb); 1118 const struct iphdr *iph = ip_hdr(skb);
1119 struct igmphdr *ih = igmp_hdr(skb); 1119 struct igmphdr *ih = igmp_hdr(skb);
1120 struct net_bridge_mdb_entry *mp; 1120 struct net_bridge_mdb_entry *mp;
1121 struct igmpv3_query *ih3; 1121 struct igmpv3_query *ih3;
@@ -1190,7 +1190,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1190 struct net_bridge_port *port, 1190 struct net_bridge_port *port,
1191 struct sk_buff *skb) 1191 struct sk_buff *skb)
1192{ 1192{
1193 struct ipv6hdr *ip6h = ipv6_hdr(skb); 1193 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
1194 struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); 1194 struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
1195 struct net_bridge_mdb_entry *mp; 1195 struct net_bridge_mdb_entry *mp;
1196 struct mld2_query *mld2q; 1196 struct mld2_query *mld2q;
@@ -1198,7 +1198,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1198 struct net_bridge_port_group __rcu **pp; 1198 struct net_bridge_port_group __rcu **pp;
1199 unsigned long max_delay; 1199 unsigned long max_delay;
1200 unsigned long now = jiffies; 1200 unsigned long now = jiffies;
1201 struct in6_addr *group = NULL; 1201 const struct in6_addr *group = NULL;
1202 int err = 0; 1202 int err = 0;
1203 1203
1204 spin_lock(&br->multicast_lock); 1204 spin_lock(&br->multicast_lock);
@@ -1356,7 +1356,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1356 struct sk_buff *skb) 1356 struct sk_buff *skb)
1357{ 1357{
1358 struct sk_buff *skb2 = skb; 1358 struct sk_buff *skb2 = skb;
1359 struct iphdr *iph; 1359 const struct iphdr *iph;
1360 struct igmphdr *ih; 1360 struct igmphdr *ih;
1361 unsigned len; 1361 unsigned len;
1362 unsigned offset; 1362 unsigned offset;
@@ -1452,7 +1452,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1452 struct sk_buff *skb) 1452 struct sk_buff *skb)
1453{ 1453{
1454 struct sk_buff *skb2; 1454 struct sk_buff *skb2;
1455 struct ipv6hdr *ip6h; 1455 const struct ipv6hdr *ip6h;
1456 struct icmp6hdr *icmp6h; 1456 struct icmp6hdr *icmp6h;
1457 u8 nexthdr; 1457 u8 nexthdr;
1458 unsigned len; 1458 unsigned len;
@@ -1475,7 +1475,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1475 ip6h->payload_len == 0) 1475 ip6h->payload_len == 0)
1476 return 0; 1476 return 0;
1477 1477
1478 len = ntohs(ip6h->payload_len); 1478 len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
1479 if (skb->len < len) 1479 if (skb->len < len)
1480 return -EINVAL; 1480 return -EINVAL;
1481 1481
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 4b5b66d07bba..e1f5ec75e91c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -219,7 +219,7 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
219static int br_parse_ip_options(struct sk_buff *skb) 219static int br_parse_ip_options(struct sk_buff *skb)
220{ 220{
221 struct ip_options *opt; 221 struct ip_options *opt;
222 struct iphdr *iph; 222 const struct iphdr *iph;
223 struct net_device *dev = skb->dev; 223 struct net_device *dev = skb->dev;
224 u32 len; 224 u32 len;
225 225
@@ -249,11 +249,9 @@ static int br_parse_ip_options(struct sk_buff *skb)
249 goto drop; 249 goto drop;
250 } 250 }
251 251
252 /* Zero out the CB buffer if no options present */ 252 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
253 if (iph->ihl == 5) { 253 if (iph->ihl == 5)
254 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
255 return 0; 254 return 0;
256 }
257 255
258 opt->optlen = iph->ihl*4 - sizeof(struct iphdr); 256 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
259 if (ip_options_compile(dev_net(dev), opt, skb)) 257 if (ip_options_compile(dev_net(dev), opt, skb))
@@ -412,10 +410,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
412 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 410 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
413 if (dnat_took_place(skb)) { 411 if (dnat_took_place(skb)) {
414 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 412 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
415 struct flowi fl = {
416 .fl4_dst = iph->daddr,
417 .fl4_tos = RT_TOS(iph->tos),
418 };
419 struct in_device *in_dev = __in_dev_get_rcu(dev); 413 struct in_device *in_dev = __in_dev_get_rcu(dev);
420 414
421 /* If err equals -EHOSTUNREACH the error is due to a 415 /* If err equals -EHOSTUNREACH the error is due to a
@@ -428,14 +422,16 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
428 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 422 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
429 goto free_skb; 423 goto free_skb;
430 424
431 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 425 rt = ip_route_output(dev_net(dev), iph->daddr, 0,
426 RT_TOS(iph->tos), 0);
427 if (!IS_ERR(rt)) {
432 /* - Bridged-and-DNAT'ed traffic doesn't 428 /* - Bridged-and-DNAT'ed traffic doesn't
433 * require ip_forwarding. */ 429 * require ip_forwarding. */
434 if (((struct dst_entry *)rt)->dev == dev) { 430 if (rt->dst.dev == dev) {
435 skb_dst_set(skb, (struct dst_entry *)rt); 431 skb_dst_set(skb, &rt->dst);
436 goto bridged_dnat; 432 goto bridged_dnat;
437 } 433 }
438 dst_release((struct dst_entry *)rt); 434 ip_rt_put(rt);
439 } 435 }
440free_skb: 436free_skb:
441 kfree_skb(skb); 437 kfree_skb(skb);
@@ -558,7 +554,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
558 const struct net_device *out, 554 const struct net_device *out,
559 int (*okfn)(struct sk_buff *)) 555 int (*okfn)(struct sk_buff *))
560{ 556{
561 struct ipv6hdr *hdr; 557 const struct ipv6hdr *hdr;
562 u32 pkt_len; 558 u32 pkt_len;
563 559
564 if (skb->len < sizeof(struct ipv6hdr)) 560 if (skb->len < sizeof(struct ipv6hdr))
@@ -741,6 +737,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
741 nf_bridge->mask |= BRNF_PKT_TYPE; 737 nf_bridge->mask |= BRNF_PKT_TYPE;
742 } 738 }
743 739
740 if (pf == PF_INET && br_parse_ip_options(skb))
741 return NF_DROP;
742
744 /* The physdev module checks on this */ 743 /* The physdev module checks on this */
745 nf_bridge->mask |= BRNF_BRIDGED; 744 nf_bridge->mask |= BRNF_BRIDGED;
746 nf_bridge->physoutdev = skb->dev; 745 nf_bridge->physoutdev = skb->dev;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f8bf4c7f842c..ffb0dc4cc0e8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -12,9 +12,11 @@
12 12
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/etherdevice.h>
15#include <net/rtnetlink.h> 16#include <net/rtnetlink.h>
16#include <net/net_namespace.h> 17#include <net/net_namespace.h>
17#include <net/sock.h> 18#include <net/sock.h>
19
18#include "br_private.h" 20#include "br_private.h"
19 21
20static inline size_t br_nlmsg_size(void) 22static inline size_t br_nlmsg_size(void)
@@ -118,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
118 int idx; 120 int idx;
119 121
120 idx = 0; 122 idx = 0;
121 for_each_netdev(net, dev) { 123 rcu_read_lock();
122 struct net_bridge_port *port = br_port_get_rtnl(dev); 124 for_each_netdev_rcu(net, dev) {
125 struct net_bridge_port *port = br_port_get_rcu(dev);
123 126
124 /* not a bridge port */ 127 /* not a bridge port */
125 if (!port || idx < cb->args[0]) 128 if (!port || idx < cb->args[0])
@@ -133,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
133skip: 136skip:
134 ++idx; 137 ++idx;
135 } 138 }
136 139 rcu_read_unlock();
137 cb->args[0] = idx; 140 cb->args[0] = idx;
138 141
139 return skb->len; 142 return skb->len;
@@ -188,20 +191,61 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
188 return 0; 191 return 0;
189} 192}
190 193
194static int br_validate(struct nlattr *tb[], struct nlattr *data[])
195{
196 if (tb[IFLA_ADDRESS]) {
197 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
198 return -EINVAL;
199 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
200 return -EADDRNOTAVAIL;
201 }
202
203 return 0;
204}
205
206static struct rtnl_link_ops br_link_ops __read_mostly = {
207 .kind = "bridge",
208 .priv_size = sizeof(struct net_bridge),
209 .setup = br_dev_setup,
210 .validate = br_validate,
211};
191 212
192int __init br_netlink_init(void) 213int __init br_netlink_init(void)
193{ 214{
194 if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo)) 215 int err;
195 return -ENOBUFS;
196 216
197 /* Only the first call to __rtnl_register can fail */ 217 err = rtnl_link_register(&br_link_ops);
198 __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); 218 if (err < 0)
219 goto err1;
220
221 err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo);
222 if (err)
223 goto err2;
224 err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
225 if (err)
226 goto err3;
227 err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL);
228 if (err)
229 goto err3;
230 err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL);
231 if (err)
232 goto err3;
233 err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump);
234 if (err)
235 goto err3;
199 236
200 return 0; 237 return 0;
238
239err3:
240 rtnl_unregister_all(PF_BRIDGE);
241err2:
242 rtnl_link_unregister(&br_link_ops);
243err1:
244 return err;
201} 245}
202 246
203void __exit br_netlink_fini(void) 247void __exit br_netlink_fini(void)
204{ 248{
249 rtnl_link_unregister(&br_link_ops);
205 rtnl_unregister_all(PF_BRIDGE); 250 rtnl_unregister_all(PF_BRIDGE);
206} 251}
207
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 7d337c9b6082..6545ee9591d1 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -36,6 +36,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
36 struct net_bridge *br; 36 struct net_bridge *br;
37 int err; 37 int err;
38 38
39 /* register of bridge completed, add sysfs entries */
40 if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
41 br_sysfs_addbr(dev);
42 return NOTIFY_DONE;
43 }
44
39 /* not a port of a bridge */ 45 /* not a port of a bridge */
40 p = br_port_get_rtnl(dev); 46 p = br_port_get_rtnl(dev);
41 if (!p) 47 if (!p)
@@ -60,10 +66,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
60 break; 66 break;
61 67
62 case NETDEV_FEAT_CHANGE: 68 case NETDEV_FEAT_CHANGE:
63 spin_lock_bh(&br->lock); 69 netdev_update_features(br->dev);
64 if (netif_running(br->dev))
65 br_features_recompute(br);
66 spin_unlock_bh(&br->lock);
67 break; 70 break;
68 71
69 case NETDEV_DOWN: 72 case NETDEV_DOWN:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4e1b620b6be6..54578f274d85 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -64,7 +64,8 @@ struct net_bridge_fdb_entry
64 struct net_bridge_port *dst; 64 struct net_bridge_port *dst;
65 65
66 struct rcu_head rcu; 66 struct rcu_head rcu;
67 unsigned long ageing_timer; 67 unsigned long updated;
68 unsigned long used;
68 mac_addr addr; 69 mac_addr addr;
69 unsigned char is_local; 70 unsigned char is_local;
70 unsigned char is_static; 71 unsigned char is_static;
@@ -182,7 +183,6 @@ struct net_bridge
182 struct br_cpu_netstats __percpu *stats; 183 struct br_cpu_netstats __percpu *stats;
183 spinlock_t hash_lock; 184 spinlock_t hash_lock;
184 struct hlist_head hash[BR_HASH_SIZE]; 185 struct hlist_head hash[BR_HASH_SIZE];
185 unsigned long feature_mask;
186#ifdef CONFIG_BRIDGE_NETFILTER 186#ifdef CONFIG_BRIDGE_NETFILTER
187 struct rtable fake_rtable; 187 struct rtable fake_rtable;
188 bool nf_call_iptables; 188 bool nf_call_iptables;
@@ -353,6 +353,9 @@ extern int br_fdb_insert(struct net_bridge *br,
353extern void br_fdb_update(struct net_bridge *br, 353extern void br_fdb_update(struct net_bridge *br,
354 struct net_bridge_port *source, 354 struct net_bridge_port *source,
355 const unsigned char *addr); 355 const unsigned char *addr);
356extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb);
357extern int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
358extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
356 359
357/* br_forward.c */ 360/* br_forward.c */
358extern void br_deliver(const struct net_bridge_port *to, 361extern void br_deliver(const struct net_bridge_port *to,
@@ -375,11 +378,11 @@ extern int br_add_if(struct net_bridge *br,
375extern int br_del_if(struct net_bridge *br, 378extern int br_del_if(struct net_bridge *br,
376 struct net_device *dev); 379 struct net_device *dev);
377extern int br_min_mtu(const struct net_bridge *br); 380extern int br_min_mtu(const struct net_bridge *br);
378extern void br_features_recompute(struct net_bridge *br); 381extern u32 br_features_recompute(struct net_bridge *br, u32 features);
379 382
380/* br_input.c */ 383/* br_input.c */
381extern int br_handle_frame_finish(struct sk_buff *skb); 384extern int br_handle_frame_finish(struct sk_buff *skb);
382extern struct sk_buff *br_handle_frame(struct sk_buff *skb); 385extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
383 386
384/* br_ioctl.c */ 387/* br_ioctl.c */
385extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); 388extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -491,20 +494,25 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br,
491extern void br_init_port(struct net_bridge_port *p); 494extern void br_init_port(struct net_bridge_port *p);
492extern void br_become_designated_port(struct net_bridge_port *p); 495extern void br_become_designated_port(struct net_bridge_port *p);
493 496
497extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
498extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
499extern int br_set_max_age(struct net_bridge *br, unsigned long x);
500
501
494/* br_stp_if.c */ 502/* br_stp_if.c */
495extern void br_stp_enable_bridge(struct net_bridge *br); 503extern void br_stp_enable_bridge(struct net_bridge *br);
496extern void br_stp_disable_bridge(struct net_bridge *br); 504extern void br_stp_disable_bridge(struct net_bridge *br);
497extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val); 505extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
498extern void br_stp_enable_port(struct net_bridge_port *p); 506extern void br_stp_enable_port(struct net_bridge_port *p);
499extern void br_stp_disable_port(struct net_bridge_port *p); 507extern void br_stp_disable_port(struct net_bridge_port *p);
500extern void br_stp_recalculate_bridge_id(struct net_bridge *br); 508extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
501extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); 509extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
502extern void br_stp_set_bridge_priority(struct net_bridge *br, 510extern void br_stp_set_bridge_priority(struct net_bridge *br,
503 u16 newprio); 511 u16 newprio);
504extern void br_stp_set_port_priority(struct net_bridge_port *p, 512extern int br_stp_set_port_priority(struct net_bridge_port *p,
505 u8 newprio); 513 unsigned long newprio);
506extern void br_stp_set_path_cost(struct net_bridge_port *p, 514extern int br_stp_set_path_cost(struct net_bridge_port *p,
507 u32 path_cost); 515 unsigned long path_cost);
508extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); 516extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
509 517
510/* br_stp_bpdu.c */ 518/* br_stp_bpdu.c */
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 8b650f7fbfa0..642ef47a867e 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -16,6 +16,19 @@
16#define BPDU_TYPE_CONFIG 0 16#define BPDU_TYPE_CONFIG 0
17#define BPDU_TYPE_TCN 0x80 17#define BPDU_TYPE_TCN 0x80
18 18
19/* IEEE 802.1D-1998 timer values */
20#define BR_MIN_HELLO_TIME (1*HZ)
21#define BR_MAX_HELLO_TIME (10*HZ)
22
23#define BR_MIN_FORWARD_DELAY (2*HZ)
24#define BR_MAX_FORWARD_DELAY (30*HZ)
25
26#define BR_MIN_MAX_AGE (6*HZ)
27#define BR_MAX_MAX_AGE (40*HZ)
28
29#define BR_MIN_PATH_COST 1
30#define BR_MAX_PATH_COST 65535
31
19struct br_config_bpdu 32struct br_config_bpdu
20{ 33{
21 unsigned topology_change:1; 34 unsigned topology_change:1;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 57186d84d2bd..bb4383e84de9 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -375,12 +375,12 @@ static void br_make_forwarding(struct net_bridge_port *p)
375 if (p->state != BR_STATE_BLOCKING) 375 if (p->state != BR_STATE_BLOCKING)
376 return; 376 return;
377 377
378 if (br->forward_delay == 0) { 378 if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
379 p->state = BR_STATE_FORWARDING; 379 p->state = BR_STATE_FORWARDING;
380 br_topology_change_detection(br); 380 br_topology_change_detection(br);
381 del_timer(&p->forward_delay_timer); 381 del_timer(&p->forward_delay_timer);
382 } 382 }
383 else if (p->br->stp_enabled == BR_KERNEL_STP) 383 else if (br->stp_enabled == BR_KERNEL_STP)
384 p->state = BR_STATE_LISTENING; 384 p->state = BR_STATE_LISTENING;
385 else 385 else
386 p->state = BR_STATE_LEARNING; 386 p->state = BR_STATE_LEARNING;
@@ -397,28 +397,37 @@ static void br_make_forwarding(struct net_bridge_port *p)
397void br_port_state_selection(struct net_bridge *br) 397void br_port_state_selection(struct net_bridge *br)
398{ 398{
399 struct net_bridge_port *p; 399 struct net_bridge_port *p;
400 unsigned int liveports = 0;
400 401
401 /* Don't change port states if userspace is handling STP */ 402 /* Don't change port states if userspace is handling STP */
402 if (br->stp_enabled == BR_USER_STP) 403 if (br->stp_enabled == BR_USER_STP)
403 return; 404 return;
404 405
405 list_for_each_entry(p, &br->port_list, list) { 406 list_for_each_entry(p, &br->port_list, list) {
406 if (p->state != BR_STATE_DISABLED) { 407 if (p->state == BR_STATE_DISABLED)
407 if (p->port_no == br->root_port) { 408 continue;
408 p->config_pending = 0; 409
409 p->topology_change_ack = 0; 410 if (p->port_no == br->root_port) {
410 br_make_forwarding(p); 411 p->config_pending = 0;
411 } else if (br_is_designated_port(p)) { 412 p->topology_change_ack = 0;
412 del_timer(&p->message_age_timer); 413 br_make_forwarding(p);
413 br_make_forwarding(p); 414 } else if (br_is_designated_port(p)) {
414 } else { 415 del_timer(&p->message_age_timer);
415 p->config_pending = 0; 416 br_make_forwarding(p);
416 p->topology_change_ack = 0; 417 } else {
417 br_make_blocking(p); 418 p->config_pending = 0;
418 } 419 p->topology_change_ack = 0;
420 br_make_blocking(p);
419 } 421 }
420 422
423 if (p->state == BR_STATE_FORWARDING)
424 ++liveports;
421 } 425 }
426
427 if (liveports == 0)
428 netif_carrier_off(br->dev);
429 else
430 netif_carrier_on(br->dev);
422} 431}
423 432
424/* called under bridge lock */ 433/* called under bridge lock */
@@ -475,3 +484,51 @@ void br_received_tcn_bpdu(struct net_bridge_port *p)
475 br_topology_change_acknowledge(p); 484 br_topology_change_acknowledge(p);
476 } 485 }
477} 486}
487
488/* Change bridge STP parameter */
489int br_set_hello_time(struct net_bridge *br, unsigned long val)
490{
491 unsigned long t = clock_t_to_jiffies(val);
492
493 if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME)
494 return -ERANGE;
495
496 spin_lock_bh(&br->lock);
497 br->bridge_hello_time = t;
498 if (br_is_root_bridge(br))
499 br->hello_time = br->bridge_hello_time;
500 spin_unlock_bh(&br->lock);
501 return 0;
502}
503
504int br_set_max_age(struct net_bridge *br, unsigned long val)
505{
506 unsigned long t = clock_t_to_jiffies(val);
507
508 if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE)
509 return -ERANGE;
510
511 spin_lock_bh(&br->lock);
512 br->bridge_max_age = t;
513 if (br_is_root_bridge(br))
514 br->max_age = br->bridge_max_age;
515 spin_unlock_bh(&br->lock);
516 return 0;
517
518}
519
520int br_set_forward_delay(struct net_bridge *br, unsigned long val)
521{
522 unsigned long t = clock_t_to_jiffies(val);
523
524 if (br->stp_enabled != BR_NO_STP &&
525 (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
526 return -ERANGE;
527
528 spin_lock_bh(&br->lock);
529 br->bridge_forward_delay = t;
530 if (br_is_root_bridge(br))
531 br->forward_delay = br->bridge_forward_delay;
532 spin_unlock_bh(&br->lock);
533 return 0;
534}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 79372d4a4055..6f615b8192f4 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -20,7 +20,7 @@
20 20
21 21
22/* Port id is composed of priority and port number. 22/* Port id is composed of priority and port number.
23 * NB: least significant bits of priority are dropped to 23 * NB: some bits of priority are dropped to
24 * make room for more ports. 24 * make room for more ports.
25 */ 25 */
26static inline port_id br_make_port_id(__u8 priority, __u16 port_no) 26static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
@@ -29,6 +29,8 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
29 | (port_no & ((1<<BR_PORT_BITS)-1)); 29 | (port_no & ((1<<BR_PORT_BITS)-1));
30} 30}
31 31
32#define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS)
33
32/* called under bridge lock */ 34/* called under bridge lock */
33void br_init_port(struct net_bridge_port *p) 35void br_init_port(struct net_bridge_port *p)
34{ 36{
@@ -204,7 +206,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
204static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; 206static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
205 207
206/* called under bridge lock */ 208/* called under bridge lock */
207void br_stp_recalculate_bridge_id(struct net_bridge *br) 209bool br_stp_recalculate_bridge_id(struct net_bridge *br)
208{ 210{
209 const unsigned char *br_mac_zero = 211 const unsigned char *br_mac_zero =
210 (const unsigned char *)br_mac_zero_aligned; 212 (const unsigned char *)br_mac_zero_aligned;
@@ -213,7 +215,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
213 215
214 /* user has chosen a value so keep it */ 216 /* user has chosen a value so keep it */
215 if (br->flags & BR_SET_MAC_ADDR) 217 if (br->flags & BR_SET_MAC_ADDR)
216 return; 218 return false;
217 219
218 list_for_each_entry(p, &br->port_list, list) { 220 list_for_each_entry(p, &br->port_list, list) {
219 if (addr == br_mac_zero || 221 if (addr == br_mac_zero ||
@@ -222,8 +224,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
222 224
223 } 225 }
224 226
225 if (compare_ether_addr(br->bridge_id.addr, addr)) 227 if (compare_ether_addr(br->bridge_id.addr, addr) == 0)
226 br_stp_change_bridge_id(br, addr); 228 return false; /* no change */
229
230 br_stp_change_bridge_id(br, addr);
231 return true;
227} 232}
228 233
229/* called under bridge lock */ 234/* called under bridge lock */
@@ -252,10 +257,14 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
252} 257}
253 258
254/* called under bridge lock */ 259/* called under bridge lock */
255void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) 260int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio)
256{ 261{
257 port_id new_port_id = br_make_port_id(newprio, p->port_no); 262 port_id new_port_id;
263
264 if (newprio > BR_MAX_PORT_PRIORITY)
265 return -ERANGE;
258 266
267 new_port_id = br_make_port_id(newprio, p->port_no);
259 if (br_is_designated_port(p)) 268 if (br_is_designated_port(p))
260 p->designated_port = new_port_id; 269 p->designated_port = new_port_id;
261 270
@@ -266,14 +275,21 @@ void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio)
266 br_become_designated_port(p); 275 br_become_designated_port(p);
267 br_port_state_selection(p->br); 276 br_port_state_selection(p->br);
268 } 277 }
278
279 return 0;
269} 280}
270 281
271/* called under bridge lock */ 282/* called under bridge lock */
272void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) 283int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost)
273{ 284{
285 if (path_cost < BR_MIN_PATH_COST ||
286 path_cost > BR_MAX_PATH_COST)
287 return -ERANGE;
288
274 p->path_cost = path_cost; 289 p->path_cost = path_cost;
275 br_configuration_update(p->br); 290 br_configuration_update(p->br);
276 br_port_state_selection(p->br); 291 br_port_state_selection(p->br);
292 return 0;
277} 293}
278 294
279ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) 295ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id)
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 7b22456023c5..3e965140051e 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -94,6 +94,7 @@ static void br_forward_delay_timer_expired(unsigned long arg)
94 p->state = BR_STATE_FORWARDING; 94 p->state = BR_STATE_FORWARDING;
95 if (br_is_designated_for_some_port(br)) 95 if (br_is_designated_for_some_port(br))
96 br_topology_change_detection(br); 96 br_topology_change_detection(br);
97 netif_carrier_on(br->dev);
97 } 98 }
98 br_log_state(p); 99 br_log_state(p);
99 spin_unlock(&br->lock); 100 spin_unlock(&br->lock);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 5c1e5559ebba..68b893ea8c3a 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -43,9 +43,7 @@ static ssize_t store_bridge_parm(struct device *d,
43 if (endp == buf) 43 if (endp == buf)
44 return -EINVAL; 44 return -EINVAL;
45 45
46 spin_lock_bh(&br->lock);
47 err = (*set)(br, val); 46 err = (*set)(br, val);
48 spin_unlock_bh(&br->lock);
49 return err ? err : len; 47 return err ? err : len;
50} 48}
51 49
@@ -57,20 +55,11 @@ static ssize_t show_forward_delay(struct device *d,
57 return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); 55 return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay));
58} 56}
59 57
60static int set_forward_delay(struct net_bridge *br, unsigned long val)
61{
62 unsigned long delay = clock_t_to_jiffies(val);
63 br->forward_delay = delay;
64 if (br_is_root_bridge(br))
65 br->bridge_forward_delay = delay;
66 return 0;
67}
68
69static ssize_t store_forward_delay(struct device *d, 58static ssize_t store_forward_delay(struct device *d,
70 struct device_attribute *attr, 59 struct device_attribute *attr,
71 const char *buf, size_t len) 60 const char *buf, size_t len)
72{ 61{
73 return store_bridge_parm(d, buf, len, set_forward_delay); 62 return store_bridge_parm(d, buf, len, br_set_forward_delay);
74} 63}
75static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, 64static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR,
76 show_forward_delay, store_forward_delay); 65 show_forward_delay, store_forward_delay);
@@ -82,24 +71,11 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr,
82 jiffies_to_clock_t(to_bridge(d)->hello_time)); 71 jiffies_to_clock_t(to_bridge(d)->hello_time));
83} 72}
84 73
85static int set_hello_time(struct net_bridge *br, unsigned long val)
86{
87 unsigned long t = clock_t_to_jiffies(val);
88
89 if (t < HZ)
90 return -EINVAL;
91
92 br->hello_time = t;
93 if (br_is_root_bridge(br))
94 br->bridge_hello_time = t;
95 return 0;
96}
97
98static ssize_t store_hello_time(struct device *d, 74static ssize_t store_hello_time(struct device *d,
99 struct device_attribute *attr, const char *buf, 75 struct device_attribute *attr, const char *buf,
100 size_t len) 76 size_t len)
101{ 77{
102 return store_bridge_parm(d, buf, len, set_hello_time); 78 return store_bridge_parm(d, buf, len, br_set_hello_time);
103} 79}
104static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, 80static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time,
105 store_hello_time); 81 store_hello_time);
@@ -111,19 +87,10 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr,
111 jiffies_to_clock_t(to_bridge(d)->max_age)); 87 jiffies_to_clock_t(to_bridge(d)->max_age));
112} 88}
113 89
114static int set_max_age(struct net_bridge *br, unsigned long val)
115{
116 unsigned long t = clock_t_to_jiffies(val);
117 br->max_age = t;
118 if (br_is_root_bridge(br))
119 br->bridge_max_age = t;
120 return 0;
121}
122
123static ssize_t store_max_age(struct device *d, struct device_attribute *attr, 90static ssize_t store_max_age(struct device *d, struct device_attribute *attr,
124 const char *buf, size_t len) 91 const char *buf, size_t len)
125{ 92{
126 return store_bridge_parm(d, buf, len, set_max_age); 93 return store_bridge_parm(d, buf, len, br_set_max_age);
127} 94}
128static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); 95static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age);
129 96
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index fd5799c9bc8d..6229b62749e8 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -23,7 +23,7 @@
23struct brport_attribute { 23struct brport_attribute {
24 struct attribute attr; 24 struct attribute attr;
25 ssize_t (*show)(struct net_bridge_port *, char *); 25 ssize_t (*show)(struct net_bridge_port *, char *);
26 ssize_t (*store)(struct net_bridge_port *, unsigned long); 26 int (*store)(struct net_bridge_port *, unsigned long);
27}; 27};
28 28
29#define BRPORT_ATTR(_name,_mode,_show,_store) \ 29#define BRPORT_ATTR(_name,_mode,_show,_store) \
@@ -38,27 +38,17 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
38{ 38{
39 return sprintf(buf, "%d\n", p->path_cost); 39 return sprintf(buf, "%d\n", p->path_cost);
40} 40}
41static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) 41
42{
43 br_stp_set_path_cost(p, v);
44 return 0;
45}
46static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, 42static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
47 show_path_cost, store_path_cost); 43 show_path_cost, br_stp_set_path_cost);
48 44
49static ssize_t show_priority(struct net_bridge_port *p, char *buf) 45static ssize_t show_priority(struct net_bridge_port *p, char *buf)
50{ 46{
51 return sprintf(buf, "%d\n", p->priority); 47 return sprintf(buf, "%d\n", p->priority);
52} 48}
53static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) 49
54{
55 if (v >= (1<<(16-BR_PORT_BITS)))
56 return -ERANGE;
57 br_stp_set_port_priority(p, v);
58 return 0;
59}
60static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, 50static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
61 show_priority, store_priority); 51 show_priority, br_stp_set_port_priority);
62 52
63static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) 53static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
64{ 54{
@@ -136,7 +126,7 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
136} 126}
137static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); 127static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
138 128
139static ssize_t store_flush(struct net_bridge_port *p, unsigned long v) 129static int store_flush(struct net_bridge_port *p, unsigned long v)
140{ 130{
141 br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry 131 br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
142 return 0; 132 return 0;
@@ -148,7 +138,7 @@ static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf)
148 int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0; 138 int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0;
149 return sprintf(buf, "%d\n", hairpin_mode); 139 return sprintf(buf, "%d\n", hairpin_mode);
150} 140}
151static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v) 141static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v)
152{ 142{
153 if (v) 143 if (v)
154 p->flags |= BR_HAIRPIN_MODE; 144 p->flags |= BR_HAIRPIN_MODE;
@@ -165,7 +155,7 @@ static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
165 return sprintf(buf, "%d\n", p->multicast_router); 155 return sprintf(buf, "%d\n", p->multicast_router);
166} 156}
167 157
168static ssize_t store_multicast_router(struct net_bridge_port *p, 158static int store_multicast_router(struct net_bridge_port *p,
169 unsigned long v) 159 unsigned long v)
170{ 160{
171 return br_multicast_set_port_router(p, v); 161 return br_multicast_set_port_router(p, v);
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 50a46afc2bcc..2ed0056a39a8 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
22#include <linux/netfilter_bridge/ebtables.h> 22#include <linux/netfilter_bridge/ebtables.h>
23#include <linux/netfilter_bridge/ebt_ip6.h> 23#include <linux/netfilter_bridge/ebt_ip6.h>
24 24
25struct tcpudphdr { 25union pkthdr {
26 __be16 src; 26 struct {
27 __be16 dst; 27 __be16 src;
28 __be16 dst;
29 } tcpudphdr;
30 struct {
31 u8 type;
32 u8 code;
33 } icmphdr;
28}; 34};
29 35
30static bool 36static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
33 const struct ebt_ip6_info *info = par->matchinfo; 39 const struct ebt_ip6_info *info = par->matchinfo;
34 const struct ipv6hdr *ih6; 40 const struct ipv6hdr *ih6;
35 struct ipv6hdr _ip6h; 41 struct ipv6hdr _ip6h;
36 const struct tcpudphdr *pptr; 42 const union pkthdr *pptr;
37 struct tcpudphdr _ports; 43 union pkthdr _pkthdr;
38 44
39 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); 45 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
40 if (ih6 == NULL) 46 if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
56 return false; 62 return false;
57 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) 63 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
58 return false; 64 return false;
59 if (!(info->bitmask & EBT_IP6_DPORT) && 65 if (!(info->bitmask & ( EBT_IP6_DPORT |
60 !(info->bitmask & EBT_IP6_SPORT)) 66 EBT_IP6_SPORT | EBT_IP6_ICMP6)))
61 return true; 67 return true;
62 pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), 68
63 &_ports); 69 /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
70 pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
71 &_pkthdr);
64 if (pptr == NULL) 72 if (pptr == NULL)
65 return false; 73 return false;
66 if (info->bitmask & EBT_IP6_DPORT) { 74 if (info->bitmask & EBT_IP6_DPORT) {
67 u32 dst = ntohs(pptr->dst); 75 u16 dst = ntohs(pptr->tcpudphdr.dst);
68 if (FWINV(dst < info->dport[0] || 76 if (FWINV(dst < info->dport[0] ||
69 dst > info->dport[1], EBT_IP6_DPORT)) 77 dst > info->dport[1], EBT_IP6_DPORT))
70 return false; 78 return false;
71 } 79 }
72 if (info->bitmask & EBT_IP6_SPORT) { 80 if (info->bitmask & EBT_IP6_SPORT) {
73 u32 src = ntohs(pptr->src); 81 u16 src = ntohs(pptr->tcpudphdr.src);
74 if (FWINV(src < info->sport[0] || 82 if (FWINV(src < info->sport[0] ||
75 src > info->sport[1], EBT_IP6_SPORT)) 83 src > info->sport[1], EBT_IP6_SPORT))
76 return false; 84 return false;
77 } 85 }
78 return true; 86 if ((info->bitmask & EBT_IP6_ICMP6) &&
87 FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
88 pptr->icmphdr.type > info->icmpv6_type[1] ||
89 pptr->icmphdr.code < info->icmpv6_code[0] ||
90 pptr->icmphdr.code > info->icmpv6_code[1],
91 EBT_IP6_ICMP6))
92 return false;
79 } 93 }
80 return true; 94 return true;
81} 95}
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
103 return -EINVAL; 117 return -EINVAL;
104 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) 118 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
105 return -EINVAL; 119 return -EINVAL;
120 if (info->bitmask & EBT_IP6_ICMP6) {
121 if ((info->invflags & EBT_IP6_PROTO) ||
122 info->protocol != IPPROTO_ICMPV6)
123 return -EINVAL;
124 if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
125 info->icmpv6_code[0] > info->icmpv6_code[1])
126 return -EINVAL;
127 }
106 return 0; 128 return 0;
107} 129}
108 130
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df0532d4b9..1a92b369c820 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1107,6 +1107,8 @@ static int do_replace(struct net *net, const void __user *user,
1107 if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) 1107 if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
1108 return -ENOMEM; 1108 return -ENOMEM;
1109 1109
1110 tmp.name[sizeof(tmp.name) - 1] = 0;
1111
1110 countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; 1112 countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
1111 newinfo = vmalloc(sizeof(*newinfo) + countersize); 1113 newinfo = vmalloc(sizeof(*newinfo) + countersize);
1112 if (!newinfo) 1114 if (!newinfo)
@@ -1764,6 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info,
1764 1766
1765 newinfo->entries_size = size; 1767 newinfo->entries_size = size;
1766 1768
1769 xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
1767 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, 1770 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
1768 entries, newinfo); 1771 entries, newinfo);
1769} 1772}
@@ -1879,7 +1882,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
1879 struct xt_match *match; 1882 struct xt_match *match;
1880 struct xt_target *wt; 1883 struct xt_target *wt;
1881 void *dst = NULL; 1884 void *dst = NULL;
1882 int off, pad = 0, ret = 0; 1885 int off, pad = 0;
1883 unsigned int size_kern, entry_offset, match_size = mwt->match_size; 1886 unsigned int size_kern, entry_offset, match_size = mwt->match_size;
1884 1887
1885 strlcpy(name, mwt->u.name, sizeof(name)); 1888 strlcpy(name, mwt->u.name, sizeof(name));
@@ -1932,13 +1935,6 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
1932 break; 1935 break;
1933 } 1936 }
1934 1937
1935 if (!dst) {
1936 ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset,
1937 off + ebt_compat_entry_padsize());
1938 if (ret < 0)
1939 return ret;
1940 }
1941
1942 state->buf_kern_offset += match_size + off; 1938 state->buf_kern_offset += match_size + off;
1943 state->buf_user_offset += match_size; 1939 state->buf_user_offset += match_size;
1944 pad = XT_ALIGN(size_kern) - size_kern; 1940 pad = XT_ALIGN(size_kern) - size_kern;
@@ -2013,50 +2009,6 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
2013 return growth; 2009 return growth;
2014} 2010}
2015 2011
2016#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...) \
2017({ \
2018 unsigned int __i; \
2019 int __ret = 0; \
2020 struct compat_ebt_entry_mwt *__watcher; \
2021 \
2022 for (__i = e->watchers_offset; \
2023 __i < (e)->target_offset; \
2024 __i += __watcher->watcher_size + \
2025 sizeof(struct compat_ebt_entry_mwt)) { \
2026 __watcher = (void *)(e) + __i; \
2027 __ret = fn(__watcher , ## args); \
2028 if (__ret != 0) \
2029 break; \
2030 } \
2031 if (__ret == 0) { \
2032 if (__i != (e)->target_offset) \
2033 __ret = -EINVAL; \
2034 } \
2035 __ret; \
2036})
2037
2038#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...) \
2039({ \
2040 unsigned int __i; \
2041 int __ret = 0; \
2042 struct compat_ebt_entry_mwt *__match; \
2043 \
2044 for (__i = sizeof(struct ebt_entry); \
2045 __i < (e)->watchers_offset; \
2046 __i += __match->match_size + \
2047 sizeof(struct compat_ebt_entry_mwt)) { \
2048 __match = (void *)(e) + __i; \
2049 __ret = fn(__match , ## args); \
2050 if (__ret != 0) \
2051 break; \
2052 } \
2053 if (__ret == 0) { \
2054 if (__i != (e)->watchers_offset) \
2055 __ret = -EINVAL; \
2056 } \
2057 __ret; \
2058})
2059
2060/* called for all ebt_entry structures. */ 2012/* called for all ebt_entry structures. */
2061static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, 2013static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
2062 unsigned int *total, 2014 unsigned int *total,
@@ -2129,6 +2081,14 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
2129 } 2081 }
2130 } 2082 }
2131 2083
2084 if (state->buf_kern_start == NULL) {
2085 unsigned int offset = buf_start - (char *) base;
2086
2087 ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset);
2088 if (ret < 0)
2089 return ret;
2090 }
2091
2132 startoff = state->buf_user_offset - startoff; 2092 startoff = state->buf_user_offset - startoff;
2133 2093
2134 BUG_ON(*total < startoff); 2094 BUG_ON(*total < startoff);
@@ -2237,6 +2197,7 @@ static int compat_do_replace(struct net *net, void __user *user,
2237 2197
2238 xt_compat_lock(NFPROTO_BRIDGE); 2198 xt_compat_lock(NFPROTO_BRIDGE);
2239 2199
2200 xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
2240 ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); 2201 ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
2241 if (ret < 0) 2202 if (ret < 0)
2242 goto out_unlock; 2203 goto out_unlock;
diff --git a/net/caif/Makefile b/net/caif/Makefile
index 9d38e406e4a4..ebcd4e7e6f47 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -5,7 +5,7 @@ caif-y := caif_dev.o \
5 cffrml.o cfveil.o cfdbgl.o\ 5 cffrml.o cfveil.o cfdbgl.o\
6 cfserl.o cfdgml.o \ 6 cfserl.o cfdgml.o \
7 cfrfml.o cfvidl.o cfutill.o \ 7 cfrfml.o cfvidl.o cfutill.o \
8 cfsrvl.o cfpkt_skbuff.o caif_config_util.o 8 cfsrvl.o cfpkt_skbuff.o
9 9
10obj-$(CONFIG_CAIF) += caif.o 10obj-$(CONFIG_CAIF) += caif.o
11obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o 11obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
deleted file mode 100644
index d522d8c1703e..000000000000
--- a/net/caif/caif_config_util.c
+++ /dev/null
@@ -1,99 +0,0 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/module.h>
8#include <linux/spinlock.h>
9#include <net/caif/cfctrl.h>
10#include <net/caif/cfcnfg.h>
11#include <net/caif/caif_dev.h>
12
13int connect_req_to_link_param(struct cfcnfg *cnfg,
14 struct caif_connect_request *s,
15 struct cfctrl_link_param *l)
16{
17 struct dev_info *dev_info;
18 enum cfcnfg_phy_preference pref;
19 int res;
20
21 memset(l, 0, sizeof(*l));
22 /* In caif protocol low value is high priority */
23 l->priority = CAIF_PRIO_MAX - s->priority + 1;
24
25 if (s->ifindex != 0){
26 res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
27 if (res < 0)
28 return res;
29 l->phyid = res;
30 }
31 else {
32 switch (s->link_selector) {
33 case CAIF_LINK_HIGH_BANDW:
34 pref = CFPHYPREF_HIGH_BW;
35 break;
36 case CAIF_LINK_LOW_LATENCY:
37 pref = CFPHYPREF_LOW_LAT;
38 break;
39 default:
40 return -EINVAL;
41 }
42 dev_info = cfcnfg_get_phyid(cnfg, pref);
43 if (dev_info == NULL)
44 return -ENODEV;
45 l->phyid = dev_info->id;
46 }
47 switch (s->protocol) {
48 case CAIFPROTO_AT:
49 l->linktype = CFCTRL_SRV_VEI;
50 if (s->sockaddr.u.at.type == CAIF_ATTYPE_PLAIN)
51 l->chtype = 0x02;
52 else
53 l->chtype = s->sockaddr.u.at.type;
54 l->endpoint = 0x00;
55 break;
56 case CAIFPROTO_DATAGRAM:
57 l->linktype = CFCTRL_SRV_DATAGRAM;
58 l->chtype = 0x00;
59 l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
60 break;
61 case CAIFPROTO_DATAGRAM_LOOP:
62 l->linktype = CFCTRL_SRV_DATAGRAM;
63 l->chtype = 0x03;
64 l->endpoint = 0x00;
65 l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
66 break;
67 case CAIFPROTO_RFM:
68 l->linktype = CFCTRL_SRV_RFM;
69 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
70 strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
71 sizeof(l->u.rfm.volume)-1);
72 l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
73 break;
74 case CAIFPROTO_UTIL:
75 l->linktype = CFCTRL_SRV_UTIL;
76 l->endpoint = 0x00;
77 l->chtype = 0x00;
78 strncpy(l->u.utility.name, s->sockaddr.u.util.service,
79 sizeof(l->u.utility.name)-1);
80 l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
81 caif_assert(sizeof(l->u.utility.name) > 10);
82 l->u.utility.paramlen = s->param.size;
83 if (l->u.utility.paramlen > sizeof(l->u.utility.params))
84 l->u.utility.paramlen = sizeof(l->u.utility.params);
85
86 memcpy(l->u.utility.params, s->param.data,
87 l->u.utility.paramlen);
88
89 break;
90 case CAIFPROTO_DEBUG:
91 l->linktype = CFCTRL_SRV_DBG;
92 l->endpoint = s->sockaddr.u.dbg.service;
93 l->chtype = s->sockaddr.u.dbg.type;
94 break;
95 default:
96 return -EINVAL;
97 }
98 return 0;
99}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index a42a408306e4..366ca0fb7a29 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -12,49 +12,51 @@
12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ 12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
13 13
14#include <linux/version.h> 14#include <linux/version.h>
15#include <linux/module.h>
16#include <linux/kernel.h> 15#include <linux/kernel.h>
17#include <linux/if_arp.h> 16#include <linux/if_arp.h>
18#include <linux/net.h> 17#include <linux/net.h>
19#include <linux/netdevice.h> 18#include <linux/netdevice.h>
20#include <linux/skbuff.h> 19#include <linux/mutex.h>
21#include <linux/sched.h>
22#include <linux/wait.h>
23#include <net/netns/generic.h> 20#include <net/netns/generic.h>
24#include <net/net_namespace.h> 21#include <net/net_namespace.h>
25#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
26#include <net/caif/caif_device.h> 23#include <net/caif/caif_device.h>
27#include <net/caif/caif_dev.h>
28#include <net/caif/caif_layer.h> 24#include <net/caif/caif_layer.h>
29#include <net/caif/cfpkt.h> 25#include <net/caif/cfpkt.h>
30#include <net/caif/cfcnfg.h> 26#include <net/caif/cfcnfg.h>
31 27
32MODULE_LICENSE("GPL"); 28MODULE_LICENSE("GPL");
33#define TIMEOUT (HZ*5)
34 29
35/* Used for local tracking of the CAIF net devices */ 30/* Used for local tracking of the CAIF net devices */
36struct caif_device_entry { 31struct caif_device_entry {
37 struct cflayer layer; 32 struct cflayer layer;
38 struct list_head list; 33 struct list_head list;
39 atomic_t in_use;
40 atomic_t state;
41 u16 phyid;
42 struct net_device *netdev; 34 struct net_device *netdev;
43 wait_queue_head_t event; 35 int __percpu *pcpu_refcnt;
44}; 36};
45 37
46struct caif_device_entry_list { 38struct caif_device_entry_list {
47 struct list_head list; 39 struct list_head list;
48 /* Protects simulanous deletes in list */ 40 /* Protects simulanous deletes in list */
49 spinlock_t lock; 41 struct mutex lock;
50}; 42};
51 43
52struct caif_net { 44struct caif_net {
45 struct cfcnfg *cfg;
53 struct caif_device_entry_list caifdevs; 46 struct caif_device_entry_list caifdevs;
54}; 47};
55 48
56static int caif_net_id; 49static int caif_net_id;
57static struct cfcnfg *cfg; 50
51struct cfcnfg *get_cfcnfg(struct net *net)
52{
53 struct caif_net *caifn;
54 BUG_ON(!net);
55 caifn = net_generic(net, caif_net_id);
56 BUG_ON(!caifn);
57 return caifn->cfg;
58}
59EXPORT_SYMBOL(get_cfcnfg);
58 60
59static struct caif_device_entry_list *caif_device_list(struct net *net) 61static struct caif_device_entry_list *caif_device_list(struct net *net)
60{ 62{
@@ -65,19 +67,39 @@ static struct caif_device_entry_list *caif_device_list(struct net *net)
65 return &caifn->caifdevs; 67 return &caifn->caifdevs;
66} 68}
67 69
70static void caifd_put(struct caif_device_entry *e)
71{
72 irqsafe_cpu_dec(*e->pcpu_refcnt);
73}
74
75static void caifd_hold(struct caif_device_entry *e)
76{
77 irqsafe_cpu_inc(*e->pcpu_refcnt);
78}
79
80static int caifd_refcnt_read(struct caif_device_entry *e)
81{
82 int i, refcnt = 0;
83 for_each_possible_cpu(i)
84 refcnt += *per_cpu_ptr(e->pcpu_refcnt, i);
85 return refcnt;
86}
87
68/* Allocate new CAIF device. */ 88/* Allocate new CAIF device. */
69static struct caif_device_entry *caif_device_alloc(struct net_device *dev) 89static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
70{ 90{
71 struct caif_device_entry_list *caifdevs; 91 struct caif_device_entry_list *caifdevs;
72 struct caif_device_entry *caifd; 92 struct caif_device_entry *caifd;
93
73 caifdevs = caif_device_list(dev_net(dev)); 94 caifdevs = caif_device_list(dev_net(dev));
74 BUG_ON(!caifdevs); 95 BUG_ON(!caifdevs);
96
75 caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); 97 caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
76 if (!caifd) 98 if (!caifd)
77 return NULL; 99 return NULL;
100 caifd->pcpu_refcnt = alloc_percpu(int);
78 caifd->netdev = dev; 101 caifd->netdev = dev;
79 list_add(&caifd->list, &caifdevs->list); 102 dev_hold(dev);
80 init_waitqueue_head(&caifd->event);
81 return caifd; 103 return caifd;
82} 104}
83 105
@@ -87,98 +109,60 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
87 caif_device_list(dev_net(dev)); 109 caif_device_list(dev_net(dev));
88 struct caif_device_entry *caifd; 110 struct caif_device_entry *caifd;
89 BUG_ON(!caifdevs); 111 BUG_ON(!caifdevs);
90 list_for_each_entry(caifd, &caifdevs->list, list) { 112 list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
91 if (caifd->netdev == dev) 113 if (caifd->netdev == dev)
92 return caifd; 114 return caifd;
93 } 115 }
94 return NULL; 116 return NULL;
95} 117}
96 118
97static void caif_device_destroy(struct net_device *dev)
98{
99 struct caif_device_entry_list *caifdevs =
100 caif_device_list(dev_net(dev));
101 struct caif_device_entry *caifd;
102 ASSERT_RTNL();
103 if (dev->type != ARPHRD_CAIF)
104 return;
105
106 spin_lock_bh(&caifdevs->lock);
107 caifd = caif_get(dev);
108 if (caifd == NULL) {
109 spin_unlock_bh(&caifdevs->lock);
110 return;
111 }
112
113 list_del(&caifd->list);
114 spin_unlock_bh(&caifdevs->lock);
115
116 kfree(caifd);
117}
118
119static int transmit(struct cflayer *layer, struct cfpkt *pkt) 119static int transmit(struct cflayer *layer, struct cfpkt *pkt)
120{ 120{
121 int err;
121 struct caif_device_entry *caifd = 122 struct caif_device_entry *caifd =
122 container_of(layer, struct caif_device_entry, layer); 123 container_of(layer, struct caif_device_entry, layer);
123 struct sk_buff *skb, *skb2; 124 struct sk_buff *skb;
124 int ret = -EINVAL; 125
125 skb = cfpkt_tonative(pkt); 126 skb = cfpkt_tonative(pkt);
126 skb->dev = caifd->netdev; 127 skb->dev = caifd->netdev;
127 /*
128 * Don't allow SKB to be destroyed upon error, but signal resend
129 * notification to clients. We can't rely on the return value as
130 * congestion (NET_XMIT_CN) sometimes drops the packet, sometimes don't.
131 */
132 if (netif_queue_stopped(caifd->netdev))
133 return -EAGAIN;
134 skb2 = skb_get(skb);
135
136 ret = dev_queue_xmit(skb2);
137
138 if (!ret)
139 kfree_skb(skb);
140 else
141 return -EAGAIN;
142 128
143 return 0; 129 err = dev_queue_xmit(skb);
144} 130 if (err > 0)
131 err = -EIO;
145 132
146static int modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) 133 return err;
147{
148 struct caif_device_entry *caifd;
149 struct caif_dev_common *caifdev;
150 caifd = container_of(layr, struct caif_device_entry, layer);
151 caifdev = netdev_priv(caifd->netdev);
152 if (ctrl == _CAIF_MODEMCMD_PHYIF_USEFULL) {
153 atomic_set(&caifd->in_use, 1);
154 wake_up_interruptible(&caifd->event);
155
156 } else if (ctrl == _CAIF_MODEMCMD_PHYIF_USELESS) {
157 atomic_set(&caifd->in_use, 0);
158 wake_up_interruptible(&caifd->event);
159 }
160 return 0;
161} 134}
162 135
163/* 136/*
164 * Stuff received packets to associated sockets. 137 * Stuff received packets into the CAIF stack.
165 * On error, returns non-zero and releases the skb. 138 * On error, returns non-zero and releases the skb.
166 */ 139 */
167static int receive(struct sk_buff *skb, struct net_device *dev, 140static int receive(struct sk_buff *skb, struct net_device *dev,
168 struct packet_type *pkttype, struct net_device *orig_dev) 141 struct packet_type *pkttype, struct net_device *orig_dev)
169{ 142{
170 struct net *net;
171 struct cfpkt *pkt; 143 struct cfpkt *pkt;
172 struct caif_device_entry *caifd; 144 struct caif_device_entry *caifd;
173 net = dev_net(dev); 145
174 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb); 146 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
147
148 rcu_read_lock();
175 caifd = caif_get(dev); 149 caifd = caif_get(dev);
176 if (!caifd || !caifd->layer.up || !caifd->layer.up->receive)
177 return NET_RX_DROP;
178 150
179 if (caifd->layer.up->receive(caifd->layer.up, pkt)) 151 if (!caifd || !caifd->layer.up || !caifd->layer.up->receive ||
152 !netif_oper_up(caifd->netdev)) {
153 rcu_read_unlock();
154 kfree_skb(skb);
180 return NET_RX_DROP; 155 return NET_RX_DROP;
156 }
157
158 /* Hold reference to netdevice while using CAIF stack */
159 caifd_hold(caifd);
160 rcu_read_unlock();
161
162 caifd->layer.up->receive(caifd->layer.up, pkt);
181 163
164 /* Release reference to stack upwards */
165 caifd_put(caifd);
182 return 0; 166 return 0;
183} 167}
184 168
@@ -189,15 +173,25 @@ static struct packet_type caif_packet_type __read_mostly = {
189 173
190static void dev_flowctrl(struct net_device *dev, int on) 174static void dev_flowctrl(struct net_device *dev, int on)
191{ 175{
192 struct caif_device_entry *caifd = caif_get(dev); 176 struct caif_device_entry *caifd;
193 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) 177
178 rcu_read_lock();
179
180 caifd = caif_get(dev);
181 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) {
182 rcu_read_unlock();
194 return; 183 return;
184 }
185
186 caifd_hold(caifd);
187 rcu_read_unlock();
195 188
196 caifd->layer.up->ctrlcmd(caifd->layer.up, 189 caifd->layer.up->ctrlcmd(caifd->layer.up,
197 on ? 190 on ?
198 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND : 191 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND :
199 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND, 192 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
200 caifd->layer.id); 193 caifd->layer.id);
194 caifd_put(caifd);
201} 195}
202 196
203/* notify Caif of device events */ 197/* notify Caif of device events */
@@ -208,37 +202,28 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
208 struct caif_device_entry *caifd = NULL; 202 struct caif_device_entry *caifd = NULL;
209 struct caif_dev_common *caifdev; 203 struct caif_dev_common *caifdev;
210 enum cfcnfg_phy_preference pref; 204 enum cfcnfg_phy_preference pref;
211 int res = -EINVAL;
212 enum cfcnfg_phy_type phy_type; 205 enum cfcnfg_phy_type phy_type;
206 struct cfcnfg *cfg;
207 struct caif_device_entry_list *caifdevs =
208 caif_device_list(dev_net(dev));
213 209
214 if (dev->type != ARPHRD_CAIF) 210 if (dev->type != ARPHRD_CAIF)
215 return 0; 211 return 0;
216 212
213 cfg = get_cfcnfg(dev_net(dev));
214 if (cfg == NULL)
215 return 0;
216
217 switch (what) { 217 switch (what) {
218 case NETDEV_REGISTER: 218 case NETDEV_REGISTER:
219 netdev_info(dev, "register\n");
220 caifd = caif_device_alloc(dev); 219 caifd = caif_device_alloc(dev);
221 if (caifd == NULL) 220 if (!caifd)
222 break; 221 return 0;
222
223 caifdev = netdev_priv(dev); 223 caifdev = netdev_priv(dev);
224 caifdev->flowctrl = dev_flowctrl; 224 caifdev->flowctrl = dev_flowctrl;
225 atomic_set(&caifd->state, what);
226 res = 0;
227 break;
228 225
229 case NETDEV_UP:
230 netdev_info(dev, "up\n");
231 caifd = caif_get(dev);
232 if (caifd == NULL)
233 break;
234 caifdev = netdev_priv(dev);
235 if (atomic_read(&caifd->state) == NETDEV_UP) {
236 netdev_info(dev, "already up\n");
237 break;
238 }
239 atomic_set(&caifd->state, what);
240 caifd->layer.transmit = transmit; 226 caifd->layer.transmit = transmit;
241 caifd->layer.modemcmd = modemcmd;
242 227
243 if (caifdev->use_frag) 228 if (caifdev->use_frag)
244 phy_type = CFPHYTYPE_FRAG; 229 phy_type = CFPHYTYPE_FRAG;
@@ -256,62 +241,94 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
256 pref = CFPHYPREF_HIGH_BW; 241 pref = CFPHYPREF_HIGH_BW;
257 break; 242 break;
258 } 243 }
259 dev_hold(dev); 244 strncpy(caifd->layer.name, dev->name,
260 cfcnfg_add_phy_layer(get_caif_conf(), 245 sizeof(caifd->layer.name) - 1);
246 caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
247
248 mutex_lock(&caifdevs->lock);
249 list_add_rcu(&caifd->list, &caifdevs->list);
250
251 cfcnfg_add_phy_layer(cfg,
261 phy_type, 252 phy_type,
262 dev, 253 dev,
263 &caifd->layer, 254 &caifd->layer,
264 &caifd->phyid,
265 pref, 255 pref,
266 caifdev->use_fcs, 256 caifdev->use_fcs,
267 caifdev->use_stx); 257 caifdev->use_stx);
268 strncpy(caifd->layer.name, dev->name, 258 mutex_unlock(&caifdevs->lock);
269 sizeof(caifd->layer.name) - 1);
270 caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
271 break; 259 break;
272 260
273 case NETDEV_GOING_DOWN: 261 case NETDEV_UP:
262 rcu_read_lock();
263
274 caifd = caif_get(dev); 264 caifd = caif_get(dev);
275 if (caifd == NULL) 265 if (caifd == NULL) {
266 rcu_read_unlock();
276 break; 267 break;
277 netdev_info(dev, "going down\n"); 268 }
278 269
279 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN || 270 cfcnfg_set_phy_state(cfg, &caifd->layer, true);
280 atomic_read(&caifd->state) == NETDEV_DOWN) 271 rcu_read_unlock();
281 break;
282 272
283 atomic_set(&caifd->state, what);
284 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
285 return -EINVAL;
286 caifd->layer.up->ctrlcmd(caifd->layer.up,
287 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
288 caifd->layer.id);
289 might_sleep();
290 res = wait_event_interruptible_timeout(caifd->event,
291 atomic_read(&caifd->in_use) == 0,
292 TIMEOUT);
293 break; 273 break;
294 274
295 case NETDEV_DOWN: 275 case NETDEV_DOWN:
276 rcu_read_lock();
277
296 caifd = caif_get(dev); 278 caifd = caif_get(dev);
297 if (caifd == NULL) 279 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) {
298 break; 280 rcu_read_unlock();
299 netdev_info(dev, "down\n"); 281 return -EINVAL;
300 if (atomic_read(&caifd->in_use)) 282 }
301 netdev_warn(dev, 283
302 "Unregistering an active CAIF device\n"); 284 cfcnfg_set_phy_state(cfg, &caifd->layer, false);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 285 caifd_hold(caifd);
304 dev_put(dev); 286 rcu_read_unlock();
305 atomic_set(&caifd->state, what); 287
288 caifd->layer.up->ctrlcmd(caifd->layer.up,
289 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
290 caifd->layer.id);
291 caifd_put(caifd);
306 break; 292 break;
307 293
308 case NETDEV_UNREGISTER: 294 case NETDEV_UNREGISTER:
295 mutex_lock(&caifdevs->lock);
296
309 caifd = caif_get(dev); 297 caifd = caif_get(dev);
310 if (caifd == NULL) 298 if (caifd == NULL) {
299 mutex_unlock(&caifdevs->lock);
300 break;
301 }
302 list_del_rcu(&caifd->list);
303
304 /*
305 * NETDEV_UNREGISTER is called repeatedly until all reference
306 * counts for the net-device are released. If references to
307 * caifd is taken, simply ignore NETDEV_UNREGISTER and wait for
308 * the next call to NETDEV_UNREGISTER.
309 *
310 * If any packets are in flight down the CAIF Stack,
311 * cfcnfg_del_phy_layer will return nonzero.
312 * If no packets are in flight, the CAIF Stack associated
313 * with the net-device un-registering is freed.
314 */
315
316 if (caifd_refcnt_read(caifd) != 0 ||
317 cfcnfg_del_phy_layer(cfg, &caifd->layer) != 0) {
318
319 pr_info("Wait for device inuse\n");
320 /* Enrole device if CAIF Stack is still in use */
321 list_add_rcu(&caifd->list, &caifdevs->list);
322 mutex_unlock(&caifdevs->lock);
311 break; 323 break;
312 netdev_info(dev, "unregister\n"); 324 }
313 atomic_set(&caifd->state, what); 325
314 caif_device_destroy(dev); 326 synchronize_rcu();
327 dev_put(caifd->netdev);
328 free_percpu(caifd->pcpu_refcnt);
329 kfree(caifd);
330
331 mutex_unlock(&caifdevs->lock);
315 break; 332 break;
316 } 333 }
317 return 0; 334 return 0;
@@ -322,61 +339,60 @@ static struct notifier_block caif_device_notifier = {
322 .priority = 0, 339 .priority = 0,
323}; 340};
324 341
325
326struct cfcnfg *get_caif_conf(void)
327{
328 return cfg;
329}
330EXPORT_SYMBOL(get_caif_conf);
331
332int caif_connect_client(struct caif_connect_request *conn_req,
333 struct cflayer *client_layer, int *ifindex,
334 int *headroom, int *tailroom)
335{
336 struct cfctrl_link_param param;
337 int ret;
338 ret = connect_req_to_link_param(get_caif_conf(), conn_req, &param);
339 if (ret)
340 return ret;
341 /* Hook up the adaptation layer. */
342 return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
343 client_layer, ifindex,
344 headroom, tailroom);
345}
346EXPORT_SYMBOL(caif_connect_client);
347
348int caif_disconnect_client(struct cflayer *adap_layer)
349{
350 return cfcnfg_disconn_adapt_layer(get_caif_conf(), adap_layer);
351}
352EXPORT_SYMBOL(caif_disconnect_client);
353
354void caif_release_client(struct cflayer *adap_layer)
355{
356 cfcnfg_release_adap_layer(adap_layer);
357}
358EXPORT_SYMBOL(caif_release_client);
359
360/* Per-namespace Caif devices handling */ 342/* Per-namespace Caif devices handling */
361static int caif_init_net(struct net *net) 343static int caif_init_net(struct net *net)
362{ 344{
363 struct caif_net *caifn = net_generic(net, caif_net_id); 345 struct caif_net *caifn = net_generic(net, caif_net_id);
346 BUG_ON(!caifn);
364 INIT_LIST_HEAD(&caifn->caifdevs.list); 347 INIT_LIST_HEAD(&caifn->caifdevs.list);
365 spin_lock_init(&caifn->caifdevs.lock); 348 mutex_init(&caifn->caifdevs.lock);
349
350 caifn->cfg = cfcnfg_create();
351 if (!caifn->cfg) {
352 pr_warn("can't create cfcnfg\n");
353 return -ENOMEM;
354 }
355
366 return 0; 356 return 0;
367} 357}
368 358
369static void caif_exit_net(struct net *net) 359static void caif_exit_net(struct net *net)
370{ 360{
371 struct net_device *dev; 361 struct caif_device_entry *caifd, *tmp;
372 int res; 362 struct caif_device_entry_list *caifdevs =
363 caif_device_list(net);
364 struct cfcnfg *cfg;
365
373 rtnl_lock(); 366 rtnl_lock();
374 for_each_netdev(net, dev) { 367 mutex_lock(&caifdevs->lock);
375 if (dev->type != ARPHRD_CAIF) 368
376 continue; 369 cfg = get_cfcnfg(net);
377 res = dev_close(dev); 370 if (cfg == NULL) {
378 caif_device_destroy(dev); 371 mutex_unlock(&caifdevs->lock);
372 return;
379 } 373 }
374
375 list_for_each_entry_safe(caifd, tmp, &caifdevs->list, list) {
376 int i = 0;
377 list_del_rcu(&caifd->list);
378 cfcnfg_set_phy_state(cfg, &caifd->layer, false);
379
380 while (i < 10 &&
381 (caifd_refcnt_read(caifd) != 0 ||
382 cfcnfg_del_phy_layer(cfg, &caifd->layer) != 0)) {
383
384 pr_info("Wait for device inuse\n");
385 msleep(250);
386 i++;
387 }
388 synchronize_rcu();
389 dev_put(caifd->netdev);
390 free_percpu(caifd->pcpu_refcnt);
391 kfree(caifd);
392 }
393 cfcnfg_remove(cfg);
394
395 mutex_unlock(&caifdevs->lock);
380 rtnl_unlock(); 396 rtnl_unlock();
381} 397}
382 398
@@ -391,32 +407,23 @@ static struct pernet_operations caif_net_ops = {
391static int __init caif_device_init(void) 407static int __init caif_device_init(void)
392{ 408{
393 int result; 409 int result;
394 cfg = cfcnfg_create(); 410
395 if (!cfg) {
396 pr_warn("can't create cfcnfg\n");
397 goto err_cfcnfg_create_failed;
398 }
399 result = register_pernet_device(&caif_net_ops); 411 result = register_pernet_device(&caif_net_ops);
400 412
401 if (result) { 413 if (result)
402 kfree(cfg);
403 cfg = NULL;
404 return result; 414 return result;
405 } 415
406 dev_add_pack(&caif_packet_type);
407 register_netdevice_notifier(&caif_device_notifier); 416 register_netdevice_notifier(&caif_device_notifier);
417 dev_add_pack(&caif_packet_type);
408 418
409 return result; 419 return result;
410err_cfcnfg_create_failed:
411 return -ENODEV;
412} 420}
413 421
414static void __exit caif_device_exit(void) 422static void __exit caif_device_exit(void)
415{ 423{
416 dev_remove_pack(&caif_packet_type);
417 unregister_pernet_device(&caif_net_ops); 424 unregister_pernet_device(&caif_net_ops);
418 unregister_netdevice_notifier(&caif_device_notifier); 425 unregister_netdevice_notifier(&caif_device_notifier);
419 cfcnfg_remove(cfg); 426 dev_remove_pack(&caif_packet_type);
420} 427}
421 428
422module_init(caif_device_init); 429module_init(caif_device_init);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 8184c031d028..b840395ced1d 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -48,6 +48,7 @@ static struct dentry *debugfsdir;
48#ifdef CONFIG_DEBUG_FS 48#ifdef CONFIG_DEBUG_FS
49struct debug_fs_counter { 49struct debug_fs_counter {
50 atomic_t caif_nr_socks; 50 atomic_t caif_nr_socks;
51 atomic_t caif_sock_create;
51 atomic_t num_connect_req; 52 atomic_t num_connect_req;
52 atomic_t num_connect_resp; 53 atomic_t num_connect_resp;
53 atomic_t num_connect_fail_resp; 54 atomic_t num_connect_fail_resp;
@@ -59,11 +60,11 @@ struct debug_fs_counter {
59 atomic_t num_rx_flow_on; 60 atomic_t num_rx_flow_on;
60}; 61};
61static struct debug_fs_counter cnt; 62static struct debug_fs_counter cnt;
62#define dbfs_atomic_inc(v) atomic_inc(v) 63#define dbfs_atomic_inc(v) atomic_inc_return(v)
63#define dbfs_atomic_dec(v) atomic_dec(v) 64#define dbfs_atomic_dec(v) atomic_dec_return(v)
64#else 65#else
65#define dbfs_atomic_inc(v) 66#define dbfs_atomic_inc(v) 0
66#define dbfs_atomic_dec(v) 67#define dbfs_atomic_dec(v) 0
67#endif 68#endif
68 69
69struct caifsock { 70struct caifsock {
@@ -155,9 +156,10 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
155 156
156 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 157 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
157 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { 158 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
158 pr_debug("sending flow OFF (queue len = %d %d)\n", 159 if (net_ratelimit())
159 atomic_read(&cf_sk->sk.sk_rmem_alloc), 160 pr_debug("sending flow OFF (queue len = %d %d)\n",
160 sk_rcvbuf_lowwater(cf_sk)); 161 atomic_read(&cf_sk->sk.sk_rmem_alloc),
162 sk_rcvbuf_lowwater(cf_sk));
161 set_rx_flow_off(cf_sk); 163 set_rx_flow_off(cf_sk);
162 dbfs_atomic_inc(&cnt.num_rx_flow_off); 164 dbfs_atomic_inc(&cnt.num_rx_flow_off);
163 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 165 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
@@ -168,7 +170,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
168 return err; 170 return err;
169 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 171 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
170 set_rx_flow_off(cf_sk); 172 set_rx_flow_off(cf_sk);
171 pr_debug("sending flow OFF due to rmem_schedule\n"); 173 if (net_ratelimit())
174 pr_debug("sending flow OFF due to rmem_schedule\n");
172 dbfs_atomic_inc(&cnt.num_rx_flow_off); 175 dbfs_atomic_inc(&cnt.num_rx_flow_off);
173 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 176 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
174 } 177 }
@@ -202,13 +205,25 @@ static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
202 skb = cfpkt_tonative(pkt); 205 skb = cfpkt_tonative(pkt);
203 206
204 if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) { 207 if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) {
205 cfpkt_destroy(pkt); 208 kfree_skb(skb);
206 return 0; 209 return 0;
207 } 210 }
208 caif_queue_rcv_skb(&cf_sk->sk, skb); 211 caif_queue_rcv_skb(&cf_sk->sk, skb);
209 return 0; 212 return 0;
210} 213}
211 214
215static void cfsk_hold(struct cflayer *layr)
216{
217 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
218 sock_hold(&cf_sk->sk);
219}
220
221static void cfsk_put(struct cflayer *layr)
222{
223 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
224 sock_put(&cf_sk->sk);
225}
226
212/* Packet Control Callback function called from CAIF */ 227/* Packet Control Callback function called from CAIF */
213static void caif_ctrl_cb(struct cflayer *layr, 228static void caif_ctrl_cb(struct cflayer *layr,
214 enum caif_ctrlcmd flow, 229 enum caif_ctrlcmd flow,
@@ -232,6 +247,8 @@ static void caif_ctrl_cb(struct cflayer *layr,
232 247
233 case CAIF_CTRLCMD_INIT_RSP: 248 case CAIF_CTRLCMD_INIT_RSP:
234 /* We're now connected */ 249 /* We're now connected */
250 caif_client_register_refcnt(&cf_sk->layer,
251 cfsk_hold, cfsk_put);
235 dbfs_atomic_inc(&cnt.num_connect_resp); 252 dbfs_atomic_inc(&cnt.num_connect_resp);
236 cf_sk->sk.sk_state = CAIF_CONNECTED; 253 cf_sk->sk.sk_state = CAIF_CONNECTED;
237 set_tx_flow_on(cf_sk); 254 set_tx_flow_on(cf_sk);
@@ -242,7 +259,6 @@ static void caif_ctrl_cb(struct cflayer *layr,
242 /* We're now disconnected */ 259 /* We're now disconnected */
243 cf_sk->sk.sk_state = CAIF_DISCONNECTED; 260 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
244 cf_sk->sk.sk_state_change(&cf_sk->sk); 261 cf_sk->sk.sk_state_change(&cf_sk->sk);
245 cfcnfg_release_adap_layer(&cf_sk->layer);
246 break; 262 break;
247 263
248 case CAIF_CTRLCMD_INIT_FAIL_RSP: 264 case CAIF_CTRLCMD_INIT_FAIL_RSP:
@@ -519,43 +535,14 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
519 int noblock, long timeo) 535 int noblock, long timeo)
520{ 536{
521 struct cfpkt *pkt; 537 struct cfpkt *pkt;
522 int ret, loopcnt = 0;
523 538
524 pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb); 539 pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
525 memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info)); 540 memset(skb->cb, 0, sizeof(struct caif_payload_info));
526 do {
527 541
528 ret = -ETIMEDOUT; 542 if (cf_sk->layer.dn == NULL)
543 return -EINVAL;
529 544
530 /* Slight paranoia, probably not needed. */ 545 return cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
531 if (unlikely(loopcnt++ > 1000)) {
532 pr_warn("transmit retries failed, error = %d\n", ret);
533 break;
534 }
535
536 if (cf_sk->layer.dn != NULL)
537 ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
538 if (likely(ret >= 0))
539 break;
540 /* if transmit return -EAGAIN, then retry */
541 if (noblock && ret == -EAGAIN)
542 break;
543 timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret);
544 if (signal_pending(current)) {
545 ret = sock_intr_errno(timeo);
546 break;
547 }
548 if (ret)
549 break;
550 if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
551 sock_flag(&cf_sk->sk, SOCK_DEAD) ||
552 (cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) {
553 ret = -EPIPE;
554 cf_sk->sk.sk_err = EPIPE;
555 break;
556 }
557 } while (ret == -EAGAIN);
558 return ret;
559} 546}
560 547
561/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ 548/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
@@ -620,7 +607,9 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
620 goto err; 607 goto err;
621 ret = transmit_skb(skb, cf_sk, noblock, timeo); 608 ret = transmit_skb(skb, cf_sk, noblock, timeo);
622 if (ret < 0) 609 if (ret < 0)
623 goto err; 610 /* skb is already freed */
611 return ret;
612
624 return len; 613 return len;
625err: 614err:
626 kfree_skb(skb); 615 kfree_skb(skb);
@@ -826,7 +815,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
826 sk->sk_state == CAIF_DISCONNECTED); 815 sk->sk_state == CAIF_DISCONNECTED);
827 if (sk->sk_shutdown & SHUTDOWN_MASK) { 816 if (sk->sk_shutdown & SHUTDOWN_MASK) {
828 /* Allow re-connect after SHUTDOWN_IND */ 817 /* Allow re-connect after SHUTDOWN_IND */
829 caif_disconnect_client(&cf_sk->layer); 818 caif_disconnect_client(sock_net(sk), &cf_sk->layer);
830 break; 819 break;
831 } 820 }
832 /* No reconnect on a seqpacket socket */ 821 /* No reconnect on a seqpacket socket */
@@ -866,8 +855,10 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
866 855
867 dbfs_atomic_inc(&cnt.num_connect_req); 856 dbfs_atomic_inc(&cnt.num_connect_req);
868 cf_sk->layer.receive = caif_sktrecv_cb; 857 cf_sk->layer.receive = caif_sktrecv_cb;
869 err = caif_connect_client(&cf_sk->conn_req, 858
859 err = caif_connect_client(sock_net(sk), &cf_sk->conn_req,
870 &cf_sk->layer, &ifindex, &headroom, &tailroom); 860 &cf_sk->layer, &ifindex, &headroom, &tailroom);
861
871 if (err < 0) { 862 if (err < 0) {
872 cf_sk->sk.sk_socket->state = SS_UNCONNECTED; 863 cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
873 cf_sk->sk.sk_state = CAIF_DISCONNECTED; 864 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
@@ -947,13 +938,14 @@ static int caif_release(struct socket *sock)
947 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock, 938 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock,
948 * this ensures no packets when sock is dead. 939 * this ensures no packets when sock is dead.
949 */ 940 */
950 spin_lock(&sk->sk_receive_queue.lock); 941 spin_lock_bh(&sk->sk_receive_queue.lock);
951 sock_set_flag(sk, SOCK_DEAD); 942 sock_set_flag(sk, SOCK_DEAD);
952 spin_unlock(&sk->sk_receive_queue.lock); 943 spin_unlock_bh(&sk->sk_receive_queue.lock);
953 sock->sk = NULL; 944 sock->sk = NULL;
954 945
955 dbfs_atomic_inc(&cnt.num_disconnect); 946 dbfs_atomic_inc(&cnt.num_disconnect);
956 947
948 WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir));
957 if (cf_sk->debugfs_socket_dir != NULL) 949 if (cf_sk->debugfs_socket_dir != NULL)
958 debugfs_remove_recursive(cf_sk->debugfs_socket_dir); 950 debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
959 951
@@ -963,13 +955,12 @@ static int caif_release(struct socket *sock)
963 955
964 if (cf_sk->sk.sk_socket->state == SS_CONNECTED || 956 if (cf_sk->sk.sk_socket->state == SS_CONNECTED ||
965 cf_sk->sk.sk_socket->state == SS_CONNECTING) 957 cf_sk->sk.sk_socket->state == SS_CONNECTING)
966 res = caif_disconnect_client(&cf_sk->layer); 958 res = caif_disconnect_client(sock_net(sk), &cf_sk->layer);
967 959
968 cf_sk->sk.sk_socket->state = SS_DISCONNECTING; 960 cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
969 wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP); 961 wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
970 962
971 sock_orphan(sk); 963 sock_orphan(sk);
972 cf_sk->layer.dn = NULL;
973 sk_stream_kill_queues(&cf_sk->sk); 964 sk_stream_kill_queues(&cf_sk->sk);
974 release_sock(sk); 965 release_sock(sk);
975 sock_put(sk); 966 sock_put(sk);
@@ -1060,16 +1051,18 @@ static void caif_sock_destructor(struct sock *sk)
1060 caif_assert(sk_unhashed(sk)); 1051 caif_assert(sk_unhashed(sk));
1061 caif_assert(!sk->sk_socket); 1052 caif_assert(!sk->sk_socket);
1062 if (!sock_flag(sk, SOCK_DEAD)) { 1053 if (!sock_flag(sk, SOCK_DEAD)) {
1063 pr_info("Attempt to release alive CAIF socket: %p\n", sk); 1054 pr_debug("Attempt to release alive CAIF socket: %p\n", sk);
1064 return; 1055 return;
1065 } 1056 }
1066 sk_stream_kill_queues(&cf_sk->sk); 1057 sk_stream_kill_queues(&cf_sk->sk);
1067 dbfs_atomic_dec(&cnt.caif_nr_socks); 1058 dbfs_atomic_dec(&cnt.caif_nr_socks);
1059 caif_free_client(&cf_sk->layer);
1068} 1060}
1069 1061
1070static int caif_create(struct net *net, struct socket *sock, int protocol, 1062static int caif_create(struct net *net, struct socket *sock, int protocol,
1071 int kern) 1063 int kern)
1072{ 1064{
1065 int num;
1073 struct sock *sk = NULL; 1066 struct sock *sk = NULL;
1074 struct caifsock *cf_sk = NULL; 1067 struct caifsock *cf_sk = NULL;
1075 static struct proto prot = {.name = "PF_CAIF", 1068 static struct proto prot = {.name = "PF_CAIF",
@@ -1132,14 +1125,16 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1132 cf_sk->conn_req.protocol = protocol; 1125 cf_sk->conn_req.protocol = protocol;
1133 /* Increase the number of sockets created. */ 1126 /* Increase the number of sockets created. */
1134 dbfs_atomic_inc(&cnt.caif_nr_socks); 1127 dbfs_atomic_inc(&cnt.caif_nr_socks);
1128 num = dbfs_atomic_inc(&cnt.caif_sock_create);
1135#ifdef CONFIG_DEBUG_FS 1129#ifdef CONFIG_DEBUG_FS
1136 if (!IS_ERR(debugfsdir)) { 1130 if (!IS_ERR(debugfsdir)) {
1131
1137 /* Fill in some information concerning the misc socket. */ 1132 /* Fill in some information concerning the misc socket. */
1138 snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d", 1133 snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d", num);
1139 atomic_read(&cnt.caif_nr_socks));
1140 1134
1141 cf_sk->debugfs_socket_dir = 1135 cf_sk->debugfs_socket_dir =
1142 debugfs_create_dir(cf_sk->name, debugfsdir); 1136 debugfs_create_dir(cf_sk->name, debugfsdir);
1137
1143 debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR, 1138 debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR,
1144 cf_sk->debugfs_socket_dir, 1139 cf_sk->debugfs_socket_dir,
1145 (u32 *) &cf_sk->sk.sk_state); 1140 (u32 *) &cf_sk->sk.sk_state);
@@ -1183,6 +1178,9 @@ static int __init caif_sktinit_module(void)
1183 debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR, 1178 debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
1184 debugfsdir, 1179 debugfsdir,
1185 (u32 *) &cnt.caif_nr_socks); 1180 (u32 *) &cnt.caif_nr_socks);
1181 debugfs_create_u32("num_create", S_IRUSR | S_IWUSR,
1182 debugfsdir,
1183 (u32 *) &cnt.caif_sock_create);
1186 debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR, 1184 debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR,
1187 debugfsdir, 1185 debugfsdir,
1188 (u32 *) &cnt.num_connect_req); 1186 (u32 *) &cnt.num_connect_req);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c665de778b60..351c2ca7e7b9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -10,6 +10,7 @@
10#include <linux/stddef.h> 10#include <linux/stddef.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/module.h>
13#include <net/caif/caif_layer.h> 14#include <net/caif/caif_layer.h>
14#include <net/caif/cfpkt.h> 15#include <net/caif/cfpkt.h>
15#include <net/caif/cfcnfg.h> 16#include <net/caif/cfcnfg.h>
@@ -18,20 +19,17 @@
18#include <net/caif/cffrml.h> 19#include <net/caif/cffrml.h>
19#include <net/caif/cfserl.h> 20#include <net/caif/cfserl.h>
20#include <net/caif/cfsrvl.h> 21#include <net/caif/cfsrvl.h>
21 22#include <net/caif/caif_dev.h>
22#include <linux/module.h>
23#include <asm/atomic.h>
24
25#define MAX_PHY_LAYERS 7
26#define PHY_NAME_LEN 20
27 23
28#define container_obj(layr) container_of(layr, struct cfcnfg, layer) 24#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
29#define RFM_FRAGMENT_SIZE 4030
30 25
31/* Information about CAIF physical interfaces held by Config Module in order 26/* Information about CAIF physical interfaces held by Config Module in order
32 * to manage physical interfaces 27 * to manage physical interfaces
33 */ 28 */
34struct cfcnfg_phyinfo { 29struct cfcnfg_phyinfo {
30 struct list_head node;
31 bool up;
32
35 /* Pointer to the layer below the MUX (framing layer) */ 33 /* Pointer to the layer below the MUX (framing layer) */
36 struct cflayer *frm_layer; 34 struct cflayer *frm_layer;
37 /* Pointer to the lowest actual physical layer */ 35 /* Pointer to the lowest actual physical layer */
@@ -41,9 +39,6 @@ struct cfcnfg_phyinfo {
41 /* Preference of the physical in interface */ 39 /* Preference of the physical in interface */
42 enum cfcnfg_phy_preference pref; 40 enum cfcnfg_phy_preference pref;
43 41
44 /* Reference count, number of channels using the device */
45 int phy_ref_count;
46
47 /* Information about the physical device */ 42 /* Information about the physical device */
48 struct dev_info dev_info; 43 struct dev_info dev_info;
49 44
@@ -61,8 +56,8 @@ struct cfcnfg {
61 struct cflayer layer; 56 struct cflayer layer;
62 struct cflayer *ctrl; 57 struct cflayer *ctrl;
63 struct cflayer *mux; 58 struct cflayer *mux;
64 u8 last_phyid; 59 struct list_head phys;
65 struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS]; 60 struct mutex lock;
66}; 61};
67 62
68static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, 63static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
@@ -78,6 +73,9 @@ struct cfcnfg *cfcnfg_create(void)
78{ 73{
79 struct cfcnfg *this; 74 struct cfcnfg *this;
80 struct cfctrl_rsp *resp; 75 struct cfctrl_rsp *resp;
76
77 might_sleep();
78
81 /* Initiate this layer */ 79 /* Initiate this layer */
82 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 80 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
83 if (!this) { 81 if (!this) {
@@ -101,27 +99,33 @@ struct cfcnfg *cfcnfg_create(void)
101 resp->radioset_rsp = cfctrl_resp_func; 99 resp->radioset_rsp = cfctrl_resp_func;
102 resp->linksetup_rsp = cfcnfg_linkup_rsp; 100 resp->linksetup_rsp = cfcnfg_linkup_rsp;
103 resp->reject_rsp = cfcnfg_reject_rsp; 101 resp->reject_rsp = cfcnfg_reject_rsp;
104 102 INIT_LIST_HEAD(&this->phys);
105 this->last_phyid = 1;
106 103
107 cfmuxl_set_uplayer(this->mux, this->ctrl, 0); 104 cfmuxl_set_uplayer(this->mux, this->ctrl, 0);
108 layer_set_dn(this->ctrl, this->mux); 105 layer_set_dn(this->ctrl, this->mux);
109 layer_set_up(this->ctrl, this); 106 layer_set_up(this->ctrl, this);
107 mutex_init(&this->lock);
108
110 return this; 109 return this;
111out_of_mem: 110out_of_mem:
112 pr_warn("Out of memory\n"); 111 pr_warn("Out of memory\n");
112
113 synchronize_rcu();
114
113 kfree(this->mux); 115 kfree(this->mux);
114 kfree(this->ctrl); 116 kfree(this->ctrl);
115 kfree(this); 117 kfree(this);
116 return NULL; 118 return NULL;
117} 119}
118EXPORT_SYMBOL(cfcnfg_create);
119 120
120void cfcnfg_remove(struct cfcnfg *cfg) 121void cfcnfg_remove(struct cfcnfg *cfg)
121{ 122{
123 might_sleep();
122 if (cfg) { 124 if (cfg) {
125 synchronize_rcu();
126
123 kfree(cfg->mux); 127 kfree(cfg->mux);
124 kfree(cfg->ctrl); 128 cfctrl_remove(cfg->ctrl);
125 kfree(cfg); 129 kfree(cfg);
126 } 130 }
127} 131}
@@ -130,132 +134,96 @@ static void cfctrl_resp_func(void)
130{ 134{
131} 135}
132 136
137static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg,
138 u8 phyid)
139{
140 struct cfcnfg_phyinfo *phy;
141
142 list_for_each_entry_rcu(phy, &cnfg->phys, node)
143 if (phy->id == phyid)
144 return phy;
145 return NULL;
146}
147
133static void cfctrl_enum_resp(void) 148static void cfctrl_enum_resp(void)
134{ 149{
135} 150}
136 151
137struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg, 152static struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg,
138 enum cfcnfg_phy_preference phy_pref) 153 enum cfcnfg_phy_preference phy_pref)
139{ 154{
140 u16 i;
141
142 /* Try to match with specified preference */ 155 /* Try to match with specified preference */
143 for (i = 1; i < MAX_PHY_LAYERS; i++) { 156 struct cfcnfg_phyinfo *phy;
144 if (cnfg->phy_layers[i].id == i && 157
145 cnfg->phy_layers[i].pref == phy_pref && 158 list_for_each_entry_rcu(phy, &cnfg->phys, node) {
146 cnfg->phy_layers[i].frm_layer != NULL) { 159 if (phy->up && phy->pref == phy_pref &&
147 caif_assert(cnfg->phy_layers != NULL); 160 phy->frm_layer != NULL)
148 caif_assert(cnfg->phy_layers[i].id == i); 161
149 return &cnfg->phy_layers[i].dev_info; 162 return &phy->dev_info;
150 }
151 } 163 }
164
152 /* Otherwise just return something */ 165 /* Otherwise just return something */
153 for (i = 1; i < MAX_PHY_LAYERS; i++) { 166 list_for_each_entry_rcu(phy, &cnfg->phys, node)
154 if (cnfg->phy_layers[i].id == i) { 167 if (phy->up)
155 caif_assert(cnfg->phy_layers != NULL); 168 return &phy->dev_info;
156 caif_assert(cnfg->phy_layers[i].id == i);
157 return &cnfg->phy_layers[i].dev_info;
158 }
159 }
160 169
161 return NULL; 170 return NULL;
162} 171}
163 172
164static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg, 173static int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
165 u8 phyid)
166{ 174{
167 int i; 175 struct cfcnfg_phyinfo *phy;
168 /* Try to match with specified preference */
169 for (i = 0; i < MAX_PHY_LAYERS; i++)
170 if (cnfg->phy_layers[i].frm_layer != NULL &&
171 cnfg->phy_layers[i].id == phyid)
172 return &cnfg->phy_layers[i];
173 return NULL;
174}
175
176 176
177int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi) 177 list_for_each_entry_rcu(phy, &cnfg->phys, node)
178{ 178 if (phy->ifindex == ifi && phy->up)
179 int i; 179 return phy->id;
180 for (i = 0; i < MAX_PHY_LAYERS; i++)
181 if (cnfg->phy_layers[i].frm_layer != NULL &&
182 cnfg->phy_layers[i].ifindex == ifi)
183 return i;
184 return -ENODEV; 180 return -ENODEV;
185} 181}
186 182
187int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) 183int caif_disconnect_client(struct net *net, struct cflayer *adap_layer)
188{ 184{
189 u8 channel_id = 0; 185 u8 channel_id = 0;
190 int ret = 0; 186 int ret = 0;
191 struct cflayer *servl = NULL; 187 struct cflayer *servl = NULL;
192 struct cfcnfg_phyinfo *phyinfo = NULL; 188 struct cfcnfg *cfg = get_cfcnfg(net);
193 u8 phyid = 0;
194 189
195 caif_assert(adap_layer != NULL); 190 caif_assert(adap_layer != NULL);
191
196 channel_id = adap_layer->id; 192 channel_id = adap_layer->id;
197 if (adap_layer->dn == NULL || channel_id == 0) { 193 if (adap_layer->dn == NULL || channel_id == 0) {
198 pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n"); 194 pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n");
199 ret = -ENOTCONN; 195 ret = -ENOTCONN;
200 goto end; 196 goto end;
201 } 197 }
202 servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id); 198
199 servl = cfmuxl_remove_uplayer(cfg->mux, channel_id);
203 if (servl == NULL) { 200 if (servl == NULL) {
204 pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)", 201 pr_err("PROTOCOL ERROR - "
205 channel_id); 202 "Error removing service_layer Channel_Id(%d)",
203 channel_id);
206 ret = -EINVAL; 204 ret = -EINVAL;
207 goto end; 205 goto end;
208 } 206 }
209 layer_set_up(servl, NULL); 207
210 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); 208 ret = cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer);
211 if (ret) 209
212 goto end;
213 caif_assert(channel_id == servl->id);
214 if (adap_layer->dn != NULL) {
215 phyid = cfsrvl_getphyid(adap_layer->dn);
216
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) {
219 pr_warn("No interface to send disconnect to\n");
220 ret = -ENODEV;
221 goto end;
222 }
223 if (phyinfo->id != phyid ||
224 phyinfo->phy_layer->id != phyid ||
225 phyinfo->frm_layer->id != phyid) {
226 pr_err("Inconsistency in phy registration\n");
227 ret = -EINVAL;
228 goto end;
229 }
230 }
231 if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
232 phyinfo->phy_layer != NULL &&
233 phyinfo->phy_layer->modemcmd != NULL) {
234 phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
235 _CAIF_MODEMCMD_PHYIF_USELESS);
236 }
237end: 210end:
238 cfsrvl_put(servl); 211 cfctrl_cancel_req(cfg->ctrl, adap_layer);
239 cfctrl_cancel_req(cnfg->ctrl, adap_layer); 212
213 /* Do RCU sync before initiating cleanup */
214 synchronize_rcu();
240 if (adap_layer->ctrlcmd != NULL) 215 if (adap_layer->ctrlcmd != NULL)
241 adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0); 216 adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0);
242 return ret; 217 return ret;
243 218
244} 219}
245EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer); 220EXPORT_SYMBOL(caif_disconnect_client);
246
247void cfcnfg_release_adap_layer(struct cflayer *adap_layer)
248{
249 if (adap_layer->dn)
250 cfsrvl_put(adap_layer->dn);
251}
252EXPORT_SYMBOL(cfcnfg_release_adap_layer);
253 221
254static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) 222static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
255{ 223{
256} 224}
257 225
258int protohead[CFCTRL_SRV_MASK] = { 226static const int protohead[CFCTRL_SRV_MASK] = {
259 [CFCTRL_SRV_VEI] = 4, 227 [CFCTRL_SRV_VEI] = 4,
260 [CFCTRL_SRV_DATAGRAM] = 7, 228 [CFCTRL_SRV_DATAGRAM] = 7,
261 [CFCTRL_SRV_UTIL] = 4, 229 [CFCTRL_SRV_UTIL] = 4,
@@ -263,49 +231,157 @@ int protohead[CFCTRL_SRV_MASK] = {
263 [CFCTRL_SRV_DBG] = 3, 231 [CFCTRL_SRV_DBG] = 3,
264}; 232};
265 233
266int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, 234
267 struct cfctrl_link_param *param, 235static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
268 struct cflayer *adap_layer, 236 struct caif_connect_request *s,
269 int *ifindex, 237 struct cfctrl_link_param *l)
238{
239 struct dev_info *dev_info;
240 enum cfcnfg_phy_preference pref;
241 int res;
242
243 memset(l, 0, sizeof(*l));
244 /* In caif protocol low value is high priority */
245 l->priority = CAIF_PRIO_MAX - s->priority + 1;
246
247 if (s->ifindex != 0) {
248 res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
249 if (res < 0)
250 return res;
251 l->phyid = res;
252 } else {
253 switch (s->link_selector) {
254 case CAIF_LINK_HIGH_BANDW:
255 pref = CFPHYPREF_HIGH_BW;
256 break;
257 case CAIF_LINK_LOW_LATENCY:
258 pref = CFPHYPREF_LOW_LAT;
259 break;
260 default:
261 return -EINVAL;
262 }
263 dev_info = cfcnfg_get_phyid(cnfg, pref);
264 if (dev_info == NULL)
265 return -ENODEV;
266 l->phyid = dev_info->id;
267 }
268 switch (s->protocol) {
269 case CAIFPROTO_AT:
270 l->linktype = CFCTRL_SRV_VEI;
271 l->endpoint = (s->sockaddr.u.at.type >> 2) & 0x3;
272 l->chtype = s->sockaddr.u.at.type & 0x3;
273 break;
274 case CAIFPROTO_DATAGRAM:
275 l->linktype = CFCTRL_SRV_DATAGRAM;
276 l->chtype = 0x00;
277 l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
278 break;
279 case CAIFPROTO_DATAGRAM_LOOP:
280 l->linktype = CFCTRL_SRV_DATAGRAM;
281 l->chtype = 0x03;
282 l->endpoint = 0x00;
283 l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
284 break;
285 case CAIFPROTO_RFM:
286 l->linktype = CFCTRL_SRV_RFM;
287 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
288 strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
289 sizeof(l->u.rfm.volume)-1);
290 l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
291 break;
292 case CAIFPROTO_UTIL:
293 l->linktype = CFCTRL_SRV_UTIL;
294 l->endpoint = 0x00;
295 l->chtype = 0x00;
296 strncpy(l->u.utility.name, s->sockaddr.u.util.service,
297 sizeof(l->u.utility.name)-1);
298 l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
299 caif_assert(sizeof(l->u.utility.name) > 10);
300 l->u.utility.paramlen = s->param.size;
301 if (l->u.utility.paramlen > sizeof(l->u.utility.params))
302 l->u.utility.paramlen = sizeof(l->u.utility.params);
303
304 memcpy(l->u.utility.params, s->param.data,
305 l->u.utility.paramlen);
306
307 break;
308 case CAIFPROTO_DEBUG:
309 l->linktype = CFCTRL_SRV_DBG;
310 l->endpoint = s->sockaddr.u.dbg.service;
311 l->chtype = s->sockaddr.u.dbg.type;
312 break;
313 default:
314 return -EINVAL;
315 }
316 return 0;
317}
318
319int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
320 struct cflayer *adap_layer, int *ifindex,
270 int *proto_head, 321 int *proto_head,
271 int *proto_tail) 322 int *proto_tail)
272{ 323{
273 struct cflayer *frml; 324 struct cflayer *frml;
325 struct cfcnfg_phyinfo *phy;
326 int err;
327 struct cfctrl_link_param param;
328 struct cfcnfg *cfg = get_cfcnfg(net);
329 caif_assert(cfg != NULL);
330
331 rcu_read_lock();
332 err = caif_connect_req_to_link_param(cfg, conn_req, &param);
333 if (err)
334 goto unlock;
335
336 phy = cfcnfg_get_phyinfo_rcu(cfg, param.phyid);
337 if (!phy) {
338 err = -ENODEV;
339 goto unlock;
340 }
341 err = -EINVAL;
342
274 if (adap_layer == NULL) { 343 if (adap_layer == NULL) {
275 pr_err("adap_layer is zero\n"); 344 pr_err("adap_layer is zero\n");
276 return -EINVAL; 345 goto unlock;
277 } 346 }
278 if (adap_layer->receive == NULL) { 347 if (adap_layer->receive == NULL) {
279 pr_err("adap_layer->receive is NULL\n"); 348 pr_err("adap_layer->receive is NULL\n");
280 return -EINVAL; 349 goto unlock;
281 } 350 }
282 if (adap_layer->ctrlcmd == NULL) { 351 if (adap_layer->ctrlcmd == NULL) {
283 pr_err("adap_layer->ctrlcmd == NULL\n"); 352 pr_err("adap_layer->ctrlcmd == NULL\n");
284 return -EINVAL; 353 goto unlock;
285 } 354 }
286 frml = cnfg->phy_layers[param->phyid].frm_layer; 355
356 err = -ENODEV;
357 frml = phy->frm_layer;
287 if (frml == NULL) { 358 if (frml == NULL) {
288 pr_err("Specified PHY type does not exist!\n"); 359 pr_err("Specified PHY type does not exist!\n");
289 return -ENODEV; 360 goto unlock;
290 } 361 }
291 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id); 362 caif_assert(param.phyid == phy->id);
292 caif_assert(cnfg->phy_layers[param->phyid].frm_layer->id == 363 caif_assert(phy->frm_layer->id ==
293 param->phyid); 364 param.phyid);
294 caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id == 365 caif_assert(phy->phy_layer->id ==
295 param->phyid); 366 param.phyid);
296 367
297 *ifindex = cnfg->phy_layers[param->phyid].ifindex; 368 *ifindex = phy->ifindex;
369 *proto_tail = 2;
298 *proto_head = 370 *proto_head =
299 protohead[param->linktype]+
300 (cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
301 371
302 *proto_tail = 2; 372 protohead[param.linktype] + (phy->use_stx ? 1 : 0);
373
374 rcu_read_unlock();
303 375
304 /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ 376 /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
305 cfctrl_enum_req(cnfg->ctrl, param->phyid); 377 cfctrl_enum_req(cfg->ctrl, param.phyid);
306 return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer); 378 return cfctrl_linkup_request(cfg->ctrl, &param, adap_layer);
379
380unlock:
381 rcu_read_unlock();
382 return err;
307} 383}
308EXPORT_SYMBOL(cfcnfg_add_adaptation_layer); 384EXPORT_SYMBOL(caif_connect_client);
309 385
310static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, 386static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
311 struct cflayer *adapt_layer) 387 struct cflayer *adapt_layer)
@@ -317,32 +393,37 @@ static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
317 393
318static void 394static void
319cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, 395cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
320 u8 phyid, struct cflayer *adapt_layer) 396 u8 phyid, struct cflayer *adapt_layer)
321{ 397{
322 struct cfcnfg *cnfg = container_obj(layer); 398 struct cfcnfg *cnfg = container_obj(layer);
323 struct cflayer *servicel = NULL; 399 struct cflayer *servicel = NULL;
324 struct cfcnfg_phyinfo *phyinfo; 400 struct cfcnfg_phyinfo *phyinfo;
325 struct net_device *netdev; 401 struct net_device *netdev;
326 402
403 rcu_read_lock();
404
327 if (adapt_layer == NULL) { 405 if (adapt_layer == NULL) {
328 pr_debug("link setup response but no client exist, send linkdown back\n"); 406 pr_debug("link setup response but no client exist,"
407 "send linkdown back\n");
329 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 408 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
330 return; 409 goto unlock;
331 } 410 }
332 411
333 caif_assert(cnfg != NULL); 412 caif_assert(cnfg != NULL);
334 caif_assert(phyid != 0); 413 caif_assert(phyid != 0);
335 phyinfo = &cnfg->phy_layers[phyid]; 414
415 phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
416 if (phyinfo == NULL) {
417 pr_err("ERROR: Link Layer Device dissapeared"
418 "while connecting\n");
419 goto unlock;
420 }
421
422 caif_assert(phyinfo != NULL);
336 caif_assert(phyinfo->id == phyid); 423 caif_assert(phyinfo->id == phyid);
337 caif_assert(phyinfo->phy_layer != NULL); 424 caif_assert(phyinfo->phy_layer != NULL);
338 caif_assert(phyinfo->phy_layer->id == phyid); 425 caif_assert(phyinfo->phy_layer->id == phyid);
339 426
340 phyinfo->phy_ref_count++;
341 if (phyinfo->phy_ref_count == 1 &&
342 phyinfo->phy_layer->modemcmd != NULL) {
343 phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
344 _CAIF_MODEMCMD_PHYIF_USEFULL);
345 }
346 adapt_layer->id = channel_id; 427 adapt_layer->id = channel_id;
347 428
348 switch (serv) { 429 switch (serv) {
@@ -350,7 +431,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
350 servicel = cfvei_create(channel_id, &phyinfo->dev_info); 431 servicel = cfvei_create(channel_id, &phyinfo->dev_info);
351 break; 432 break;
352 case CFCTRL_SRV_DATAGRAM: 433 case CFCTRL_SRV_DATAGRAM:
353 servicel = cfdgml_create(channel_id, &phyinfo->dev_info); 434 servicel = cfdgml_create(channel_id,
435 &phyinfo->dev_info);
354 break; 436 break;
355 case CFCTRL_SRV_RFM: 437 case CFCTRL_SRV_RFM:
356 netdev = phyinfo->dev_info.dev; 438 netdev = phyinfo->dev_info.dev;
@@ -367,94 +449,93 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
367 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 449 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
368 break; 450 break;
369 default: 451 default:
370 pr_err("Protocol error. Link setup response - unknown channel type\n"); 452 pr_err("Protocol error. Link setup response "
371 return; 453 "- unknown channel type\n");
454 goto unlock;
372 } 455 }
373 if (!servicel) { 456 if (!servicel) {
374 pr_warn("Out of memory\n"); 457 pr_warn("Out of memory\n");
375 return; 458 goto unlock;
376 } 459 }
377 layer_set_dn(servicel, cnfg->mux); 460 layer_set_dn(servicel, cnfg->mux);
378 cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); 461 cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
379 layer_set_up(servicel, adapt_layer); 462 layer_set_up(servicel, adapt_layer);
380 layer_set_dn(adapt_layer, servicel); 463 layer_set_dn(adapt_layer, servicel);
381 cfsrvl_get(servicel); 464
465 rcu_read_unlock();
466
382 servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0); 467 servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
468 return;
469unlock:
470 rcu_read_unlock();
383} 471}
384 472
385void 473void
386cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, 474cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
387 struct net_device *dev, struct cflayer *phy_layer, 475 struct net_device *dev, struct cflayer *phy_layer,
388 u16 *phyid, enum cfcnfg_phy_preference pref, 476 enum cfcnfg_phy_preference pref,
389 bool fcs, bool stx) 477 bool fcs, bool stx)
390{ 478{
391 struct cflayer *frml; 479 struct cflayer *frml;
392 struct cflayer *phy_driver = NULL; 480 struct cflayer *phy_driver = NULL;
481 struct cfcnfg_phyinfo *phyinfo;
393 int i; 482 int i;
483 u8 phyid;
394 484
485 mutex_lock(&cnfg->lock);
395 486
396 if (cnfg->phy_layers[cnfg->last_phyid].frm_layer == NULL) { 487 /* CAIF protocol allow maximum 6 link-layers */
397 *phyid = cnfg->last_phyid; 488 for (i = 0; i < 7; i++) {
398 489 phyid = (dev->ifindex + i) & 0x7;
399 /* range: * 1..(MAX_PHY_LAYERS-1) */ 490 if (phyid == 0)
400 cnfg->last_phyid = 491 continue;
401 (cnfg->last_phyid % (MAX_PHY_LAYERS - 1)) + 1; 492 if (cfcnfg_get_phyinfo_rcu(cnfg, phyid) == NULL)
402 } else { 493 goto got_phyid;
403 *phyid = 0;
404 for (i = 1; i < MAX_PHY_LAYERS; i++) {
405 if (cnfg->phy_layers[i].frm_layer == NULL) {
406 *phyid = i;
407 break;
408 }
409 }
410 }
411 if (*phyid == 0) {
412 pr_err("No Available PHY ID\n");
413 return;
414 } 494 }
495 pr_warn("Too many CAIF Link Layers (max 6)\n");
496 goto out;
497
498got_phyid:
499 phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
415 500
416 switch (phy_type) { 501 switch (phy_type) {
417 case CFPHYTYPE_FRAG: 502 case CFPHYTYPE_FRAG:
418 phy_driver = 503 phy_driver =
419 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx); 504 cfserl_create(CFPHYTYPE_FRAG, phyid, stx);
420 if (!phy_driver) { 505 if (!phy_driver) {
421 pr_warn("Out of memory\n"); 506 pr_warn("Out of memory\n");
422 return; 507 goto out;
423 } 508 }
424
425 break; 509 break;
426 case CFPHYTYPE_CAIF: 510 case CFPHYTYPE_CAIF:
427 phy_driver = NULL; 511 phy_driver = NULL;
428 break; 512 break;
429 default: 513 default:
430 pr_err("%d\n", phy_type); 514 goto out;
431 return;
432 break;
433 } 515 }
434 516 phy_layer->id = phyid;
435 phy_layer->id = *phyid; 517 phyinfo->pref = pref;
436 cnfg->phy_layers[*phyid].pref = pref; 518 phyinfo->id = phyid;
437 cnfg->phy_layers[*phyid].id = *phyid; 519 phyinfo->dev_info.id = phyid;
438 cnfg->phy_layers[*phyid].dev_info.id = *phyid; 520 phyinfo->dev_info.dev = dev;
439 cnfg->phy_layers[*phyid].dev_info.dev = dev; 521 phyinfo->phy_layer = phy_layer;
440 cnfg->phy_layers[*phyid].phy_layer = phy_layer; 522 phyinfo->ifindex = dev->ifindex;
441 cnfg->phy_layers[*phyid].phy_ref_count = 0; 523 phyinfo->use_stx = stx;
442 cnfg->phy_layers[*phyid].ifindex = dev->ifindex; 524 phyinfo->use_fcs = fcs;
443 cnfg->phy_layers[*phyid].use_stx = stx;
444 cnfg->phy_layers[*phyid].use_fcs = fcs;
445 525
446 phy_layer->type = phy_type; 526 phy_layer->type = phy_type;
447 frml = cffrml_create(*phyid, fcs); 527 frml = cffrml_create(phyid, fcs);
528
448 if (!frml) { 529 if (!frml) {
449 pr_warn("Out of memory\n"); 530 pr_warn("Out of memory\n");
450 return; 531 kfree(phyinfo);
532 goto out;
451 } 533 }
452 cnfg->phy_layers[*phyid].frm_layer = frml; 534 phyinfo->frm_layer = frml;
453 cfmuxl_set_dnlayer(cnfg->mux, frml, *phyid);
454 layer_set_up(frml, cnfg->mux); 535 layer_set_up(frml, cnfg->mux);
455 536
456 if (phy_driver != NULL) { 537 if (phy_driver != NULL) {
457 phy_driver->id = *phyid; 538 phy_driver->id = phyid;
458 layer_set_dn(frml, phy_driver); 539 layer_set_dn(frml, phy_driver);
459 layer_set_up(phy_driver, frml); 540 layer_set_up(phy_driver, frml);
460 layer_set_dn(phy_driver, phy_layer); 541 layer_set_dn(phy_driver, phy_layer);
@@ -463,33 +544,95 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
463 layer_set_dn(frml, phy_layer); 544 layer_set_dn(frml, phy_layer);
464 layer_set_up(phy_layer, frml); 545 layer_set_up(phy_layer, frml);
465 } 546 }
547
548 list_add_rcu(&phyinfo->node, &cnfg->phys);
549out:
550 mutex_unlock(&cnfg->lock);
466} 551}
467EXPORT_SYMBOL(cfcnfg_add_phy_layer); 552EXPORT_SYMBOL(cfcnfg_add_phy_layer);
468 553
554int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
555 bool up)
556{
557 struct cfcnfg_phyinfo *phyinfo;
558
559 rcu_read_lock();
560 phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phy_layer->id);
561 if (phyinfo == NULL) {
562 rcu_read_unlock();
563 return -ENODEV;
564 }
565
566 if (phyinfo->up == up) {
567 rcu_read_unlock();
568 return 0;
569 }
570 phyinfo->up = up;
571
572 if (up) {
573 cffrml_hold(phyinfo->frm_layer);
574 cfmuxl_set_dnlayer(cnfg->mux, phyinfo->frm_layer,
575 phy_layer->id);
576 } else {
577 cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id);
578 cffrml_put(phyinfo->frm_layer);
579 }
580
581 rcu_read_unlock();
582 return 0;
583}
584EXPORT_SYMBOL(cfcnfg_set_phy_state);
585
469int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer) 586int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer)
470{ 587{
471 struct cflayer *frml, *frml_dn; 588 struct cflayer *frml, *frml_dn;
472 u16 phyid; 589 u16 phyid;
590 struct cfcnfg_phyinfo *phyinfo;
591
592 might_sleep();
593
594 mutex_lock(&cnfg->lock);
595
473 phyid = phy_layer->id; 596 phyid = phy_layer->id;
474 caif_assert(phyid == cnfg->phy_layers[phyid].id); 597 phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
475 caif_assert(phy_layer == cnfg->phy_layers[phyid].phy_layer); 598
599 if (phyinfo == NULL) {
600 mutex_unlock(&cnfg->lock);
601 return 0;
602 }
603 caif_assert(phyid == phyinfo->id);
604 caif_assert(phy_layer == phyinfo->phy_layer);
476 caif_assert(phy_layer->id == phyid); 605 caif_assert(phy_layer->id == phyid);
477 caif_assert(cnfg->phy_layers[phyid].frm_layer->id == phyid); 606 caif_assert(phyinfo->frm_layer->id == phyid);
478 607
479 memset(&cnfg->phy_layers[phy_layer->id], 0, 608 list_del_rcu(&phyinfo->node);
480 sizeof(struct cfcnfg_phyinfo)); 609 synchronize_rcu();
481 frml = cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id); 610
611 /* Fail if reference count is not zero */
612 if (cffrml_refcnt_read(phyinfo->frm_layer) != 0) {
613 pr_info("Wait for device inuse\n");
614 list_add_rcu(&phyinfo->node, &cnfg->phys);
615 mutex_unlock(&cnfg->lock);
616 return -EAGAIN;
617 }
618
619 frml = phyinfo->frm_layer;
482 frml_dn = frml->dn; 620 frml_dn = frml->dn;
483 cffrml_set_uplayer(frml, NULL); 621 cffrml_set_uplayer(frml, NULL);
484 cffrml_set_dnlayer(frml, NULL); 622 cffrml_set_dnlayer(frml, NULL);
485 kfree(frml);
486
487 if (phy_layer != frml_dn) { 623 if (phy_layer != frml_dn) {
488 layer_set_up(frml_dn, NULL); 624 layer_set_up(frml_dn, NULL);
489 layer_set_dn(frml_dn, NULL); 625 layer_set_dn(frml_dn, NULL);
490 kfree(frml_dn);
491 } 626 }
492 layer_set_up(phy_layer, NULL); 627 layer_set_up(phy_layer, NULL);
628
629 if (phyinfo->phy_layer != frml_dn)
630 kfree(frml_dn);
631
632 cffrml_free(frml);
633 kfree(phyinfo);
634 mutex_unlock(&cnfg->lock);
635
493 return 0; 636 return 0;
494} 637}
495EXPORT_SYMBOL(cfcnfg_del_phy_layer); 638EXPORT_SYMBOL(cfcnfg_del_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 3cd8f978e309..0c00a6015dda 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -17,7 +17,6 @@
17#define UTILITY_NAME_LENGTH 16 17#define UTILITY_NAME_LENGTH 16
18#define CFPKT_CTRL_PKT_LEN 20 18#define CFPKT_CTRL_PKT_LEN 20
19 19
20
21#ifdef CAIF_NO_LOOP 20#ifdef CAIF_NO_LOOP
22static int handle_loop(struct cfctrl *ctrl, 21static int handle_loop(struct cfctrl *ctrl,
23 int cmd, struct cfpkt *pkt){ 22 int cmd, struct cfpkt *pkt){
@@ -51,14 +50,31 @@ struct cflayer *cfctrl_create(void)
51 this->serv.layer.receive = cfctrl_recv; 50 this->serv.layer.receive = cfctrl_recv;
52 sprintf(this->serv.layer.name, "ctrl"); 51 sprintf(this->serv.layer.name, "ctrl");
53 this->serv.layer.ctrlcmd = cfctrl_ctrlcmd; 52 this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
53#ifndef CAIF_NO_LOOP
54 spin_lock_init(&this->loop_linkid_lock); 54 spin_lock_init(&this->loop_linkid_lock);
55 this->loop_linkid = 1;
56#endif
55 spin_lock_init(&this->info_list_lock); 57 spin_lock_init(&this->info_list_lock);
56 INIT_LIST_HEAD(&this->list); 58 INIT_LIST_HEAD(&this->list);
57 this->loop_linkid = 1;
58 return &this->serv.layer; 59 return &this->serv.layer;
59} 60}
60 61
61static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2) 62void cfctrl_remove(struct cflayer *layer)
63{
64 struct cfctrl_request_info *p, *tmp;
65 struct cfctrl *ctrl = container_obj(layer);
66
67 spin_lock_bh(&ctrl->info_list_lock);
68 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
69 list_del(&p->list);
70 kfree(p);
71 }
72 spin_unlock_bh(&ctrl->info_list_lock);
73 kfree(layer);
74}
75
76static bool param_eq(const struct cfctrl_link_param *p1,
77 const struct cfctrl_link_param *p2)
62{ 78{
63 bool eq = 79 bool eq =
64 p1->linktype == p2->linktype && 80 p1->linktype == p2->linktype &&
@@ -100,8 +116,8 @@ static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2)
100 return false; 116 return false;
101} 117}
102 118
103bool cfctrl_req_eq(struct cfctrl_request_info *r1, 119static bool cfctrl_req_eq(const struct cfctrl_request_info *r1,
104 struct cfctrl_request_info *r2) 120 const struct cfctrl_request_info *r2)
105{ 121{
106 if (r1->cmd != r2->cmd) 122 if (r1->cmd != r2->cmd)
107 return false; 123 return false;
@@ -112,23 +128,22 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1,
112} 128}
113 129
114/* Insert request at the end */ 130/* Insert request at the end */
115void cfctrl_insert_req(struct cfctrl *ctrl, 131static void cfctrl_insert_req(struct cfctrl *ctrl,
116 struct cfctrl_request_info *req) 132 struct cfctrl_request_info *req)
117{ 133{
118 spin_lock(&ctrl->info_list_lock); 134 spin_lock_bh(&ctrl->info_list_lock);
119 atomic_inc(&ctrl->req_seq_no); 135 atomic_inc(&ctrl->req_seq_no);
120 req->sequence_no = atomic_read(&ctrl->req_seq_no); 136 req->sequence_no = atomic_read(&ctrl->req_seq_no);
121 list_add_tail(&req->list, &ctrl->list); 137 list_add_tail(&req->list, &ctrl->list);
122 spin_unlock(&ctrl->info_list_lock); 138 spin_unlock_bh(&ctrl->info_list_lock);
123} 139}
124 140
125/* Compare and remove request */ 141/* Compare and remove request */
126struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, 142static struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
127 struct cfctrl_request_info *req) 143 struct cfctrl_request_info *req)
128{ 144{
129 struct cfctrl_request_info *p, *tmp, *first; 145 struct cfctrl_request_info *p, *tmp, *first;
130 146
131 spin_lock(&ctrl->info_list_lock);
132 first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list); 147 first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
133 148
134 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 149 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
@@ -144,7 +159,6 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
144 } 159 }
145 p = NULL; 160 p = NULL;
146out: 161out:
147 spin_unlock(&ctrl->info_list_lock);
148 return p; 162 return p;
149} 163}
150 164
@@ -154,16 +168,6 @@ struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
154 return &this->res; 168 return &this->res;
155} 169}
156 170
157void cfctrl_set_dnlayer(struct cflayer *this, struct cflayer *dn)
158{
159 this->dn = dn;
160}
161
162void cfctrl_set_uplayer(struct cflayer *this, struct cflayer *up)
163{
164 this->up = up;
165}
166
167static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl) 171static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl)
168{ 172{
169 info->hdr_len = 0; 173 info->hdr_len = 0;
@@ -188,10 +192,6 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
188 cfpkt_addbdy(pkt, physlinkid); 192 cfpkt_addbdy(pkt, physlinkid);
189 ret = 193 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 194 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) {
192 pr_err("Could not transmit enum message\n");
193 cfpkt_destroy(pkt);
194 }
195} 195}
196 196
197int cfctrl_linkup_request(struct cflayer *layer, 197int cfctrl_linkup_request(struct cflayer *layer,
@@ -205,14 +205,23 @@ int cfctrl_linkup_request(struct cflayer *layer,
205 struct cfctrl_request_info *req; 205 struct cfctrl_request_info *req;
206 int ret; 206 int ret;
207 char utility_name[16]; 207 char utility_name[16];
208 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 208 struct cfpkt *pkt;
209
210 if (cfctrl_cancel_req(layer, user_layer) > 0) {
211 /* Slight Paranoia, check if already connecting */
212 pr_err("Duplicate connect request for same client\n");
213 WARN_ON(1);
214 return -EALREADY;
215 }
216
217 pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
209 if (!pkt) { 218 if (!pkt) {
210 pr_warn("Out of memory\n"); 219 pr_warn("Out of memory\n");
211 return -ENOMEM; 220 return -ENOMEM;
212 } 221 }
213 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 222 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
214 cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype); 223 cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype);
215 cfpkt_addbdy(pkt, (param->priority << 3) + param->phyid); 224 cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid);
216 cfpkt_addbdy(pkt, param->endpoint & 0x03); 225 cfpkt_addbdy(pkt, param->endpoint & 0x03);
217 226
218 switch (param->linktype) { 227 switch (param->linktype) {
@@ -275,9 +284,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
275 ret = 284 ret =
276 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 285 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
277 if (ret < 0) { 286 if (ret < 0) {
278 pr_err("Could not transmit linksetup request\n"); 287 int count;
279 cfpkt_destroy(pkt); 288
280 return -ENODEV; 289 count = cfctrl_cancel_req(&cfctrl->serv.layer,
290 user_layer);
291 if (count != 1)
292 pr_err("Could not remove request (%d)", count);
293 return -ENODEV;
281 } 294 }
282 return 0; 295 return 0;
283} 296}
@@ -297,80 +310,29 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
297 init_info(cfpkt_info(pkt), cfctrl); 310 init_info(cfpkt_info(pkt), cfctrl);
298 ret = 311 ret =
299 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 312 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
300 if (ret < 0) { 313#ifndef CAIF_NO_LOOP
301 pr_err("Could not transmit link-down request\n"); 314 cfctrl->loop_linkused[channelid] = 0;
302 cfpkt_destroy(pkt); 315#endif
303 }
304 return ret; 316 return ret;
305} 317}
306 318
307void cfctrl_sleep_req(struct cflayer *layer) 319int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
308{
309 int ret;
310 struct cfctrl *cfctrl = container_obj(layer);
311 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
312 if (!pkt) {
313 pr_warn("Out of memory\n");
314 return;
315 }
316 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
317 init_info(cfpkt_info(pkt), cfctrl);
318 ret =
319 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
320 if (ret < 0)
321 cfpkt_destroy(pkt);
322}
323
324void cfctrl_wake_req(struct cflayer *layer)
325{
326 int ret;
327 struct cfctrl *cfctrl = container_obj(layer);
328 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
329 if (!pkt) {
330 pr_warn("Out of memory\n");
331 return;
332 }
333 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
334 init_info(cfpkt_info(pkt), cfctrl);
335 ret =
336 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
337 if (ret < 0)
338 cfpkt_destroy(pkt);
339}
340
341void cfctrl_getstartreason_req(struct cflayer *layer)
342{
343 int ret;
344 struct cfctrl *cfctrl = container_obj(layer);
345 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
346 if (!pkt) {
347 pr_warn("Out of memory\n");
348 return;
349 }
350 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
351 init_info(cfpkt_info(pkt), cfctrl);
352 ret =
353 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
354 if (ret < 0)
355 cfpkt_destroy(pkt);
356}
357
358
359void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
360{ 320{
361 struct cfctrl_request_info *p, *tmp; 321 struct cfctrl_request_info *p, *tmp;
362 struct cfctrl *ctrl = container_obj(layr); 322 struct cfctrl *ctrl = container_obj(layr);
363 spin_lock(&ctrl->info_list_lock); 323 int found = 0;
324 spin_lock_bh(&ctrl->info_list_lock);
364 325
365 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 326 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
366 if (p->client_layer == adap_layer) { 327 if (p->client_layer == adap_layer) {
367 pr_debug("cancel req :%d\n", p->sequence_no);
368 list_del(&p->list); 328 list_del(&p->list);
369 kfree(p); 329 kfree(p);
330 found++;
370 } 331 }
371 } 332 }
372 333
373 spin_unlock(&ctrl->info_list_lock); 334 spin_unlock_bh(&ctrl->info_list_lock);
335 return found;
374} 336}
375 337
376static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) 338static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
@@ -522,6 +484,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
522 484
523 rsp.cmd = cmd; 485 rsp.cmd = cmd;
524 rsp.param = linkparam; 486 rsp.param = linkparam;
487 spin_lock_bh(&cfctrl->info_list_lock);
525 req = cfctrl_remove_req(cfctrl, &rsp); 488 req = cfctrl_remove_req(cfctrl, &rsp);
526 489
527 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || 490 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
@@ -541,6 +504,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
541 504
542 if (req != NULL) 505 if (req != NULL)
543 kfree(req); 506 kfree(req);
507
508 spin_unlock_bh(&cfctrl->info_list_lock);
544 } 509 }
545 break; 510 break;
546 case CFCTRL_CMD_LINK_DESTROY: 511 case CFCTRL_CMD_LINK_DESTROY:
@@ -584,12 +549,29 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
584 switch (ctrl) { 549 switch (ctrl) {
585 case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: 550 case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
586 case CAIF_CTRLCMD_FLOW_OFF_IND: 551 case CAIF_CTRLCMD_FLOW_OFF_IND:
587 spin_lock(&this->info_list_lock); 552 spin_lock_bh(&this->info_list_lock);
588 if (!list_empty(&this->list)) { 553 if (!list_empty(&this->list)) {
589 pr_debug("Received flow off in control layer\n"); 554 pr_debug("Received flow off in control layer\n");
590 } 555 }
591 spin_unlock(&this->info_list_lock); 556 spin_unlock_bh(&this->info_list_lock);
592 break; 557 break;
558 case _CAIF_CTRLCMD_PHYIF_DOWN_IND: {
559 struct cfctrl_request_info *p, *tmp;
560
561 /* Find all connect request and report failure */
562 spin_lock_bh(&this->info_list_lock);
563 list_for_each_entry_safe(p, tmp, &this->list, list) {
564 if (p->param.phyid == phyid) {
565 list_del(&p->list);
566 p->client_layer->ctrlcmd(p->client_layer,
567 CAIF_CTRLCMD_INIT_FAIL_RSP,
568 phyid);
569 kfree(p);
570 }
571 }
572 spin_unlock_bh(&this->info_list_lock);
573 break;
574 }
593 default: 575 default:
594 break; 576 break;
595 } 577 }
@@ -599,27 +581,33 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
599static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt) 581static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
600{ 582{
601 static int last_linkid; 583 static int last_linkid;
584 static int dec;
602 u8 linkid, linktype, tmp; 585 u8 linkid, linktype, tmp;
603 switch (cmd) { 586 switch (cmd) {
604 case CFCTRL_CMD_LINK_SETUP: 587 case CFCTRL_CMD_LINK_SETUP:
605 spin_lock(&ctrl->loop_linkid_lock); 588 spin_lock_bh(&ctrl->loop_linkid_lock);
606 for (linkid = last_linkid + 1; linkid < 255; linkid++) 589 if (!dec) {
607 if (!ctrl->loop_linkused[linkid]) 590 for (linkid = last_linkid + 1; linkid < 255; linkid++)
608 goto found; 591 if (!ctrl->loop_linkused[linkid])
592 goto found;
593 }
594 dec = 1;
609 for (linkid = last_linkid - 1; linkid > 0; linkid--) 595 for (linkid = last_linkid - 1; linkid > 0; linkid--)
610 if (!ctrl->loop_linkused[linkid]) 596 if (!ctrl->loop_linkused[linkid])
611 goto found; 597 goto found;
612 spin_unlock(&ctrl->loop_linkid_lock); 598 spin_unlock_bh(&ctrl->loop_linkid_lock);
613 pr_err("Out of link-ids\n"); 599
614 return -EINVAL;
615found: 600found:
601 if (linkid < 10)
602 dec = 0;
603
616 if (!ctrl->loop_linkused[linkid]) 604 if (!ctrl->loop_linkused[linkid])
617 ctrl->loop_linkused[linkid] = 1; 605 ctrl->loop_linkused[linkid] = 1;
618 606
619 last_linkid = linkid; 607 last_linkid = linkid;
620 608
621 cfpkt_add_trail(pkt, &linkid, 1); 609 cfpkt_add_trail(pkt, &linkid, 1);
622 spin_unlock(&ctrl->loop_linkid_lock); 610 spin_unlock_bh(&ctrl->loop_linkid_lock);
623 cfpkt_peek_head(pkt, &linktype, 1); 611 cfpkt_peek_head(pkt, &linktype, 1);
624 if (linktype == CFCTRL_SRV_UTIL) { 612 if (linktype == CFCTRL_SRV_UTIL) {
625 tmp = 0x01; 613 tmp = 0x01;
@@ -629,10 +617,10 @@ found:
629 break; 617 break;
630 618
631 case CFCTRL_CMD_LINK_DESTROY: 619 case CFCTRL_CMD_LINK_DESTROY:
632 spin_lock(&ctrl->loop_linkid_lock); 620 spin_lock_bh(&ctrl->loop_linkid_lock);
633 cfpkt_peek_head(pkt, &linkid, 1); 621 cfpkt_peek_head(pkt, &linkid, 1);
634 ctrl->loop_linkused[linkid] = 0; 622 ctrl->loop_linkused[linkid] = 0;
635 spin_unlock(&ctrl->loop_linkid_lock); 623 spin_unlock_bh(&ctrl->loop_linkid_lock);
636 break; 624 break;
637 default: 625 default:
638 break; 626 break;
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index d3ed264ad6c4..0382dec84fdc 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -13,12 +13,12 @@
13#include <net/caif/cfsrvl.h> 13#include <net/caif/cfsrvl.h>
14#include <net/caif/cfpkt.h> 14#include <net/caif/cfpkt.h>
15 15
16
16#define container_obj(layr) ((struct cfsrvl *) layr) 17#define container_obj(layr) ((struct cfsrvl *) layr)
17 18
18#define DGM_CMD_BIT 0x80 19#define DGM_CMD_BIT 0x80
19#define DGM_FLOW_OFF 0x81 20#define DGM_FLOW_OFF 0x81
20#define DGM_FLOW_ON 0x80 21#define DGM_FLOW_ON 0x80
21#define DGM_CTRL_PKT_SIZE 1
22#define DGM_MTU 1500 22#define DGM_MTU 1500
23 23
24static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); 24static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
@@ -84,6 +84,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
84 84
85static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) 85static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
86{ 86{
87 u8 packet_type;
87 u32 zero = 0; 88 u32 zero = 0;
88 struct caif_payload_info *info; 89 struct caif_payload_info *info;
89 struct cfsrvl *service = container_obj(layr); 90 struct cfsrvl *service = container_obj(layr);
@@ -95,7 +96,9 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
95 if (cfpkt_getlen(pkt) > DGM_MTU) 96 if (cfpkt_getlen(pkt) > DGM_MTU)
96 return -EMSGSIZE; 97 return -EMSGSIZE;
97 98
98 cfpkt_add_head(pkt, &zero, 4); 99 cfpkt_add_head(pkt, &zero, 3);
100 packet_type = 0x08; /* B9 set - UNCLASSIFIED */
101 cfpkt_add_head(pkt, &packet_type, 1);
99 102
100 /* Add info for MUX-layer to route the packet out. */ 103 /* Add info for MUX-layer to route the packet out. */
101 info = cfpkt_info(pkt); 104 info = cfpkt_info(pkt);
@@ -105,10 +108,5 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
105 */ 108 */
106 info->hdr_len = 4; 109 info->hdr_len = 4;
107 info->dev_info = &service->dev_info; 110 info->dev_info = &service->dev_info;
108 ret = layr->dn->transmit(layr->dn, pkt); 111 return layr->dn->transmit(layr->dn, pkt);
109 if (ret < 0) {
110 u32 tmp32;
111 cfpkt_extr_head(pkt, &tmp32, 4);
112 }
113 return ret;
114} 112}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index a445043931ae..04204b202718 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -12,6 +12,7 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/crc-ccitt.h> 14#include <linux/crc-ccitt.h>
15#include <linux/netdevice.h>
15#include <net/caif/caif_layer.h> 16#include <net/caif/caif_layer.h>
16#include <net/caif/cfpkt.h> 17#include <net/caif/cfpkt.h>
17#include <net/caif/cffrml.h> 18#include <net/caif/cffrml.h>
@@ -21,6 +22,7 @@
21struct cffrml { 22struct cffrml {
22 struct cflayer layer; 23 struct cflayer layer;
23 bool dofcs; /* !< FCS active */ 24 bool dofcs; /* !< FCS active */
25 int __percpu *pcpu_refcnt;
24}; 26};
25 27
26static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); 28static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
@@ -37,6 +39,12 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
37 pr_warn("Out of memory\n"); 39 pr_warn("Out of memory\n");
38 return NULL; 40 return NULL;
39 } 41 }
42 this->pcpu_refcnt = alloc_percpu(int);
43 if (this->pcpu_refcnt == NULL) {
44 kfree(this);
45 return NULL;
46 }
47
40 caif_assert(offsetof(struct cffrml, layer) == 0); 48 caif_assert(offsetof(struct cffrml, layer) == 0);
41 49
42 memset(this, 0, sizeof(struct cflayer)); 50 memset(this, 0, sizeof(struct cflayer));
@@ -49,6 +57,13 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
49 return (struct cflayer *) this; 57 return (struct cflayer *) this;
50} 58}
51 59
60void cffrml_free(struct cflayer *layer)
61{
62 struct cffrml *this = container_obj(layer);
63 free_percpu(this->pcpu_refcnt);
64 kfree(layer);
65}
66
52void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up) 67void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up)
53{ 68{
54 this->up = up; 69 this->up = up;
@@ -112,6 +127,13 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
112 cfpkt_destroy(pkt); 127 cfpkt_destroy(pkt);
113 return -EPROTO; 128 return -EPROTO;
114 } 129 }
130
131 if (layr->up == NULL) {
132 pr_err("Layr up is missing!\n");
133 cfpkt_destroy(pkt);
134 return -EINVAL;
135 }
136
115 return layr->up->receive(layr->up, pkt); 137 return layr->up->receive(layr->up, pkt);
116} 138}
117 139
@@ -120,7 +142,6 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
120 int tmp; 142 int tmp;
121 u16 chks; 143 u16 chks;
122 u16 len; 144 u16 len;
123 int ret;
124 struct cffrml *this = container_obj(layr); 145 struct cffrml *this = container_obj(layr);
125 if (this->dofcs) { 146 if (this->dofcs) {
126 chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); 147 chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
@@ -135,19 +156,44 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
135 cfpkt_info(pkt)->hdr_len += 2; 156 cfpkt_info(pkt)->hdr_len += 2;
136 if (cfpkt_erroneous(pkt)) { 157 if (cfpkt_erroneous(pkt)) {
137 pr_err("Packet is erroneous!\n"); 158 pr_err("Packet is erroneous!\n");
159 cfpkt_destroy(pkt);
138 return -EPROTO; 160 return -EPROTO;
139 } 161 }
140 ret = layr->dn->transmit(layr->dn, pkt); 162
141 if (ret < 0) { 163 if (layr->dn == NULL) {
142 /* Remove header on faulty packet. */ 164 cfpkt_destroy(pkt);
143 cfpkt_extr_head(pkt, &tmp, 2); 165 return -ENODEV;
166
144 } 167 }
145 return ret; 168 return layr->dn->transmit(layr->dn, pkt);
146} 169}
147 170
148static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 171static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
149 int phyid) 172 int phyid)
150{ 173{
151 if (layr->up->ctrlcmd) 174 if (layr->up && layr->up->ctrlcmd)
152 layr->up->ctrlcmd(layr->up, ctrl, layr->id); 175 layr->up->ctrlcmd(layr->up, ctrl, layr->id);
153} 176}
177
178void cffrml_put(struct cflayer *layr)
179{
180 struct cffrml *this = container_obj(layr);
181 if (layr != NULL && this->pcpu_refcnt != NULL)
182 irqsafe_cpu_dec(*this->pcpu_refcnt);
183}
184
185void cffrml_hold(struct cflayer *layr)
186{
187 struct cffrml *this = container_obj(layr);
188 if (layr != NULL && this->pcpu_refcnt != NULL)
189 irqsafe_cpu_inc(*this->pcpu_refcnt);
190}
191
192int cffrml_refcnt_read(struct cflayer *layr)
193{
194 int i, refcnt = 0;
195 struct cffrml *this = container_obj(layr);
196 for_each_possible_cpu(i)
197 refcnt += *per_cpu_ptr(this->pcpu_refcnt, i);
198 return refcnt;
199}
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 46f34b2e0478..2a56df7e0a4b 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -9,6 +9,7 @@
9#include <linux/stddef.h> 9#include <linux/stddef.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/rculist.h>
12#include <net/caif/cfpkt.h> 13#include <net/caif/cfpkt.h>
13#include <net/caif/cfmuxl.h> 14#include <net/caif/cfmuxl.h>
14#include <net/caif/cfsrvl.h> 15#include <net/caif/cfsrvl.h>
@@ -64,66 +65,31 @@ struct cflayer *cfmuxl_create(void)
64int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid) 65int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
65{ 66{
66 struct cfmuxl *muxl = container_obj(layr); 67 struct cfmuxl *muxl = container_obj(layr);
67 spin_lock(&muxl->receive_lock);
68 cfsrvl_get(up);
69 list_add(&up->node, &muxl->srvl_list);
70 spin_unlock(&muxl->receive_lock);
71 return 0;
72}
73
74bool cfmuxl_is_phy_inuse(struct cflayer *layr, u8 phyid)
75{
76 struct list_head *node;
77 struct cflayer *layer;
78 struct cfmuxl *muxl = container_obj(layr);
79 bool match = false;
80 spin_lock(&muxl->receive_lock);
81
82 list_for_each(node, &muxl->srvl_list) {
83 layer = list_entry(node, struct cflayer, node);
84 if (cfsrvl_phyid_match(layer, phyid)) {
85 match = true;
86 break;
87 }
88
89 }
90 spin_unlock(&muxl->receive_lock);
91 return match;
92}
93 68
94u8 cfmuxl_get_phyid(struct cflayer *layr, u8 channel_id) 69 spin_lock_bh(&muxl->receive_lock);
95{ 70 list_add_rcu(&up->node, &muxl->srvl_list);
96 struct cflayer *up; 71 spin_unlock_bh(&muxl->receive_lock);
97 int phyid; 72 return 0;
98 struct cfmuxl *muxl = container_obj(layr);
99 spin_lock(&muxl->receive_lock);
100 up = get_up(muxl, channel_id);
101 if (up != NULL)
102 phyid = cfsrvl_getphyid(up);
103 else
104 phyid = 0;
105 spin_unlock(&muxl->receive_lock);
106 return phyid;
107} 73}
108 74
109int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid) 75int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid)
110{ 76{
111 struct cfmuxl *muxl = (struct cfmuxl *) layr; 77 struct cfmuxl *muxl = (struct cfmuxl *) layr;
112 spin_lock(&muxl->transmit_lock); 78
113 list_add(&dn->node, &muxl->frml_list); 79 spin_lock_bh(&muxl->transmit_lock);
114 spin_unlock(&muxl->transmit_lock); 80 list_add_rcu(&dn->node, &muxl->frml_list);
81 spin_unlock_bh(&muxl->transmit_lock);
115 return 0; 82 return 0;
116} 83}
117 84
118static struct cflayer *get_from_id(struct list_head *list, u16 id) 85static struct cflayer *get_from_id(struct list_head *list, u16 id)
119{ 86{
120 struct list_head *node; 87 struct cflayer *lyr;
121 struct cflayer *layer; 88 list_for_each_entry_rcu(lyr, list, node) {
122 list_for_each(node, list) { 89 if (lyr->id == id)
123 layer = list_entry(node, struct cflayer, node); 90 return lyr;
124 if (layer->id == id)
125 return layer;
126 } 91 }
92
127 return NULL; 93 return NULL;
128} 94}
129 95
@@ -131,41 +97,45 @@ struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
131{ 97{
132 struct cfmuxl *muxl = container_obj(layr); 98 struct cfmuxl *muxl = container_obj(layr);
133 struct cflayer *dn; 99 struct cflayer *dn;
134 spin_lock(&muxl->transmit_lock); 100 int idx = phyid % DN_CACHE_SIZE;
135 memset(muxl->dn_cache, 0, sizeof(muxl->dn_cache)); 101
102 spin_lock_bh(&muxl->transmit_lock);
103 rcu_assign_pointer(muxl->dn_cache[idx], NULL);
136 dn = get_from_id(&muxl->frml_list, phyid); 104 dn = get_from_id(&muxl->frml_list, phyid);
137 if (dn == NULL) { 105 if (dn == NULL)
138 spin_unlock(&muxl->transmit_lock); 106 goto out;
139 return NULL; 107
140 } 108 list_del_rcu(&dn->node);
141 list_del(&dn->node);
142 caif_assert(dn != NULL); 109 caif_assert(dn != NULL);
143 spin_unlock(&muxl->transmit_lock); 110out:
111 spin_unlock_bh(&muxl->transmit_lock);
144 return dn; 112 return dn;
145} 113}
146 114
147/* Invariant: lock is taken */
148static struct cflayer *get_up(struct cfmuxl *muxl, u16 id) 115static struct cflayer *get_up(struct cfmuxl *muxl, u16 id)
149{ 116{
150 struct cflayer *up; 117 struct cflayer *up;
151 int idx = id % UP_CACHE_SIZE; 118 int idx = id % UP_CACHE_SIZE;
152 up = muxl->up_cache[idx]; 119 up = rcu_dereference(muxl->up_cache[idx]);
153 if (up == NULL || up->id != id) { 120 if (up == NULL || up->id != id) {
121 spin_lock_bh(&muxl->receive_lock);
154 up = get_from_id(&muxl->srvl_list, id); 122 up = get_from_id(&muxl->srvl_list, id);
155 muxl->up_cache[idx] = up; 123 rcu_assign_pointer(muxl->up_cache[idx], up);
124 spin_unlock_bh(&muxl->receive_lock);
156 } 125 }
157 return up; 126 return up;
158} 127}
159 128
160/* Invariant: lock is taken */
161static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info) 129static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info)
162{ 130{
163 struct cflayer *dn; 131 struct cflayer *dn;
164 int idx = dev_info->id % DN_CACHE_SIZE; 132 int idx = dev_info->id % DN_CACHE_SIZE;
165 dn = muxl->dn_cache[idx]; 133 dn = rcu_dereference(muxl->dn_cache[idx]);
166 if (dn == NULL || dn->id != dev_info->id) { 134 if (dn == NULL || dn->id != dev_info->id) {
135 spin_lock_bh(&muxl->transmit_lock);
167 dn = get_from_id(&muxl->frml_list, dev_info->id); 136 dn = get_from_id(&muxl->frml_list, dev_info->id);
168 muxl->dn_cache[idx] = dn; 137 rcu_assign_pointer(muxl->dn_cache[idx], dn);
138 spin_unlock_bh(&muxl->transmit_lock);
169 } 139 }
170 return dn; 140 return dn;
171} 141}
@@ -174,15 +144,17 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
174{ 144{
175 struct cflayer *up; 145 struct cflayer *up;
176 struct cfmuxl *muxl = container_obj(layr); 146 struct cfmuxl *muxl = container_obj(layr);
177 spin_lock(&muxl->receive_lock); 147 int idx = id % UP_CACHE_SIZE;
178 up = get_up(muxl, id); 148
149 spin_lock_bh(&muxl->receive_lock);
150 up = get_from_id(&muxl->srvl_list, id);
179 if (up == NULL) 151 if (up == NULL)
180 goto out; 152 goto out;
181 memset(muxl->up_cache, 0, sizeof(muxl->up_cache)); 153
182 list_del(&up->node); 154 rcu_assign_pointer(muxl->up_cache[idx], NULL);
183 cfsrvl_put(up); 155 list_del_rcu(&up->node);
184out: 156out:
185 spin_unlock(&muxl->receive_lock); 157 spin_unlock_bh(&muxl->receive_lock);
186 return up; 158 return up;
187} 159}
188 160
@@ -197,58 +169,78 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
197 cfpkt_destroy(pkt); 169 cfpkt_destroy(pkt);
198 return -EPROTO; 170 return -EPROTO;
199 } 171 }
200 172 rcu_read_lock();
201 spin_lock(&muxl->receive_lock);
202 up = get_up(muxl, id); 173 up = get_up(muxl, id);
203 spin_unlock(&muxl->receive_lock); 174
204 if (up == NULL) { 175 if (up == NULL) {
205 pr_info("Received data on unknown link ID = %d (0x%x) up == NULL", 176 pr_debug("Received data on unknown link ID = %d (0x%x)"
206 id, id); 177 " up == NULL", id, id);
207 cfpkt_destroy(pkt); 178 cfpkt_destroy(pkt);
208 /* 179 /*
209 * Don't return ERROR, since modem misbehaves and sends out 180 * Don't return ERROR, since modem misbehaves and sends out
210 * flow on before linksetup response. 181 * flow on before linksetup response.
211 */ 182 */
183
184 rcu_read_unlock();
212 return /* CFGLU_EPROT; */ 0; 185 return /* CFGLU_EPROT; */ 0;
213 } 186 }
187
188 /* We can't hold rcu_lock during receive, so take a ref count instead */
214 cfsrvl_get(up); 189 cfsrvl_get(up);
190 rcu_read_unlock();
191
215 ret = up->receive(up, pkt); 192 ret = up->receive(up, pkt);
193
216 cfsrvl_put(up); 194 cfsrvl_put(up);
217 return ret; 195 return ret;
218} 196}
219 197
220static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt) 198static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
221{ 199{
222 int ret;
223 struct cfmuxl *muxl = container_obj(layr); 200 struct cfmuxl *muxl = container_obj(layr);
201 int err;
224 u8 linkid; 202 u8 linkid;
225 struct cflayer *dn; 203 struct cflayer *dn;
226 struct caif_payload_info *info = cfpkt_info(pkt); 204 struct caif_payload_info *info = cfpkt_info(pkt);
227 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); 205 BUG_ON(!info);
206
207 rcu_read_lock();
208
209 dn = get_dn(muxl, info->dev_info);
228 if (dn == NULL) { 210 if (dn == NULL) {
229 pr_warn("Send data on unknown phy ID = %d (0x%x)\n", 211 pr_debug("Send data on unknown phy ID = %d (0x%x)\n",
230 info->dev_info->id, info->dev_info->id); 212 info->dev_info->id, info->dev_info->id);
213 rcu_read_unlock();
214 cfpkt_destroy(pkt);
231 return -ENOTCONN; 215 return -ENOTCONN;
232 } 216 }
217
233 info->hdr_len += 1; 218 info->hdr_len += 1;
234 linkid = info->channel_id; 219 linkid = info->channel_id;
235 cfpkt_add_head(pkt, &linkid, 1); 220 cfpkt_add_head(pkt, &linkid, 1);
236 ret = dn->transmit(dn, pkt); 221
237 /* Remove MUX protocol header upon error. */ 222 /* We can't hold rcu_lock during receive, so take a ref count instead */
238 if (ret < 0) 223 cffrml_hold(dn);
239 cfpkt_extr_head(pkt, &linkid, 1); 224
240 return ret; 225 rcu_read_unlock();
226
227 err = dn->transmit(dn, pkt);
228
229 cffrml_put(dn);
230 return err;
241} 231}
242 232
243static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 233static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
244 int phyid) 234 int phyid)
245{ 235{
246 struct cfmuxl *muxl = container_obj(layr); 236 struct cfmuxl *muxl = container_obj(layr);
247 struct list_head *node;
248 struct cflayer *layer; 237 struct cflayer *layer;
249 list_for_each(node, &muxl->srvl_list) { 238
250 layer = list_entry(node, struct cflayer, node); 239 rcu_read_lock();
251 if (cfsrvl_phyid_match(layer, phyid)) 240 list_for_each_entry_rcu(layer, &muxl->srvl_list, node) {
241 if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd)
242 /* NOTE: ctrlcmd is not allowed to block */
252 layer->ctrlcmd(layer, ctrl, phyid); 243 layer->ctrlcmd(layer, ctrl, phyid);
253 } 244 }
245 rcu_read_unlock();
254} 246}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index d7e865e2ff65..75d4bfae1a78 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -42,22 +42,22 @@ struct cfpkt_priv_data {
42 bool erronous; 42 bool erronous;
43}; 43};
44 44
45inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt) 45static inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt)
46{ 46{
47 return (struct cfpkt_priv_data *) pkt->skb.cb; 47 return (struct cfpkt_priv_data *) pkt->skb.cb;
48} 48}
49 49
50inline bool is_erronous(struct cfpkt *pkt) 50static inline bool is_erronous(struct cfpkt *pkt)
51{ 51{
52 return cfpkt_priv(pkt)->erronous; 52 return cfpkt_priv(pkt)->erronous;
53} 53}
54 54
55inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt) 55static inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt)
56{ 56{
57 return &pkt->skb; 57 return &pkt->skb;
58} 58}
59 59
60inline struct cfpkt *skb_to_pkt(struct sk_buff *skb) 60static inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
61{ 61{
62 return (struct cfpkt *) skb; 62 return (struct cfpkt *) skb;
63} 63}
@@ -97,21 +97,20 @@ inline struct cfpkt *cfpkt_create(u16 len)
97{ 97{
98 return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX); 98 return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
99} 99}
100EXPORT_SYMBOL(cfpkt_create);
101 100
102void cfpkt_destroy(struct cfpkt *pkt) 101void cfpkt_destroy(struct cfpkt *pkt)
103{ 102{
104 struct sk_buff *skb = pkt_to_skb(pkt); 103 struct sk_buff *skb = pkt_to_skb(pkt);
105 kfree_skb(skb); 104 kfree_skb(skb);
106} 105}
107EXPORT_SYMBOL(cfpkt_destroy); 106
108 107
109inline bool cfpkt_more(struct cfpkt *pkt) 108inline bool cfpkt_more(struct cfpkt *pkt)
110{ 109{
111 struct sk_buff *skb = pkt_to_skb(pkt); 110 struct sk_buff *skb = pkt_to_skb(pkt);
112 return skb->len > 0; 111 return skb->len > 0;
113} 112}
114EXPORT_SYMBOL(cfpkt_more); 113
115 114
116int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len) 115int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
117{ 116{
@@ -123,7 +122,6 @@ int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
123 return !cfpkt_extr_head(pkt, data, len) && 122 return !cfpkt_extr_head(pkt, data, len) &&
124 !cfpkt_add_head(pkt, data, len); 123 !cfpkt_add_head(pkt, data, len);
125} 124}
126EXPORT_SYMBOL(cfpkt_peek_head);
127 125
128int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len) 126int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
129{ 127{
@@ -148,7 +146,6 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
148 memcpy(data, from, len); 146 memcpy(data, from, len);
149 return 0; 147 return 0;
150} 148}
151EXPORT_SYMBOL(cfpkt_extr_head);
152 149
153int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len) 150int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
154{ 151{
@@ -171,13 +168,13 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
171 memcpy(data, from, len); 168 memcpy(data, from, len);
172 return 0; 169 return 0;
173} 170}
174EXPORT_SYMBOL(cfpkt_extr_trail); 171
175 172
176int cfpkt_pad_trail(struct cfpkt *pkt, u16 len) 173int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
177{ 174{
178 return cfpkt_add_body(pkt, NULL, len); 175 return cfpkt_add_body(pkt, NULL, len);
179} 176}
180EXPORT_SYMBOL(cfpkt_pad_trail); 177
181 178
182int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len) 179int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
183{ 180{
@@ -226,13 +223,11 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
226 memcpy(to, data, len); 223 memcpy(to, data, len);
227 return 0; 224 return 0;
228} 225}
229EXPORT_SYMBOL(cfpkt_add_body);
230 226
231inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data) 227inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data)
232{ 228{
233 return cfpkt_add_body(pkt, &data, 1); 229 return cfpkt_add_body(pkt, &data, 1);
234} 230}
235EXPORT_SYMBOL(cfpkt_addbdy);
236 231
237int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len) 232int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
238{ 233{
@@ -259,20 +254,20 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
259 memcpy(to, data, len); 254 memcpy(to, data, len);
260 return 0; 255 return 0;
261} 256}
262EXPORT_SYMBOL(cfpkt_add_head); 257
263 258
264inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len) 259inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
265{ 260{
266 return cfpkt_add_body(pkt, data, len); 261 return cfpkt_add_body(pkt, data, len);
267} 262}
268EXPORT_SYMBOL(cfpkt_add_trail); 263
269 264
270inline u16 cfpkt_getlen(struct cfpkt *pkt) 265inline u16 cfpkt_getlen(struct cfpkt *pkt)
271{ 266{
272 struct sk_buff *skb = pkt_to_skb(pkt); 267 struct sk_buff *skb = pkt_to_skb(pkt);
273 return skb->len; 268 return skb->len;
274} 269}
275EXPORT_SYMBOL(cfpkt_getlen); 270
276 271
277inline u16 cfpkt_iterate(struct cfpkt *pkt, 272inline u16 cfpkt_iterate(struct cfpkt *pkt,
278 u16 (*iter_func)(u16, void *, u16), 273 u16 (*iter_func)(u16, void *, u16),
@@ -290,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
290 } 285 }
291 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); 286 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
292} 287}
293EXPORT_SYMBOL(cfpkt_iterate); 288
294 289
295int cfpkt_setlen(struct cfpkt *pkt, u16 len) 290int cfpkt_setlen(struct cfpkt *pkt, u16 len)
296{ 291{
@@ -315,18 +310,6 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
315 310
316 return cfpkt_getlen(pkt); 311 return cfpkt_getlen(pkt);
317} 312}
318EXPORT_SYMBOL(cfpkt_setlen);
319
320struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
321{
322 struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
323 if (!pkt)
324 return NULL;
325 if (unlikely(data != NULL))
326 cfpkt_add_body(pkt, data, len);
327 return pkt;
328}
329EXPORT_SYMBOL(cfpkt_create_uplink);
330 313
331struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, 314struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
332 struct cfpkt *addpkt, 315 struct cfpkt *addpkt,
@@ -368,7 +351,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
368 dst->len += addlen; 351 dst->len += addlen;
369 return skb_to_pkt(dst); 352 return skb_to_pkt(dst);
370} 353}
371EXPORT_SYMBOL(cfpkt_append);
372 354
373struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) 355struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
374{ 356{
@@ -406,174 +388,13 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
406 skb2->len += len2nd; 388 skb2->len += len2nd;
407 return skb_to_pkt(skb2); 389 return skb_to_pkt(skb2);
408} 390}
409EXPORT_SYMBOL(cfpkt_split);
410
411char *cfpkt_log_pkt(struct cfpkt *pkt, char *buf, int buflen)
412{
413 struct sk_buff *skb = pkt_to_skb(pkt);
414 char *p = buf;
415 int i;
416
417 /*
418 * Sanity check buffer length, it needs to be at least as large as
419 * the header info: ~=50+ bytes
420 */
421 if (buflen < 50)
422 return NULL;
423
424 snprintf(buf, buflen, "%s: pkt:%p len:%ld(%ld+%ld) {%ld,%ld} data: [",
425 is_erronous(pkt) ? "ERRONOUS-SKB" :
426 (skb->data_len != 0 ? "COMPLEX-SKB" : "SKB"),
427 skb,
428 (long) skb->len,
429 (long) (skb_tail_pointer(skb) - skb->data),
430 (long) skb->data_len,
431 (long) (skb->data - skb->head),
432 (long) (skb_tail_pointer(skb) - skb->head));
433 p = buf + strlen(buf);
434
435 for (i = 0; i < skb_tail_pointer(skb) - skb->data && i < 300; i++) {
436 if (p > buf + buflen - 10) {
437 sprintf(p, "...");
438 p = buf + strlen(buf);
439 break;
440 }
441 sprintf(p, "%02x,", skb->data[i]);
442 p = buf + strlen(buf);
443 }
444 sprintf(p, "]\n");
445 return buf;
446}
447EXPORT_SYMBOL(cfpkt_log_pkt);
448
449int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
450{
451 struct sk_buff *skb = pkt_to_skb(pkt);
452 struct sk_buff *lastskb;
453
454 caif_assert(buf != NULL);
455 if (unlikely(is_erronous(pkt)))
456 return -EPROTO;
457 /* Make sure SKB is writable */
458 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
459 PKT_ERROR(pkt, "skb_cow_data failed\n");
460 return -EPROTO;
461 }
462
463 if (unlikely(skb_linearize(skb) != 0)) {
464 PKT_ERROR(pkt, "linearize failed\n");
465 return -EPROTO;
466 }
467
468 if (unlikely(skb_tailroom(skb) < buflen)) {
469 PKT_ERROR(pkt, "buffer too short - failed\n");
470 return -EPROTO;
471 }
472
473 *buf = skb_put(skb, buflen);
474 return 1;
475}
476EXPORT_SYMBOL(cfpkt_raw_append);
477 391
478int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen) 392bool cfpkt_erroneous(struct cfpkt *pkt)
479{
480 struct sk_buff *skb = pkt_to_skb(pkt);
481
482 caif_assert(buf != NULL);
483 if (unlikely(is_erronous(pkt)))
484 return -EPROTO;
485
486 if (unlikely(buflen > skb->len)) {
487 PKT_ERROR(pkt, "buflen too large - failed\n");
488 return -EPROTO;
489 }
490
491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "linearize failed\n");
494 return -EPROTO;
495 }
496 }
497
498 *buf = skb->data;
499 skb_pull(skb, buflen);
500
501 return 1;
502}
503EXPORT_SYMBOL(cfpkt_raw_extract);
504
505inline bool cfpkt_erroneous(struct cfpkt *pkt)
506{ 393{
507 return cfpkt_priv(pkt)->erronous; 394 return cfpkt_priv(pkt)->erronous;
508} 395}
509EXPORT_SYMBOL(cfpkt_erroneous);
510
511struct cfpktq *cfpktq_create(void)
512{
513 struct cfpktq *q = kmalloc(sizeof(struct cfpktq), GFP_ATOMIC);
514 if (!q)
515 return NULL;
516 skb_queue_head_init(&q->head);
517 atomic_set(&q->count, 0);
518 spin_lock_init(&q->lock);
519 return q;
520}
521EXPORT_SYMBOL(cfpktq_create);
522
523void cfpkt_queue(struct cfpktq *pktq, struct cfpkt *pkt, unsigned short prio)
524{
525 atomic_inc(&pktq->count);
526 spin_lock(&pktq->lock);
527 skb_queue_tail(&pktq->head, pkt_to_skb(pkt));
528 spin_unlock(&pktq->lock);
529
530}
531EXPORT_SYMBOL(cfpkt_queue);
532
533struct cfpkt *cfpkt_qpeek(struct cfpktq *pktq)
534{
535 struct cfpkt *tmp;
536 spin_lock(&pktq->lock);
537 tmp = skb_to_pkt(skb_peek(&pktq->head));
538 spin_unlock(&pktq->lock);
539 return tmp;
540}
541EXPORT_SYMBOL(cfpkt_qpeek);
542
543struct cfpkt *cfpkt_dequeue(struct cfpktq *pktq)
544{
545 struct cfpkt *pkt;
546 spin_lock(&pktq->lock);
547 pkt = skb_to_pkt(skb_dequeue(&pktq->head));
548 if (pkt) {
549 atomic_dec(&pktq->count);
550 caif_assert(atomic_read(&pktq->count) >= 0);
551 }
552 spin_unlock(&pktq->lock);
553 return pkt;
554}
555EXPORT_SYMBOL(cfpkt_dequeue);
556
557int cfpkt_qcount(struct cfpktq *pktq)
558{
559 return atomic_read(&pktq->count);
560}
561EXPORT_SYMBOL(cfpkt_qcount);
562
563struct cfpkt *cfpkt_clone_release(struct cfpkt *pkt)
564{
565 struct cfpkt *clone;
566 clone = skb_to_pkt(skb_clone(pkt_to_skb(pkt), GFP_ATOMIC));
567 /* Free original packet. */
568 cfpkt_destroy(pkt);
569 if (!clone)
570 return NULL;
571 return clone;
572}
573EXPORT_SYMBOL(cfpkt_clone_release);
574 396
575struct caif_payload_info *cfpkt_info(struct cfpkt *pkt) 397struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
576{ 398{
577 return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb; 399 return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
578} 400}
579EXPORT_SYMBOL(cfpkt_info);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index e2fb5fa75795..0deabb440051 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -31,9 +31,9 @@ struct cfrfml {
31 spinlock_t sync; 31 spinlock_t sync;
32}; 32};
33 33
34static void cfrfml_release(struct kref *kref) 34static void cfrfml_release(struct cflayer *layer)
35{ 35{
36 struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref); 36 struct cfsrvl *srvl = container_of(layer, struct cfsrvl, layer);
37 struct cfrfml *rfml = container_obj(&srvl->layer); 37 struct cfrfml *rfml = container_obj(&srvl->layer);
38 38
39 if (rfml->incomplete_frm) 39 if (rfml->incomplete_frm)
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 9297f7dea9d8..2715c84cfa87 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -25,7 +25,6 @@ struct cfserl {
25 spinlock_t sync; 25 spinlock_t sync;
26 bool usestx; 26 bool usestx;
27}; 27};
28#define STXLEN(layr) (layr->usestx ? 1 : 0)
29 28
30static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); 29static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
31static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); 30static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -180,15 +179,10 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
180static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt) 179static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
181{ 180{
182 struct cfserl *layr = container_obj(layer); 181 struct cfserl *layr = container_obj(layer);
183 int ret;
184 u8 tmp8 = CFSERL_STX; 182 u8 tmp8 = CFSERL_STX;
185 if (layr->usestx) 183 if (layr->usestx)
186 cfpkt_add_head(newpkt, &tmp8, 1); 184 cfpkt_add_head(newpkt, &tmp8, 1);
187 ret = layer->dn->transmit(layer->dn, newpkt); 185 return layer->dn->transmit(layer->dn, newpkt);
188 if (ret < 0)
189 cfpkt_extr_head(newpkt, &tmp8, 1);
190
191 return ret;
192} 186}
193 187
194static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 188static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index ab5e542526bf..535a1e72b366 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -10,6 +10,7 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/module.h>
13#include <net/caif/caif_layer.h> 14#include <net/caif/caif_layer.h>
14#include <net/caif/cfsrvl.h> 15#include <net/caif/cfsrvl.h>
15#include <net/caif/cfpkt.h> 16#include <net/caif/cfpkt.h>
@@ -27,8 +28,8 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
27{ 28{
28 struct cfsrvl *service = container_obj(layr); 29 struct cfsrvl *service = container_obj(layr);
29 30
30 caif_assert(layr->up != NULL); 31 if (layr->up == NULL || layr->up->ctrlcmd == NULL)
31 caif_assert(layr->up->ctrlcmd != NULL); 32 return;
32 33
33 switch (ctrl) { 34 switch (ctrl) {
34 case CAIF_CTRLCMD_INIT_RSP: 35 case CAIF_CTRLCMD_INIT_RSP:
@@ -151,14 +152,9 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
151 return -EINVAL; 152 return -EINVAL;
152} 153}
153 154
154void cfservl_destroy(struct cflayer *layer) 155static void cfsrvl_release(struct cflayer *layer)
155{ 156{
156 kfree(layer); 157 struct cfsrvl *service = container_of(layer, struct cfsrvl, layer);
157}
158
159void cfsrvl_release(struct kref *kref)
160{
161 struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
162 kfree(service); 158 kfree(service);
163} 159}
164 160
@@ -178,10 +174,8 @@ void cfsrvl_init(struct cfsrvl *service,
178 service->dev_info = *dev_info; 174 service->dev_info = *dev_info;
179 service->supports_flowctrl = supports_flowctrl; 175 service->supports_flowctrl = supports_flowctrl;
180 service->release = cfsrvl_release; 176 service->release = cfsrvl_release;
181 kref_init(&service->ref);
182} 177}
183 178
184
185bool cfsrvl_ready(struct cfsrvl *service, int *err) 179bool cfsrvl_ready(struct cfsrvl *service, int *err)
186{ 180{
187 if (service->open && service->modem_flow_on && service->phy_flow_on) 181 if (service->open && service->modem_flow_on && service->phy_flow_on)
@@ -194,6 +188,7 @@ bool cfsrvl_ready(struct cfsrvl *service, int *err)
194 *err = -EAGAIN; 188 *err = -EAGAIN;
195 return false; 189 return false;
196} 190}
191
197u8 cfsrvl_getphyid(struct cflayer *layer) 192u8 cfsrvl_getphyid(struct cflayer *layer)
198{ 193{
199 struct cfsrvl *servl = container_obj(layer); 194 struct cfsrvl *servl = container_obj(layer);
@@ -205,3 +200,26 @@ bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
205 struct cfsrvl *servl = container_obj(layer); 200 struct cfsrvl *servl = container_obj(layer);
206 return servl->dev_info.id == phyid; 201 return servl->dev_info.id == phyid;
207} 202}
203
204void caif_free_client(struct cflayer *adap_layer)
205{
206 struct cfsrvl *servl;
207 if (adap_layer == NULL || adap_layer->dn == NULL)
208 return;
209 servl = container_obj(adap_layer->dn);
210 servl->release(&servl->layer);
211}
212EXPORT_SYMBOL(caif_free_client);
213
214void caif_client_register_refcnt(struct cflayer *adapt_layer,
215 void (*hold)(struct cflayer *lyr),
216 void (*put)(struct cflayer *lyr))
217{
218 struct cfsrvl *service;
219 service = container_of(adapt_layer->dn, struct cfsrvl, layer);
220
221 WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL);
222 service->hold = hold;
223 service->put = put;
224}
225EXPORT_SYMBOL(caif_client_register_refcnt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index efad410e4c82..98e027db18ed 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -20,7 +20,7 @@
20#define UTIL_REMOTE_SHUTDOWN 0x82 20#define UTIL_REMOTE_SHUTDOWN 0x82
21#define UTIL_FLOW_OFF 0x81 21#define UTIL_FLOW_OFF 0x81
22#define UTIL_FLOW_ON 0x80 22#define UTIL_FLOW_ON 0x80
23#define UTIL_CTRL_PKT_SIZE 1 23
24static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt); 24static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
25static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); 25static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
26 26
@@ -100,10 +100,5 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
100 */ 100 */
101 info->hdr_len = 1; 101 info->hdr_len = 1;
102 info->dev_info = &service->dev_info; 102 info->dev_info = &service->dev_info;
103 ret = layr->dn->transmit(layr->dn, pkt); 103 return layr->dn->transmit(layr->dn, pkt);
104 if (ret < 0) {
105 u32 tmp32;
106 cfpkt_extr_head(pkt, &tmp32, 4);
107 }
108 return ret;
109} 104}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3b425b189a99..3ec83fbc2887 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -17,7 +17,7 @@
17#define VEI_FLOW_OFF 0x81 17#define VEI_FLOW_OFF 0x81
18#define VEI_FLOW_ON 0x80 18#define VEI_FLOW_ON 0x80
19#define VEI_SET_PIN 0x82 19#define VEI_SET_PIN 0x82
20#define VEI_CTRL_PKT_SIZE 1 20
21#define container_obj(layr) container_of(layr, struct cfsrvl, layer) 21#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
22 22
23static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt); 23static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
@@ -82,13 +82,14 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
82 int ret; 82 int ret;
83 struct cfsrvl *service = container_obj(layr); 83 struct cfsrvl *service = container_obj(layr);
84 if (!cfsrvl_ready(service, &ret)) 84 if (!cfsrvl_ready(service, &ret))
85 return ret; 85 goto err;
86 caif_assert(layr->dn != NULL); 86 caif_assert(layr->dn != NULL);
87 caif_assert(layr->dn->transmit != NULL); 87 caif_assert(layr->dn->transmit != NULL);
88 88
89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
90 pr_err("Packet is erroneous!\n"); 90 pr_err("Packet is erroneous!\n");
91 return -EPROTO; 91 ret = -EPROTO;
92 goto err;
92 } 93 }
93 94
94 /* Add info-> for MUX-layer to route the packet out. */ 95 /* Add info-> for MUX-layer to route the packet out. */
@@ -96,8 +97,8 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
96 info->channel_id = service->layer.id; 97 info->channel_id = service->layer.id;
97 info->hdr_len = 1; 98 info->hdr_len = 1;
98 info->dev_info = &service->dev_info; 99 info->dev_info = &service->dev_info;
99 ret = layr->dn->transmit(layr->dn, pkt); 100 return layr->dn->transmit(layr->dn, pkt);
100 if (ret < 0) 101err:
101 cfpkt_extr_head(pkt, &tmp, 1); 102 cfpkt_destroy(pkt);
102 return ret; 103 return ret;
103} 104}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index bf6fef2a0eff..b2f5989ad455 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -60,8 +60,5 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt)
60 info = cfpkt_info(pkt); 60 info = cfpkt_info(pkt);
61 info->channel_id = service->layer.id; 61 info->channel_id = service->layer.id;
62 info->dev_info = &service->dev_info; 62 info->dev_info = &service->dev_info;
63 ret = layr->dn->transmit(layr->dn, pkt); 63 return layr->dn->transmit(layr->dn, pkt);
64 if (ret < 0)
65 cfpkt_extr_head(pkt, &videoheader, 4);
66 return ret;
67} 64}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 6008d6dc18a0..649ebacaf6bc 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -20,7 +20,6 @@
20#include <linux/caif/if_caif.h> 20#include <linux/caif/if_caif.h>
21#include <net/rtnetlink.h> 21#include <net/rtnetlink.h>
22#include <net/caif/caif_layer.h> 22#include <net/caif/caif_layer.h>
23#include <net/caif/cfcnfg.h>
24#include <net/caif/cfpkt.h> 23#include <net/caif/cfpkt.h>
25#include <net/caif/caif_dev.h> 24#include <net/caif/caif_dev.h>
26 25
@@ -84,10 +83,11 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
84 if (!priv) 83 if (!priv)
85 return -EINVAL; 84 return -EINVAL;
86 85
86 skb = (struct sk_buff *) cfpkt_tonative(pkt);
87
87 /* Get length of CAIF packet. */ 88 /* Get length of CAIF packet. */
88 pktlen = cfpkt_getlen(pkt); 89 pktlen = skb->len;
89 90
90 skb = (struct sk_buff *) cfpkt_tonative(pkt);
91 /* Pass some minimum information and 91 /* Pass some minimum information and
92 * send the packet to the net stack. 92 * send the packet to the net stack.
93 */ 93 */
@@ -153,6 +153,18 @@ static void close_work(struct work_struct *work)
153} 153}
154static DECLARE_WORK(close_worker, close_work); 154static DECLARE_WORK(close_worker, close_work);
155 155
156static void chnl_hold(struct cflayer *lyr)
157{
158 struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl);
159 dev_hold(priv->netdev);
160}
161
162static void chnl_put(struct cflayer *lyr)
163{
164 struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl);
165 dev_put(priv->netdev);
166}
167
156static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, 168static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
157 int phyid) 169 int phyid)
158{ 170{
@@ -190,6 +202,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
190 netif_wake_queue(priv->netdev); 202 netif_wake_queue(priv->netdev);
191 break; 203 break;
192 case CAIF_CTRLCMD_INIT_RSP: 204 case CAIF_CTRLCMD_INIT_RSP:
205 caif_client_register_refcnt(&priv->chnl, chnl_hold, chnl_put);
193 priv->state = CAIF_CONNECTED; 206 priv->state = CAIF_CONNECTED;
194 priv->flowenabled = true; 207 priv->flowenabled = true;
195 netif_wake_queue(priv->netdev); 208 netif_wake_queue(priv->netdev);
@@ -257,8 +270,9 @@ static int chnl_net_open(struct net_device *dev)
257 270
258 if (priv->state != CAIF_CONNECTING) { 271 if (priv->state != CAIF_CONNECTING) {
259 priv->state = CAIF_CONNECTING; 272 priv->state = CAIF_CONNECTING;
260 result = caif_connect_client(&priv->conn_req, &priv->chnl, 273 result = caif_connect_client(dev_net(dev), &priv->conn_req,
261 &llifindex, &headroom, &tailroom); 274 &priv->chnl, &llifindex,
275 &headroom, &tailroom);
262 if (result != 0) { 276 if (result != 0) {
263 pr_debug("err: " 277 pr_debug("err: "
264 "Unable to register and open device," 278 "Unable to register and open device,"
@@ -314,7 +328,7 @@ static int chnl_net_open(struct net_device *dev)
314 328
315 if (result == 0) { 329 if (result == 0) {
316 pr_debug("connect timeout\n"); 330 pr_debug("connect timeout\n");
317 caif_disconnect_client(&priv->chnl); 331 caif_disconnect_client(dev_net(dev), &priv->chnl);
318 priv->state = CAIF_DISCONNECTED; 332 priv->state = CAIF_DISCONNECTED;
319 pr_debug("state disconnected\n"); 333 pr_debug("state disconnected\n");
320 result = -ETIMEDOUT; 334 result = -ETIMEDOUT;
@@ -330,7 +344,7 @@ static int chnl_net_open(struct net_device *dev)
330 return 0; 344 return 0;
331 345
332error: 346error:
333 caif_disconnect_client(&priv->chnl); 347 caif_disconnect_client(dev_net(dev), &priv->chnl);
334 priv->state = CAIF_DISCONNECTED; 348 priv->state = CAIF_DISCONNECTED;
335 pr_debug("state disconnected\n"); 349 pr_debug("state disconnected\n");
336 return result; 350 return result;
@@ -344,7 +358,7 @@ static int chnl_net_stop(struct net_device *dev)
344 ASSERT_RTNL(); 358 ASSERT_RTNL();
345 priv = netdev_priv(dev); 359 priv = netdev_priv(dev);
346 priv->state = CAIF_DISCONNECTED; 360 priv->state = CAIF_DISCONNECTED;
347 caif_disconnect_client(&priv->chnl); 361 caif_disconnect_client(dev_net(dev), &priv->chnl);
348 return 0; 362 return 0;
349} 363}
350 364
@@ -373,11 +387,18 @@ static const struct net_device_ops netdev_ops = {
373 .ndo_start_xmit = chnl_net_start_xmit, 387 .ndo_start_xmit = chnl_net_start_xmit,
374}; 388};
375 389
390static void chnl_net_destructor(struct net_device *dev)
391{
392 struct chnl_net *priv = netdev_priv(dev);
393 caif_free_client(&priv->chnl);
394 free_netdev(dev);
395}
396
376static void ipcaif_net_setup(struct net_device *dev) 397static void ipcaif_net_setup(struct net_device *dev)
377{ 398{
378 struct chnl_net *priv; 399 struct chnl_net *priv;
379 dev->netdev_ops = &netdev_ops; 400 dev->netdev_ops = &netdev_ops;
380 dev->destructor = free_netdev; 401 dev->destructor = chnl_net_destructor;
381 dev->flags |= IFF_NOARP; 402 dev->flags |= IFF_NOARP;
382 dev->flags |= IFF_POINTOPOINT; 403 dev->flags |= IFF_POINTOPOINT;
383 dev->mtu = GPRS_PDP_MTU; 404 dev->mtu = GPRS_PDP_MTU;
@@ -391,7 +412,7 @@ static void ipcaif_net_setup(struct net_device *dev)
391 priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW; 412 priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
392 priv->conn_req.priority = CAIF_PRIO_LOW; 413 priv->conn_req.priority = CAIF_PRIO_LOW;
393 /* Insert illegal value */ 414 /* Insert illegal value */
394 priv->conn_req.sockaddr.u.dgm.connection_id = -1; 415 priv->conn_req.sockaddr.u.dgm.connection_id = 0;
395 priv->flowenabled = false; 416 priv->flowenabled = false;
396 417
397 init_waitqueue_head(&priv->netmgmt_wq); 418 init_waitqueue_head(&priv->netmgmt_wq);
@@ -453,6 +474,10 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
453 pr_warn("device rtml registration failed\n"); 474 pr_warn("device rtml registration failed\n");
454 else 475 else
455 list_add(&caifdev->list_field, &chnl_net_list); 476 list_add(&caifdev->list_field, &chnl_net_list);
477
478 /* Take ifindex as connection-id if null */
479 if (caifdev->conn_req.sockaddr.u.dgm.connection_id == 0)
480 caifdev->conn_req.sockaddr.u.dgm.connection_id = dev->ifindex;
456 return ret; 481 return ret;
457} 482}
458 483
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 702be5a2c956..094fc5332d42 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -84,8 +84,8 @@ static DEFINE_SPINLOCK(can_rcvlists_lock);
84static struct kmem_cache *rcv_cache __read_mostly; 84static struct kmem_cache *rcv_cache __read_mostly;
85 85
86/* table of registered CAN protocols */ 86/* table of registered CAN protocols */
87static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; 87static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
88static DEFINE_SPINLOCK(proto_tab_lock); 88static DEFINE_MUTEX(proto_tab_lock);
89 89
90struct timer_list can_stattimer; /* timer for statistics update */ 90struct timer_list can_stattimer; /* timer for statistics update */
91struct s_stats can_stats; /* packet statistics */ 91struct s_stats can_stats; /* packet statistics */
@@ -95,7 +95,7 @@ struct s_pstats can_pstats; /* receive list statistics */
95 * af_can socket functions 95 * af_can socket functions
96 */ 96 */
97 97
98static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 98int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
99{ 99{
100 struct sock *sk = sock->sk; 100 struct sock *sk = sock->sk;
101 101
@@ -108,17 +108,36 @@ static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
108 return -ENOIOCTLCMD; 108 return -ENOIOCTLCMD;
109 } 109 }
110} 110}
111EXPORT_SYMBOL(can_ioctl);
111 112
112static void can_sock_destruct(struct sock *sk) 113static void can_sock_destruct(struct sock *sk)
113{ 114{
114 skb_queue_purge(&sk->sk_receive_queue); 115 skb_queue_purge(&sk->sk_receive_queue);
115} 116}
116 117
118static const struct can_proto *can_get_proto(int protocol)
119{
120 const struct can_proto *cp;
121
122 rcu_read_lock();
123 cp = rcu_dereference(proto_tab[protocol]);
124 if (cp && !try_module_get(cp->prot->owner))
125 cp = NULL;
126 rcu_read_unlock();
127
128 return cp;
129}
130
131static inline void can_put_proto(const struct can_proto *cp)
132{
133 module_put(cp->prot->owner);
134}
135
117static int can_create(struct net *net, struct socket *sock, int protocol, 136static int can_create(struct net *net, struct socket *sock, int protocol,
118 int kern) 137 int kern)
119{ 138{
120 struct sock *sk; 139 struct sock *sk;
121 struct can_proto *cp; 140 const struct can_proto *cp;
122 int err = 0; 141 int err = 0;
123 142
124 sock->state = SS_UNCONNECTED; 143 sock->state = SS_UNCONNECTED;
@@ -129,9 +148,12 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
129 if (!net_eq(net, &init_net)) 148 if (!net_eq(net, &init_net))
130 return -EAFNOSUPPORT; 149 return -EAFNOSUPPORT;
131 150
151 cp = can_get_proto(protocol);
152
132#ifdef CONFIG_MODULES 153#ifdef CONFIG_MODULES
133 /* try to load protocol module kernel is modular */ 154 if (!cp) {
134 if (!proto_tab[protocol]) { 155 /* try to load protocol module if kernel is modular */
156
135 err = request_module("can-proto-%d", protocol); 157 err = request_module("can-proto-%d", protocol);
136 158
137 /* 159 /*
@@ -142,22 +164,18 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
142 if (err && printk_ratelimit()) 164 if (err && printk_ratelimit())
143 printk(KERN_ERR "can: request_module " 165 printk(KERN_ERR "can: request_module "
144 "(can-proto-%d) failed.\n", protocol); 166 "(can-proto-%d) failed.\n", protocol);
167
168 cp = can_get_proto(protocol);
145 } 169 }
146#endif 170#endif
147 171
148 spin_lock(&proto_tab_lock);
149 cp = proto_tab[protocol];
150 if (cp && !try_module_get(cp->prot->owner))
151 cp = NULL;
152 spin_unlock(&proto_tab_lock);
153
154 /* check for available protocol and correct usage */ 172 /* check for available protocol and correct usage */
155 173
156 if (!cp) 174 if (!cp)
157 return -EPROTONOSUPPORT; 175 return -EPROTONOSUPPORT;
158 176
159 if (cp->type != sock->type) { 177 if (cp->type != sock->type) {
160 err = -EPROTONOSUPPORT; 178 err = -EPROTOTYPE;
161 goto errout; 179 goto errout;
162 } 180 }
163 181
@@ -182,7 +200,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
182 } 200 }
183 201
184 errout: 202 errout:
185 module_put(cp->prot->owner); 203 can_put_proto(cp);
186 return err; 204 return err;
187} 205}
188 206
@@ -678,7 +696,7 @@ drop:
678 * -EBUSY protocol already in use 696 * -EBUSY protocol already in use
679 * -ENOBUF if proto_register() fails 697 * -ENOBUF if proto_register() fails
680 */ 698 */
681int can_proto_register(struct can_proto *cp) 699int can_proto_register(const struct can_proto *cp)
682{ 700{
683 int proto = cp->protocol; 701 int proto = cp->protocol;
684 int err = 0; 702 int err = 0;
@@ -693,19 +711,16 @@ int can_proto_register(struct can_proto *cp)
693 if (err < 0) 711 if (err < 0)
694 return err; 712 return err;
695 713
696 spin_lock(&proto_tab_lock); 714 mutex_lock(&proto_tab_lock);
715
697 if (proto_tab[proto]) { 716 if (proto_tab[proto]) {
698 printk(KERN_ERR "can: protocol %d already registered\n", 717 printk(KERN_ERR "can: protocol %d already registered\n",
699 proto); 718 proto);
700 err = -EBUSY; 719 err = -EBUSY;
701 } else { 720 } else
702 proto_tab[proto] = cp; 721 rcu_assign_pointer(proto_tab[proto], cp);
703 722
704 /* use generic ioctl function if not defined by module */ 723 mutex_unlock(&proto_tab_lock);
705 if (!cp->ops->ioctl)
706 cp->ops->ioctl = can_ioctl;
707 }
708 spin_unlock(&proto_tab_lock);
709 724
710 if (err < 0) 725 if (err < 0)
711 proto_unregister(cp->prot); 726 proto_unregister(cp->prot);
@@ -718,17 +733,16 @@ EXPORT_SYMBOL(can_proto_register);
718 * can_proto_unregister - unregister CAN transport protocol 733 * can_proto_unregister - unregister CAN transport protocol
719 * @cp: pointer to CAN protocol structure 734 * @cp: pointer to CAN protocol structure
720 */ 735 */
721void can_proto_unregister(struct can_proto *cp) 736void can_proto_unregister(const struct can_proto *cp)
722{ 737{
723 int proto = cp->protocol; 738 int proto = cp->protocol;
724 739
725 spin_lock(&proto_tab_lock); 740 mutex_lock(&proto_tab_lock);
726 if (!proto_tab[proto]) { 741 BUG_ON(proto_tab[proto] != cp);
727 printk(KERN_ERR "BUG: can: protocol %d is not registered\n", 742 rcu_assign_pointer(proto_tab[proto], NULL);
728 proto); 743 mutex_unlock(&proto_tab_lock);
729 } 744
730 proto_tab[proto] = NULL; 745 synchronize_rcu();
731 spin_unlock(&proto_tab_lock);
732 746
733 proto_unregister(cp->prot); 747 proto_unregister(cp->prot);
734} 748}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 092dc88a7c64..cced806098a9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -387,7 +387,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
387} 387}
388 388
389/* 389/*
390 * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions 390 * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
391 */ 391 */
392static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) 392static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
393{ 393{
@@ -1427,9 +1427,14 @@ static int bcm_init(struct sock *sk)
1427static int bcm_release(struct socket *sock) 1427static int bcm_release(struct socket *sock)
1428{ 1428{
1429 struct sock *sk = sock->sk; 1429 struct sock *sk = sock->sk;
1430 struct bcm_sock *bo = bcm_sk(sk); 1430 struct bcm_sock *bo;
1431 struct bcm_op *op, *next; 1431 struct bcm_op *op, *next;
1432 1432
1433 if (sk == NULL)
1434 return 0;
1435
1436 bo = bcm_sk(sk);
1437
1433 /* remove bcm_ops, timer, rx_unregister(), etc. */ 1438 /* remove bcm_ops, timer, rx_unregister(), etc. */
1434 1439
1435 unregister_netdevice_notifier(&bo->notifier); 1440 unregister_netdevice_notifier(&bo->notifier);
@@ -1569,7 +1574,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
1569 return size; 1574 return size;
1570} 1575}
1571 1576
1572static struct proto_ops bcm_ops __read_mostly = { 1577static const struct proto_ops bcm_ops = {
1573 .family = PF_CAN, 1578 .family = PF_CAN,
1574 .release = bcm_release, 1579 .release = bcm_release,
1575 .bind = sock_no_bind, 1580 .bind = sock_no_bind,
@@ -1578,7 +1583,7 @@ static struct proto_ops bcm_ops __read_mostly = {
1578 .accept = sock_no_accept, 1583 .accept = sock_no_accept,
1579 .getname = sock_no_getname, 1584 .getname = sock_no_getname,
1580 .poll = datagram_poll, 1585 .poll = datagram_poll,
1581 .ioctl = NULL, /* use can_ioctl() from af_can.c */ 1586 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
1582 .listen = sock_no_listen, 1587 .listen = sock_no_listen,
1583 .shutdown = sock_no_shutdown, 1588 .shutdown = sock_no_shutdown,
1584 .setsockopt = sock_no_setsockopt, 1589 .setsockopt = sock_no_setsockopt,
@@ -1596,7 +1601,7 @@ static struct proto bcm_proto __read_mostly = {
1596 .init = bcm_init, 1601 .init = bcm_init,
1597}; 1602};
1598 1603
1599static struct can_proto bcm_can_proto __read_mostly = { 1604static const struct can_proto bcm_can_proto = {
1600 .type = SOCK_DGRAM, 1605 .type = SOCK_DGRAM,
1601 .protocol = CAN_BCM, 1606 .protocol = CAN_BCM,
1602 .ops = &bcm_ops, 1607 .ops = &bcm_ops,
diff --git a/net/can/raw.c b/net/can/raw.c
index 883e9d74fddf..dea99a6e596c 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -305,7 +305,12 @@ static int raw_init(struct sock *sk)
305static int raw_release(struct socket *sock) 305static int raw_release(struct socket *sock)
306{ 306{
307 struct sock *sk = sock->sk; 307 struct sock *sk = sock->sk;
308 struct raw_sock *ro = raw_sk(sk); 308 struct raw_sock *ro;
309
310 if (!sk)
311 return 0;
312
313 ro = raw_sk(sk);
309 314
310 unregister_netdevice_notifier(&ro->notifier); 315 unregister_netdevice_notifier(&ro->notifier);
311 316
@@ -742,7 +747,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
742 return size; 747 return size;
743} 748}
744 749
745static struct proto_ops raw_ops __read_mostly = { 750static const struct proto_ops raw_ops = {
746 .family = PF_CAN, 751 .family = PF_CAN,
747 .release = raw_release, 752 .release = raw_release,
748 .bind = raw_bind, 753 .bind = raw_bind,
@@ -751,7 +756,7 @@ static struct proto_ops raw_ops __read_mostly = {
751 .accept = sock_no_accept, 756 .accept = sock_no_accept,
752 .getname = raw_getname, 757 .getname = raw_getname,
753 .poll = datagram_poll, 758 .poll = datagram_poll,
754 .ioctl = NULL, /* use can_ioctl() from af_can.c */ 759 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
755 .listen = sock_no_listen, 760 .listen = sock_no_listen,
756 .shutdown = sock_no_shutdown, 761 .shutdown = sock_no_shutdown,
757 .setsockopt = raw_setsockopt, 762 .setsockopt = raw_setsockopt,
@@ -769,7 +774,7 @@ static struct proto raw_proto __read_mostly = {
769 .init = raw_init, 774 .init = raw_init,
770}; 775};
771 776
772static struct can_proto raw_can_proto __read_mostly = { 777static const struct can_proto raw_can_proto = {
773 .type = SOCK_RAW, 778 .type = SOCK_RAW,
774 .protocol = CAN_RAW, 779 .protocol = CAN_RAW,
775 .ops = &raw_ops, 780 .ops = &raw_ops,
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index ad424049b0cf..be683f2d401f 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -4,6 +4,7 @@ config CEPH_LIB
4 select LIBCRC32C 4 select LIBCRC32C
5 select CRYPTO_AES 5 select CRYPTO_AES
6 select CRYPTO 6 select CRYPTO
7 select KEYS
7 default n 8 default n
8 help 9 help
9 Choose Y or M here to include cephlib, which provides the 10 Choose Y or M here to include cephlib, which provides the
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index eb2a666b0be7..1fc1ee11dfa2 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end)
78 while (src < end) { 78 while (src < end) {
79 int a, b, c, d; 79 int a, b, c, d;
80 80
81 if (src < end && src[0] == '\n') 81 if (src[0] == '\n') {
82 src++; 82 src++;
83 continue;
84 }
83 if (src + 4 > end) 85 if (src + 4 > end)
84 return -EINVAL; 86 return -EINVAL;
85 a = decode_bits(src[0]); 87 a = decode_bits(src[0]);
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 549c1f43e1d5..b4bf4ac090f1 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -35,12 +35,12 @@ static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
35/* 35/*
36 * setup, teardown. 36 * setup, teardown.
37 */ 37 */
38struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) 38struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key)
39{ 39{
40 struct ceph_auth_client *ac; 40 struct ceph_auth_client *ac;
41 int ret; 41 int ret;
42 42
43 dout("auth_init name '%s' secret '%s'\n", name, secret); 43 dout("auth_init name '%s'\n", name);
44 44
45 ret = -ENOMEM; 45 ret = -ENOMEM;
46 ac = kzalloc(sizeof(*ac), GFP_NOFS); 46 ac = kzalloc(sizeof(*ac), GFP_NOFS);
@@ -52,8 +52,8 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret)
52 ac->name = name; 52 ac->name = name;
53 else 53 else
54 ac->name = CEPH_AUTH_NAME_DEFAULT; 54 ac->name = CEPH_AUTH_NAME_DEFAULT;
55 dout("auth_init name %s secret %s\n", ac->name, secret); 55 dout("auth_init name %s\n", ac->name);
56 ac->secret = secret; 56 ac->key = key;
57 return ac; 57 return ac;
58 58
59out: 59out:
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 7fd5dfcf6e18..1587dc6010c6 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -662,14 +662,16 @@ int ceph_x_init(struct ceph_auth_client *ac)
662 goto out; 662 goto out;
663 663
664 ret = -EINVAL; 664 ret = -EINVAL;
665 if (!ac->secret) { 665 if (!ac->key) {
666 pr_err("no secret set (for auth_x protocol)\n"); 666 pr_err("no secret set (for auth_x protocol)\n");
667 goto out_nomem; 667 goto out_nomem;
668 } 668 }
669 669
670 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); 670 ret = ceph_crypto_key_clone(&xi->secret, ac->key);
671 if (ret) 671 if (ret < 0) {
672 pr_err("cannot clone key: %d\n", ret);
672 goto out_nomem; 673 goto out_nomem;
674 }
673 675
674 xi->starting = true; 676 xi->starting = true;
675 xi->ticket_handlers = RB_ROOT; 677 xi->ticket_handlers = RB_ROOT;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f3e4a13fea0c..132963abc266 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -5,6 +5,8 @@
5#include <linux/fs.h> 5#include <linux/fs.h>
6#include <linux/inet.h> 6#include <linux/inet.h>
7#include <linux/in6.h> 7#include <linux/in6.h>
8#include <linux/key.h>
9#include <keys/ceph-type.h>
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/mount.h> 11#include <linux/mount.h>
10#include <linux/parser.h> 12#include <linux/parser.h>
@@ -20,6 +22,7 @@
20#include <linux/ceph/decode.h> 22#include <linux/ceph/decode.h>
21#include <linux/ceph/mon_client.h> 23#include <linux/ceph/mon_client.h>
22#include <linux/ceph/auth.h> 24#include <linux/ceph/auth.h>
25#include "crypto.h"
23 26
24 27
25 28
@@ -62,6 +65,7 @@ const char *ceph_msg_type_name(int type)
62 case CEPH_MSG_OSD_MAP: return "osd_map"; 65 case CEPH_MSG_OSD_MAP: return "osd_map";
63 case CEPH_MSG_OSD_OP: return "osd_op"; 66 case CEPH_MSG_OSD_OP: return "osd_op";
64 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; 67 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
68 case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
65 default: return "unknown"; 69 default: return "unknown";
66 } 70 }
67} 71}
@@ -116,9 +120,29 @@ int ceph_compare_options(struct ceph_options *new_opt,
116 if (ret) 120 if (ret)
117 return ret; 121 return ret;
118 122
119 ret = strcmp_null(opt1->secret, opt2->secret); 123 if (opt1->key && !opt2->key)
120 if (ret) 124 return -1;
121 return ret; 125 if (!opt1->key && opt2->key)
126 return 1;
127 if (opt1->key && opt2->key) {
128 if (opt1->key->type != opt2->key->type)
129 return -1;
130 if (opt1->key->created.tv_sec != opt2->key->created.tv_sec)
131 return -1;
132 if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec)
133 return -1;
134 if (opt1->key->len != opt2->key->len)
135 return -1;
136 if (opt1->key->key && !opt2->key->key)
137 return -1;
138 if (!opt1->key->key && opt2->key->key)
139 return 1;
140 if (opt1->key->key && opt2->key->key) {
141 ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len);
142 if (ret)
143 return ret;
144 }
145 }
122 146
123 /* any matching mon ip implies a match */ 147 /* any matching mon ip implies a match */
124 for (i = 0; i < opt1->num_mon; i++) { 148 for (i = 0; i < opt1->num_mon; i++) {
@@ -175,6 +199,7 @@ enum {
175 Opt_fsid, 199 Opt_fsid,
176 Opt_name, 200 Opt_name,
177 Opt_secret, 201 Opt_secret,
202 Opt_key,
178 Opt_ip, 203 Opt_ip,
179 Opt_last_string, 204 Opt_last_string,
180 /* string args above */ 205 /* string args above */
@@ -191,6 +216,7 @@ static match_table_t opt_tokens = {
191 {Opt_fsid, "fsid=%s"}, 216 {Opt_fsid, "fsid=%s"},
192 {Opt_name, "name=%s"}, 217 {Opt_name, "name=%s"},
193 {Opt_secret, "secret=%s"}, 218 {Opt_secret, "secret=%s"},
219 {Opt_key, "key=%s"},
194 {Opt_ip, "ip=%s"}, 220 {Opt_ip, "ip=%s"},
195 /* string args above */ 221 /* string args above */
196 {Opt_noshare, "noshare"}, 222 {Opt_noshare, "noshare"},
@@ -202,11 +228,56 @@ void ceph_destroy_options(struct ceph_options *opt)
202{ 228{
203 dout("destroy_options %p\n", opt); 229 dout("destroy_options %p\n", opt);
204 kfree(opt->name); 230 kfree(opt->name);
205 kfree(opt->secret); 231 if (opt->key) {
232 ceph_crypto_key_destroy(opt->key);
233 kfree(opt->key);
234 }
206 kfree(opt); 235 kfree(opt);
207} 236}
208EXPORT_SYMBOL(ceph_destroy_options); 237EXPORT_SYMBOL(ceph_destroy_options);
209 238
239/* get secret from key store */
240static int get_secret(struct ceph_crypto_key *dst, const char *name) {
241 struct key *ukey;
242 int key_err;
243 int err = 0;
244 struct ceph_crypto_key *ckey;
245
246 ukey = request_key(&key_type_ceph, name, NULL);
247 if (!ukey || IS_ERR(ukey)) {
248 /* request_key errors don't map nicely to mount(2)
249 errors; don't even try, but still printk */
250 key_err = PTR_ERR(ukey);
251 switch (key_err) {
252 case -ENOKEY:
253 pr_warning("ceph: Mount failed due to key not found: %s\n", name);
254 break;
255 case -EKEYEXPIRED:
256 pr_warning("ceph: Mount failed due to expired key: %s\n", name);
257 break;
258 case -EKEYREVOKED:
259 pr_warning("ceph: Mount failed due to revoked key: %s\n", name);
260 break;
261 default:
262 pr_warning("ceph: Mount failed due to unknown key error"
263 " %d: %s\n", key_err, name);
264 }
265 err = -EPERM;
266 goto out;
267 }
268
269 ckey = ukey->payload.data;
270 err = ceph_crypto_key_clone(dst, ckey);
271 if (err)
272 goto out_key;
273 /* pass through, err is 0 */
274
275out_key:
276 key_put(ukey);
277out:
278 return err;
279}
280
210int ceph_parse_options(struct ceph_options **popt, char *options, 281int ceph_parse_options(struct ceph_options **popt, char *options,
211 const char *dev_name, const char *dev_name_end, 282 const char *dev_name, const char *dev_name_end,
212 int (*parse_extra_token)(char *c, void *private), 283 int (*parse_extra_token)(char *c, void *private),
@@ -294,9 +365,24 @@ int ceph_parse_options(struct ceph_options **popt, char *options,
294 GFP_KERNEL); 365 GFP_KERNEL);
295 break; 366 break;
296 case Opt_secret: 367 case Opt_secret:
297 opt->secret = kstrndup(argstr[0].from, 368 opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
298 argstr[0].to-argstr[0].from, 369 if (!opt->key) {
299 GFP_KERNEL); 370 err = -ENOMEM;
371 goto out;
372 }
373 err = ceph_crypto_key_unarmor(opt->key, argstr[0].from);
374 if (err < 0)
375 goto out;
376 break;
377 case Opt_key:
378 opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
379 if (!opt->key) {
380 err = -ENOMEM;
381 goto out;
382 }
383 err = get_secret(opt->key, argstr[0].from);
384 if (err < 0)
385 goto out;
300 break; 386 break;
301 387
302 /* misc */ 388 /* misc */
@@ -393,8 +479,8 @@ void ceph_destroy_client(struct ceph_client *client)
393 ceph_osdc_stop(&client->osdc); 479 ceph_osdc_stop(&client->osdc);
394 480
395 /* 481 /*
396 * make sure mds and osd connections close out before destroying 482 * make sure osd connections close out before destroying the
397 * the auth module, which is needed to free those connections' 483 * auth module, which is needed to free those connections'
398 * ceph_authorizers. 484 * ceph_authorizers.
399 */ 485 */
400 ceph_msgr_flush(); 486 ceph_msgr_flush();
@@ -495,10 +581,14 @@ static int __init init_ceph_lib(void)
495 if (ret < 0) 581 if (ret < 0)
496 goto out; 582 goto out;
497 583
498 ret = ceph_msgr_init(); 584 ret = ceph_crypto_init();
499 if (ret < 0) 585 if (ret < 0)
500 goto out_debugfs; 586 goto out_debugfs;
501 587
588 ret = ceph_msgr_init();
589 if (ret < 0)
590 goto out_crypto;
591
502 pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", 592 pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
503 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, 593 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
504 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, 594 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
@@ -506,6 +596,8 @@ static int __init init_ceph_lib(void)
506 596
507 return 0; 597 return 0;
508 598
599out_crypto:
600 ceph_crypto_shutdown();
509out_debugfs: 601out_debugfs:
510 ceph_debugfs_cleanup(); 602 ceph_debugfs_cleanup();
511out: 603out:
@@ -516,6 +608,7 @@ static void __exit exit_ceph_lib(void)
516{ 608{
517 dout("exit_ceph_lib\n"); 609 dout("exit_ceph_lib\n");
518 ceph_msgr_exit(); 610 ceph_msgr_exit();
611 ceph_crypto_shutdown();
519 ceph_debugfs_cleanup(); 612 ceph_debugfs_cleanup();
520} 613}
521 614
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 7b505b0c983f..5a8009c9e0cd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -5,10 +5,23 @@
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <crypto/hash.h> 7#include <crypto/hash.h>
8#include <linux/key-type.h>
8 9
10#include <keys/ceph-type.h>
9#include <linux/ceph/decode.h> 11#include <linux/ceph/decode.h>
10#include "crypto.h" 12#include "crypto.h"
11 13
14int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
15 const struct ceph_crypto_key *src)
16{
17 memcpy(dst, src, sizeof(struct ceph_crypto_key));
18 dst->key = kmalloc(src->len, GFP_NOFS);
19 if (!dst->key)
20 return -ENOMEM;
21 memcpy(dst->key, src->key, src->len);
22 return 0;
23}
24
12int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) 25int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
13{ 26{
14 if (*p + sizeof(u16) + sizeof(key->created) + 27 if (*p + sizeof(u16) + sizeof(key->created) +
@@ -410,3 +423,63 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
410 return -EINVAL; 423 return -EINVAL;
411 } 424 }
412} 425}
426
427int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
428{
429 struct ceph_crypto_key *ckey;
430 int ret;
431 void *p;
432
433 ret = -EINVAL;
434 if (datalen <= 0 || datalen > 32767 || !data)
435 goto err;
436
437 ret = key_payload_reserve(key, datalen);
438 if (ret < 0)
439 goto err;
440
441 ret = -ENOMEM;
442 ckey = kmalloc(sizeof(*ckey), GFP_KERNEL);
443 if (!ckey)
444 goto err;
445
446 /* TODO ceph_crypto_key_decode should really take const input */
447 p = (void*)data;
448 ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen);
449 if (ret < 0)
450 goto err_ckey;
451
452 key->payload.data = ckey;
453 return 0;
454
455err_ckey:
456 kfree(ckey);
457err:
458 return ret;
459}
460
461int ceph_key_match(const struct key *key, const void *description)
462{
463 return strcmp(key->description, description) == 0;
464}
465
466void ceph_key_destroy(struct key *key) {
467 struct ceph_crypto_key *ckey = key->payload.data;
468
469 ceph_crypto_key_destroy(ckey);
470}
471
472struct key_type key_type_ceph = {
473 .name = "ceph",
474 .instantiate = ceph_key_instantiate,
475 .match = ceph_key_match,
476 .destroy = ceph_key_destroy,
477};
478
479int ceph_crypto_init(void) {
480 return register_key_type(&key_type_ceph);
481}
482
483void ceph_crypto_shutdown(void) {
484 unregister_key_type(&key_type_ceph);
485}
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index f9eccace592b..1919d1550d75 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -19,6 +19,8 @@ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
19 kfree(key->key); 19 kfree(key->key);
20} 20}
21 21
22extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
23 const struct ceph_crypto_key *src);
22extern int ceph_crypto_key_encode(struct ceph_crypto_key *key, 24extern int ceph_crypto_key_encode(struct ceph_crypto_key *key,
23 void **p, void *end); 25 void **p, void *end);
24extern int ceph_crypto_key_decode(struct ceph_crypto_key *key, 26extern int ceph_crypto_key_decode(struct ceph_crypto_key *key,
@@ -40,6 +42,8 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret,
40 void *dst, size_t *dst_len, 42 void *dst, size_t *dst_len,
41 const void *src1, size_t src1_len, 43 const void *src1, size_t src1_len,
42 const void *src2, size_t src2_len); 44 const void *src2, size_t src2_len);
45extern int ceph_crypto_init(void);
46extern void ceph_crypto_shutdown(void);
43 47
44/* armor.c */ 48/* armor.c */
45extern int ceph_armor(char *dst, const char *src, const char *end); 49extern int ceph_armor(char *dst, const char *src, const char *end);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 35b36b86d762..e15a82ccc05f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con)
336 ceph_msg_put(con->out_msg); 336 ceph_msg_put(con->out_msg);
337 con->out_msg = NULL; 337 con->out_msg = NULL;
338 } 338 }
339 con->out_keepalive_pending = false;
340 con->in_seq = 0; 339 con->in_seq = 0;
341 con->in_seq_acked = 0; 340 con->in_seq_acked = 0;
342} 341}
@@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con)
1248 con->auth_retry); 1247 con->auth_retry);
1249 if (con->auth_retry == 2) { 1248 if (con->auth_retry == 2) {
1250 con->error_msg = "connect authorization failure"; 1249 con->error_msg = "connect authorization failure";
1251 reset_connection(con);
1252 set_bit(CLOSED, &con->state);
1253 return -1; 1250 return -1;
1254 } 1251 }
1255 con->auth_retry = 1; 1252 con->auth_retry = 1;
@@ -1715,14 +1712,6 @@ more:
1715 1712
1716 /* open the socket first? */ 1713 /* open the socket first? */
1717 if (con->sock == NULL) { 1714 if (con->sock == NULL) {
1718 /*
1719 * if we were STANDBY and are reconnecting _this_
1720 * connection, bump connect_seq now. Always bump
1721 * global_seq.
1722 */
1723 if (test_and_clear_bit(STANDBY, &con->state))
1724 con->connect_seq++;
1725
1726 prepare_write_banner(msgr, con); 1715 prepare_write_banner(msgr, con);
1727 prepare_write_connect(msgr, con, 1); 1716 prepare_write_connect(msgr, con, 1);
1728 prepare_read_banner(con); 1717 prepare_read_banner(con);
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work)
1951 work.work); 1940 work.work);
1952 1941
1953 mutex_lock(&con->mutex); 1942 mutex_lock(&con->mutex);
1943 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 dout("con_work %p backing off\n", con);
1945 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1946 round_jiffies_relative(con->delay))) {
1947 dout("con_work %p backoff %lu\n", con, con->delay);
1948 mutex_unlock(&con->mutex);
1949 return;
1950 } else {
1951 con->ops->put(con);
1952 dout("con_work %p FAILED to back off %lu\n", con,
1953 con->delay);
1954 }
1955 }
1954 1956
1957 if (test_bit(STANDBY, &con->state)) {
1958 dout("con_work %p STANDBY\n", con);
1959 goto done;
1960 }
1955 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1961 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
1956 dout("con_work CLOSED\n"); 1962 dout("con_work CLOSED\n");
1957 con_close_socket(con); 1963 con_close_socket(con);
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con)
2008 /* Requeue anything that hasn't been acked */ 2014 /* Requeue anything that hasn't been acked */
2009 list_splice_init(&con->out_sent, &con->out_queue); 2015 list_splice_init(&con->out_sent, &con->out_queue);
2010 2016
2011 /* If there are no messages in the queue, place the connection 2017 /* If there are no messages queued or keepalive pending, place
2012 * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2018 * the connection in a STANDBY state */
2013 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2019 if (list_empty(&con->out_queue) &&
2014 dout("fault setting STANDBY\n"); 2020 !test_bit(KEEPALIVE_PENDING, &con->state)) {
2021 dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
2022 clear_bit(WRITE_PENDING, &con->state);
2015 set_bit(STANDBY, &con->state); 2023 set_bit(STANDBY, &con->state);
2016 } else { 2024 } else {
2017 /* retry after a delay. */ 2025 /* retry after a delay. */
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con)
2019 con->delay = BASE_DELAY_INTERVAL; 2027 con->delay = BASE_DELAY_INTERVAL;
2020 else if (con->delay < MAX_DELAY_INTERVAL) 2028 else if (con->delay < MAX_DELAY_INTERVAL)
2021 con->delay *= 2; 2029 con->delay *= 2;
2022 dout("fault queueing %p delay %lu\n", con, con->delay);
2023 con->ops->get(con); 2030 con->ops->get(con);
2024 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2031 if (queue_delayed_work(ceph_msgr_wq, &con->work,
2025 round_jiffies_relative(con->delay)) == 0) 2032 round_jiffies_relative(con->delay))) {
2033 dout("fault queued %p delay %lu\n", con, con->delay);
2034 } else {
2026 con->ops->put(con); 2035 con->ops->put(con);
2036 dout("fault failed to queue %p delay %lu, backoff\n",
2037 con, con->delay);
2038 /*
2039 * In many cases we see a socket state change
2040 * while con_work is running and end up
2041 * queuing (non-delayed) work, such that we
2042 * can't backoff with a delay. Set a flag so
2043 * that when con_work restarts we schedule the
2044 * delay then.
2045 */
2046 set_bit(BACKOFF, &con->state);
2047 }
2027 } 2048 }
2028 2049
2029out_unlock: 2050out_unlock:
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
2094} 2115}
2095EXPORT_SYMBOL(ceph_messenger_destroy); 2116EXPORT_SYMBOL(ceph_messenger_destroy);
2096 2117
2118static void clear_standby(struct ceph_connection *con)
2119{
2120 /* come back from STANDBY? */
2121 if (test_and_clear_bit(STANDBY, &con->state)) {
2122 mutex_lock(&con->mutex);
2123 dout("clear_standby %p and ++connect_seq\n", con);
2124 con->connect_seq++;
2125 WARN_ON(test_bit(WRITE_PENDING, &con->state));
2126 WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
2127 mutex_unlock(&con->mutex);
2128 }
2129}
2130
2097/* 2131/*
2098 * Queue up an outgoing message on the given connection. 2132 * Queue up an outgoing message on the given connection.
2099 */ 2133 */
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
2126 2160
2127 /* if there wasn't anything waiting to send before, queue 2161 /* if there wasn't anything waiting to send before, queue
2128 * new work */ 2162 * new work */
2163 clear_standby(con);
2129 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2130 queue_con(con); 2165 queue_con(con);
2131} 2166}
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2191 */ 2226 */
2192void ceph_con_keepalive(struct ceph_connection *con) 2227void ceph_con_keepalive(struct ceph_connection *con)
2193{ 2228{
2229 dout("con_keepalive %p\n", con);
2230 clear_standby(con);
2194 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2231 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
2195 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2232 test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2196 queue_con(con); 2233 queue_con(con);
@@ -2230,6 +2267,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2230 m->more_to_follow = false; 2267 m->more_to_follow = false;
2231 m->pool = NULL; 2268 m->pool = NULL;
2232 2269
2270 /* middle */
2271 m->middle = NULL;
2272
2273 /* data */
2274 m->nr_pages = 0;
2275 m->page_alignment = 0;
2276 m->pages = NULL;
2277 m->pagelist = NULL;
2278 m->bio = NULL;
2279 m->bio_iter = NULL;
2280 m->bio_seg = 0;
2281 m->trail = NULL;
2282
2233 /* front */ 2283 /* front */
2234 if (front_len) { 2284 if (front_len) {
2235 if (front_len > PAGE_CACHE_SIZE) { 2285 if (front_len > PAGE_CACHE_SIZE) {
@@ -2249,19 +2299,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2249 } 2299 }
2250 m->front.iov_len = front_len; 2300 m->front.iov_len = front_len;
2251 2301
2252 /* middle */
2253 m->middle = NULL;
2254
2255 /* data */
2256 m->nr_pages = 0;
2257 m->page_alignment = 0;
2258 m->pages = NULL;
2259 m->pagelist = NULL;
2260 m->bio = NULL;
2261 m->bio_iter = NULL;
2262 m->bio_seg = 0;
2263 m->trail = NULL;
2264
2265 dout("ceph_msg_new %p front %d\n", m, front_len); 2302 dout("ceph_msg_new %p front %d\n", m, front_len);
2266 return m; 2303 return m;
2267 2304
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 8a079399174a..cbe31fa45508 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -759,7 +759,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
759 759
760 /* authentication */ 760 /* authentication */
761 monc->auth = ceph_auth_init(cl->options->name, 761 monc->auth = ceph_auth_init(cl->options->name,
762 cl->options->secret); 762 cl->options->key);
763 if (IS_ERR(monc->auth)) 763 if (IS_ERR(monc->auth))
764 return PTR_ERR(monc->auth); 764 return PTR_ERR(monc->auth);
765 monc->auth->want_keys = 765 monc->auth->want_keys =
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3e20a122ffa2..6b5dda1cb5df 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -22,10 +22,15 @@
22#define OSD_OPREPLY_FRONT_LEN 512 22#define OSD_OPREPLY_FRONT_LEN 512
23 23
24static const struct ceph_connection_operations osd_con_ops; 24static const struct ceph_connection_operations osd_con_ops;
25static int __kick_requests(struct ceph_osd_client *osdc,
26 struct ceph_osd *kickosd);
27 25
28static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); 26static void send_queued(struct ceph_osd_client *osdc);
27static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
28static void __register_request(struct ceph_osd_client *osdc,
29 struct ceph_osd_request *req);
30static void __unregister_linger_request(struct ceph_osd_client *osdc,
31 struct ceph_osd_request *req);
32static int __send_request(struct ceph_osd_client *osdc,
33 struct ceph_osd_request *req);
29 34
30static int op_needs_trail(int op) 35static int op_needs_trail(int op)
31{ 36{
@@ -34,6 +39,7 @@ static int op_needs_trail(int op)
34 case CEPH_OSD_OP_SETXATTR: 39 case CEPH_OSD_OP_SETXATTR:
35 case CEPH_OSD_OP_CMPXATTR: 40 case CEPH_OSD_OP_CMPXATTR:
36 case CEPH_OSD_OP_CALL: 41 case CEPH_OSD_OP_CALL:
42 case CEPH_OSD_OP_NOTIFY:
37 return 1; 43 return 1;
38 default: 44 default:
39 return 0; 45 return 0;
@@ -209,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
209 init_completion(&req->r_completion); 215 init_completion(&req->r_completion);
210 init_completion(&req->r_safe_completion); 216 init_completion(&req->r_safe_completion);
211 INIT_LIST_HEAD(&req->r_unsafe_item); 217 INIT_LIST_HEAD(&req->r_unsafe_item);
218 INIT_LIST_HEAD(&req->r_linger_item);
219 INIT_LIST_HEAD(&req->r_linger_osd);
212 req->r_flags = flags; 220 req->r_flags = flags;
213 221
214 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); 222 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -315,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
315 break; 323 break;
316 case CEPH_OSD_OP_STARTSYNC: 324 case CEPH_OSD_OP_STARTSYNC:
317 break; 325 break;
326 case CEPH_OSD_OP_NOTIFY:
327 {
328 __le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
329 __le32 timeout = cpu_to_le32(src->watch.timeout);
330
331 BUG_ON(!req->r_trail);
332
333 ceph_pagelist_append(req->r_trail,
334 &prot_ver, sizeof(prot_ver));
335 ceph_pagelist_append(req->r_trail,
336 &timeout, sizeof(timeout));
337 }
338 case CEPH_OSD_OP_NOTIFY_ACK:
339 case CEPH_OSD_OP_WATCH:
340 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
341 dst->watch.ver = cpu_to_le64(src->watch.ver);
342 dst->watch.flag = src->watch.flag;
343 break;
318 default: 344 default:
319 pr_err("unrecognized osd opcode %d\n", dst->op); 345 pr_err("unrecognized osd opcode %d\n", dst->op);
320 WARN_ON(1); 346 WARN_ON(1);
@@ -444,8 +470,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
444 snapc, ops, 470 snapc, ops,
445 use_mempool, 471 use_mempool,
446 GFP_NOFS, NULL, NULL); 472 GFP_NOFS, NULL, NULL);
447 if (IS_ERR(req)) 473 if (!req)
448 return req; 474 return NULL;
449 475
450 /* calculate max write size */ 476 /* calculate max write size */
451 calc_layout(osdc, vino, layout, off, plen, req, ops); 477 calc_layout(osdc, vino, layout, off, plen, req, ops);
@@ -529,6 +555,51 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
529 return NULL; 555 return NULL;
530} 556}
531 557
558/*
559 * Resubmit requests pending on the given osd.
560 */
561static void __kick_osd_requests(struct ceph_osd_client *osdc,
562 struct ceph_osd *osd)
563{
564 struct ceph_osd_request *req, *nreq;
565 int err;
566
567 dout("__kick_osd_requests osd%d\n", osd->o_osd);
568 err = __reset_osd(osdc, osd);
569 if (err == -EAGAIN)
570 return;
571
572 list_for_each_entry(req, &osd->o_requests, r_osd_item) {
573 list_move(&req->r_req_lru_item, &osdc->req_unsent);
574 dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
575 osd->o_osd);
576 if (!req->r_linger)
577 req->r_flags |= CEPH_OSD_FLAG_RETRY;
578 }
579
580 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
581 r_linger_osd) {
582 /*
583 * reregister request prior to unregistering linger so
584 * that r_osd is preserved.
585 */
586 BUG_ON(!list_empty(&req->r_req_lru_item));
587 __register_request(osdc, req);
588 list_add(&req->r_req_lru_item, &osdc->req_unsent);
589 list_add(&req->r_osd_item, &req->r_osd->o_requests);
590 __unregister_linger_request(osdc, req);
591 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
592 osd->o_osd);
593 }
594}
595
596static void kick_osd_requests(struct ceph_osd_client *osdc,
597 struct ceph_osd *kickosd)
598{
599 mutex_lock(&osdc->request_mutex);
600 __kick_osd_requests(osdc, kickosd);
601 mutex_unlock(&osdc->request_mutex);
602}
532 603
533/* 604/*
534 * If the osd connection drops, we need to resubmit all requests. 605 * If the osd connection drops, we need to resubmit all requests.
@@ -543,7 +614,8 @@ static void osd_reset(struct ceph_connection *con)
543 dout("osd_reset osd%d\n", osd->o_osd); 614 dout("osd_reset osd%d\n", osd->o_osd);
544 osdc = osd->o_osdc; 615 osdc = osd->o_osdc;
545 down_read(&osdc->map_sem); 616 down_read(&osdc->map_sem);
546 kick_requests(osdc, osd); 617 kick_osd_requests(osdc, osd);
618 send_queued(osdc);
547 up_read(&osdc->map_sem); 619 up_read(&osdc->map_sem);
548} 620}
549 621
@@ -561,6 +633,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
561 atomic_set(&osd->o_ref, 1); 633 atomic_set(&osd->o_ref, 1);
562 osd->o_osdc = osdc; 634 osd->o_osdc = osdc;
563 INIT_LIST_HEAD(&osd->o_requests); 635 INIT_LIST_HEAD(&osd->o_requests);
636 INIT_LIST_HEAD(&osd->o_linger_requests);
564 INIT_LIST_HEAD(&osd->o_osd_lru); 637 INIT_LIST_HEAD(&osd->o_osd_lru);
565 osd->o_incarnation = 1; 638 osd->o_incarnation = 1;
566 639
@@ -650,7 +723,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
650 int ret = 0; 723 int ret = 0;
651 724
652 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 725 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
653 if (list_empty(&osd->o_requests)) { 726 if (list_empty(&osd->o_requests) &&
727 list_empty(&osd->o_linger_requests)) {
654 __remove_osd(osdc, osd); 728 __remove_osd(osdc, osd);
655 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], 729 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
656 &osd->o_con.peer_addr, 730 &osd->o_con.peer_addr,
@@ -723,15 +797,14 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
723 * Register request, assign tid. If this is the first request, set up 797 * Register request, assign tid. If this is the first request, set up
724 * the timeout event. 798 * the timeout event.
725 */ 799 */
726static void register_request(struct ceph_osd_client *osdc, 800static void __register_request(struct ceph_osd_client *osdc,
727 struct ceph_osd_request *req) 801 struct ceph_osd_request *req)
728{ 802{
729 mutex_lock(&osdc->request_mutex);
730 req->r_tid = ++osdc->last_tid; 803 req->r_tid = ++osdc->last_tid;
731 req->r_request->hdr.tid = cpu_to_le64(req->r_tid); 804 req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
732 INIT_LIST_HEAD(&req->r_req_lru_item); 805 INIT_LIST_HEAD(&req->r_req_lru_item);
733 806
734 dout("register_request %p tid %lld\n", req, req->r_tid); 807 dout("__register_request %p tid %lld\n", req, req->r_tid);
735 __insert_request(osdc, req); 808 __insert_request(osdc, req);
736 ceph_osdc_get_request(req); 809 ceph_osdc_get_request(req);
737 osdc->num_requests++; 810 osdc->num_requests++;
@@ -740,6 +813,13 @@ static void register_request(struct ceph_osd_client *osdc,
740 dout(" first request, scheduling timeout\n"); 813 dout(" first request, scheduling timeout\n");
741 __schedule_osd_timeout(osdc); 814 __schedule_osd_timeout(osdc);
742 } 815 }
816}
817
818static void register_request(struct ceph_osd_client *osdc,
819 struct ceph_osd_request *req)
820{
821 mutex_lock(&osdc->request_mutex);
822 __register_request(osdc, req);
743 mutex_unlock(&osdc->request_mutex); 823 mutex_unlock(&osdc->request_mutex);
744} 824}
745 825
@@ -758,9 +838,13 @@ static void __unregister_request(struct ceph_osd_client *osdc,
758 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 838 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
759 839
760 list_del_init(&req->r_osd_item); 840 list_del_init(&req->r_osd_item);
761 if (list_empty(&req->r_osd->o_requests)) 841 if (list_empty(&req->r_osd->o_requests) &&
842 list_empty(&req->r_osd->o_linger_requests)) {
843 dout("moving osd to %p lru\n", req->r_osd);
762 __move_osd_to_lru(osdc, req->r_osd); 844 __move_osd_to_lru(osdc, req->r_osd);
763 req->r_osd = NULL; 845 }
846 if (list_empty(&req->r_linger_item))
847 req->r_osd = NULL;
764 } 848 }
765 849
766 ceph_osdc_put_request(req); 850 ceph_osdc_put_request(req);
@@ -781,20 +865,73 @@ static void __cancel_request(struct ceph_osd_request *req)
781 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 865 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
782 req->r_sent = 0; 866 req->r_sent = 0;
783 } 867 }
784 list_del_init(&req->r_req_lru_item);
785} 868}
786 869
870static void __register_linger_request(struct ceph_osd_client *osdc,
871 struct ceph_osd_request *req)
872{
873 dout("__register_linger_request %p\n", req);
874 list_add_tail(&req->r_linger_item, &osdc->req_linger);
875 list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
876}
877
878static void __unregister_linger_request(struct ceph_osd_client *osdc,
879 struct ceph_osd_request *req)
880{
881 dout("__unregister_linger_request %p\n", req);
882 if (req->r_osd) {
883 list_del_init(&req->r_linger_item);
884 list_del_init(&req->r_linger_osd);
885
886 if (list_empty(&req->r_osd->o_requests) &&
887 list_empty(&req->r_osd->o_linger_requests)) {
888 dout("moving osd to %p lru\n", req->r_osd);
889 __move_osd_to_lru(osdc, req->r_osd);
890 }
891 if (list_empty(&req->r_osd_item))
892 req->r_osd = NULL;
893 }
894}
895
896void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
897 struct ceph_osd_request *req)
898{
899 mutex_lock(&osdc->request_mutex);
900 if (req->r_linger) {
901 __unregister_linger_request(osdc, req);
902 ceph_osdc_put_request(req);
903 }
904 mutex_unlock(&osdc->request_mutex);
905}
906EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
907
908void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
909 struct ceph_osd_request *req)
910{
911 if (!req->r_linger) {
912 dout("set_request_linger %p\n", req);
913 req->r_linger = 1;
914 /*
915 * caller is now responsible for calling
916 * unregister_linger_request
917 */
918 ceph_osdc_get_request(req);
919 }
920}
921EXPORT_SYMBOL(ceph_osdc_set_request_linger);
922
787/* 923/*
788 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct 924 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
789 * (as needed), and set the request r_osd appropriately. If there is 925 * (as needed), and set the request r_osd appropriately. If there is
790 * no up osd, set r_osd to NULL. 926 * no up osd, set r_osd to NULL. Move the request to the appropriate list
927 * (unsent, homeless) or leave on in-flight lru.
791 * 928 *
792 * Return 0 if unchanged, 1 if changed, or negative on error. 929 * Return 0 if unchanged, 1 if changed, or negative on error.
793 * 930 *
794 * Caller should hold map_sem for read and request_mutex. 931 * Caller should hold map_sem for read and request_mutex.
795 */ 932 */
796static int __map_osds(struct ceph_osd_client *osdc, 933static int __map_request(struct ceph_osd_client *osdc,
797 struct ceph_osd_request *req) 934 struct ceph_osd_request *req)
798{ 935{
799 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 936 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
800 struct ceph_pg pgid; 937 struct ceph_pg pgid;
@@ -802,11 +939,13 @@ static int __map_osds(struct ceph_osd_client *osdc,
802 int o = -1, num = 0; 939 int o = -1, num = 0;
803 int err; 940 int err;
804 941
805 dout("map_osds %p tid %lld\n", req, req->r_tid); 942 dout("map_request %p tid %lld\n", req, req->r_tid);
806 err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, 943 err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
807 &req->r_file_layout, osdc->osdmap); 944 &req->r_file_layout, osdc->osdmap);
808 if (err) 945 if (err) {
946 list_move(&req->r_req_lru_item, &osdc->req_notarget);
809 return err; 947 return err;
948 }
810 pgid = reqhead->layout.ol_pgid; 949 pgid = reqhead->layout.ol_pgid;
811 req->r_pgid = pgid; 950 req->r_pgid = pgid;
812 951
@@ -823,7 +962,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
823 (req->r_osd == NULL && o == -1)) 962 (req->r_osd == NULL && o == -1))
824 return 0; /* no change */ 963 return 0; /* no change */
825 964
826 dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n", 965 dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
827 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 966 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
828 req->r_osd ? req->r_osd->o_osd : -1); 967 req->r_osd ? req->r_osd->o_osd : -1);
829 968
@@ -841,10 +980,12 @@ static int __map_osds(struct ceph_osd_client *osdc,
841 if (!req->r_osd && o >= 0) { 980 if (!req->r_osd && o >= 0) {
842 err = -ENOMEM; 981 err = -ENOMEM;
843 req->r_osd = create_osd(osdc); 982 req->r_osd = create_osd(osdc);
844 if (!req->r_osd) 983 if (!req->r_osd) {
984 list_move(&req->r_req_lru_item, &osdc->req_notarget);
845 goto out; 985 goto out;
986 }
846 987
847 dout("map_osds osd %p is osd%d\n", req->r_osd, o); 988 dout("map_request osd %p is osd%d\n", req->r_osd, o);
848 req->r_osd->o_osd = o; 989 req->r_osd->o_osd = o;
849 req->r_osd->o_con.peer_name.num = cpu_to_le64(o); 990 req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
850 __insert_osd(osdc, req->r_osd); 991 __insert_osd(osdc, req->r_osd);
@@ -855,6 +996,9 @@ static int __map_osds(struct ceph_osd_client *osdc,
855 if (req->r_osd) { 996 if (req->r_osd) {
856 __remove_osd_from_lru(req->r_osd); 997 __remove_osd_from_lru(req->r_osd);
857 list_add(&req->r_osd_item, &req->r_osd->o_requests); 998 list_add(&req->r_osd_item, &req->r_osd->o_requests);
999 list_move(&req->r_req_lru_item, &osdc->req_unsent);
1000 } else {
1001 list_move(&req->r_req_lru_item, &osdc->req_notarget);
858 } 1002 }
859 err = 1; /* osd or pg changed */ 1003 err = 1; /* osd or pg changed */
860 1004
@@ -869,16 +1013,6 @@ static int __send_request(struct ceph_osd_client *osdc,
869 struct ceph_osd_request *req) 1013 struct ceph_osd_request *req)
870{ 1014{
871 struct ceph_osd_request_head *reqhead; 1015 struct ceph_osd_request_head *reqhead;
872 int err;
873
874 err = __map_osds(osdc, req);
875 if (err < 0)
876 return err;
877 if (req->r_osd == NULL) {
878 dout("send_request %p no up osds in pg\n", req);
879 ceph_monc_request_next_osdmap(&osdc->client->monc);
880 return 0;
881 }
882 1016
883 dout("send_request %p tid %llu to osd%d flags %d\n", 1017 dout("send_request %p tid %llu to osd%d flags %d\n",
884 req, req->r_tid, req->r_osd->o_osd, req->r_flags); 1018 req, req->r_tid, req->r_osd->o_osd, req->r_flags);
@@ -898,6 +1032,21 @@ static int __send_request(struct ceph_osd_client *osdc,
898} 1032}
899 1033
900/* 1034/*
1035 * Send any requests in the queue (req_unsent).
1036 */
1037static void send_queued(struct ceph_osd_client *osdc)
1038{
1039 struct ceph_osd_request *req, *tmp;
1040
1041 dout("send_queued\n");
1042 mutex_lock(&osdc->request_mutex);
1043 list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
1044 __send_request(osdc, req);
1045 }
1046 mutex_unlock(&osdc->request_mutex);
1047}
1048
1049/*
901 * Timeout callback, called every N seconds when 1 or more osd 1050 * Timeout callback, called every N seconds when 1 or more osd
902 * requests has been active for more than N seconds. When this 1051 * requests has been active for more than N seconds. When this
903 * happens, we ping all OSDs with requests who have timed out to 1052 * happens, we ping all OSDs with requests who have timed out to
@@ -916,30 +1065,13 @@ static void handle_timeout(struct work_struct *work)
916 unsigned long keepalive = 1065 unsigned long keepalive =
917 osdc->client->options->osd_keepalive_timeout * HZ; 1066 osdc->client->options->osd_keepalive_timeout * HZ;
918 unsigned long last_stamp = 0; 1067 unsigned long last_stamp = 0;
919 struct rb_node *p;
920 struct list_head slow_osds; 1068 struct list_head slow_osds;
921
922 dout("timeout\n"); 1069 dout("timeout\n");
923 down_read(&osdc->map_sem); 1070 down_read(&osdc->map_sem);
924 1071
925 ceph_monc_request_next_osdmap(&osdc->client->monc); 1072 ceph_monc_request_next_osdmap(&osdc->client->monc);
926 1073
927 mutex_lock(&osdc->request_mutex); 1074 mutex_lock(&osdc->request_mutex);
928 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
929 req = rb_entry(p, struct ceph_osd_request, r_node);
930
931 if (req->r_resend) {
932 int err;
933
934 dout("osdc resending prev failed %lld\n", req->r_tid);
935 err = __send_request(osdc, req);
936 if (err)
937 dout("osdc failed again on %lld\n", req->r_tid);
938 else
939 req->r_resend = false;
940 continue;
941 }
942 }
943 1075
944 /* 1076 /*
945 * reset osds that appear to be _really_ unresponsive. this 1077 * reset osds that appear to be _really_ unresponsive. this
@@ -963,7 +1095,7 @@ static void handle_timeout(struct work_struct *work)
963 BUG_ON(!osd); 1095 BUG_ON(!osd);
964 pr_warning(" tid %llu timed out on osd%d, will reset osd\n", 1096 pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
965 req->r_tid, osd->o_osd); 1097 req->r_tid, osd->o_osd);
966 __kick_requests(osdc, osd); 1098 __kick_osd_requests(osdc, osd);
967 } 1099 }
968 1100
969 /* 1101 /*
@@ -991,7 +1123,7 @@ static void handle_timeout(struct work_struct *work)
991 1123
992 __schedule_osd_timeout(osdc); 1124 __schedule_osd_timeout(osdc);
993 mutex_unlock(&osdc->request_mutex); 1125 mutex_unlock(&osdc->request_mutex);
994 1126 send_queued(osdc);
995 up_read(&osdc->map_sem); 1127 up_read(&osdc->map_sem);
996} 1128}
997 1129
@@ -1035,7 +1167,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1035 numops * sizeof(struct ceph_osd_op)) 1167 numops * sizeof(struct ceph_osd_op))
1036 goto bad; 1168 goto bad;
1037 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); 1169 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
1038
1039 /* lookup */ 1170 /* lookup */
1040 mutex_lock(&osdc->request_mutex); 1171 mutex_lock(&osdc->request_mutex);
1041 req = __lookup_request(osdc, tid); 1172 req = __lookup_request(osdc, tid);
@@ -1079,6 +1210,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1079 1210
1080 dout("handle_reply tid %llu flags %d\n", tid, flags); 1211 dout("handle_reply tid %llu flags %d\n", tid, flags);
1081 1212
1213 if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
1214 __register_linger_request(osdc, req);
1215
1082 /* either this is a read, or we got the safe response */ 1216 /* either this is a read, or we got the safe response */
1083 if (result < 0 || 1217 if (result < 0 ||
1084 (flags & CEPH_OSD_FLAG_ONDISK) || 1218 (flags & CEPH_OSD_FLAG_ONDISK) ||
@@ -1099,6 +1233,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1099 } 1233 }
1100 1234
1101done: 1235done:
1236 dout("req=%p req->r_linger=%d\n", req, req->r_linger);
1102 ceph_osdc_put_request(req); 1237 ceph_osdc_put_request(req);
1103 return; 1238 return;
1104 1239
@@ -1109,108 +1244,83 @@ bad:
1109 ceph_msg_dump(msg); 1244 ceph_msg_dump(msg);
1110} 1245}
1111 1246
1112 1247static void reset_changed_osds(struct ceph_osd_client *osdc)
1113static int __kick_requests(struct ceph_osd_client *osdc,
1114 struct ceph_osd *kickosd)
1115{ 1248{
1116 struct ceph_osd_request *req;
1117 struct rb_node *p, *n; 1249 struct rb_node *p, *n;
1118 int needmap = 0;
1119 int err;
1120 1250
1121 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 1251 for (p = rb_first(&osdc->osds); p; p = n) {
1122 if (kickosd) { 1252 struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
1123 err = __reset_osd(osdc, kickosd); 1253
1124 if (err == -EAGAIN) 1254 n = rb_next(p);
1125 return 1; 1255 if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
1126 } else { 1256 memcmp(&osd->o_con.peer_addr,
1127 for (p = rb_first(&osdc->osds); p; p = n) { 1257 ceph_osd_addr(osdc->osdmap,
1128 struct ceph_osd *osd = 1258 osd->o_osd),
1129 rb_entry(p, struct ceph_osd, o_node); 1259 sizeof(struct ceph_entity_addr)) != 0)
1130 1260 __reset_osd(osdc, osd);
1131 n = rb_next(p);
1132 if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
1133 memcmp(&osd->o_con.peer_addr,
1134 ceph_osd_addr(osdc->osdmap,
1135 osd->o_osd),
1136 sizeof(struct ceph_entity_addr)) != 0)
1137 __reset_osd(osdc, osd);
1138 }
1139 } 1261 }
1262}
1263
1264/*
1265 * Requeue requests whose mapping to an OSD has changed. If requests map to
1266 * no osd, request a new map.
1267 *
1268 * Caller should hold map_sem for read and request_mutex.
1269 */
1270static void kick_requests(struct ceph_osd_client *osdc)
1271{
1272 struct ceph_osd_request *req, *nreq;
1273 struct rb_node *p;
1274 int needmap = 0;
1275 int err;
1140 1276
1277 dout("kick_requests\n");
1278 mutex_lock(&osdc->request_mutex);
1141 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 1279 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
1142 req = rb_entry(p, struct ceph_osd_request, r_node); 1280 req = rb_entry(p, struct ceph_osd_request, r_node);
1143 1281 err = __map_request(osdc, req);
1144 if (req->r_resend) { 1282 if (err < 0)
1145 dout(" r_resend set on tid %llu\n", req->r_tid); 1283 continue; /* error */
1146 __cancel_request(req); 1284 if (req->r_osd == NULL) {
1147 goto kick; 1285 dout("%p tid %llu maps to no osd\n", req, req->r_tid);
1148 } 1286 needmap++; /* request a newer map */
1149 if (req->r_osd && kickosd == req->r_osd) { 1287 } else if (err > 0) {
1150 __cancel_request(req); 1288 dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
1151 goto kick; 1289 req->r_osd ? req->r_osd->o_osd : -1);
1290 if (!req->r_linger)
1291 req->r_flags |= CEPH_OSD_FLAG_RETRY;
1152 } 1292 }
1293 }
1153 1294
1154 err = __map_osds(osdc, req); 1295 list_for_each_entry_safe(req, nreq, &osdc->req_linger,
1296 r_linger_item) {
1297 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
1298
1299 err = __map_request(osdc, req);
1155 if (err == 0) 1300 if (err == 0)
1156 continue; /* no change */ 1301 continue; /* no change and no osd was specified */
1157 if (err < 0) { 1302 if (err < 0)
1158 /* 1303 continue; /* hrm! */
1159 * FIXME: really, we should set the request
1160 * error and fail if this isn't a 'nofail'
1161 * request, but that's a fair bit more
1162 * complicated to do. So retry!
1163 */
1164 dout(" setting r_resend on %llu\n", req->r_tid);
1165 req->r_resend = true;
1166 continue;
1167 }
1168 if (req->r_osd == NULL) { 1304 if (req->r_osd == NULL) {
1169 dout("tid %llu maps to no valid osd\n", req->r_tid); 1305 dout("tid %llu maps to no valid osd\n", req->r_tid);
1170 needmap++; /* request a newer map */ 1306 needmap++; /* request a newer map */
1171 continue; 1307 continue;
1172 } 1308 }
1173 1309
1174kick: 1310 dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
1175 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
1176 req->r_osd ? req->r_osd->o_osd : -1); 1311 req->r_osd ? req->r_osd->o_osd : -1);
1177 req->r_flags |= CEPH_OSD_FLAG_RETRY; 1312 __unregister_linger_request(osdc, req);
1178 err = __send_request(osdc, req); 1313 __register_request(osdc, req);
1179 if (err) {
1180 dout(" setting r_resend on %llu\n", req->r_tid);
1181 req->r_resend = true;
1182 }
1183 } 1314 }
1184
1185 return needmap;
1186}
1187
1188/*
1189 * Resubmit osd requests whose osd or osd address has changed. Request
1190 * a new osd map if osds are down, or we are otherwise unable to determine
1191 * how to direct a request.
1192 *
1193 * Close connections to down osds.
1194 *
1195 * If @who is specified, resubmit requests for that specific osd.
1196 *
1197 * Caller should hold map_sem for read and request_mutex.
1198 */
1199static void kick_requests(struct ceph_osd_client *osdc,
1200 struct ceph_osd *kickosd)
1201{
1202 int needmap;
1203
1204 mutex_lock(&osdc->request_mutex);
1205 needmap = __kick_requests(osdc, kickosd);
1206 mutex_unlock(&osdc->request_mutex); 1315 mutex_unlock(&osdc->request_mutex);
1207 1316
1208 if (needmap) { 1317 if (needmap) {
1209 dout("%d requests for down osds, need new map\n", needmap); 1318 dout("%d requests for down osds, need new map\n", needmap);
1210 ceph_monc_request_next_osdmap(&osdc->client->monc); 1319 ceph_monc_request_next_osdmap(&osdc->client->monc);
1211 } 1320 }
1212
1213} 1321}
1322
1323
1214/* 1324/*
1215 * Process updated osd map. 1325 * Process updated osd map.
1216 * 1326 *
@@ -1263,6 +1373,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1263 ceph_osdmap_destroy(osdc->osdmap); 1373 ceph_osdmap_destroy(osdc->osdmap);
1264 osdc->osdmap = newmap; 1374 osdc->osdmap = newmap;
1265 } 1375 }
1376 kick_requests(osdc);
1377 reset_changed_osds(osdc);
1266 } else { 1378 } else {
1267 dout("ignoring incremental map %u len %d\n", 1379 dout("ignoring incremental map %u len %d\n",
1268 epoch, maplen); 1380 epoch, maplen);
@@ -1300,6 +1412,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1300 osdc->osdmap = newmap; 1412 osdc->osdmap = newmap;
1301 if (oldmap) 1413 if (oldmap)
1302 ceph_osdmap_destroy(oldmap); 1414 ceph_osdmap_destroy(oldmap);
1415 kick_requests(osdc);
1303 } 1416 }
1304 p += maplen; 1417 p += maplen;
1305 nr_maps--; 1418 nr_maps--;
@@ -1308,8 +1421,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1308done: 1421done:
1309 downgrade_write(&osdc->map_sem); 1422 downgrade_write(&osdc->map_sem);
1310 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 1423 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
1311 if (newmap) 1424 send_queued(osdc);
1312 kick_requests(osdc, NULL);
1313 up_read(&osdc->map_sem); 1425 up_read(&osdc->map_sem);
1314 wake_up_all(&osdc->client->auth_wq); 1426 wake_up_all(&osdc->client->auth_wq);
1315 return; 1427 return;
@@ -1322,6 +1434,223 @@ bad:
1322} 1434}
1323 1435
1324/* 1436/*
1437 * watch/notify callback event infrastructure
1438 *
1439 * These callbacks are used both for watch and notify operations.
1440 */
1441static void __release_event(struct kref *kref)
1442{
1443 struct ceph_osd_event *event =
1444 container_of(kref, struct ceph_osd_event, kref);
1445
1446 dout("__release_event %p\n", event);
1447 kfree(event);
1448}
1449
1450static void get_event(struct ceph_osd_event *event)
1451{
1452 kref_get(&event->kref);
1453}
1454
1455void ceph_osdc_put_event(struct ceph_osd_event *event)
1456{
1457 kref_put(&event->kref, __release_event);
1458}
1459EXPORT_SYMBOL(ceph_osdc_put_event);
1460
1461static void __insert_event(struct ceph_osd_client *osdc,
1462 struct ceph_osd_event *new)
1463{
1464 struct rb_node **p = &osdc->event_tree.rb_node;
1465 struct rb_node *parent = NULL;
1466 struct ceph_osd_event *event = NULL;
1467
1468 while (*p) {
1469 parent = *p;
1470 event = rb_entry(parent, struct ceph_osd_event, node);
1471 if (new->cookie < event->cookie)
1472 p = &(*p)->rb_left;
1473 else if (new->cookie > event->cookie)
1474 p = &(*p)->rb_right;
1475 else
1476 BUG();
1477 }
1478
1479 rb_link_node(&new->node, parent, p);
1480 rb_insert_color(&new->node, &osdc->event_tree);
1481}
1482
1483static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
1484 u64 cookie)
1485{
1486 struct rb_node **p = &osdc->event_tree.rb_node;
1487 struct rb_node *parent = NULL;
1488 struct ceph_osd_event *event = NULL;
1489
1490 while (*p) {
1491 parent = *p;
1492 event = rb_entry(parent, struct ceph_osd_event, node);
1493 if (cookie < event->cookie)
1494 p = &(*p)->rb_left;
1495 else if (cookie > event->cookie)
1496 p = &(*p)->rb_right;
1497 else
1498 return event;
1499 }
1500 return NULL;
1501}
1502
1503static void __remove_event(struct ceph_osd_event *event)
1504{
1505 struct ceph_osd_client *osdc = event->osdc;
1506
1507 if (!RB_EMPTY_NODE(&event->node)) {
1508 dout("__remove_event removed %p\n", event);
1509 rb_erase(&event->node, &osdc->event_tree);
1510 ceph_osdc_put_event(event);
1511 } else {
1512 dout("__remove_event didn't remove %p\n", event);
1513 }
1514}
1515
1516int ceph_osdc_create_event(struct ceph_osd_client *osdc,
1517 void (*event_cb)(u64, u64, u8, void *),
1518 int one_shot, void *data,
1519 struct ceph_osd_event **pevent)
1520{
1521 struct ceph_osd_event *event;
1522
1523 event = kmalloc(sizeof(*event), GFP_NOIO);
1524 if (!event)
1525 return -ENOMEM;
1526
1527 dout("create_event %p\n", event);
1528 event->cb = event_cb;
1529 event->one_shot = one_shot;
1530 event->data = data;
1531 event->osdc = osdc;
1532 INIT_LIST_HEAD(&event->osd_node);
1533 kref_init(&event->kref); /* one ref for us */
1534 kref_get(&event->kref); /* one ref for the caller */
1535 init_completion(&event->completion);
1536
1537 spin_lock(&osdc->event_lock);
1538 event->cookie = ++osdc->event_count;
1539 __insert_event(osdc, event);
1540 spin_unlock(&osdc->event_lock);
1541
1542 *pevent = event;
1543 return 0;
1544}
1545EXPORT_SYMBOL(ceph_osdc_create_event);
1546
1547void ceph_osdc_cancel_event(struct ceph_osd_event *event)
1548{
1549 struct ceph_osd_client *osdc = event->osdc;
1550
1551 dout("cancel_event %p\n", event);
1552 spin_lock(&osdc->event_lock);
1553 __remove_event(event);
1554 spin_unlock(&osdc->event_lock);
1555 ceph_osdc_put_event(event); /* caller's */
1556}
1557EXPORT_SYMBOL(ceph_osdc_cancel_event);
1558
1559
1560static void do_event_work(struct work_struct *work)
1561{
1562 struct ceph_osd_event_work *event_work =
1563 container_of(work, struct ceph_osd_event_work, work);
1564 struct ceph_osd_event *event = event_work->event;
1565 u64 ver = event_work->ver;
1566 u64 notify_id = event_work->notify_id;
1567 u8 opcode = event_work->opcode;
1568
1569 dout("do_event_work completing %p\n", event);
1570 event->cb(ver, notify_id, opcode, event->data);
1571 complete(&event->completion);
1572 dout("do_event_work completed %p\n", event);
1573 ceph_osdc_put_event(event);
1574 kfree(event_work);
1575}
1576
1577
1578/*
1579 * Process osd watch notifications
1580 */
1581void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1582{
1583 void *p, *end;
1584 u8 proto_ver;
1585 u64 cookie, ver, notify_id;
1586 u8 opcode;
1587 struct ceph_osd_event *event;
1588 struct ceph_osd_event_work *event_work;
1589
1590 p = msg->front.iov_base;
1591 end = p + msg->front.iov_len;
1592
1593 ceph_decode_8_safe(&p, end, proto_ver, bad);
1594 ceph_decode_8_safe(&p, end, opcode, bad);
1595 ceph_decode_64_safe(&p, end, cookie, bad);
1596 ceph_decode_64_safe(&p, end, ver, bad);
1597 ceph_decode_64_safe(&p, end, notify_id, bad);
1598
1599 spin_lock(&osdc->event_lock);
1600 event = __find_event(osdc, cookie);
1601 if (event) {
1602 get_event(event);
1603 if (event->one_shot)
1604 __remove_event(event);
1605 }
1606 spin_unlock(&osdc->event_lock);
1607 dout("handle_watch_notify cookie %lld ver %lld event %p\n",
1608 cookie, ver, event);
1609 if (event) {
1610 event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
1611 if (!event_work) {
1612 dout("ERROR: could not allocate event_work\n");
1613 goto done_err;
1614 }
1615 INIT_WORK(&event_work->work, do_event_work);
1616 event_work->event = event;
1617 event_work->ver = ver;
1618 event_work->notify_id = notify_id;
1619 event_work->opcode = opcode;
1620 if (!queue_work(osdc->notify_wq, &event_work->work)) {
1621 dout("WARNING: failed to queue notify event work\n");
1622 goto done_err;
1623 }
1624 }
1625
1626 return;
1627
1628done_err:
1629 complete(&event->completion);
1630 ceph_osdc_put_event(event);
1631 return;
1632
1633bad:
1634 pr_err("osdc handle_watch_notify corrupt msg\n");
1635 return;
1636}
1637
1638int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
1639{
1640 int err;
1641
1642 dout("wait_event %p\n", event);
1643 err = wait_for_completion_interruptible_timeout(&event->completion,
1644 timeout * HZ);
1645 ceph_osdc_put_event(event);
1646 if (err > 0)
1647 err = 0;
1648 dout("wait_event %p returns %d\n", event, err);
1649 return err;
1650}
1651EXPORT_SYMBOL(ceph_osdc_wait_event);
1652
1653/*
1325 * Register request, send initial attempt. 1654 * Register request, send initial attempt.
1326 */ 1655 */
1327int ceph_osdc_start_request(struct ceph_osd_client *osdc, 1656int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1347,18 +1676,27 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1347 * the request still han't been touched yet. 1676 * the request still han't been touched yet.
1348 */ 1677 */
1349 if (req->r_sent == 0) { 1678 if (req->r_sent == 0) {
1350 rc = __send_request(osdc, req); 1679 rc = __map_request(osdc, req);
1351 if (rc) { 1680 if (rc < 0)
1352 if (nofail) { 1681 goto out_unlock;
1353 dout("osdc_start_request failed send, " 1682 if (req->r_osd == NULL) {
1354 " marking %lld\n", req->r_tid); 1683 dout("send_request %p no up osds in pg\n", req);
1355 req->r_resend = true; 1684 ceph_monc_request_next_osdmap(&osdc->client->monc);
1356 rc = 0; 1685 } else {
1357 } else { 1686 rc = __send_request(osdc, req);
1358 __unregister_request(osdc, req); 1687 if (rc) {
1688 if (nofail) {
1689 dout("osdc_start_request failed send, "
1690 " will retry %lld\n", req->r_tid);
1691 rc = 0;
1692 } else {
1693 __unregister_request(osdc, req);
1694 }
1359 } 1695 }
1360 } 1696 }
1361 } 1697 }
1698
1699out_unlock:
1362 mutex_unlock(&osdc->request_mutex); 1700 mutex_unlock(&osdc->request_mutex);
1363 up_read(&osdc->map_sem); 1701 up_read(&osdc->map_sem);
1364 return rc; 1702 return rc;
@@ -1441,9 +1779,15 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1441 INIT_LIST_HEAD(&osdc->osd_lru); 1779 INIT_LIST_HEAD(&osdc->osd_lru);
1442 osdc->requests = RB_ROOT; 1780 osdc->requests = RB_ROOT;
1443 INIT_LIST_HEAD(&osdc->req_lru); 1781 INIT_LIST_HEAD(&osdc->req_lru);
1782 INIT_LIST_HEAD(&osdc->req_unsent);
1783 INIT_LIST_HEAD(&osdc->req_notarget);
1784 INIT_LIST_HEAD(&osdc->req_linger);
1444 osdc->num_requests = 0; 1785 osdc->num_requests = 0;
1445 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); 1786 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
1446 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); 1787 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
1788 spin_lock_init(&osdc->event_lock);
1789 osdc->event_tree = RB_ROOT;
1790 osdc->event_count = 0;
1447 1791
1448 schedule_delayed_work(&osdc->osds_timeout_work, 1792 schedule_delayed_work(&osdc->osds_timeout_work,
1449 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); 1793 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
@@ -1463,6 +1807,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1463 "osd_op_reply"); 1807 "osd_op_reply");
1464 if (err < 0) 1808 if (err < 0)
1465 goto out_msgpool; 1809 goto out_msgpool;
1810
1811 osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
1812 if (IS_ERR(osdc->notify_wq)) {
1813 err = PTR_ERR(osdc->notify_wq);
1814 osdc->notify_wq = NULL;
1815 goto out_msgpool;
1816 }
1466 return 0; 1817 return 0;
1467 1818
1468out_msgpool: 1819out_msgpool:
@@ -1476,6 +1827,8 @@ EXPORT_SYMBOL(ceph_osdc_init);
1476 1827
1477void ceph_osdc_stop(struct ceph_osd_client *osdc) 1828void ceph_osdc_stop(struct ceph_osd_client *osdc)
1478{ 1829{
1830 flush_workqueue(osdc->notify_wq);
1831 destroy_workqueue(osdc->notify_wq);
1479 cancel_delayed_work_sync(&osdc->timeout_work); 1832 cancel_delayed_work_sync(&osdc->timeout_work);
1480 cancel_delayed_work_sync(&osdc->osds_timeout_work); 1833 cancel_delayed_work_sync(&osdc->osds_timeout_work);
1481 if (osdc->osdmap) { 1834 if (osdc->osdmap) {
@@ -1483,6 +1836,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
1483 osdc->osdmap = NULL; 1836 osdc->osdmap = NULL;
1484 } 1837 }
1485 remove_old_osds(osdc, 1); 1838 remove_old_osds(osdc, 1);
1839 WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
1486 mempool_destroy(osdc->req_mempool); 1840 mempool_destroy(osdc->req_mempool);
1487 ceph_msgpool_destroy(&osdc->msgpool_op); 1841 ceph_msgpool_destroy(&osdc->msgpool_op);
1488 ceph_msgpool_destroy(&osdc->msgpool_op_reply); 1842 ceph_msgpool_destroy(&osdc->msgpool_op_reply);
@@ -1591,6 +1945,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
1591 case CEPH_MSG_OSD_OPREPLY: 1945 case CEPH_MSG_OSD_OPREPLY:
1592 handle_reply(osdc, msg, con); 1946 handle_reply(osdc, msg, con);
1593 break; 1947 break;
1948 case CEPH_MSG_WATCH_NOTIFY:
1949 handle_watch_notify(osdc, msg);
1950 break;
1594 1951
1595 default: 1952 default:
1596 pr_err("received unknown message type %d %s\n", type, 1953 pr_err("received unknown message type %d %s\n", type,
@@ -1684,6 +2041,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
1684 2041
1685 switch (type) { 2042 switch (type) {
1686 case CEPH_MSG_OSD_MAP: 2043 case CEPH_MSG_OSD_MAP:
2044 case CEPH_MSG_WATCH_NOTIFY:
1687 return ceph_msg_new(type, front, GFP_NOFS); 2045 return ceph_msg_new(type, front, GFP_NOFS);
1688 case CEPH_MSG_OSD_OPREPLY: 2046 case CEPH_MSG_OSD_OPREPLY:
1689 return get_reply(con, hdr, skip); 2047 return get_reply(con, hdr, skip);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a040e64c69f..cd9c21df87d1 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
16 int num_pages, bool write_page) 16 int num_pages, bool write_page)
17{ 17{
18 struct page **pages; 18 struct page **pages;
19 int rc; 19 int got = 0;
20 int rc = 0;
20 21
21 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 22 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
22 if (!pages) 23 if (!pages)
23 return ERR_PTR(-ENOMEM); 24 return ERR_PTR(-ENOMEM);
24 25
25 down_read(&current->mm->mmap_sem); 26 down_read(&current->mm->mmap_sem);
26 rc = get_user_pages(current, current->mm, (unsigned long)data, 27 while (got < num_pages) {
27 num_pages, write_page, 0, pages, NULL); 28 rc = get_user_pages(current, current->mm,
29 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
30 num_pages - got, write_page, 0, pages + got, NULL);
31 if (rc < 0)
32 break;
33 BUG_ON(rc == 0);
34 got += rc;
35 }
28 up_read(&current->mm->mmap_sem); 36 up_read(&current->mm->mmap_sem);
29 if (rc < num_pages) 37 if (rc < 0)
30 goto fail; 38 goto fail;
31 return pages; 39 return pages;
32 40
33fail: 41fail:
34 ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); 42 ceph_put_page_vector(pages, got, false);
35 return ERR_PTR(rc); 43 return ERR_PTR(rc);
36} 44}
37EXPORT_SYMBOL(ceph_get_direct_page_vector); 45EXPORT_SYMBOL(ceph_get_direct_page_vector);
diff --git a/net/compat.c b/net/compat.c
index 3649d5895361..c578d9382e19 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
722 722
723/* Argument list sizes for compat_sys_socketcall */ 723/* Argument list sizes for compat_sys_socketcall */
724#define AL(x) ((x) * sizeof(u32)) 724#define AL(x) ((x) * sizeof(u32))
725static unsigned char nas[20] = { 725static unsigned char nas[21] = {
726 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 726 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
727 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 727 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
728 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 728 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
729 AL(4), AL(5) 729 AL(4), AL(5), AL(4)
730}; 730};
731#undef AL 731#undef AL
732 732
@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns
735 return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); 735 return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
736} 736}
737 737
738asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
739 unsigned vlen, unsigned int flags)
740{
741 return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
742 flags | MSG_CMSG_COMPAT);
743}
744
738asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) 745asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
739{ 746{
740 return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); 747 return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
780 u32 a[6]; 787 u32 a[6];
781 u32 a0, a1; 788 u32 a0, a1;
782 789
783 if (call < SYS_SOCKET || call > SYS_RECVMMSG) 790 if (call < SYS_SOCKET || call > SYS_SENDMMSG)
784 return -EINVAL; 791 return -EINVAL;
785 if (copy_from_user(a, args, nas[call])) 792 if (copy_from_user(a, args, nas[call]))
786 return -EFAULT; 793 return -EFAULT;
@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
839 case SYS_SENDMSG: 846 case SYS_SENDMSG:
840 ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); 847 ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
841 break; 848 break;
849 case SYS_SENDMMSG:
850 ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
851 break;
842 case SYS_RECVMSG: 852 case SYS_RECVMSG:
843 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); 853 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
844 break; 854 break;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc2..d94537914a71 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -947,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
947} 948}
948EXPORT_SYMBOL(dev_alloc_name); 949EXPORT_SYMBOL(dev_alloc_name);
949 950
950static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) 951static int dev_get_valid_name(struct net_device *dev, const char *name)
951{ 952{
952 struct net *net; 953 struct net *net;
953 954
@@ -957,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt
957 if (!dev_valid_name(name)) 958 if (!dev_valid_name(name))
958 return -EINVAL; 959 return -EINVAL;
959 960
960 if (fmt && strchr(name, '%')) 961 if (strchr(name, '%'))
961 return dev_alloc_name(dev, name); 962 return dev_alloc_name(dev, name);
962 else if (__dev_get_by_name(net, name)) 963 else if (__dev_get_by_name(net, name))
963 return -EEXIST; 964 return -EEXIST;
@@ -994,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
994 995
995 memcpy(oldname, dev->name, IFNAMSIZ); 996 memcpy(oldname, dev->name, IFNAMSIZ);
996 997
997 err = dev_get_valid_name(dev, newname, 1); 998 err = dev_get_valid_name(dev, newname);
998 if (err < 0) 999 if (err < 0)
999 return err; 1000 return err;
1000 1001
@@ -1006,7 +1007,7 @@ rollback:
1006 } 1007 }
1007 1008
1008 write_lock_bh(&dev_base_lock); 1009 write_lock_bh(&dev_base_lock);
1009 hlist_del(&dev->name_hlist); 1010 hlist_del_rcu(&dev->name_hlist);
1010 write_unlock_bh(&dev_base_lock); 1011 write_unlock_bh(&dev_base_lock);
1011 1012
1012 synchronize_rcu(); 1013 synchronize_rcu();
@@ -1114,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1115void dev_load(struct net *net, const char *name)
1115{ 1116{
1116 struct net_device *dev; 1117 struct net_device *dev;
1118 int no_module;
1117 1119
1118 rcu_read_lock(); 1120 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1121 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1122 rcu_read_unlock();
1121 1123
1122 if (!dev && capable(CAP_NET_ADMIN)) 1124 no_module = !dev;
1123 request_module("%s", name); 1125 if (no_module && capable(CAP_NET_ADMIN))
1126 no_module = request_module("netdev-%s", name);
1127 if (no_module && capable(CAP_SYS_MODULE)) {
1128 if (!request_module("%s", name))
1129 pr_err("Loading kernel module for a network device "
1130"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1131"instead\n", name);
1132 }
1124} 1133}
1125EXPORT_SYMBOL(dev_load); 1134EXPORT_SYMBOL(dev_load);
1126 1135
@@ -1131,9 +1140,6 @@ static int __dev_open(struct net_device *dev)
1131 1140
1132 ASSERT_RTNL(); 1141 ASSERT_RTNL();
1133 1142
1134 /*
1135 * Is it even present?
1136 */
1137 if (!netif_device_present(dev)) 1143 if (!netif_device_present(dev))
1138 return -ENODEV; 1144 return -ENODEV;
1139 1145
@@ -1142,9 +1148,6 @@ static int __dev_open(struct net_device *dev)
1142 if (ret) 1148 if (ret)
1143 return ret; 1149 return ret;
1144 1150
1145 /*
1146 * Call device private open method
1147 */
1148 set_bit(__LINK_STATE_START, &dev->state); 1151 set_bit(__LINK_STATE_START, &dev->state);
1149 1152
1150 if (ops->ndo_validate_addr) 1153 if (ops->ndo_validate_addr)
@@ -1153,31 +1156,12 @@ static int __dev_open(struct net_device *dev)
1153 if (!ret && ops->ndo_open) 1156 if (!ret && ops->ndo_open)
1154 ret = ops->ndo_open(dev); 1157 ret = ops->ndo_open(dev);
1155 1158
1156 /*
1157 * If it went open OK then:
1158 */
1159
1160 if (ret) 1159 if (ret)
1161 clear_bit(__LINK_STATE_START, &dev->state); 1160 clear_bit(__LINK_STATE_START, &dev->state);
1162 else { 1161 else {
1163 /*
1164 * Set the flags.
1165 */
1166 dev->flags |= IFF_UP; 1162 dev->flags |= IFF_UP;
1167
1168 /*
1169 * Enable NET_DMA
1170 */
1171 net_dmaengine_get(); 1163 net_dmaengine_get();
1172
1173 /*
1174 * Initialize multicasting status
1175 */
1176 dev_set_rx_mode(dev); 1164 dev_set_rx_mode(dev);
1177
1178 /*
1179 * Wakeup transmit queue engine
1180 */
1181 dev_activate(dev); 1165 dev_activate(dev);
1182 } 1166 }
1183 1167
@@ -1200,22 +1184,13 @@ int dev_open(struct net_device *dev)
1200{ 1184{
1201 int ret; 1185 int ret;
1202 1186
1203 /*
1204 * Is it already up?
1205 */
1206 if (dev->flags & IFF_UP) 1187 if (dev->flags & IFF_UP)
1207 return 0; 1188 return 0;
1208 1189
1209 /*
1210 * Open device
1211 */
1212 ret = __dev_open(dev); 1190 ret = __dev_open(dev);
1213 if (ret < 0) 1191 if (ret < 0)
1214 return ret; 1192 return ret;
1215 1193
1216 /*
1217 * ... and announce new interface.
1218 */
1219 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1194 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1220 call_netdevice_notifiers(NETDEV_UP, dev); 1195 call_netdevice_notifiers(NETDEV_UP, dev);
1221 1196
@@ -1231,10 +1206,6 @@ static int __dev_close_many(struct list_head *head)
1231 might_sleep(); 1206 might_sleep();
1232 1207
1233 list_for_each_entry(dev, head, unreg_list) { 1208 list_for_each_entry(dev, head, unreg_list) {
1234 /*
1235 * Tell people we are going down, so that they can
1236 * prepare to death, when device is still operating.
1237 */
1238 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1209 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1239 1210
1240 clear_bit(__LINK_STATE_START, &dev->state); 1211 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1263,15 +1234,7 @@ static int __dev_close_many(struct list_head *head)
1263 if (ops->ndo_stop) 1234 if (ops->ndo_stop)
1264 ops->ndo_stop(dev); 1235 ops->ndo_stop(dev);
1265 1236
1266 /*
1267 * Device is now down.
1268 */
1269
1270 dev->flags &= ~IFF_UP; 1237 dev->flags &= ~IFF_UP;
1271
1272 /*
1273 * Shutdown NET_DMA
1274 */
1275 net_dmaengine_put(); 1238 net_dmaengine_put();
1276 } 1239 }
1277 1240
@@ -1289,7 +1252,7 @@ static int __dev_close(struct net_device *dev)
1289 return retval; 1252 return retval;
1290} 1253}
1291 1254
1292int dev_close_many(struct list_head *head) 1255static int dev_close_many(struct list_head *head)
1293{ 1256{
1294 struct net_device *dev, *tmp; 1257 struct net_device *dev, *tmp;
1295 LIST_HEAD(tmp_list); 1258 LIST_HEAD(tmp_list);
@@ -1300,9 +1263,6 @@ int dev_close_many(struct list_head *head)
1300 1263
1301 __dev_close_many(head); 1264 __dev_close_many(head);
1302 1265
1303 /*
1304 * Tell people we are down
1305 */
1306 list_for_each_entry(dev, head, unreg_list) { 1266 list_for_each_entry(dev, head, unreg_list) {
1307 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1267 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1308 call_netdevice_notifiers(NETDEV_DOWN, dev); 1268 call_netdevice_notifiers(NETDEV_DOWN, dev);
@@ -1324,11 +1284,13 @@ int dev_close_many(struct list_head *head)
1324 */ 1284 */
1325int dev_close(struct net_device *dev) 1285int dev_close(struct net_device *dev)
1326{ 1286{
1327 LIST_HEAD(single); 1287 if (dev->flags & IFF_UP) {
1288 LIST_HEAD(single);
1328 1289
1329 list_add(&dev->unreg_list, &single); 1290 list_add(&dev->unreg_list, &single);
1330 dev_close_many(&single); 1291 dev_close_many(&single);
1331 list_del(&single); 1292 list_del(&single);
1293 }
1332 return 0; 1294 return 0;
1333} 1295}
1334EXPORT_SYMBOL(dev_close); 1296EXPORT_SYMBOL(dev_close);
@@ -1344,26 +1306,25 @@ EXPORT_SYMBOL(dev_close);
1344 */ 1306 */
1345void dev_disable_lro(struct net_device *dev) 1307void dev_disable_lro(struct net_device *dev)
1346{ 1308{
1347 if (dev->ethtool_ops && dev->ethtool_ops->get_flags && 1309 u32 flags;
1348 dev->ethtool_ops->set_flags) { 1310
1349 u32 flags = dev->ethtool_ops->get_flags(dev); 1311 if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
1350 if (flags & ETH_FLAG_LRO) { 1312 flags = dev->ethtool_ops->get_flags(dev);
1351 flags &= ~ETH_FLAG_LRO; 1313 else
1352 dev->ethtool_ops->set_flags(dev, flags); 1314 flags = ethtool_op_get_flags(dev);
1353 } 1315
1354 } 1316 if (!(flags & ETH_FLAG_LRO))
1355 WARN_ON(dev->features & NETIF_F_LRO); 1317 return;
1318
1319 __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
1320 if (unlikely(dev->features & NETIF_F_LRO))
1321 netdev_WARN(dev, "failed to disable LRO!\n");
1356} 1322}
1357EXPORT_SYMBOL(dev_disable_lro); 1323EXPORT_SYMBOL(dev_disable_lro);
1358 1324
1359 1325
1360static int dev_boot_phase = 1; 1326static int dev_boot_phase = 1;
1361 1327
1362/*
1363 * Device change register/unregister. These are not inline or static
1364 * as we export them to the world.
1365 */
1366
1367/** 1328/**
1368 * register_netdevice_notifier - register a network notifier block 1329 * register_netdevice_notifier - register a network notifier block
1369 * @nb: notifier 1330 * @nb: notifier
@@ -1465,6 +1426,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1465 ASSERT_RTNL(); 1426 ASSERT_RTNL();
1466 return raw_notifier_call_chain(&netdev_chain, val, dev); 1427 return raw_notifier_call_chain(&netdev_chain, val, dev);
1467} 1428}
1429EXPORT_SYMBOL(call_netdevice_notifiers);
1468 1430
1469/* When > 0 there are consumers of rx skb time stamps */ 1431/* When > 0 there are consumers of rx skb time stamps */
1470static atomic_t netstamp_needed = ATOMIC_INIT(0); 1432static atomic_t netstamp_needed = ATOMIC_INIT(0);
@@ -1495,6 +1457,27 @@ static inline void net_timestamp_check(struct sk_buff *skb)
1495 __net_timestamp(skb); 1457 __net_timestamp(skb);
1496} 1458}
1497 1459
1460static inline bool is_skb_forwardable(struct net_device *dev,
1461 struct sk_buff *skb)
1462{
1463 unsigned int len;
1464
1465 if (!(dev->flags & IFF_UP))
1466 return false;
1467
1468 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1469 if (skb->len <= len)
1470 return true;
1471
1472 /* if TSO is enabled, we don't care about the length as the packet
1473 * could be forwarded without being segmented before
1474 */
1475 if (skb_is_gso(skb))
1476 return true;
1477
1478 return false;
1479}
1480
1498/** 1481/**
1499 * dev_forward_skb - loopback an skb to another netif 1482 * dev_forward_skb - loopback an skb to another netif
1500 * 1483 *
@@ -1518,8 +1501,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1518 skb_orphan(skb); 1501 skb_orphan(skb);
1519 nf_reset(skb); 1502 nf_reset(skb);
1520 1503
1521 if (unlikely(!(dev->flags & IFF_UP) || 1504 if (unlikely(!is_skb_forwardable(dev, skb))) {
1522 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1523 atomic_long_inc(&dev->rx_dropped); 1505 atomic_long_inc(&dev->rx_dropped);
1524 kfree_skb(skb); 1506 kfree_skb(skb);
1525 return NET_RX_DROP; 1507 return NET_RX_DROP;
@@ -1597,6 +1579,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1597 rcu_read_unlock(); 1579 rcu_read_unlock();
1598} 1580}
1599 1581
1582/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1583 * @dev: Network device
1584 * @txq: number of queues available
1585 *
1586 * If real_num_tx_queues is changed the tc mappings may no longer be
1587 * valid. To resolve this verify the tc mapping remains valid and if
1588 * not NULL the mapping. With no priorities mapping to this
1589 * offset/count pair it will no longer be used. In the worst case TC0
1590 * is invalid nothing can be done so disable priority mappings. If is
1591 * expected that drivers will fix this mapping if they can before
1592 * calling netif_set_real_num_tx_queues.
1593 */
1594static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1595{
1596 int i;
1597 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1598
1599 /* If TC0 is invalidated disable TC mapping */
1600 if (tc->offset + tc->count > txq) {
1601 pr_warning("Number of in use tx queues changed "
1602 "invalidating tc mappings. Priority "
1603 "traffic classification disabled!\n");
1604 dev->num_tc = 0;
1605 return;
1606 }
1607
1608 /* Invalidated prio to tc mappings set to TC0 */
1609 for (i = 1; i < TC_BITMASK + 1; i++) {
1610 int q = netdev_get_prio_tc_map(dev, i);
1611
1612 tc = &dev->tc_to_txq[q];
1613 if (tc->offset + tc->count > txq) {
1614 pr_warning("Number of in use tx queues "
1615 "changed. Priority %i to tc "
1616 "mapping %i is no longer valid "
1617 "setting map to 0\n",
1618 i, q);
1619 netdev_set_prio_tc_map(dev, i, 0);
1620 }
1621 }
1622}
1623
1600/* 1624/*
1601 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1625 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1602 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1626 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1608,7 +1632,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1608 if (txq < 1 || txq > dev->num_tx_queues) 1632 if (txq < 1 || txq > dev->num_tx_queues)
1609 return -EINVAL; 1633 return -EINVAL;
1610 1634
1611 if (dev->reg_state == NETREG_REGISTERED) { 1635 if (dev->reg_state == NETREG_REGISTERED ||
1636 dev->reg_state == NETREG_UNREGISTERING) {
1612 ASSERT_RTNL(); 1637 ASSERT_RTNL();
1613 1638
1614 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, 1639 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -1616,6 +1641,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1616 if (rc) 1641 if (rc)
1617 return rc; 1642 return rc;
1618 1643
1644 if (dev->num_tc)
1645 netif_setup_tc(dev, txq);
1646
1619 if (txq < dev->real_num_tx_queues) 1647 if (txq < dev->real_num_tx_queues)
1620 qdisc_reset_all_tx_gt(dev, txq); 1648 qdisc_reset_all_tx_gt(dev, txq);
1621 } 1649 }
@@ -1815,7 +1843,7 @@ EXPORT_SYMBOL(skb_checksum_help);
1815 * It may return NULL if the skb requires no segmentation. This is 1843 * It may return NULL if the skb requires no segmentation. This is
1816 * only possible when GSO is used for verifying header integrity. 1844 * only possible when GSO is used for verifying header integrity.
1817 */ 1845 */
1818struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1846struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
1819{ 1847{
1820 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1848 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1821 struct packet_type *ptype; 1849 struct packet_type *ptype;
@@ -2003,7 +2031,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2003 protocol == htons(ETH_P_FCOE))); 2031 protocol == htons(ETH_P_FCOE)));
2004} 2032}
2005 2033
2006static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) 2034static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
2007{ 2035{
2008 if (!can_checksum_protocol(features, protocol)) { 2036 if (!can_checksum_protocol(features, protocol)) {
2009 features &= ~NETIF_F_ALL_CSUM; 2037 features &= ~NETIF_F_ALL_CSUM;
@@ -2015,10 +2043,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
2015 return features; 2043 return features;
2016} 2044}
2017 2045
2018int netif_skb_features(struct sk_buff *skb) 2046u32 netif_skb_features(struct sk_buff *skb)
2019{ 2047{
2020 __be16 protocol = skb->protocol; 2048 __be16 protocol = skb->protocol;
2021 int features = skb->dev->features; 2049 u32 features = skb->dev->features;
2022 2050
2023 if (protocol == htons(ETH_P_8021Q)) { 2051 if (protocol == htons(ETH_P_8021Q)) {
2024 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2052 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2063,10 +2091,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2063 int rc = NETDEV_TX_OK; 2091 int rc = NETDEV_TX_OK;
2064 2092
2065 if (likely(!skb->next)) { 2093 if (likely(!skb->next)) {
2066 int features; 2094 u32 features;
2067 2095
2068 /* 2096 /*
2069 * If device doesnt need skb->dst, release it right now while 2097 * If device doesn't need skb->dst, release it right now while
2070 * its hot in this cpu cache 2098 * its hot in this cpu cache
2071 */ 2099 */
2072 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2100 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2126,7 +2154,7 @@ gso:
2126 nskb->next = NULL; 2154 nskb->next = NULL;
2127 2155
2128 /* 2156 /*
2129 * If device doesnt need nskb->dst, release it right now while 2157 * If device doesn't need nskb->dst, release it right now while
2130 * its hot in this cpu cache 2158 * its hot in this cpu cache
2131 */ 2159 */
2132 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2160 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2165,6 +2193,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2165 unsigned int num_tx_queues) 2193 unsigned int num_tx_queues)
2166{ 2194{
2167 u32 hash; 2195 u32 hash;
2196 u16 qoffset = 0;
2197 u16 qcount = num_tx_queues;
2168 2198
2169 if (skb_rx_queue_recorded(skb)) { 2199 if (skb_rx_queue_recorded(skb)) {
2170 hash = skb_get_rx_queue(skb); 2200 hash = skb_get_rx_queue(skb);
@@ -2173,13 +2203,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2173 return hash; 2203 return hash;
2174 } 2204 }
2175 2205
2206 if (dev->num_tc) {
2207 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2208 qoffset = dev->tc_to_txq[tc].offset;
2209 qcount = dev->tc_to_txq[tc].count;
2210 }
2211
2176 if (skb->sk && skb->sk->sk_hash) 2212 if (skb->sk && skb->sk->sk_hash)
2177 hash = skb->sk->sk_hash; 2213 hash = skb->sk->sk_hash;
2178 else 2214 else
2179 hash = (__force u16) skb->protocol ^ skb->rxhash; 2215 hash = (__force u16) skb->protocol ^ skb->rxhash;
2180 hash = jhash_1word(hash, hashrnd); 2216 hash = jhash_1word(hash, hashrnd);
2181 2217
2182 return (u16) (((u64) hash * num_tx_queues) >> 32); 2218 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2183} 2219}
2184EXPORT_SYMBOL(__skb_tx_hash); 2220EXPORT_SYMBOL(__skb_tx_hash);
2185 2221
@@ -2276,15 +2312,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2276 struct netdev_queue *txq) 2312 struct netdev_queue *txq)
2277{ 2313{
2278 spinlock_t *root_lock = qdisc_lock(q); 2314 spinlock_t *root_lock = qdisc_lock(q);
2279 bool contended = qdisc_is_running(q); 2315 bool contended;
2280 int rc; 2316 int rc;
2281 2317
2318 qdisc_skb_cb(skb)->pkt_len = skb->len;
2319 qdisc_calculate_pkt_len(skb, q);
2282 /* 2320 /*
2283 * Heuristic to force contended enqueues to serialize on a 2321 * Heuristic to force contended enqueues to serialize on a
2284 * separate lock before trying to get qdisc main lock. 2322 * separate lock before trying to get qdisc main lock.
2285 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2323 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2286 * and dequeue packets faster. 2324 * and dequeue packets faster.
2287 */ 2325 */
2326 contended = qdisc_is_running(q);
2288 if (unlikely(contended)) 2327 if (unlikely(contended))
2289 spin_lock(&q->busylock); 2328 spin_lock(&q->busylock);
2290 2329
@@ -2302,7 +2341,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2302 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2341 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2303 skb_dst_force(skb); 2342 skb_dst_force(skb);
2304 2343
2305 qdisc_skb_cb(skb)->pkt_len = skb->len;
2306 qdisc_bstats_update(q, skb); 2344 qdisc_bstats_update(q, skb);
2307 2345
2308 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2346 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2317,7 +2355,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2317 rc = NET_XMIT_SUCCESS; 2355 rc = NET_XMIT_SUCCESS;
2318 } else { 2356 } else {
2319 skb_dst_force(skb); 2357 skb_dst_force(skb);
2320 rc = qdisc_enqueue_root(skb, q); 2358 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2321 if (qdisc_run_begin(q)) { 2359 if (qdisc_run_begin(q)) {
2322 if (unlikely(contended)) { 2360 if (unlikely(contended)) {
2323 spin_unlock(&q->busylock); 2361 spin_unlock(&q->busylock);
@@ -2467,8 +2505,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2467__u32 __skb_get_rxhash(struct sk_buff *skb) 2505__u32 __skb_get_rxhash(struct sk_buff *skb)
2468{ 2506{
2469 int nhoff, hash = 0, poff; 2507 int nhoff, hash = 0, poff;
2470 struct ipv6hdr *ip6; 2508 const struct ipv6hdr *ip6;
2471 struct iphdr *ip; 2509 const struct iphdr *ip;
2472 u8 ip_proto; 2510 u8 ip_proto;
2473 u32 addr1, addr2, ihl; 2511 u32 addr1, addr2, ihl;
2474 union { 2512 union {
@@ -2483,7 +2521,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2483 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) 2521 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2484 goto done; 2522 goto done;
2485 2523
2486 ip = (struct iphdr *) (skb->data + nhoff); 2524 ip = (const struct iphdr *) (skb->data + nhoff);
2487 if (ip->frag_off & htons(IP_MF | IP_OFFSET)) 2525 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2488 ip_proto = 0; 2526 ip_proto = 0;
2489 else 2527 else
@@ -2496,7 +2534,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2496 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) 2534 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2497 goto done; 2535 goto done;
2498 2536
2499 ip6 = (struct ipv6hdr *) (skb->data + nhoff); 2537 ip6 = (const struct ipv6hdr *) (skb->data + nhoff);
2500 ip_proto = ip6->nexthdr; 2538 ip_proto = ip6->nexthdr;
2501 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2539 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2502 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2540 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2536,6 +2574,54 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2536struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2574struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2537EXPORT_SYMBOL(rps_sock_flow_table); 2575EXPORT_SYMBOL(rps_sock_flow_table);
2538 2576
2577static struct rps_dev_flow *
2578set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2579 struct rps_dev_flow *rflow, u16 next_cpu)
2580{
2581 u16 tcpu;
2582
2583 tcpu = rflow->cpu = next_cpu;
2584 if (tcpu != RPS_NO_CPU) {
2585#ifdef CONFIG_RFS_ACCEL
2586 struct netdev_rx_queue *rxqueue;
2587 struct rps_dev_flow_table *flow_table;
2588 struct rps_dev_flow *old_rflow;
2589 u32 flow_id;
2590 u16 rxq_index;
2591 int rc;
2592
2593 /* Should we steer this flow to a different hardware queue? */
2594 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2595 !(dev->features & NETIF_F_NTUPLE))
2596 goto out;
2597 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2598 if (rxq_index == skb_get_rx_queue(skb))
2599 goto out;
2600
2601 rxqueue = dev->_rx + rxq_index;
2602 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2603 if (!flow_table)
2604 goto out;
2605 flow_id = skb->rxhash & flow_table->mask;
2606 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2607 rxq_index, flow_id);
2608 if (rc < 0)
2609 goto out;
2610 old_rflow = rflow;
2611 rflow = &flow_table->flows[flow_id];
2612 rflow->cpu = next_cpu;
2613 rflow->filter = rc;
2614 if (old_rflow->filter == rflow->filter)
2615 old_rflow->filter = RPS_NO_FILTER;
2616 out:
2617#endif
2618 rflow->last_qtail =
2619 per_cpu(softnet_data, tcpu).input_queue_head;
2620 }
2621
2622 return rflow;
2623}
2624
2539/* 2625/*
2540 * get_rps_cpu is called from netif_receive_skb and returns the target 2626 * get_rps_cpu is called from netif_receive_skb and returns the target
2541 * CPU from the RPS map of the receiving queue for a given skb. 2627 * CPU from the RPS map of the receiving queue for a given skb.
@@ -2607,12 +2693,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2607 if (unlikely(tcpu != next_cpu) && 2693 if (unlikely(tcpu != next_cpu) &&
2608 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 2694 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2609 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 2695 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2610 rflow->last_qtail)) >= 0)) { 2696 rflow->last_qtail)) >= 0))
2611 tcpu = rflow->cpu = next_cpu; 2697 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2612 if (tcpu != RPS_NO_CPU) 2698
2613 rflow->last_qtail = per_cpu(softnet_data,
2614 tcpu).input_queue_head;
2615 }
2616 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 2699 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2617 *rflowp = rflow; 2700 *rflowp = rflow;
2618 cpu = tcpu; 2701 cpu = tcpu;
@@ -2633,6 +2716,46 @@ done:
2633 return cpu; 2716 return cpu;
2634} 2717}
2635 2718
2719#ifdef CONFIG_RFS_ACCEL
2720
2721/**
2722 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2723 * @dev: Device on which the filter was set
2724 * @rxq_index: RX queue index
2725 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2726 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2727 *
2728 * Drivers that implement ndo_rx_flow_steer() should periodically call
2729 * this function for each installed filter and remove the filters for
2730 * which it returns %true.
2731 */
2732bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2733 u32 flow_id, u16 filter_id)
2734{
2735 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2736 struct rps_dev_flow_table *flow_table;
2737 struct rps_dev_flow *rflow;
2738 bool expire = true;
2739 int cpu;
2740
2741 rcu_read_lock();
2742 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2743 if (flow_table && flow_id <= flow_table->mask) {
2744 rflow = &flow_table->flows[flow_id];
2745 cpu = ACCESS_ONCE(rflow->cpu);
2746 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2747 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2748 rflow->last_qtail) <
2749 (int)(10 * flow_table->mask)))
2750 expire = false;
2751 }
2752 rcu_read_unlock();
2753 return expire;
2754}
2755EXPORT_SYMBOL(rps_may_expire_flow);
2756
2757#endif /* CONFIG_RFS_ACCEL */
2758
2636/* Called from hardirq (IPI) context */ 2759/* Called from hardirq (IPI) context */
2637static void rps_trigger_softirq(void *data) 2760static void rps_trigger_softirq(void *data)
2638{ 2761{
@@ -2850,8 +2973,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2850 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2973 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2851 * a compare and 2 stores extra right now if we dont have it on 2974 * a compare and 2 stores extra right now if we dont have it on
2852 * but have CONFIG_NET_CLS_ACT 2975 * but have CONFIG_NET_CLS_ACT
2853 * NOTE: This doesnt stop any functionality; if you dont have 2976 * NOTE: This doesn't stop any functionality; if you dont have
2854 * the ingress scheduler, you just cant add policies on ingress. 2977 * the ingress scheduler, you just can't add policies on ingress.
2855 * 2978 *
2856 */ 2979 */
2857static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) 2980static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
@@ -2920,6 +3043,8 @@ out:
2920 * on a failure. 3043 * on a failure.
2921 * 3044 *
2922 * The caller must hold the rtnl_mutex. 3045 * The caller must hold the rtnl_mutex.
3046 *
3047 * For a general description of rx_handler, see enum rx_handler_result.
2923 */ 3048 */
2924int netdev_rx_handler_register(struct net_device *dev, 3049int netdev_rx_handler_register(struct net_device *dev,
2925 rx_handler_func_t *rx_handler, 3050 rx_handler_func_t *rx_handler,
@@ -2954,64 +3079,13 @@ void netdev_rx_handler_unregister(struct net_device *dev)
2954} 3079}
2955EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3080EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2956 3081
2957static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2958 struct net_device *master)
2959{
2960 if (skb->pkt_type == PACKET_HOST) {
2961 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2962
2963 memcpy(dest, master->dev_addr, ETH_ALEN);
2964 }
2965}
2966
2967/* On bonding slaves other than the currently active slave, suppress
2968 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2969 * ARP on active-backup slaves with arp_validate enabled.
2970 */
2971int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2972{
2973 struct net_device *dev = skb->dev;
2974
2975 if (master->priv_flags & IFF_MASTER_ARPMON)
2976 dev->last_rx = jiffies;
2977
2978 if ((master->priv_flags & IFF_MASTER_ALB) &&
2979 (master->priv_flags & IFF_BRIDGE_PORT)) {
2980 /* Do address unmangle. The local destination address
2981 * will be always the one master has. Provides the right
2982 * functionality in a bridge.
2983 */
2984 skb_bond_set_mac_by_master(skb, master);
2985 }
2986
2987 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2988 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2989 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2990 return 0;
2991
2992 if (master->priv_flags & IFF_MASTER_ALB) {
2993 if (skb->pkt_type != PACKET_BROADCAST &&
2994 skb->pkt_type != PACKET_MULTICAST)
2995 return 0;
2996 }
2997 if (master->priv_flags & IFF_MASTER_8023AD &&
2998 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2999 return 0;
3000
3001 return 1;
3002 }
3003 return 0;
3004}
3005EXPORT_SYMBOL(__skb_bond_should_drop);
3006
3007static int __netif_receive_skb(struct sk_buff *skb) 3082static int __netif_receive_skb(struct sk_buff *skb)
3008{ 3083{
3009 struct packet_type *ptype, *pt_prev; 3084 struct packet_type *ptype, *pt_prev;
3010 rx_handler_func_t *rx_handler; 3085 rx_handler_func_t *rx_handler;
3011 struct net_device *orig_dev; 3086 struct net_device *orig_dev;
3012 struct net_device *master; 3087 struct net_device *null_or_dev;
3013 struct net_device *null_or_orig; 3088 bool deliver_exact = false;
3014 struct net_device *orig_or_bond;
3015 int ret = NET_RX_DROP; 3089 int ret = NET_RX_DROP;
3016 __be16 type; 3090 __be16 type;
3017 3091
@@ -3026,28 +3100,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
3026 3100
3027 if (!skb->skb_iif) 3101 if (!skb->skb_iif)
3028 skb->skb_iif = skb->dev->ifindex; 3102 skb->skb_iif = skb->dev->ifindex;
3029
3030 /*
3031 * bonding note: skbs received on inactive slaves should only
3032 * be delivered to pkt handlers that are exact matches. Also
3033 * the deliver_no_wcard flag will be set. If packet handlers
3034 * are sensitive to duplicate packets these skbs will need to
3035 * be dropped at the handler.
3036 */
3037 null_or_orig = NULL;
3038 orig_dev = skb->dev; 3103 orig_dev = skb->dev;
3039 master = ACCESS_ONCE(orig_dev->master);
3040 if (skb->deliver_no_wcard)
3041 null_or_orig = orig_dev;
3042 else if (master) {
3043 if (skb_bond_should_drop(skb, master)) {
3044 skb->deliver_no_wcard = 1;
3045 null_or_orig = orig_dev; /* deliver only exact match */
3046 } else
3047 skb->dev = master;
3048 }
3049 3104
3050 __this_cpu_inc(softnet_data.processed);
3051 skb_reset_network_header(skb); 3105 skb_reset_network_header(skb);
3052 skb_reset_transport_header(skb); 3106 skb_reset_transport_header(skb);
3053 skb->mac_len = skb->network_header - skb->mac_header; 3107 skb->mac_len = skb->network_header - skb->mac_header;
@@ -3056,6 +3110,16 @@ static int __netif_receive_skb(struct sk_buff *skb)
3056 3110
3057 rcu_read_lock(); 3111 rcu_read_lock();
3058 3112
3113another_round:
3114
3115 __this_cpu_inc(softnet_data.processed);
3116
3117 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3118 skb = vlan_untag(skb);
3119 if (unlikely(!skb))
3120 goto out;
3121 }
3122
3059#ifdef CONFIG_NET_CLS_ACT 3123#ifdef CONFIG_NET_CLS_ACT
3060 if (skb->tc_verd & TC_NCLS) { 3124 if (skb->tc_verd & TC_NCLS) {
3061 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 3125 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3064,8 +3128,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3064#endif 3128#endif
3065 3129
3066 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3130 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3067 if (ptype->dev == null_or_orig || ptype->dev == skb->dev || 3131 if (!ptype->dev || ptype->dev == skb->dev) {
3068 ptype->dev == orig_dev) {
3069 if (pt_prev) 3132 if (pt_prev)
3070 ret = deliver_skb(skb, pt_prev, orig_dev); 3133 ret = deliver_skb(skb, pt_prev, orig_dev);
3071 pt_prev = ptype; 3134 pt_prev = ptype;
@@ -3079,16 +3142,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
3079ncls: 3142ncls:
3080#endif 3143#endif
3081 3144
3082 /* Handle special case of bridge or macvlan */
3083 rx_handler = rcu_dereference(skb->dev->rx_handler); 3145 rx_handler = rcu_dereference(skb->dev->rx_handler);
3084 if (rx_handler) { 3146 if (rx_handler) {
3085 if (pt_prev) { 3147 if (pt_prev) {
3086 ret = deliver_skb(skb, pt_prev, orig_dev); 3148 ret = deliver_skb(skb, pt_prev, orig_dev);
3087 pt_prev = NULL; 3149 pt_prev = NULL;
3088 } 3150 }
3089 skb = rx_handler(skb); 3151 switch (rx_handler(&skb)) {
3090 if (!skb) 3152 case RX_HANDLER_CONSUMED:
3091 goto out; 3153 goto out;
3154 case RX_HANDLER_ANOTHER:
3155 goto another_round;
3156 case RX_HANDLER_EXACT:
3157 deliver_exact = true;
3158 case RX_HANDLER_PASS:
3159 break;
3160 default:
3161 BUG();
3162 }
3092 } 3163 }
3093 3164
3094 if (vlan_tx_tag_present(skb)) { 3165 if (vlan_tx_tag_present(skb)) {
@@ -3096,31 +3167,22 @@ ncls:
3096 ret = deliver_skb(skb, pt_prev, orig_dev); 3167 ret = deliver_skb(skb, pt_prev, orig_dev);
3097 pt_prev = NULL; 3168 pt_prev = NULL;
3098 } 3169 }
3099 if (vlan_hwaccel_do_receive(&skb)) { 3170 if (vlan_do_receive(&skb)) {
3100 ret = __netif_receive_skb(skb); 3171 ret = __netif_receive_skb(skb);
3101 goto out; 3172 goto out;
3102 } else if (unlikely(!skb)) 3173 } else if (unlikely(!skb))
3103 goto out; 3174 goto out;
3104 } 3175 }
3105 3176
3106 /* 3177 /* deliver only exact match when indicated */
3107 * Make sure frames received on VLAN interfaces stacked on 3178 null_or_dev = deliver_exact ? skb->dev : NULL;
3108 * bonding interfaces still make their way to any base bonding
3109 * device that may have registered for a specific ptype. The
3110 * handler may have to adjust skb->dev and orig_dev.
3111 */
3112 orig_or_bond = orig_dev;
3113 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
3114 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
3115 orig_or_bond = vlan_dev_real_dev(skb->dev);
3116 }
3117 3179
3118 type = skb->protocol; 3180 type = skb->protocol;
3119 list_for_each_entry_rcu(ptype, 3181 list_for_each_entry_rcu(ptype,
3120 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3182 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3121 if (ptype->type == type && (ptype->dev == null_or_orig || 3183 if (ptype->type == type &&
3122 ptype->dev == skb->dev || ptype->dev == orig_dev || 3184 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3123 ptype->dev == orig_or_bond)) { 3185 ptype->dev == orig_dev)) {
3124 if (pt_prev) 3186 if (pt_prev)
3125 ret = deliver_skb(skb, pt_prev, orig_dev); 3187 ret = deliver_skb(skb, pt_prev, orig_dev);
3126 pt_prev = ptype; 3188 pt_prev = ptype;
@@ -3726,7 +3788,7 @@ static void net_rx_action(struct softirq_action *h)
3726 * with netpoll's poll_napi(). Only the entity which 3788 * with netpoll's poll_napi(). Only the entity which
3727 * obtains the lock and sees NAPI_STATE_SCHED set will 3789 * obtains the lock and sees NAPI_STATE_SCHED set will
3728 * actually make the ->poll() call. Therefore we avoid 3790 * actually make the ->poll() call. Therefore we avoid
3729 * accidently calling ->poll() when NAPI is not scheduled. 3791 * accidentally calling ->poll() when NAPI is not scheduled.
3730 */ 3792 */
3731 work = 0; 3793 work = 0;
3732 if (test_bit(NAPI_STATE_SCHED, &n->state)) { 3794 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
@@ -3917,12 +3979,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3917 3979
3918void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3980void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3919{ 3981{
3920 struct net_device *dev = (v == SEQ_START_TOKEN) ? 3982 struct net_device *dev = v;
3921 first_net_device(seq_file_net(seq)) : 3983
3922 next_net_device((struct net_device *)v); 3984 if (v == SEQ_START_TOKEN)
3985 dev = first_net_device_rcu(seq_file_net(seq));
3986 else
3987 dev = next_net_device_rcu(dev);
3923 3988
3924 ++*pos; 3989 ++*pos;
3925 return rcu_dereference(dev); 3990 return dev;
3926} 3991}
3927 3992
3928void dev_seq_stop(struct seq_file *seq, void *v) 3993void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4206,15 +4271,14 @@ static int __init dev_proc_init(void)
4206 4271
4207 4272
4208/** 4273/**
4209 * netdev_set_master - set up master/slave pair 4274 * netdev_set_master - set up master pointer
4210 * @slave: slave device 4275 * @slave: slave device
4211 * @master: new master device 4276 * @master: new master device
4212 * 4277 *
4213 * Changes the master device of the slave. Pass %NULL to break the 4278 * Changes the master device of the slave. Pass %NULL to break the
4214 * bonding. The caller must hold the RTNL semaphore. On a failure 4279 * bonding. The caller must hold the RTNL semaphore. On a failure
4215 * a negative errno code is returned. On success the reference counts 4280 * a negative errno code is returned. On success the reference counts
4216 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 4281 * are adjusted and the function returns zero.
4217 * function returns zero.
4218 */ 4282 */
4219int netdev_set_master(struct net_device *slave, struct net_device *master) 4283int netdev_set_master(struct net_device *slave, struct net_device *master)
4220{ 4284{
@@ -4234,6 +4298,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4234 synchronize_net(); 4298 synchronize_net();
4235 dev_put(old); 4299 dev_put(old);
4236 } 4300 }
4301 return 0;
4302}
4303EXPORT_SYMBOL(netdev_set_master);
4304
4305/**
4306 * netdev_set_bond_master - set up bonding master/slave pair
4307 * @slave: slave device
4308 * @master: new master device
4309 *
4310 * Changes the master device of the slave. Pass %NULL to break the
4311 * bonding. The caller must hold the RTNL semaphore. On a failure
4312 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4313 * to the routing socket and the function returns zero.
4314 */
4315int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4316{
4317 int err;
4318
4319 ASSERT_RTNL();
4320
4321 err = netdev_set_master(slave, master);
4322 if (err)
4323 return err;
4237 if (master) 4324 if (master)
4238 slave->flags |= IFF_SLAVE; 4325 slave->flags |= IFF_SLAVE;
4239 else 4326 else
@@ -4242,7 +4329,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4242 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 4329 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4243 return 0; 4330 return 0;
4244} 4331}
4245EXPORT_SYMBOL(netdev_set_master); 4332EXPORT_SYMBOL(netdev_set_bond_master);
4246 4333
4247static void dev_change_rx_flags(struct net_device *dev, int flags) 4334static void dev_change_rx_flags(struct net_device *dev, int flags)
4248{ 4335{
@@ -4411,6 +4498,30 @@ void dev_set_rx_mode(struct net_device *dev)
4411} 4498}
4412 4499
4413/** 4500/**
4501 * dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
4502 * @dev: device
4503 * @cmd: memory area for ethtool_ops::get_settings() result
4504 *
4505 * The cmd arg is initialized properly (cleared and
4506 * ethtool_cmd::cmd field set to ETHTOOL_GSET).
4507 *
4508 * Return device's ethtool_ops::get_settings() result value or
4509 * -EOPNOTSUPP when device doesn't expose
4510 * ethtool_ops::get_settings() operation.
4511 */
4512int dev_ethtool_get_settings(struct net_device *dev,
4513 struct ethtool_cmd *cmd)
4514{
4515 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
4516 return -EOPNOTSUPP;
4517
4518 memset(cmd, 0, sizeof(struct ethtool_cmd));
4519 cmd->cmd = ETHTOOL_GSET;
4520 return dev->ethtool_ops->get_settings(dev, cmd);
4521}
4522EXPORT_SYMBOL(dev_ethtool_get_settings);
4523
4524/**
4414 * dev_get_flags - get flags reported to userspace 4525 * dev_get_flags - get flags reported to userspace
4415 * @dev: device 4526 * @dev: device
4416 * 4527 *
@@ -4579,6 +4690,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
4579EXPORT_SYMBOL(dev_set_mtu); 4690EXPORT_SYMBOL(dev_set_mtu);
4580 4691
4581/** 4692/**
4693 * dev_set_group - Change group this device belongs to
4694 * @dev: device
4695 * @new_group: group this device should belong to
4696 */
4697void dev_set_group(struct net_device *dev, int new_group)
4698{
4699 dev->group = new_group;
4700}
4701EXPORT_SYMBOL(dev_set_group);
4702
4703/**
4582 * dev_set_mac_address - Change Media Access Control Address 4704 * dev_set_mac_address - Change Media Access Control Address
4583 * @dev: device 4705 * @dev: device
4584 * @sa: new address 4706 * @sa: new address
@@ -4663,7 +4785,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
4663 * is never reached 4785 * is never reached
4664 */ 4786 */
4665 WARN_ON(1); 4787 WARN_ON(1);
4666 err = -EINVAL; 4788 err = -ENOTTY;
4667 break; 4789 break;
4668 4790
4669 } 4791 }
@@ -4931,7 +5053,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4931 /* Set the per device memory buffer space. 5053 /* Set the per device memory buffer space.
4932 * Not applicable in our case */ 5054 * Not applicable in our case */
4933 case SIOCSIFLINK: 5055 case SIOCSIFLINK:
4934 return -EINVAL; 5056 return -ENOTTY;
4935 5057
4936 /* 5058 /*
4937 * Unknown or private ioctl. 5059 * Unknown or private ioctl.
@@ -4952,7 +5074,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4952 /* Take care of Wireless Extensions */ 5074 /* Take care of Wireless Extensions */
4953 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 5075 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4954 return wext_handle_ioctl(net, &ifr, cmd, arg); 5076 return wext_handle_ioctl(net, &ifr, cmd, arg);
4955 return -EINVAL; 5077 return -ENOTTY;
4956 } 5078 }
4957} 5079}
4958 5080
@@ -5004,7 +5126,7 @@ static void rollback_registered_many(struct list_head *head)
5004 list_del(&dev->unreg_list); 5126 list_del(&dev->unreg_list);
5005 continue; 5127 continue;
5006 } 5128 }
5007 5129 dev->dismantle = true;
5008 BUG_ON(dev->reg_state != NETREG_REGISTERED); 5130 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5009 } 5131 }
5010 5132
@@ -5069,41 +5191,59 @@ static void rollback_registered(struct net_device *dev)
5069 list_del(&single); 5191 list_del(&single);
5070} 5192}
5071 5193
5072unsigned long netdev_fix_features(unsigned long features, const char *name) 5194u32 netdev_fix_features(struct net_device *dev, u32 features)
5073{ 5195{
5196 /* Fix illegal checksum combinations */
5197 if ((features & NETIF_F_HW_CSUM) &&
5198 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5199 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5200 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5201 }
5202
5203 if ((features & NETIF_F_NO_CSUM) &&
5204 (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5205 netdev_warn(dev, "mixed no checksumming and other settings.\n");
5206 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5207 }
5208
5074 /* Fix illegal SG+CSUM combinations. */ 5209 /* Fix illegal SG+CSUM combinations. */
5075 if ((features & NETIF_F_SG) && 5210 if ((features & NETIF_F_SG) &&
5076 !(features & NETIF_F_ALL_CSUM)) { 5211 !(features & NETIF_F_ALL_CSUM)) {
5077 if (name) 5212 netdev_dbg(dev,
5078 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " 5213 "Dropping NETIF_F_SG since no checksum feature.\n");
5079 "checksum feature.\n", name);
5080 features &= ~NETIF_F_SG; 5214 features &= ~NETIF_F_SG;
5081 } 5215 }
5082 5216
5083 /* TSO requires that SG is present as well. */ 5217 /* TSO requires that SG is present as well. */
5084 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { 5218 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5085 if (name) 5219 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5086 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " 5220 features &= ~NETIF_F_ALL_TSO;
5087 "SG feature.\n", name); 5221 }
5088 features &= ~NETIF_F_TSO; 5222
5223 /* TSO ECN requires that TSO is present as well. */
5224 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5225 features &= ~NETIF_F_TSO_ECN;
5226
5227 /* Software GSO depends on SG. */
5228 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5229 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5230 features &= ~NETIF_F_GSO;
5089 } 5231 }
5090 5232
5233 /* UFO needs SG and checksumming */
5091 if (features & NETIF_F_UFO) { 5234 if (features & NETIF_F_UFO) {
5092 /* maybe split UFO into V4 and V6? */ 5235 /* maybe split UFO into V4 and V6? */
5093 if (!((features & NETIF_F_GEN_CSUM) || 5236 if (!((features & NETIF_F_GEN_CSUM) ||
5094 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 5237 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5095 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5238 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5096 if (name) 5239 netdev_dbg(dev,
5097 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5240 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5098 "since no checksum offload features.\n",
5099 name);
5100 features &= ~NETIF_F_UFO; 5241 features &= ~NETIF_F_UFO;
5101 } 5242 }
5102 5243
5103 if (!(features & NETIF_F_SG)) { 5244 if (!(features & NETIF_F_SG)) {
5104 if (name) 5245 netdev_dbg(dev,
5105 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5246 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5106 "since no NETIF_F_SG feature.\n", name);
5107 features &= ~NETIF_F_UFO; 5247 features &= ~NETIF_F_UFO;
5108 } 5248 }
5109 } 5249 }
@@ -5112,6 +5252,75 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
5112} 5252}
5113EXPORT_SYMBOL(netdev_fix_features); 5253EXPORT_SYMBOL(netdev_fix_features);
5114 5254
5255int __netdev_update_features(struct net_device *dev)
5256{
5257 u32 features;
5258 int err = 0;
5259
5260 ASSERT_RTNL();
5261
5262 features = netdev_get_wanted_features(dev);
5263
5264 if (dev->netdev_ops->ndo_fix_features)
5265 features = dev->netdev_ops->ndo_fix_features(dev, features);
5266
5267 /* driver might be less strict about feature dependencies */
5268 features = netdev_fix_features(dev, features);
5269
5270 if (dev->features == features)
5271 return 0;
5272
5273 netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
5274 dev->features, features);
5275
5276 if (dev->netdev_ops->ndo_set_features)
5277 err = dev->netdev_ops->ndo_set_features(dev, features);
5278
5279 if (unlikely(err < 0)) {
5280 netdev_err(dev,
5281 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
5282 err, features, dev->features);
5283 return -1;
5284 }
5285
5286 if (!err)
5287 dev->features = features;
5288
5289 return 1;
5290}
5291
5292/**
5293 * netdev_update_features - recalculate device features
5294 * @dev: the device to check
5295 *
5296 * Recalculate dev->features set and send notifications if it
5297 * has changed. Should be called after driver or hardware dependent
5298 * conditions might have changed that influence the features.
5299 */
5300void netdev_update_features(struct net_device *dev)
5301{
5302 if (__netdev_update_features(dev))
5303 netdev_features_change(dev);
5304}
5305EXPORT_SYMBOL(netdev_update_features);
5306
5307/**
5308 * netdev_change_features - recalculate device features
5309 * @dev: the device to check
5310 *
5311 * Recalculate dev->features set and send notifications even
5312 * if they have not changed. Should be called instead of
5313 * netdev_update_features() if also dev->vlan_features might
5314 * have changed to allow the changes to be propagated to stacked
5315 * VLAN devices.
5316 */
5317void netdev_change_features(struct net_device *dev)
5318{
5319 __netdev_update_features(dev);
5320 netdev_features_change(dev);
5321}
5322EXPORT_SYMBOL(netdev_change_features);
5323
5115/** 5324/**
5116 * netif_stacked_transfer_operstate - transfer operstate 5325 * netif_stacked_transfer_operstate - transfer operstate
5117 * @rootdev: the root or lower level device to transfer state from 5326 * @rootdev: the root or lower level device to transfer state from
@@ -5228,6 +5437,10 @@ int register_netdevice(struct net_device *dev)
5228 5437
5229 dev->iflink = -1; 5438 dev->iflink = -1;
5230 5439
5440 ret = dev_get_valid_name(dev, dev->name);
5441 if (ret < 0)
5442 goto out;
5443
5231 /* Init, if this function is available */ 5444 /* Init, if this function is available */
5232 if (dev->netdev_ops->ndo_init) { 5445 if (dev->netdev_ops->ndo_init) {
5233 ret = dev->netdev_ops->ndo_init(dev); 5446 ret = dev->netdev_ops->ndo_init(dev);
@@ -5238,35 +5451,25 @@ int register_netdevice(struct net_device *dev)
5238 } 5451 }
5239 } 5452 }
5240 5453
5241 ret = dev_get_valid_name(dev, dev->name, 0);
5242 if (ret)
5243 goto err_uninit;
5244
5245 dev->ifindex = dev_new_index(net); 5454 dev->ifindex = dev_new_index(net);
5246 if (dev->iflink == -1) 5455 if (dev->iflink == -1)
5247 dev->iflink = dev->ifindex; 5456 dev->iflink = dev->ifindex;
5248 5457
5249 /* Fix illegal checksum combinations */ 5458 /* Transfer changeable features to wanted_features and enable
5250 if ((dev->features & NETIF_F_HW_CSUM) && 5459 * software offloads (GSO and GRO).
5251 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5460 */
5252 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", 5461 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5253 dev->name); 5462 dev->features |= NETIF_F_SOFT_FEATURES;
5254 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5463 dev->wanted_features = dev->features & dev->hw_features;
5255 }
5256 5464
5257 if ((dev->features & NETIF_F_NO_CSUM) && 5465 /* Turn on no cache copy if HW is doing checksum */
5258 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5466 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5259 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", 5467 if ((dev->features & NETIF_F_ALL_CSUM) &&
5260 dev->name); 5468 !(dev->features & NETIF_F_NO_CSUM)) {
5261 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); 5469 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5470 dev->features |= NETIF_F_NOCACHE_COPY;
5262 } 5471 }
5263 5472
5264 dev->features = netdev_fix_features(dev->features, dev->name);
5265
5266 /* Enable software GSO if SG is supported. */
5267 if (dev->features & NETIF_F_SG)
5268 dev->features |= NETIF_F_GSO;
5269
5270 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, 5473 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5271 * vlan_dev_init() will do the dev->features check, so these features 5474 * vlan_dev_init() will do the dev->features check, so these features
5272 * are enabled only if supported by underlying device. 5475 * are enabled only if supported by underlying device.
@@ -5283,6 +5486,8 @@ int register_netdevice(struct net_device *dev)
5283 goto err_uninit; 5486 goto err_uninit;
5284 dev->reg_state = NETREG_REGISTERED; 5487 dev->reg_state = NETREG_REGISTERED;
5285 5488
5489 __netdev_update_features(dev);
5490
5286 /* 5491 /*
5287 * Default initial state at registry is that the 5492 * Default initial state at registry is that the
5288 * device is present. 5493 * device is present.
@@ -5378,19 +5583,7 @@ int register_netdev(struct net_device *dev)
5378 int err; 5583 int err;
5379 5584
5380 rtnl_lock(); 5585 rtnl_lock();
5381
5382 /*
5383 * If the name is a format string the caller wants us to do a
5384 * name allocation.
5385 */
5386 if (strchr(dev->name, '%')) {
5387 err = dev_alloc_name(dev, dev->name);
5388 if (err < 0)
5389 goto out;
5390 }
5391
5392 err = register_netdevice(dev); 5586 err = register_netdevice(dev);
5393out:
5394 rtnl_unlock(); 5587 rtnl_unlock();
5395 return err; 5588 return err;
5396} 5589}
@@ -5687,6 +5880,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5687#endif 5880#endif
5688 5881
5689 strcpy(dev->name, name); 5882 strcpy(dev->name, name);
5883 dev->group = INIT_NETDEV_GROUP;
5690 return dev; 5884 return dev;
5691 5885
5692free_all: 5886free_all:
@@ -5871,7 +6065,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5871 /* We get here if we can't use the current device name */ 6065 /* We get here if we can't use the current device name */
5872 if (!pat) 6066 if (!pat)
5873 goto out; 6067 goto out;
5874 if (dev_get_valid_name(dev, pat, 1)) 6068 if (dev_get_valid_name(dev, pat) < 0)
5875 goto out; 6069 goto out;
5876 } 6070 }
5877 6071
@@ -6001,32 +6195,22 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6001 * @one to the master device with current feature set @all. Will not 6195 * @one to the master device with current feature set @all. Will not
6002 * enable anything that is off in @mask. Returns the new feature set. 6196 * enable anything that is off in @mask. Returns the new feature set.
6003 */ 6197 */
6004unsigned long netdev_increment_features(unsigned long all, unsigned long one, 6198u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6005 unsigned long mask)
6006{ 6199{
6007 /* If device needs checksumming, downgrade to it. */ 6200 if (mask & NETIF_F_GEN_CSUM)
6008 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 6201 mask |= NETIF_F_ALL_CSUM;
6009 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); 6202 mask |= NETIF_F_VLAN_CHALLENGED;
6010 else if (mask & NETIF_F_ALL_CSUM) {
6011 /* If one device supports v4/v6 checksumming, set for all. */
6012 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
6013 !(all & NETIF_F_GEN_CSUM)) {
6014 all &= ~NETIF_F_ALL_CSUM;
6015 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
6016 }
6017 6203
6018 /* If one device supports hw checksumming, set for all. */ 6204 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6019 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { 6205 all &= one | ~NETIF_F_ALL_FOR_ALL;
6020 all &= ~NETIF_F_ALL_CSUM;
6021 all |= NETIF_F_HW_CSUM;
6022 }
6023 }
6024 6206
6025 one |= NETIF_F_ALL_CSUM; 6207 /* If device needs checksumming, downgrade to it. */
6208 if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
6209 all &= ~NETIF_F_NO_CSUM;
6026 6210
6027 one |= all & NETIF_F_ONE_FOR_ALL; 6211 /* If one device supports hw checksumming, set for all. */
6028 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; 6212 if (all & NETIF_F_GEN_CSUM)
6029 all |= one & mask & NETIF_F_ONE_FOR_ALL; 6213 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6030 6214
6031 return all; 6215 return all;
6032} 6216}
@@ -6187,7 +6371,7 @@ static void __net_exit default_device_exit(struct net *net)
6187 if (dev->rtnl_link_ops) 6371 if (dev->rtnl_link_ops)
6188 continue; 6372 continue;
6189 6373
6190 /* Push remaing network devices to init_net */ 6374 /* Push remaining network devices to init_net */
6191 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 6375 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6192 err = dev_change_net_namespace(dev, &init_net, fb_name); 6376 err = dev_change_net_namespace(dev, &init_net, fb_name);
6193 if (err) { 6377 if (err) {
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c18992f..e2e66939ed00 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -68,14 +68,6 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
68 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); 68 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
69} 69}
70 70
71static void ha_rcu_free(struct rcu_head *head)
72{
73 struct netdev_hw_addr *ha;
74
75 ha = container_of(head, struct netdev_hw_addr, rcu_head);
76 kfree(ha);
77}
78
79static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, 71static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
80 unsigned char *addr, int addr_len, 72 unsigned char *addr, int addr_len,
81 unsigned char addr_type, bool global) 73 unsigned char addr_type, bool global)
@@ -94,7 +86,7 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
94 if (--ha->refcount) 86 if (--ha->refcount)
95 return 0; 87 return 0;
96 list_del_rcu(&ha->list); 88 list_del_rcu(&ha->list);
97 call_rcu(&ha->rcu_head, ha_rcu_free); 89 kfree_rcu(ha, rcu_head);
98 list->count--; 90 list->count--;
99 return 0; 91 return 0;
100 } 92 }
@@ -144,7 +136,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
144 136
145 list_for_each_entry(ha, &from_list->list, list) { 137 list_for_each_entry(ha, &from_list->list, list) {
146 type = addr_type ? addr_type : ha->type; 138 type = addr_type ? addr_type : ha->type;
147 __hw_addr_del(to_list, ha->addr, addr_len, addr_type); 139 __hw_addr_del(to_list, ha->addr, addr_len, type);
148 } 140 }
149} 141}
150EXPORT_SYMBOL(__hw_addr_del_multiple); 142EXPORT_SYMBOL(__hw_addr_del_multiple);
@@ -197,7 +189,7 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list)
197 189
198 list_for_each_entry_safe(ha, tmp, &list->list, list) { 190 list_for_each_entry_safe(ha, tmp, &list->list, list) {
199 list_del_rcu(&ha->list); 191 list_del_rcu(&ha->list);
200 call_rcu(&ha->rcu_head, ha_rcu_free); 192 kfree_rcu(ha, rcu_head);
201 } 193 }
202 list->count = 0; 194 list->count = 0;
203} 195}
@@ -357,8 +349,8 @@ EXPORT_SYMBOL(dev_addr_add_multiple);
357/** 349/**
358 * dev_addr_del_multiple - Delete device addresses by another device 350 * dev_addr_del_multiple - Delete device addresses by another device
359 * @to_dev: device where the addresses will be deleted 351 * @to_dev: device where the addresses will be deleted
360 * @from_dev: device by which addresses the addresses will be deleted 352 * @from_dev: device supplying the addresses to be deleted
361 * @addr_type: address type - 0 means type will used from from_dev 353 * @addr_type: address type - 0 means type will be used from from_dev
362 * 354 *
363 * Deletes addresses in to device by the list of addresses in from device. 355 * Deletes addresses in to device by the list of addresses in from device.
364 * 356 *
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce9..7f36b38e060f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -207,14 +207,6 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
207 rcu_read_unlock(); 207 rcu_read_unlock();
208} 208}
209 209
210
211static void free_dm_hw_stat(struct rcu_head *head)
212{
213 struct dm_hw_stat_delta *n;
214 n = container_of(head, struct dm_hw_stat_delta, rcu);
215 kfree(n);
216}
217
218static int set_all_monitor_traces(int state) 210static int set_all_monitor_traces(int state)
219{ 211{
220 int rc = 0; 212 int rc = 0;
@@ -245,7 +237,7 @@ static int set_all_monitor_traces(int state)
245 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 237 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
246 if (new_stat->dev == NULL) { 238 if (new_stat->dev == NULL) {
247 list_del_rcu(&new_stat->list); 239 list_del_rcu(&new_stat->list);
248 call_rcu(&new_stat->rcu, free_dm_hw_stat); 240 kfree_rcu(new_stat, rcu);
249 } 241 }
250 } 242 }
251 break; 243 break;
@@ -314,7 +306,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
314 new_stat->dev = NULL; 306 new_stat->dev = NULL;
315 if (trace_state == TRACE_OFF) { 307 if (trace_state == TRACE_OFF) {
316 list_del_rcu(&new_stat->list); 308 list_del_rcu(&new_stat->list);
317 call_rcu(&new_stat->rcu, free_dm_hw_stat); 309 kfree_rcu(new_stat, rcu);
318 break; 310 break;
319 } 311 }
320 } 312 }
@@ -350,7 +342,7 @@ static int __init init_net_drop_monitor(void)
350 struct per_cpu_dm_data *data; 342 struct per_cpu_dm_data *data;
351 int cpu, rc; 343 int cpu, rc;
352 344
353 printk(KERN_INFO "Initalizing network drop monitor service\n"); 345 printk(KERN_INFO "Initializing network drop monitor service\n");
354 346
355 if (sizeof(void *) > 8) { 347 if (sizeof(void *) > 8) {
356 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); 348 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7ffce2..81a4fa1c95ed 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -19,6 +19,7 @@
19#include <linux/types.h> 19#include <linux/types.h>
20#include <net/net_namespace.h> 20#include <net/net_namespace.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/prefetch.h>
22 23
23#include <net/dst.h> 24#include <net/dst.h>
24 25
@@ -33,9 +34,6 @@
33 * 3) This list is guarded by a mutex, 34 * 3) This list is guarded by a mutex,
34 * so that the gc_task and dst_dev_event() can be synchronized. 35 * so that the gc_task and dst_dev_event() can be synchronized.
35 */ 36 */
36#if RT_CACHE_DEBUG >= 2
37static atomic_t dst_total = ATOMIC_INIT(0);
38#endif
39 37
40/* 38/*
41 * We want to keep lock & list close together 39 * We want to keep lock & list close together
@@ -69,10 +67,6 @@ static void dst_gc_task(struct work_struct *work)
69 unsigned long expires = ~0L; 67 unsigned long expires = ~0L;
70 struct dst_entry *dst, *next, head; 68 struct dst_entry *dst, *next, head;
71 struct dst_entry *last = &head; 69 struct dst_entry *last = &head;
72#if RT_CACHE_DEBUG >= 2
73 ktime_t time_start = ktime_get();
74 struct timespec elapsed;
75#endif
76 70
77 mutex_lock(&dst_gc_mutex); 71 mutex_lock(&dst_gc_mutex);
78 next = dst_busy_list; 72 next = dst_busy_list;
@@ -146,15 +140,6 @@ loop:
146 140
147 spin_unlock_bh(&dst_garbage.lock); 141 spin_unlock_bh(&dst_garbage.lock);
148 mutex_unlock(&dst_gc_mutex); 142 mutex_unlock(&dst_gc_mutex);
149#if RT_CACHE_DEBUG >= 2
150 elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start));
151 printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d"
152 " expires: %lu elapsed: %lu us\n",
153 atomic_read(&dst_total), delayed, work_performed,
154 expires,
155 elapsed.tv_sec * USEC_PER_SEC +
156 elapsed.tv_nsec / NSEC_PER_USEC);
157#endif
158} 143}
159 144
160int dst_discard(struct sk_buff *skb) 145int dst_discard(struct sk_buff *skb)
@@ -164,7 +149,10 @@ int dst_discard(struct sk_buff *skb)
164} 149}
165EXPORT_SYMBOL(dst_discard); 150EXPORT_SYMBOL(dst_discard);
166 151
167void *dst_alloc(struct dst_ops *ops) 152const u32 dst_default_metrics[RTAX_MAX];
153
154void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
155 int initial_ref, int initial_obsolete, int flags)
168{ 156{
169 struct dst_entry *dst; 157 struct dst_entry *dst;
170 158
@@ -172,17 +160,36 @@ void *dst_alloc(struct dst_ops *ops)
172 if (ops->gc(ops)) 160 if (ops->gc(ops))
173 return NULL; 161 return NULL;
174 } 162 }
175 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); 163 dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
176 if (!dst) 164 if (!dst)
177 return NULL; 165 return NULL;
178 atomic_set(&dst->__refcnt, 0); 166 dst->child = NULL;
167 dst->dev = dev;
168 if (dev)
169 dev_hold(dev);
179 dst->ops = ops; 170 dst->ops = ops;
180 dst->lastuse = jiffies; 171 dst_init_metrics(dst, dst_default_metrics, true);
172 dst->expires = 0UL;
181 dst->path = dst; 173 dst->path = dst;
182 dst->input = dst->output = dst_discard; 174 dst->neighbour = NULL;
183#if RT_CACHE_DEBUG >= 2 175 dst->hh = NULL;
184 atomic_inc(&dst_total); 176#ifdef CONFIG_XFRM
177 dst->xfrm = NULL;
185#endif 178#endif
179 dst->input = dst_discard;
180 dst->output = dst_discard;
181 dst->error = 0;
182 dst->obsolete = initial_obsolete;
183 dst->header_len = 0;
184 dst->trailer_len = 0;
185#ifdef CONFIG_IP_ROUTE_CLASSID
186 dst->tclassid = 0;
187#endif
188 atomic_set(&dst->__refcnt, initial_ref);
189 dst->__use = 0;
190 dst->lastuse = jiffies;
191 dst->flags = flags;
192 dst->next = NULL;
186 dst_entries_add(ops, 1); 193 dst_entries_add(ops, 1);
187 return dst; 194 return dst;
188} 195}
@@ -242,9 +249,6 @@ again:
242 dst->ops->destroy(dst); 249 dst->ops->destroy(dst);
243 if (dst->dev) 250 if (dst->dev)
244 dev_put(dst->dev); 251 dev_put(dst->dev);
245#if RT_CACHE_DEBUG >= 2
246 atomic_dec(&dst_total);
247#endif
248 kmem_cache_free(dst->ops->kmem_cachep, dst); 252 kmem_cache_free(dst->ops->kmem_cachep, dst);
249 253
250 dst = child; 254 dst = child;
@@ -282,6 +286,42 @@ void dst_release(struct dst_entry *dst)
282} 286}
283EXPORT_SYMBOL(dst_release); 287EXPORT_SYMBOL(dst_release);
284 288
289u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
290{
291 u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
292
293 if (p) {
294 u32 *old_p = __DST_METRICS_PTR(old);
295 unsigned long prev, new;
296
297 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
298
299 new = (unsigned long) p;
300 prev = cmpxchg(&dst->_metrics, old, new);
301
302 if (prev != old) {
303 kfree(p);
304 p = __DST_METRICS_PTR(prev);
305 if (prev & DST_METRICS_READ_ONLY)
306 p = NULL;
307 }
308 }
309 return p;
310}
311EXPORT_SYMBOL(dst_cow_metrics_generic);
312
313/* Caller asserts that dst_metrics_read_only(dst) is false. */
314void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
315{
316 unsigned long prev, new;
317
318 new = (unsigned long) dst_default_metrics;
319 prev = cmpxchg(&dst->_metrics, old, new);
320 if (prev == old)
321 kfree(__DST_METRICS_PTR(old));
322}
323EXPORT_SYMBOL(__dst_destroy_metrics_generic);
324
285/** 325/**
286 * skb_dst_set_noref - sets skb dst, without a reference 326 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer 327 * @skb: buffer
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ff2302910b5e..84e7304532e6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -21,6 +21,8 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/rtnetlink.h>
25#include <linux/sched.h>
24 26
25/* 27/*
26 * Some useful ethtool_ops methods that're device independent. 28 * Some useful ethtool_ops methods that're device independent.
@@ -34,12 +36,6 @@ u32 ethtool_op_get_link(struct net_device *dev)
34} 36}
35EXPORT_SYMBOL(ethtool_op_get_link); 37EXPORT_SYMBOL(ethtool_op_get_link);
36 38
37u32 ethtool_op_get_rx_csum(struct net_device *dev)
38{
39 return (dev->features & NETIF_F_ALL_CSUM) != 0;
40}
41EXPORT_SYMBOL(ethtool_op_get_rx_csum);
42
43u32 ethtool_op_get_tx_csum(struct net_device *dev) 39u32 ethtool_op_get_tx_csum(struct net_device *dev)
44{ 40{
45 return (dev->features & NETIF_F_ALL_CSUM) != 0; 41 return (dev->features & NETIF_F_ALL_CSUM) != 0;
@@ -55,6 +51,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
55 51
56 return 0; 52 return 0;
57} 53}
54EXPORT_SYMBOL(ethtool_op_set_tx_csum);
58 55
59int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) 56int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
60{ 57{
@@ -146,9 +143,24 @@ u32 ethtool_op_get_flags(struct net_device *dev)
146} 143}
147EXPORT_SYMBOL(ethtool_op_get_flags); 144EXPORT_SYMBOL(ethtool_op_get_flags);
148 145
146/* Check if device can enable (or disable) particular feature coded in "data"
147 * argument. Flags "supported" describe features that can be toggled by device.
148 * If feature can not be toggled, it state (enabled or disabled) must match
149 * hardcoded device features state, otherwise flags are marked as invalid.
150 */
151bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
152{
153 u32 features = dev->features & flags_dup_features;
154 /* "data" can contain only flags_dup_features bits,
155 * see __ethtool_set_flags */
156
157 return (features & ~supported) != (data & ~supported);
158}
159EXPORT_SYMBOL(ethtool_invalid_flags);
160
149int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) 161int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
150{ 162{
151 if (data & ~supported) 163 if (ethtool_invalid_flags(dev, data, supported))
152 return -EINVAL; 164 return -EINVAL;
153 165
154 dev->features = ((dev->features & ~flags_dup_features) | 166 dev->features = ((dev->features & ~flags_dup_features) |
@@ -171,6 +183,381 @@ EXPORT_SYMBOL(ethtool_ntuple_flush);
171 183
172/* Handlers for each ethtool command */ 184/* Handlers for each ethtool command */
173 185
186#define ETHTOOL_DEV_FEATURE_WORDS 1
187
188static void ethtool_get_features_compat(struct net_device *dev,
189 struct ethtool_get_features_block *features)
190{
191 if (!dev->ethtool_ops)
192 return;
193
194 /* getting RX checksum */
195 if (dev->ethtool_ops->get_rx_csum)
196 if (dev->ethtool_ops->get_rx_csum(dev))
197 features[0].active |= NETIF_F_RXCSUM;
198
199 /* mark legacy-changeable features */
200 if (dev->ethtool_ops->set_sg)
201 features[0].available |= NETIF_F_SG;
202 if (dev->ethtool_ops->set_tx_csum)
203 features[0].available |= NETIF_F_ALL_CSUM;
204 if (dev->ethtool_ops->set_tso)
205 features[0].available |= NETIF_F_ALL_TSO;
206 if (dev->ethtool_ops->set_rx_csum)
207 features[0].available |= NETIF_F_RXCSUM;
208 if (dev->ethtool_ops->set_flags)
209 features[0].available |= flags_dup_features;
210}
211
212static int ethtool_set_feature_compat(struct net_device *dev,
213 int (*legacy_set)(struct net_device *, u32),
214 struct ethtool_set_features_block *features, u32 mask)
215{
216 u32 do_set;
217
218 if (!legacy_set)
219 return 0;
220
221 if (!(features[0].valid & mask))
222 return 0;
223
224 features[0].valid &= ~mask;
225
226 do_set = !!(features[0].requested & mask);
227
228 if (legacy_set(dev, do_set) < 0)
229 netdev_info(dev,
230 "Legacy feature change (%s) failed for 0x%08x\n",
231 do_set ? "set" : "clear", mask);
232
233 return 1;
234}
235
236static int ethtool_set_features_compat(struct net_device *dev,
237 struct ethtool_set_features_block *features)
238{
239 int compat;
240
241 if (!dev->ethtool_ops)
242 return 0;
243
244 compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
245 features, NETIF_F_SG);
246 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
247 features, NETIF_F_ALL_CSUM);
248 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
249 features, NETIF_F_ALL_TSO);
250 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
251 features, NETIF_F_RXCSUM);
252 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags,
253 features, flags_dup_features);
254
255 return compat;
256}
257
258static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
259{
260 struct ethtool_gfeatures cmd = {
261 .cmd = ETHTOOL_GFEATURES,
262 .size = ETHTOOL_DEV_FEATURE_WORDS,
263 };
264 struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
265 {
266 .available = dev->hw_features,
267 .requested = dev->wanted_features,
268 .active = dev->features,
269 .never_changed = NETIF_F_NEVER_CHANGE,
270 },
271 };
272 u32 __user *sizeaddr;
273 u32 copy_size;
274
275 ethtool_get_features_compat(dev, features);
276
277 sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
278 if (get_user(copy_size, sizeaddr))
279 return -EFAULT;
280
281 if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
282 copy_size = ETHTOOL_DEV_FEATURE_WORDS;
283
284 if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
285 return -EFAULT;
286 useraddr += sizeof(cmd);
287 if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
288 return -EFAULT;
289
290 return 0;
291}
292
293static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
294{
295 struct ethtool_sfeatures cmd;
296 struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
297 int ret = 0;
298
299 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
300 return -EFAULT;
301 useraddr += sizeof(cmd);
302
303 if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
304 return -EINVAL;
305
306 if (copy_from_user(features, useraddr, sizeof(features)))
307 return -EFAULT;
308
309 if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
310 return -EINVAL;
311
312 if (ethtool_set_features_compat(dev, features))
313 ret |= ETHTOOL_F_COMPAT;
314
315 if (features[0].valid & ~dev->hw_features) {
316 features[0].valid &= dev->hw_features;
317 ret |= ETHTOOL_F_UNSUPPORTED;
318 }
319
320 dev->wanted_features &= ~features[0].valid;
321 dev->wanted_features |= features[0].valid & features[0].requested;
322 __netdev_update_features(dev);
323
324 if ((dev->wanted_features ^ dev->features) & features[0].valid)
325 ret |= ETHTOOL_F_WISH;
326
327 return ret;
328}
329
330static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
331 /* NETIF_F_SG */ "tx-scatter-gather",
332 /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
333 /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
334 /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
335 /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6",
336 /* NETIF_F_HIGHDMA */ "highdma",
337 /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
338 /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
339
340 /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
341 /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
342 /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
343 /* NETIF_F_GSO */ "tx-generic-segmentation",
344 /* NETIF_F_LLTX */ "tx-lockless",
345 /* NETIF_F_NETNS_LOCAL */ "netns-local",
346 /* NETIF_F_GRO */ "rx-gro",
347 /* NETIF_F_LRO */ "rx-lro",
348
349 /* NETIF_F_TSO */ "tx-tcp-segmentation",
350 /* NETIF_F_UFO */ "tx-udp-fragmentation",
351 /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
352 /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
353 /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
354 /* NETIF_F_FSO */ "tx-fcoe-segmentation",
355 "",
356 "",
357
358 /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
359 /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
360 /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
361 /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
362 /* NETIF_F_RXHASH */ "rx-hashing",
363 /* NETIF_F_RXCSUM */ "rx-checksum",
364 /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy",
365 /* NETIF_F_LOOPBACK */ "loopback",
366};
367
368static int __ethtool_get_sset_count(struct net_device *dev, int sset)
369{
370 const struct ethtool_ops *ops = dev->ethtool_ops;
371
372 if (sset == ETH_SS_FEATURES)
373 return ARRAY_SIZE(netdev_features_strings);
374
375 if (ops && ops->get_sset_count && ops->get_strings)
376 return ops->get_sset_count(dev, sset);
377 else
378 return -EOPNOTSUPP;
379}
380
381static void __ethtool_get_strings(struct net_device *dev,
382 u32 stringset, u8 *data)
383{
384 const struct ethtool_ops *ops = dev->ethtool_ops;
385
386 if (stringset == ETH_SS_FEATURES)
387 memcpy(data, netdev_features_strings,
388 sizeof(netdev_features_strings));
389 else
390 /* ops->get_strings is valid because checked earlier */
391 ops->get_strings(dev, stringset, data);
392}
393
394static u32 ethtool_get_feature_mask(u32 eth_cmd)
395{
396 /* feature masks of legacy discrete ethtool ops */
397
398 switch (eth_cmd) {
399 case ETHTOOL_GTXCSUM:
400 case ETHTOOL_STXCSUM:
401 return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
402 case ETHTOOL_GRXCSUM:
403 case ETHTOOL_SRXCSUM:
404 return NETIF_F_RXCSUM;
405 case ETHTOOL_GSG:
406 case ETHTOOL_SSG:
407 return NETIF_F_SG;
408 case ETHTOOL_GTSO:
409 case ETHTOOL_STSO:
410 return NETIF_F_ALL_TSO;
411 case ETHTOOL_GUFO:
412 case ETHTOOL_SUFO:
413 return NETIF_F_UFO;
414 case ETHTOOL_GGSO:
415 case ETHTOOL_SGSO:
416 return NETIF_F_GSO;
417 case ETHTOOL_GGRO:
418 case ETHTOOL_SGRO:
419 return NETIF_F_GRO;
420 default:
421 BUG();
422 }
423}
424
425static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
426{
427 const struct ethtool_ops *ops = dev->ethtool_ops;
428
429 if (!ops)
430 return NULL;
431
432 switch (ethcmd) {
433 case ETHTOOL_GTXCSUM:
434 return ops->get_tx_csum;
435 case ETHTOOL_GRXCSUM:
436 return ops->get_rx_csum;
437 case ETHTOOL_SSG:
438 return ops->get_sg;
439 case ETHTOOL_STSO:
440 return ops->get_tso;
441 case ETHTOOL_SUFO:
442 return ops->get_ufo;
443 default:
444 return NULL;
445 }
446}
447
448static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
449{
450 return !!(dev->features & NETIF_F_ALL_CSUM);
451}
452
453static int ethtool_get_one_feature(struct net_device *dev,
454 char __user *useraddr, u32 ethcmd)
455{
456 u32 mask = ethtool_get_feature_mask(ethcmd);
457 struct ethtool_value edata = {
458 .cmd = ethcmd,
459 .data = !!(dev->features & mask),
460 };
461
462 /* compatibility with discrete get_ ops */
463 if (!(dev->hw_features & mask)) {
464 u32 (*actor)(struct net_device *);
465
466 actor = __ethtool_get_one_feature_actor(dev, ethcmd);
467
468 /* bug compatibility with old get_rx_csum */
469 if (ethcmd == ETHTOOL_GRXCSUM && !actor)
470 actor = __ethtool_get_rx_csum_oldbug;
471
472 if (actor)
473 edata.data = actor(dev);
474 }
475
476 if (copy_to_user(useraddr, &edata, sizeof(edata)))
477 return -EFAULT;
478 return 0;
479}
480
481static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
482static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
483static int __ethtool_set_sg(struct net_device *dev, u32 data);
484static int __ethtool_set_tso(struct net_device *dev, u32 data);
485static int __ethtool_set_ufo(struct net_device *dev, u32 data);
486
487static int ethtool_set_one_feature(struct net_device *dev,
488 void __user *useraddr, u32 ethcmd)
489{
490 struct ethtool_value edata;
491 u32 mask;
492
493 if (copy_from_user(&edata, useraddr, sizeof(edata)))
494 return -EFAULT;
495
496 mask = ethtool_get_feature_mask(ethcmd);
497 mask &= dev->hw_features;
498 if (mask) {
499 if (edata.data)
500 dev->wanted_features |= mask;
501 else
502 dev->wanted_features &= ~mask;
503
504 __netdev_update_features(dev);
505 return 0;
506 }
507
508 /* Driver is not converted to ndo_fix_features or does not
509 * support changing this offload. In the latter case it won't
510 * have corresponding ethtool_ops field set.
511 *
512 * Following part is to be removed after all drivers advertise
513 * their changeable features in netdev->hw_features and stop
514 * using discrete offload setting ops.
515 */
516
517 switch (ethcmd) {
518 case ETHTOOL_STXCSUM:
519 return __ethtool_set_tx_csum(dev, edata.data);
520 case ETHTOOL_SRXCSUM:
521 return __ethtool_set_rx_csum(dev, edata.data);
522 case ETHTOOL_SSG:
523 return __ethtool_set_sg(dev, edata.data);
524 case ETHTOOL_STSO:
525 return __ethtool_set_tso(dev, edata.data);
526 case ETHTOOL_SUFO:
527 return __ethtool_set_ufo(dev, edata.data);
528 default:
529 return -EOPNOTSUPP;
530 }
531}
532
533int __ethtool_set_flags(struct net_device *dev, u32 data)
534{
535 u32 changed;
536
537 if (data & ~flags_dup_features)
538 return -EINVAL;
539
540 /* legacy set_flags() op */
541 if (dev->ethtool_ops->set_flags) {
542 if (unlikely(dev->hw_features & flags_dup_features))
543 netdev_warn(dev,
544 "driver BUG: mixed hw_features and set_flags()\n");
545 return dev->ethtool_ops->set_flags(dev, data);
546 }
547
548 /* allow changing only bits set in hw_features */
549 changed = (data ^ dev->features) & flags_dup_features;
550 if (changed & ~dev->hw_features)
551 return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
552
553 dev->wanted_features =
554 (dev->wanted_features & ~changed) | (data & dev->hw_features);
555
556 __netdev_update_features(dev);
557
558 return 0;
559}
560
174static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 561static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
175{ 562{
176 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; 563 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
@@ -251,14 +638,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
251 void __user *useraddr) 638 void __user *useraddr)
252{ 639{
253 struct ethtool_sset_info info; 640 struct ethtool_sset_info info;
254 const struct ethtool_ops *ops = dev->ethtool_ops;
255 u64 sset_mask; 641 u64 sset_mask;
256 int i, idx = 0, n_bits = 0, ret, rc; 642 int i, idx = 0, n_bits = 0, ret, rc;
257 u32 *info_buf = NULL; 643 u32 *info_buf = NULL;
258 644
259 if (!ops->get_sset_count)
260 return -EOPNOTSUPP;
261
262 if (copy_from_user(&info, useraddr, sizeof(info))) 645 if (copy_from_user(&info, useraddr, sizeof(info)))
263 return -EFAULT; 646 return -EFAULT;
264 647
@@ -285,7 +668,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
285 if (!(sset_mask & (1ULL << i))) 668 if (!(sset_mask & (1ULL << i)))
286 continue; 669 continue;
287 670
288 rc = ops->get_sset_count(dev, i); 671 rc = __ethtool_get_sset_count(dev, i);
289 if (rc >= 0) { 672 if (rc >= 0) {
290 info.sset_mask |= (1ULL << i); 673 info.sset_mask |= (1ULL << i);
291 info_buf[idx++] = rc; 674 info_buf[idx++] = rc;
@@ -527,6 +910,9 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
527 struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; 910 struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
528 int ret; 911 int ret;
529 912
913 if (!ops->set_rx_ntuple)
914 return -EOPNOTSUPP;
915
530 if (!(dev->features & NETIF_F_NTUPLE)) 916 if (!(dev->features & NETIF_F_NTUPLE))
531 return -EINVAL; 917 return -EINVAL;
532 918
@@ -1060,6 +1446,35 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
1060 return dev->ethtool_ops->set_ringparam(dev, &ringparam); 1446 return dev->ethtool_ops->set_ringparam(dev, &ringparam);
1061} 1447}
1062 1448
1449static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
1450 void __user *useraddr)
1451{
1452 struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
1453
1454 if (!dev->ethtool_ops->get_channels)
1455 return -EOPNOTSUPP;
1456
1457 dev->ethtool_ops->get_channels(dev, &channels);
1458
1459 if (copy_to_user(useraddr, &channels, sizeof(channels)))
1460 return -EFAULT;
1461 return 0;
1462}
1463
1464static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
1465 void __user *useraddr)
1466{
1467 struct ethtool_channels channels;
1468
1469 if (!dev->ethtool_ops->set_channels)
1470 return -EOPNOTSUPP;
1471
1472 if (copy_from_user(&channels, useraddr, sizeof(channels)))
1473 return -EFAULT;
1474
1475 return dev->ethtool_ops->set_channels(dev, &channels);
1476}
1477
1063static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) 1478static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
1064{ 1479{
1065 struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; 1480 struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
@@ -1091,6 +1506,12 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
1091{ 1506{
1092 int err; 1507 int err;
1093 1508
1509 if (!dev->ethtool_ops->set_sg)
1510 return -EOPNOTSUPP;
1511
1512 if (data && !(dev->features & NETIF_F_ALL_CSUM))
1513 return -EINVAL;
1514
1094 if (!data && dev->ethtool_ops->set_tso) { 1515 if (!data && dev->ethtool_ops->set_tso) {
1095 err = dev->ethtool_ops->set_tso(dev, 0); 1516 err = dev->ethtool_ops->set_tso(dev, 0);
1096 if (err) 1517 if (err)
@@ -1105,145 +1526,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
1105 return dev->ethtool_ops->set_sg(dev, data); 1526 return dev->ethtool_ops->set_sg(dev, data);
1106} 1527}
1107 1528
1108static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) 1529static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
1109{ 1530{
1110 struct ethtool_value edata;
1111 int err; 1531 int err;
1112 1532
1113 if (!dev->ethtool_ops->set_tx_csum) 1533 if (!dev->ethtool_ops->set_tx_csum)
1114 return -EOPNOTSUPP; 1534 return -EOPNOTSUPP;
1115 1535
1116 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1536 if (!data && dev->ethtool_ops->set_sg) {
1117 return -EFAULT;
1118
1119 if (!edata.data && dev->ethtool_ops->set_sg) {
1120 err = __ethtool_set_sg(dev, 0); 1537 err = __ethtool_set_sg(dev, 0);
1121 if (err) 1538 if (err)
1122 return err; 1539 return err;
1123 } 1540 }
1124 1541
1125 return dev->ethtool_ops->set_tx_csum(dev, edata.data); 1542 return dev->ethtool_ops->set_tx_csum(dev, data);
1126} 1543}
1127EXPORT_SYMBOL(ethtool_op_set_tx_csum);
1128 1544
1129static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) 1545static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
1130{ 1546{
1131 struct ethtool_value edata;
1132
1133 if (!dev->ethtool_ops->set_rx_csum) 1547 if (!dev->ethtool_ops->set_rx_csum)
1134 return -EOPNOTSUPP; 1548 return -EOPNOTSUPP;
1135 1549
1136 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1550 if (!data)
1137 return -EFAULT;
1138
1139 if (!edata.data && dev->ethtool_ops->set_sg)
1140 dev->features &= ~NETIF_F_GRO; 1551 dev->features &= ~NETIF_F_GRO;
1141 1552
1142 return dev->ethtool_ops->set_rx_csum(dev, edata.data); 1553 return dev->ethtool_ops->set_rx_csum(dev, data);
1143}
1144
1145static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
1146{
1147 struct ethtool_value edata;
1148
1149 if (!dev->ethtool_ops->set_sg)
1150 return -EOPNOTSUPP;
1151
1152 if (copy_from_user(&edata, useraddr, sizeof(edata)))
1153 return -EFAULT;
1154
1155 if (edata.data &&
1156 !(dev->features & NETIF_F_ALL_CSUM))
1157 return -EINVAL;
1158
1159 return __ethtool_set_sg(dev, edata.data);
1160} 1554}
1161 1555
1162static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) 1556static int __ethtool_set_tso(struct net_device *dev, u32 data)
1163{ 1557{
1164 struct ethtool_value edata;
1165
1166 if (!dev->ethtool_ops->set_tso) 1558 if (!dev->ethtool_ops->set_tso)
1167 return -EOPNOTSUPP; 1559 return -EOPNOTSUPP;
1168 1560
1169 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1561 if (data && !(dev->features & NETIF_F_SG))
1170 return -EFAULT;
1171
1172 if (edata.data && !(dev->features & NETIF_F_SG))
1173 return -EINVAL; 1562 return -EINVAL;
1174 1563
1175 return dev->ethtool_ops->set_tso(dev, edata.data); 1564 return dev->ethtool_ops->set_tso(dev, data);
1176} 1565}
1177 1566
1178static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) 1567static int __ethtool_set_ufo(struct net_device *dev, u32 data)
1179{ 1568{
1180 struct ethtool_value edata;
1181
1182 if (!dev->ethtool_ops->set_ufo) 1569 if (!dev->ethtool_ops->set_ufo)
1183 return -EOPNOTSUPP; 1570 return -EOPNOTSUPP;
1184 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1571 if (data && !(dev->features & NETIF_F_SG))
1185 return -EFAULT;
1186 if (edata.data && !(dev->features & NETIF_F_SG))
1187 return -EINVAL; 1572 return -EINVAL;
1188 if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || 1573 if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
1189 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 1574 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
1190 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) 1575 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
1191 return -EINVAL; 1576 return -EINVAL;
1192 return dev->ethtool_ops->set_ufo(dev, edata.data); 1577 return dev->ethtool_ops->set_ufo(dev, data);
1193}
1194
1195static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
1196{
1197 struct ethtool_value edata = { ETHTOOL_GGSO };
1198
1199 edata.data = dev->features & NETIF_F_GSO;
1200 if (copy_to_user(useraddr, &edata, sizeof(edata)))
1201 return -EFAULT;
1202 return 0;
1203}
1204
1205static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
1206{
1207 struct ethtool_value edata;
1208
1209 if (copy_from_user(&edata, useraddr, sizeof(edata)))
1210 return -EFAULT;
1211 if (edata.data)
1212 dev->features |= NETIF_F_GSO;
1213 else
1214 dev->features &= ~NETIF_F_GSO;
1215 return 0;
1216}
1217
1218static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
1219{
1220 struct ethtool_value edata = { ETHTOOL_GGRO };
1221
1222 edata.data = dev->features & NETIF_F_GRO;
1223 if (copy_to_user(useraddr, &edata, sizeof(edata)))
1224 return -EFAULT;
1225 return 0;
1226}
1227
1228static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
1229{
1230 struct ethtool_value edata;
1231
1232 if (copy_from_user(&edata, useraddr, sizeof(edata)))
1233 return -EFAULT;
1234
1235 if (edata.data) {
1236 u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
1237 dev->ethtool_ops->get_rx_csum(dev) :
1238 ethtool_op_get_rx_csum(dev);
1239
1240 if (!rxcsum)
1241 return -EINVAL;
1242 dev->features |= NETIF_F_GRO;
1243 } else
1244 dev->features &= ~NETIF_F_GRO;
1245
1246 return 0;
1247} 1578}
1248 1579
1249static int ethtool_self_test(struct net_device *dev, char __user *useraddr) 1580static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
@@ -1287,17 +1618,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
1287static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) 1618static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1288{ 1619{
1289 struct ethtool_gstrings gstrings; 1620 struct ethtool_gstrings gstrings;
1290 const struct ethtool_ops *ops = dev->ethtool_ops;
1291 u8 *data; 1621 u8 *data;
1292 int ret; 1622 int ret;
1293 1623
1294 if (!ops->get_strings || !ops->get_sset_count)
1295 return -EOPNOTSUPP;
1296
1297 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) 1624 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
1298 return -EFAULT; 1625 return -EFAULT;
1299 1626
1300 ret = ops->get_sset_count(dev, gstrings.string_set); 1627 ret = __ethtool_get_sset_count(dev, gstrings.string_set);
1301 if (ret < 0) 1628 if (ret < 0)
1302 return ret; 1629 return ret;
1303 1630
@@ -1307,7 +1634,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1307 if (!data) 1634 if (!data)
1308 return -ENOMEM; 1635 return -ENOMEM;
1309 1636
1310 ops->get_strings(dev, gstrings.string_set, data); 1637 __ethtool_get_strings(dev, gstrings.string_set, data);
1311 1638
1312 ret = -EFAULT; 1639 ret = -EFAULT;
1313 if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) 1640 if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1317,7 +1644,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1317 goto out; 1644 goto out;
1318 ret = 0; 1645 ret = 0;
1319 1646
1320 out: 1647out:
1321 kfree(data); 1648 kfree(data);
1322 return ret; 1649 return ret;
1323} 1650}
@@ -1325,14 +1652,60 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1325static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) 1652static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1326{ 1653{
1327 struct ethtool_value id; 1654 struct ethtool_value id;
1655 static bool busy;
1656 int rc;
1328 1657
1329 if (!dev->ethtool_ops->phys_id) 1658 if (!dev->ethtool_ops->set_phys_id)
1330 return -EOPNOTSUPP; 1659 return -EOPNOTSUPP;
1331 1660
1661 if (busy)
1662 return -EBUSY;
1663
1332 if (copy_from_user(&id, useraddr, sizeof(id))) 1664 if (copy_from_user(&id, useraddr, sizeof(id)))
1333 return -EFAULT; 1665 return -EFAULT;
1334 1666
1335 return dev->ethtool_ops->phys_id(dev, id.data); 1667 rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
1668 if (rc < 0)
1669 return rc;
1670
1671 /* Drop the RTNL lock while waiting, but prevent reentry or
1672 * removal of the device.
1673 */
1674 busy = true;
1675 dev_hold(dev);
1676 rtnl_unlock();
1677
1678 if (rc == 0) {
1679 /* Driver will handle this itself */
1680 schedule_timeout_interruptible(
1681 id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
1682 } else {
1683 /* Driver expects to be called at twice the frequency in rc */
1684 int n = rc * 2, i, interval = HZ / n;
1685
1686 /* Count down seconds */
1687 do {
1688 /* Count down iterations per second */
1689 i = n;
1690 do {
1691 rtnl_lock();
1692 rc = dev->ethtool_ops->set_phys_id(dev,
1693 (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
1694 rtnl_unlock();
1695 if (rc)
1696 break;
1697 schedule_timeout_interruptible(interval);
1698 } while (!signal_pending(current) && --i != 0);
1699 } while (!signal_pending(current) &&
1700 (id.data == 0 || --id.data != 0));
1701 }
1702
1703 rtnl_lock();
1704 dev_put(dev);
1705 busy = false;
1706
1707 (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
1708 return rc;
1336} 1709}
1337 1710
1338static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) 1711static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
@@ -1450,6 +1823,87 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
1450 return dev->ethtool_ops->flash_device(dev, &efl); 1823 return dev->ethtool_ops->flash_device(dev, &efl);
1451} 1824}
1452 1825
1826static int ethtool_set_dump(struct net_device *dev,
1827 void __user *useraddr)
1828{
1829 struct ethtool_dump dump;
1830
1831 if (!dev->ethtool_ops->set_dump)
1832 return -EOPNOTSUPP;
1833
1834 if (copy_from_user(&dump, useraddr, sizeof(dump)))
1835 return -EFAULT;
1836
1837 return dev->ethtool_ops->set_dump(dev, &dump);
1838}
1839
1840static int ethtool_get_dump_flag(struct net_device *dev,
1841 void __user *useraddr)
1842{
1843 int ret;
1844 struct ethtool_dump dump;
1845 const struct ethtool_ops *ops = dev->ethtool_ops;
1846
1847 if (!dev->ethtool_ops->get_dump_flag)
1848 return -EOPNOTSUPP;
1849
1850 if (copy_from_user(&dump, useraddr, sizeof(dump)))
1851 return -EFAULT;
1852
1853 ret = ops->get_dump_flag(dev, &dump);
1854 if (ret)
1855 return ret;
1856
1857 if (copy_to_user(useraddr, &dump, sizeof(dump)))
1858 return -EFAULT;
1859 return 0;
1860}
1861
1862static int ethtool_get_dump_data(struct net_device *dev,
1863 void __user *useraddr)
1864{
1865 int ret;
1866 __u32 len;
1867 struct ethtool_dump dump, tmp;
1868 const struct ethtool_ops *ops = dev->ethtool_ops;
1869 void *data = NULL;
1870
1871 if (!dev->ethtool_ops->get_dump_data ||
1872 !dev->ethtool_ops->get_dump_flag)
1873 return -EOPNOTSUPP;
1874
1875 if (copy_from_user(&dump, useraddr, sizeof(dump)))
1876 return -EFAULT;
1877
1878 memset(&tmp, 0, sizeof(tmp));
1879 tmp.cmd = ETHTOOL_GET_DUMP_FLAG;
1880 ret = ops->get_dump_flag(dev, &tmp);
1881 if (ret)
1882 return ret;
1883
1884 len = (tmp.len > dump.len) ? dump.len : tmp.len;
1885 if (!len)
1886 return -EFAULT;
1887
1888 data = vzalloc(tmp.len);
1889 if (!data)
1890 return -ENOMEM;
1891 ret = ops->get_dump_data(dev, &dump, data);
1892 if (ret)
1893 goto out;
1894
1895 if (copy_to_user(useraddr, &dump, sizeof(dump))) {
1896 ret = -EFAULT;
1897 goto out;
1898 }
1899 useraddr += offsetof(struct ethtool_dump, data);
1900 if (copy_to_user(useraddr, data, len))
1901 ret = -EFAULT;
1902out:
1903 vfree(data);
1904 return ret;
1905}
1906
1453/* The main entry point in this file. Called from net/core/dev.c */ 1907/* The main entry point in this file. Called from net/core/dev.c */
1454 1908
1455int dev_ethtool(struct net *net, struct ifreq *ifr) 1909int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1458,7 +1912,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1458 void __user *useraddr = ifr->ifr_data; 1912 void __user *useraddr = ifr->ifr_data;
1459 u32 ethcmd; 1913 u32 ethcmd;
1460 int rc; 1914 int rc;
1461 unsigned long old_features; 1915 u32 old_features;
1462 1916
1463 if (!dev || !netif_device_present(dev)) 1917 if (!dev || !netif_device_present(dev))
1464 return -ENODEV; 1918 return -ENODEV;
@@ -1500,6 +1954,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1500 case ETHTOOL_GRXCLSRLCNT: 1954 case ETHTOOL_GRXCLSRLCNT:
1501 case ETHTOOL_GRXCLSRULE: 1955 case ETHTOOL_GRXCLSRULE:
1502 case ETHTOOL_GRXCLSRLALL: 1956 case ETHTOOL_GRXCLSRLALL:
1957 case ETHTOOL_GFEATURES:
1503 break; 1958 break;
1504 default: 1959 default:
1505 if (!capable(CAP_NET_ADMIN)) 1960 if (!capable(CAP_NET_ADMIN))
@@ -1570,42 +2025,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1570 case ETHTOOL_SPAUSEPARAM: 2025 case ETHTOOL_SPAUSEPARAM:
1571 rc = ethtool_set_pauseparam(dev, useraddr); 2026 rc = ethtool_set_pauseparam(dev, useraddr);
1572 break; 2027 break;
1573 case ETHTOOL_GRXCSUM:
1574 rc = ethtool_get_value(dev, useraddr, ethcmd,
1575 (dev->ethtool_ops->get_rx_csum ?
1576 dev->ethtool_ops->get_rx_csum :
1577 ethtool_op_get_rx_csum));
1578 break;
1579 case ETHTOOL_SRXCSUM:
1580 rc = ethtool_set_rx_csum(dev, useraddr);
1581 break;
1582 case ETHTOOL_GTXCSUM:
1583 rc = ethtool_get_value(dev, useraddr, ethcmd,
1584 (dev->ethtool_ops->get_tx_csum ?
1585 dev->ethtool_ops->get_tx_csum :
1586 ethtool_op_get_tx_csum));
1587 break;
1588 case ETHTOOL_STXCSUM:
1589 rc = ethtool_set_tx_csum(dev, useraddr);
1590 break;
1591 case ETHTOOL_GSG:
1592 rc = ethtool_get_value(dev, useraddr, ethcmd,
1593 (dev->ethtool_ops->get_sg ?
1594 dev->ethtool_ops->get_sg :
1595 ethtool_op_get_sg));
1596 break;
1597 case ETHTOOL_SSG:
1598 rc = ethtool_set_sg(dev, useraddr);
1599 break;
1600 case ETHTOOL_GTSO:
1601 rc = ethtool_get_value(dev, useraddr, ethcmd,
1602 (dev->ethtool_ops->get_tso ?
1603 dev->ethtool_ops->get_tso :
1604 ethtool_op_get_tso));
1605 break;
1606 case ETHTOOL_STSO:
1607 rc = ethtool_set_tso(dev, useraddr);
1608 break;
1609 case ETHTOOL_TEST: 2028 case ETHTOOL_TEST:
1610 rc = ethtool_self_test(dev, useraddr); 2029 rc = ethtool_self_test(dev, useraddr);
1611 break; 2030 break;
@@ -1621,21 +2040,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1621 case ETHTOOL_GPERMADDR: 2040 case ETHTOOL_GPERMADDR:
1622 rc = ethtool_get_perm_addr(dev, useraddr); 2041 rc = ethtool_get_perm_addr(dev, useraddr);
1623 break; 2042 break;
1624 case ETHTOOL_GUFO:
1625 rc = ethtool_get_value(dev, useraddr, ethcmd,
1626 (dev->ethtool_ops->get_ufo ?
1627 dev->ethtool_ops->get_ufo :
1628 ethtool_op_get_ufo));
1629 break;
1630 case ETHTOOL_SUFO:
1631 rc = ethtool_set_ufo(dev, useraddr);
1632 break;
1633 case ETHTOOL_GGSO:
1634 rc = ethtool_get_gso(dev, useraddr);
1635 break;
1636 case ETHTOOL_SGSO:
1637 rc = ethtool_set_gso(dev, useraddr);
1638 break;
1639 case ETHTOOL_GFLAGS: 2043 case ETHTOOL_GFLAGS:
1640 rc = ethtool_get_value(dev, useraddr, ethcmd, 2044 rc = ethtool_get_value(dev, useraddr, ethcmd,
1641 (dev->ethtool_ops->get_flags ? 2045 (dev->ethtool_ops->get_flags ?
@@ -1643,8 +2047,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1643 ethtool_op_get_flags)); 2047 ethtool_op_get_flags));
1644 break; 2048 break;
1645 case ETHTOOL_SFLAGS: 2049 case ETHTOOL_SFLAGS:
1646 rc = ethtool_set_value(dev, useraddr, 2050 rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
1647 dev->ethtool_ops->set_flags);
1648 break; 2051 break;
1649 case ETHTOOL_GPFLAGS: 2052 case ETHTOOL_GPFLAGS:
1650 rc = ethtool_get_value(dev, useraddr, ethcmd, 2053 rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1666,12 +2069,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1666 case ETHTOOL_SRXCLSRLINS: 2069 case ETHTOOL_SRXCLSRLINS:
1667 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); 2070 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
1668 break; 2071 break;
1669 case ETHTOOL_GGRO:
1670 rc = ethtool_get_gro(dev, useraddr);
1671 break;
1672 case ETHTOOL_SGRO:
1673 rc = ethtool_set_gro(dev, useraddr);
1674 break;
1675 case ETHTOOL_FLASHDEV: 2072 case ETHTOOL_FLASHDEV:
1676 rc = ethtool_flash_device(dev, useraddr); 2073 rc = ethtool_flash_device(dev, useraddr);
1677 break; 2074 break;
@@ -1693,6 +2090,45 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1693 case ETHTOOL_SRXFHINDIR: 2090 case ETHTOOL_SRXFHINDIR:
1694 rc = ethtool_set_rxfh_indir(dev, useraddr); 2091 rc = ethtool_set_rxfh_indir(dev, useraddr);
1695 break; 2092 break;
2093 case ETHTOOL_GFEATURES:
2094 rc = ethtool_get_features(dev, useraddr);
2095 break;
2096 case ETHTOOL_SFEATURES:
2097 rc = ethtool_set_features(dev, useraddr);
2098 break;
2099 case ETHTOOL_GTXCSUM:
2100 case ETHTOOL_GRXCSUM:
2101 case ETHTOOL_GSG:
2102 case ETHTOOL_GTSO:
2103 case ETHTOOL_GUFO:
2104 case ETHTOOL_GGSO:
2105 case ETHTOOL_GGRO:
2106 rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
2107 break;
2108 case ETHTOOL_STXCSUM:
2109 case ETHTOOL_SRXCSUM:
2110 case ETHTOOL_SSG:
2111 case ETHTOOL_STSO:
2112 case ETHTOOL_SUFO:
2113 case ETHTOOL_SGSO:
2114 case ETHTOOL_SGRO:
2115 rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
2116 break;
2117 case ETHTOOL_GCHANNELS:
2118 rc = ethtool_get_channels(dev, useraddr);
2119 break;
2120 case ETHTOOL_SCHANNELS:
2121 rc = ethtool_set_channels(dev, useraddr);
2122 break;
2123 case ETHTOOL_SET_DUMP:
2124 rc = ethtool_set_dump(dev, useraddr);
2125 break;
2126 case ETHTOOL_GET_DUMP_FLAG:
2127 rc = ethtool_get_dump_flag(dev, useraddr);
2128 break;
2129 case ETHTOOL_GET_DUMP_DATA:
2130 rc = ethtool_get_dump_data(dev, useraddr);
2131 break;
1696 default: 2132 default:
1697 rc = -EOPNOTSUPP; 2133 rc = -EOPNOTSUPP;
1698 } 2134 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a20e5d3bbfa0..3911586e12e4 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
181{ 181{
182 int ret = 0; 182 int ret = 0;
183 183
184 if (rule->iifindex && (rule->iifindex != fl->iif)) 184 if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
185 goto out; 185 goto out;
186 186
187 if (rule->oifindex && (rule->oifindex != fl->oif)) 187 if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
188 goto out; 188 goto out;
189 189
190 if ((rule->mark ^ fl->mark) & rule->mark_mask) 190 if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
191 goto out; 191 goto out;
192 192
193 ret = ops->match(rule, fl, flags); 193 ret = ops->match(rule, fl, flags);
@@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
590 int idx = 0; 590 int idx = 0;
591 struct fib_rule *rule; 591 struct fib_rule *rule;
592 592
593 list_for_each_entry(rule, &ops->rules_list, list) { 593 rcu_read_lock();
594 list_for_each_entry_rcu(rule, &ops->rules_list, list) {
594 if (idx < cb->args[1]) 595 if (idx < cb->args[1])
595 goto skip; 596 goto skip;
596 597
diff --git a/net/core/filter.c b/net/core/filter.c
index afc58374ca96..0eb8c4466eaa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,65 +39,6 @@
39#include <linux/filter.h> 39#include <linux/filter.h>
40#include <linux/reciprocal_div.h> 40#include <linux/reciprocal_div.h>
41 41
42enum {
43 BPF_S_RET_K = 1,
44 BPF_S_RET_A,
45 BPF_S_ALU_ADD_K,
46 BPF_S_ALU_ADD_X,
47 BPF_S_ALU_SUB_K,
48 BPF_S_ALU_SUB_X,
49 BPF_S_ALU_MUL_K,
50 BPF_S_ALU_MUL_X,
51 BPF_S_ALU_DIV_X,
52 BPF_S_ALU_AND_K,
53 BPF_S_ALU_AND_X,
54 BPF_S_ALU_OR_K,
55 BPF_S_ALU_OR_X,
56 BPF_S_ALU_LSH_K,
57 BPF_S_ALU_LSH_X,
58 BPF_S_ALU_RSH_K,
59 BPF_S_ALU_RSH_X,
60 BPF_S_ALU_NEG,
61 BPF_S_LD_W_ABS,
62 BPF_S_LD_H_ABS,
63 BPF_S_LD_B_ABS,
64 BPF_S_LD_W_LEN,
65 BPF_S_LD_W_IND,
66 BPF_S_LD_H_IND,
67 BPF_S_LD_B_IND,
68 BPF_S_LD_IMM,
69 BPF_S_LDX_W_LEN,
70 BPF_S_LDX_B_MSH,
71 BPF_S_LDX_IMM,
72 BPF_S_MISC_TAX,
73 BPF_S_MISC_TXA,
74 BPF_S_ALU_DIV_K,
75 BPF_S_LD_MEM,
76 BPF_S_LDX_MEM,
77 BPF_S_ST,
78 BPF_S_STX,
79 BPF_S_JMP_JA,
80 BPF_S_JMP_JEQ_K,
81 BPF_S_JMP_JEQ_X,
82 BPF_S_JMP_JGE_K,
83 BPF_S_JMP_JGE_X,
84 BPF_S_JMP_JGT_K,
85 BPF_S_JMP_JGT_X,
86 BPF_S_JMP_JSET_K,
87 BPF_S_JMP_JSET_X,
88 /* Ancillary data */
89 BPF_S_ANC_PROTOCOL,
90 BPF_S_ANC_PKTTYPE,
91 BPF_S_ANC_IFINDEX,
92 BPF_S_ANC_NLATTR,
93 BPF_S_ANC_NLATTR_NEST,
94 BPF_S_ANC_MARK,
95 BPF_S_ANC_QUEUE,
96 BPF_S_ANC_HATYPE,
97 BPF_S_ANC_RXHASH,
98 BPF_S_ANC_CPU,
99};
100
101/* No hurry in this branch */ 42/* No hurry in this branch */
102static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) 43static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
103{ 44{
@@ -142,14 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
142 if (err) 83 if (err)
143 return err; 84 return err;
144 85
145 rcu_read_lock_bh(); 86 rcu_read_lock();
146 filter = rcu_dereference_bh(sk->sk_filter); 87 filter = rcu_dereference(sk->sk_filter);
147 if (filter) { 88 if (filter) {
148 unsigned int pkt_len = sk_run_filter(skb, filter->insns); 89 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
149 90
150 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 91 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
151 } 92 }
152 rcu_read_unlock_bh(); 93 rcu_read_unlock();
153 94
154 return err; 95 return err;
155} 96}
@@ -425,7 +366,7 @@ EXPORT_SYMBOL(sk_run_filter);
425 * As we dont want to clear mem[] array for each packet going through 366 * As we dont want to clear mem[] array for each packet going through
426 * sk_run_filter(), we check that filter loaded by user never try to read 367 * sk_run_filter(), we check that filter loaded by user never try to read
427 * a cell if not previously written, and we check all branches to be sure 368 * a cell if not previously written, and we check all branches to be sure
428 * a malicious user doesnt try to abuse us. 369 * a malicious user doesn't try to abuse us.
429 */ 370 */
430static int check_load_and_stores(struct sock_filter *filter, int flen) 371static int check_load_and_stores(struct sock_filter *filter, int flen)
431{ 372{
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
638{ 579{
639 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 580 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
640 581
582 bpf_jit_free(fp);
641 kfree(fp); 583 kfree(fp);
642} 584}
643EXPORT_SYMBOL(sk_filter_release_rcu); 585EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
672 614
673 atomic_set(&fp->refcnt, 1); 615 atomic_set(&fp->refcnt, 1);
674 fp->len = fprog->len; 616 fp->len = fprog->len;
617 fp->bpf_func = sk_run_filter;
675 618
676 err = sk_chk_filter(fp->insns, fp->len); 619 err = sk_chk_filter(fp->insns, fp->len);
677 if (err) { 620 if (err) {
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
679 return err; 622 return err;
680 } 623 }
681 624
625 bpf_jit_compile(fp);
626
682 old_fp = rcu_dereference_protected(sk->sk_filter, 627 old_fp = rcu_dereference_protected(sk->sk_filter,
683 sock_owned_by_user(sk)); 628 sock_owned_by_user(sk));
684 rcu_assign_pointer(sk->sk_filter, fp); 629 rcu_assign_pointer(sk->sk_filter, fp);
diff --git a/net/core/flow.c b/net/core/flow.c
index 127c8a7ffd61..990703b8863b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
172 172
173static u32 flow_hash_code(struct flow_cache *fc, 173static u32 flow_hash_code(struct flow_cache *fc,
174 struct flow_cache_percpu *fcp, 174 struct flow_cache_percpu *fcp,
175 struct flowi *key) 175 const struct flowi *key)
176{ 176{
177 u32 *k = (u32 *) key; 177 const u32 *k = (const u32 *) key;
178 178
179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
180 & (flow_cache_hash_size(fc) - 1); 180 & (flow_cache_hash_size(fc) - 1);
@@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t;
186 * important assumptions that we can here, such as alignment and 186 * important assumptions that we can here, such as alignment and
187 * constant size. 187 * constant size.
188 */ 188 */
189static int flow_key_compare(struct flowi *key1, struct flowi *key2) 189static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
190{ 190{
191 flow_compare_t *k1, *k1_lim, *k2; 191 const flow_compare_t *k1, *k1_lim, *k2;
192 const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); 192 const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
193 193
194 BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); 194 BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
195 195
196 k1 = (flow_compare_t *) key1; 196 k1 = (const flow_compare_t *) key1;
197 k1_lim = k1 + n_elem; 197 k1_lim = k1 + n_elem;
198 198
199 k2 = (flow_compare_t *) key2; 199 k2 = (const flow_compare_t *) key2;
200 200
201 do { 201 do {
202 if (*k1++ != *k2++) 202 if (*k1++ != *k2++)
@@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
207} 207}
208 208
209struct flow_cache_object * 209struct flow_cache_object *
210flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, 210flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
211 flow_resolve_t resolver, void *ctx) 211 flow_resolve_t resolver, void *ctx)
212{ 212{
213 struct flow_cache *fc = &flow_cache_global; 213 struct flow_cache *fc = &flow_cache_global;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7c2373321b74..43b03dd71e85 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -249,13 +249,6 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
249} 249}
250EXPORT_SYMBOL(gen_new_estimator); 250EXPORT_SYMBOL(gen_new_estimator);
251 251
252static void __gen_kill_estimator(struct rcu_head *head)
253{
254 struct gen_estimator *e = container_of(head,
255 struct gen_estimator, e_rcu);
256 kfree(e);
257}
258
259/** 252/**
260 * gen_kill_estimator - remove a rate estimator 253 * gen_kill_estimator - remove a rate estimator
261 * @bstats: basic statistics 254 * @bstats: basic statistics
@@ -279,7 +272,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
279 write_unlock(&est_lock); 272 write_unlock(&est_lock);
280 273
281 list_del_rcu(&e->list); 274 list_del_rcu(&e->list);
282 call_rcu(&e->e_rcu, __gen_kill_estimator); 275 kfree_rcu(e, e_rcu);
283 } 276 }
284 spin_unlock_bh(&est_tree_lock); 277 spin_unlock_bh(&est_tree_lock);
285} 278}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 01a1101b5936..a7b342131869 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -129,7 +129,7 @@ static void linkwatch_schedule_work(int urgent)
129 if (!cancel_delayed_work(&linkwatch_work)) 129 if (!cancel_delayed_work(&linkwatch_work))
130 return; 130 return;
131 131
132 /* Otherwise we reschedule it again for immediate exection. */ 132 /* Otherwise we reschedule it again for immediate execution. */
133 schedule_delayed_work(&linkwatch_work, 0); 133 schedule_delayed_work(&linkwatch_work, 0);
134} 134}
135 135
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60a902913429..799f06e03a22 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
316{ 316{
317 size_t size = entries * sizeof(struct neighbour *); 317 size_t size = entries * sizeof(struct neighbour *);
318 struct neigh_hash_table *ret; 318 struct neigh_hash_table *ret;
319 struct neighbour **buckets; 319 struct neighbour __rcu **buckets;
320 320
321 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 321 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
322 if (!ret) 322 if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
324 if (size <= PAGE_SIZE) 324 if (size <= PAGE_SIZE)
325 buckets = kzalloc(size, GFP_ATOMIC); 325 buckets = kzalloc(size, GFP_ATOMIC);
326 else 326 else
327 buckets = (struct neighbour **) 327 buckets = (struct neighbour __rcu **)
328 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 328 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
329 get_order(size)); 329 get_order(size));
330 if (!buckets) { 330 if (!buckets) {
331 kfree(ret); 331 kfree(ret);
332 return NULL; 332 return NULL;
333 } 333 }
334 rcu_assign_pointer(ret->hash_buckets, buckets); 334 ret->hash_buckets = buckets;
335 ret->hash_mask = entries - 1; 335 ret->hash_mask = entries - 1;
336 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); 336 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
337 return ret; 337 return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
343 struct neigh_hash_table, 343 struct neigh_hash_table,
344 rcu); 344 rcu);
345 size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); 345 size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
346 struct neighbour **buckets = nht->hash_buckets; 346 struct neighbour __rcu **buckets = nht->hash_buckets;
347 347
348 if (size <= PAGE_SIZE) 348 if (size <= PAGE_SIZE)
349 kfree(buckets); 349 kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1540 panic("cannot create neighbour proc dir entry"); 1540 panic("cannot create neighbour proc dir entry");
1541#endif 1541#endif
1542 1542
1543 tbl->nht = neigh_hash_alloc(8); 1543 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
1544 1544
1545 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1545 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1546 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1546 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
1602 } 1602 }
1603 write_unlock(&neigh_tbl_lock); 1603 write_unlock(&neigh_tbl_lock);
1604 1604
1605 call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); 1605 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1606 neigh_hash_free_rcu);
1606 tbl->nht = NULL; 1607 tbl->nht = NULL;
1607 1608
1608 kfree(tbl->phash_buckets); 1609 kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e23c01be5a5b..11b98bc2aa8f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -28,6 +28,7 @@
28static const char fmt_hex[] = "%#x\n"; 28static const char fmt_hex[] = "%#x\n";
29static const char fmt_long_hex[] = "%#lx\n"; 29static const char fmt_long_hex[] = "%#lx\n";
30static const char fmt_dec[] = "%d\n"; 30static const char fmt_dec[] = "%d\n";
31static const char fmt_udec[] = "%u\n";
31static const char fmt_ulong[] = "%lu\n"; 32static const char fmt_ulong[] = "%lu\n";
32static const char fmt_u64[] = "%llu\n"; 33static const char fmt_u64[] = "%llu\n";
33 34
@@ -99,7 +100,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
99NETDEVICE_SHOW(addr_len, fmt_dec); 100NETDEVICE_SHOW(addr_len, fmt_dec);
100NETDEVICE_SHOW(iflink, fmt_dec); 101NETDEVICE_SHOW(iflink, fmt_dec);
101NETDEVICE_SHOW(ifindex, fmt_dec); 102NETDEVICE_SHOW(ifindex, fmt_dec);
102NETDEVICE_SHOW(features, fmt_long_hex); 103NETDEVICE_SHOW(features, fmt_hex);
103NETDEVICE_SHOW(type, fmt_dec); 104NETDEVICE_SHOW(type, fmt_dec);
104NETDEVICE_SHOW(link_mode, fmt_dec); 105NETDEVICE_SHOW(link_mode, fmt_dec);
105 106
@@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev,
145 if (!rtnl_trylock()) 146 if (!rtnl_trylock())
146 return restart_syscall(); 147 return restart_syscall();
147 148
148 if (netif_running(netdev) && 149 if (netif_running(netdev)) {
149 netdev->ethtool_ops && 150 struct ethtool_cmd cmd;
150 netdev->ethtool_ops->get_settings) { 151 if (!dev_ethtool_get_settings(netdev, &cmd))
151 struct ethtool_cmd cmd = { ETHTOOL_GSET }; 152 ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
152
153 if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
154 ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
155 } 153 }
156 rtnl_unlock(); 154 rtnl_unlock();
157 return ret; 155 return ret;
@@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev,
166 if (!rtnl_trylock()) 164 if (!rtnl_trylock())
167 return restart_syscall(); 165 return restart_syscall();
168 166
169 if (netif_running(netdev) && 167 if (netif_running(netdev)) {
170 netdev->ethtool_ops && 168 struct ethtool_cmd cmd;
171 netdev->ethtool_ops->get_settings) { 169 if (!dev_ethtool_get_settings(netdev, &cmd))
172 struct ethtool_cmd cmd = { ETHTOOL_GSET }; 170 ret = sprintf(buf, "%s\n",
173 171 cmd.duplex ? "full" : "half");
174 if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
175 ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
176 } 172 }
177 rtnl_unlock(); 173 rtnl_unlock();
178 return ret; 174 return ret;
@@ -295,6 +291,20 @@ static ssize_t show_ifalias(struct device *dev,
295 return ret; 291 return ret;
296} 292}
297 293
294NETDEVICE_SHOW(group, fmt_dec);
295
296static int change_group(struct net_device *net, unsigned long new_group)
297{
298 dev_set_group(net, (int) new_group);
299 return 0;
300}
301
302static ssize_t store_group(struct device *dev, struct device_attribute *attr,
303 const char *buf, size_t len)
304{
305 return netdev_store(dev, attr, buf, len, change_group);
306}
307
298static struct device_attribute net_class_attributes[] = { 308static struct device_attribute net_class_attributes[] = {
299 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), 309 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
300 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 310 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +326,7 @@ static struct device_attribute net_class_attributes[] = {
316 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), 326 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
317 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 327 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
318 store_tx_queue_len), 328 store_tx_queue_len),
329 __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
319 {} 330 {}
320}; 331};
321 332
@@ -550,13 +561,6 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
550 return len; 561 return len;
551} 562}
552 563
553static void rps_map_release(struct rcu_head *rcu)
554{
555 struct rps_map *map = container_of(rcu, struct rps_map, rcu);
556
557 kfree(map);
558}
559
560static ssize_t store_rps_map(struct netdev_rx_queue *queue, 564static ssize_t store_rps_map(struct netdev_rx_queue *queue,
561 struct rx_queue_attribute *attribute, 565 struct rx_queue_attribute *attribute,
562 const char *buf, size_t len) 566 const char *buf, size_t len)
@@ -604,7 +608,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
604 spin_unlock(&rps_map_lock); 608 spin_unlock(&rps_map_lock);
605 609
606 if (old_map) 610 if (old_map)
607 call_rcu(&old_map->rcu, rps_map_release); 611 kfree_rcu(old_map, rcu);
608 612
609 free_cpumask_var(mask); 613 free_cpumask_var(mask);
610 return len; 614 return len;
@@ -713,7 +717,7 @@ static void rx_queue_release(struct kobject *kobj)
713 map = rcu_dereference_raw(queue->rps_map); 717 map = rcu_dereference_raw(queue->rps_map);
714 if (map) { 718 if (map) {
715 RCU_INIT_POINTER(queue->rps_map, NULL); 719 RCU_INIT_POINTER(queue->rps_map, NULL);
716 call_rcu(&map->rcu, rps_map_release); 720 kfree_rcu(map, rcu);
717 } 721 }
718 722
719 flow_table = rcu_dereference_raw(queue->rps_flow_table); 723 flow_table = rcu_dereference_raw(queue->rps_flow_table);
@@ -883,21 +887,6 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
883 return len; 887 return len;
884} 888}
885 889
886static void xps_map_release(struct rcu_head *rcu)
887{
888 struct xps_map *map = container_of(rcu, struct xps_map, rcu);
889
890 kfree(map);
891}
892
893static void xps_dev_maps_release(struct rcu_head *rcu)
894{
895 struct xps_dev_maps *dev_maps =
896 container_of(rcu, struct xps_dev_maps, rcu);
897
898 kfree(dev_maps);
899}
900
901static DEFINE_MUTEX(xps_map_mutex); 890static DEFINE_MUTEX(xps_map_mutex);
902#define xmap_dereference(P) \ 891#define xmap_dereference(P) \
903 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 892 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -953,7 +942,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
953 } else 942 } else
954 pos = map_len = alloc_len = 0; 943 pos = map_len = alloc_len = 0;
955 944
956 need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); 945 need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
957#ifdef CONFIG_NUMA 946#ifdef CONFIG_NUMA
958 if (need_set) { 947 if (need_set) {
959 if (numa_node == -2) 948 if (numa_node == -2)
@@ -994,7 +983,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
994 map = dev_maps ? 983 map = dev_maps ?
995 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; 984 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
996 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) 985 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
997 call_rcu(&map->rcu, xps_map_release); 986 kfree_rcu(map, rcu);
998 if (new_dev_maps->cpu_map[cpu]) 987 if (new_dev_maps->cpu_map[cpu])
999 nonempty = 1; 988 nonempty = 1;
1000 } 989 }
@@ -1007,7 +996,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
1007 } 996 }
1008 997
1009 if (dev_maps) 998 if (dev_maps)
1010 call_rcu(&dev_maps->rcu, xps_dev_maps_release); 999 kfree_rcu(dev_maps, rcu);
1011 1000
1012 netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : 1001 netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
1013 NUMA_NO_NODE); 1002 NUMA_NO_NODE);
@@ -1069,7 +1058,7 @@ static void netdev_queue_release(struct kobject *kobj)
1069 else { 1058 else {
1070 RCU_INIT_POINTER(dev_maps->cpu_map[i], 1059 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1071 NULL); 1060 NULL);
1072 call_rcu(&map->rcu, xps_map_release); 1061 kfree_rcu(map, rcu);
1073 map = NULL; 1062 map = NULL;
1074 } 1063 }
1075 } 1064 }
@@ -1079,7 +1068,7 @@ static void netdev_queue_release(struct kobject *kobj)
1079 1068
1080 if (!nonempty) { 1069 if (!nonempty) {
1081 RCU_INIT_POINTER(dev->xps_maps, NULL); 1070 RCU_INIT_POINTER(dev->xps_maps, NULL);
1082 call_rcu(&dev_maps->rcu, xps_dev_maps_release); 1071 kfree_rcu(dev_maps, rcu);
1083 } 1072 }
1084 } 1073 }
1085 1074
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f860261c5ee..2e2dce6583e1 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,14 +27,6 @@ EXPORT_SYMBOL(init_net);
27 27
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29 29
30static void net_generic_release(struct rcu_head *rcu)
31{
32 struct net_generic *ng;
33
34 ng = container_of(rcu, struct net_generic, rcu);
35 kfree(ng);
36}
37
38static int net_assign_generic(struct net *net, int id, void *data) 30static int net_assign_generic(struct net *net, int id, void *data)
39{ 31{
40 struct net_generic *ng, *old_ng; 32 struct net_generic *ng, *old_ng;
@@ -68,7 +60,7 @@ static int net_assign_generic(struct net *net, int id, void *data)
68 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 60 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
69 61
70 rcu_assign_pointer(net->gen, ng); 62 rcu_assign_pointer(net->gen, ng);
71 call_rcu(&old_ng->rcu, net_generic_release); 63 kfree_rcu(old_ng, rcu);
72assign: 64assign:
73 ng->ptr[id - 1] = data; 65 ng->ptr[id - 1] = data;
74 return 0; 66 return 0;
@@ -216,11 +208,14 @@ static void net_free(struct net *net)
216 kmem_cache_free(net_cachep, net); 208 kmem_cache_free(net_cachep, net);
217} 209}
218 210
219static struct net *net_create(void) 211struct net *copy_net_ns(unsigned long flags, struct net *old_net)
220{ 212{
221 struct net *net; 213 struct net *net;
222 int rv; 214 int rv;
223 215
216 if (!(flags & CLONE_NEWNET))
217 return get_net(old_net);
218
224 net = net_alloc(); 219 net = net_alloc();
225 if (!net) 220 if (!net)
226 return ERR_PTR(-ENOMEM); 221 return ERR_PTR(-ENOMEM);
@@ -239,13 +234,6 @@ static struct net *net_create(void)
239 return net; 234 return net;
240} 235}
241 236
242struct net *copy_net_ns(unsigned long flags, struct net *old_net)
243{
244 if (!(flags & CLONE_NEWNET))
245 return get_net(old_net);
246 return net_create();
247}
248
249static DEFINE_SPINLOCK(cleanup_list_lock); 237static DEFINE_SPINLOCK(cleanup_list_lock);
250static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 238static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
251 239
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 02dc2cbcbe86..2d7d6d473781 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev)
193 193
194 poll_napi(dev); 194 poll_napi(dev);
195 195
196 if (dev->priv_flags & IFF_SLAVE) {
197 if (dev->npinfo) {
198 struct net_device *bond_dev = dev->master;
199 struct sk_buff *skb;
200 while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
201 skb->dev = bond_dev;
202 skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
203 }
204 }
205 }
206
196 service_arp_queue(dev->npinfo); 207 service_arp_queue(dev->npinfo);
197 208
198 zap_completion_queue(); 209 zap_completion_queue();
@@ -313,9 +324,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
313 tries > 0; --tries) { 324 tries > 0; --tries) {
314 if (__netif_tx_trylock(txq)) { 325 if (__netif_tx_trylock(txq)) {
315 if (!netif_tx_queue_stopped(txq)) { 326 if (!netif_tx_queue_stopped(txq)) {
316 dev->priv_flags |= IFF_IN_NETPOLL;
317 status = ops->ndo_start_xmit(skb, dev); 327 status = ops->ndo_start_xmit(skb, dev);
318 dev->priv_flags &= ~IFF_IN_NETPOLL;
319 if (status == NETDEV_TX_OK) 328 if (status == NETDEV_TX_OK)
320 txq_trans_update(txq); 329 txq_trans_update(txq);
321 } 330 }
@@ -530,7 +539,7 @@ int __netpoll_rx(struct sk_buff *skb)
530{ 539{
531 int proto, len, ulen; 540 int proto, len, ulen;
532 int hits = 0; 541 int hits = 0;
533 struct iphdr *iph; 542 const struct iphdr *iph;
534 struct udphdr *uh; 543 struct udphdr *uh;
535 struct netpoll_info *npinfo = skb->dev->npinfo; 544 struct netpoll_info *npinfo = skb->dev->npinfo;
536 struct netpoll *np, *tmp; 545 struct netpoll *np, *tmp;
@@ -689,32 +698,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
689 698
690 if (*cur != 0) { 699 if (*cur != 0) {
691 /* MAC address */ 700 /* MAC address */
692 if ((delim = strchr(cur, ':')) == NULL) 701 if (!mac_pton(cur, np->remote_mac))
693 goto parse_failed;
694 *delim = 0;
695 np->remote_mac[0] = simple_strtol(cur, NULL, 16);
696 cur = delim + 1;
697 if ((delim = strchr(cur, ':')) == NULL)
698 goto parse_failed; 702 goto parse_failed;
699 *delim = 0;
700 np->remote_mac[1] = simple_strtol(cur, NULL, 16);
701 cur = delim + 1;
702 if ((delim = strchr(cur, ':')) == NULL)
703 goto parse_failed;
704 *delim = 0;
705 np->remote_mac[2] = simple_strtol(cur, NULL, 16);
706 cur = delim + 1;
707 if ((delim = strchr(cur, ':')) == NULL)
708 goto parse_failed;
709 *delim = 0;
710 np->remote_mac[3] = simple_strtol(cur, NULL, 16);
711 cur = delim + 1;
712 if ((delim = strchr(cur, ':')) == NULL)
713 goto parse_failed;
714 *delim = 0;
715 np->remote_mac[4] = simple_strtol(cur, NULL, 16);
716 cur = delim + 1;
717 np->remote_mac[5] = simple_strtol(cur, NULL, 16);
718 } 703 }
719 704
720 netpoll_print_options(np); 705 netpoll_print_options(np);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461f..67870e9fd097 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -156,6 +156,7 @@
156#include <linux/wait.h> 156#include <linux/wait.h>
157#include <linux/etherdevice.h> 157#include <linux/etherdevice.h>
158#include <linux/kthread.h> 158#include <linux/kthread.h>
159#include <linux/prefetch.h>
159#include <net/net_namespace.h> 160#include <net/net_namespace.h>
160#include <net/checksum.h> 161#include <net/checksum.h>
161#include <net/ipv6.h> 162#include <net/ipv6.h>
@@ -251,6 +252,7 @@ struct pktgen_dev {
251 int max_pkt_size; /* = ETH_ZLEN; */ 252 int max_pkt_size; /* = ETH_ZLEN; */
252 int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ 253 int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
253 int nfrags; 254 int nfrags;
255 struct page *page;
254 u64 delay; /* nano-seconds */ 256 u64 delay; /* nano-seconds */
255 257
256 __u64 count; /* Default No packets to send */ 258 __u64 count; /* Default No packets to send */
@@ -448,7 +450,6 @@ static void pktgen_stop(struct pktgen_thread *t);
448static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); 450static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
449 451
450static unsigned int scan_ip6(const char *s, char ip[16]); 452static unsigned int scan_ip6(const char *s, char ip[16]);
451static unsigned int fmt_ip6(char *s, const char ip[16]);
452 453
453/* Module parameters, defaults. */ 454/* Module parameters, defaults. */
454static int pg_count_d __read_mostly = 1000; 455static int pg_count_d __read_mostly = 1000;
@@ -555,21 +556,13 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
555 pkt_dev->skb_priority); 556 pkt_dev->skb_priority);
556 557
557 if (pkt_dev->flags & F_IPV6) { 558 if (pkt_dev->flags & F_IPV6) {
558 char b1[128], b2[128], b3[128];
559 fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
560 fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr);
561 fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr);
562 seq_printf(seq, 559 seq_printf(seq,
563 " saddr: %s min_saddr: %s max_saddr: %s\n", b1, 560 " saddr: %pI6c min_saddr: %pI6c max_saddr: %pI6c\n"
564 b2, b3); 561 " daddr: %pI6c min_daddr: %pI6c max_daddr: %pI6c\n",
565 562 &pkt_dev->in6_saddr,
566 fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); 563 &pkt_dev->min_in6_saddr, &pkt_dev->max_in6_saddr,
567 fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); 564 &pkt_dev->in6_daddr,
568 fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); 565 &pkt_dev->min_in6_daddr, &pkt_dev->max_in6_daddr);
569 seq_printf(seq,
570 " daddr: %s min_daddr: %s max_daddr: %s\n", b1,
571 b2, b3);
572
573 } else { 566 } else {
574 seq_printf(seq, 567 seq_printf(seq,
575 " dst_min: %s dst_max: %s\n", 568 " dst_min: %s dst_max: %s\n",
@@ -705,10 +698,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
705 pkt_dev->cur_src_mac_offset); 698 pkt_dev->cur_src_mac_offset);
706 699
707 if (pkt_dev->flags & F_IPV6) { 700 if (pkt_dev->flags & F_IPV6) {
708 char b1[128], b2[128]; 701 seq_printf(seq, " cur_saddr: %pI6c cur_daddr: %pI6c\n",
709 fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); 702 &pkt_dev->cur_in6_saddr,
710 fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); 703 &pkt_dev->cur_in6_daddr);
711 seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1);
712 } else 704 } else
713 seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", 705 seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n",
714 pkt_dev->cur_saddr, pkt_dev->cur_daddr); 706 pkt_dev->cur_saddr, pkt_dev->cur_daddr);
@@ -1134,6 +1126,10 @@ static ssize_t pktgen_if_write(struct file *file,
1134 if (node_possible(value)) { 1126 if (node_possible(value)) {
1135 pkt_dev->node = value; 1127 pkt_dev->node = value;
1136 sprintf(pg_result, "OK: node=%d", pkt_dev->node); 1128 sprintf(pg_result, "OK: node=%d", pkt_dev->node);
1129 if (pkt_dev->page) {
1130 put_page(pkt_dev->page);
1131 pkt_dev->page = NULL;
1132 }
1137 } 1133 }
1138 else 1134 else
1139 sprintf(pg_result, "ERROR: node not possible"); 1135 sprintf(pg_result, "ERROR: node not possible");
@@ -1304,7 +1300,7 @@ static ssize_t pktgen_if_write(struct file *file,
1304 buf[len] = 0; 1300 buf[len] = 0;
1305 1301
1306 scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); 1302 scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
1307 fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr); 1303 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
1308 1304
1309 ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); 1305 ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
1310 1306
@@ -1327,7 +1323,7 @@ static ssize_t pktgen_if_write(struct file *file,
1327 buf[len] = 0; 1323 buf[len] = 0;
1328 1324
1329 scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); 1325 scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
1330 fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); 1326 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
1331 1327
1332 ipv6_addr_copy(&pkt_dev->cur_in6_daddr, 1328 ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
1333 &pkt_dev->min_in6_daddr); 1329 &pkt_dev->min_in6_daddr);
@@ -1350,7 +1346,7 @@ static ssize_t pktgen_if_write(struct file *file,
1350 buf[len] = 0; 1346 buf[len] = 0;
1351 1347
1352 scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); 1348 scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
1353 fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); 1349 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr);
1354 1350
1355 if (debug) 1351 if (debug)
1356 printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); 1352 printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf);
@@ -1371,7 +1367,7 @@ static ssize_t pktgen_if_write(struct file *file,
1371 buf[len] = 0; 1367 buf[len] = 0;
1372 1368
1373 scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); 1369 scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
1374 fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr); 1370 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
1375 1371
1376 ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); 1372 ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
1377 1373
@@ -1425,11 +1421,6 @@ static ssize_t pktgen_if_write(struct file *file,
1425 return count; 1421 return count;
1426 } 1422 }
1427 if (!strcmp(name, "dst_mac")) { 1423 if (!strcmp(name, "dst_mac")) {
1428 char *v = valstr;
1429 unsigned char old_dmac[ETH_ALEN];
1430 unsigned char *m = pkt_dev->dst_mac;
1431 memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN);
1432
1433 len = strn_len(&user_buffer[i], sizeof(valstr) - 1); 1424 len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
1434 if (len < 0) 1425 if (len < 0)
1435 return len; 1426 return len;
@@ -1437,35 +1428,16 @@ static ssize_t pktgen_if_write(struct file *file,
1437 memset(valstr, 0, sizeof(valstr)); 1428 memset(valstr, 0, sizeof(valstr));
1438 if (copy_from_user(valstr, &user_buffer[i], len)) 1429 if (copy_from_user(valstr, &user_buffer[i], len))
1439 return -EFAULT; 1430 return -EFAULT;
1440 i += len;
1441
1442 for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
1443 int value;
1444
1445 value = hex_to_bin(*v);
1446 if (value >= 0)
1447 *m = *m * 16 + value;
1448
1449 if (*v == ':') {
1450 m++;
1451 *m = 0;
1452 }
1453 }
1454 1431
1432 if (!mac_pton(valstr, pkt_dev->dst_mac))
1433 return -EINVAL;
1455 /* Set up Dest MAC */ 1434 /* Set up Dest MAC */
1456 if (compare_ether_addr(old_dmac, pkt_dev->dst_mac)) 1435 memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN);
1457 memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
1458 1436
1459 sprintf(pg_result, "OK: dstmac"); 1437 sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
1460 return count; 1438 return count;
1461 } 1439 }
1462 if (!strcmp(name, "src_mac")) { 1440 if (!strcmp(name, "src_mac")) {
1463 char *v = valstr;
1464 unsigned char old_smac[ETH_ALEN];
1465 unsigned char *m = pkt_dev->src_mac;
1466
1467 memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
1468
1469 len = strn_len(&user_buffer[i], sizeof(valstr) - 1); 1441 len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
1470 if (len < 0) 1442 if (len < 0)
1471 return len; 1443 return len;
@@ -1473,26 +1445,13 @@ static ssize_t pktgen_if_write(struct file *file,
1473 memset(valstr, 0, sizeof(valstr)); 1445 memset(valstr, 0, sizeof(valstr));
1474 if (copy_from_user(valstr, &user_buffer[i], len)) 1446 if (copy_from_user(valstr, &user_buffer[i], len))
1475 return -EFAULT; 1447 return -EFAULT;
1476 i += len;
1477
1478 for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
1479 int value;
1480
1481 value = hex_to_bin(*v);
1482 if (value >= 0)
1483 *m = *m * 16 + value;
1484
1485 if (*v == ':') {
1486 m++;
1487 *m = 0;
1488 }
1489 }
1490 1448
1449 if (!mac_pton(valstr, pkt_dev->src_mac))
1450 return -EINVAL;
1491 /* Set up Src MAC */ 1451 /* Set up Src MAC */
1492 if (compare_ether_addr(old_smac, pkt_dev->src_mac)) 1452 memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN);
1493 memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN);
1494 1453
1495 sprintf(pg_result, "OK: srcmac"); 1454 sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
1496 return count; 1455 return count;
1497 } 1456 }
1498 1457
@@ -2509,7 +2468,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
2509{ 2468{
2510 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; 2469 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
2511 int err = 0; 2470 int err = 0;
2512 struct iphdr *iph;
2513 2471
2514 if (!x) 2472 if (!x)
2515 return 0; 2473 return 0;
@@ -2519,7 +2477,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
2519 return 0; 2477 return 0;
2520 2478
2521 spin_lock(&x->lock); 2479 spin_lock(&x->lock);
2522 iph = ip_hdr(skb);
2523 2480
2524 err = x->outer_mode->output(x, skb); 2481 err = x->outer_mode->output(x, skb);
2525 if (err) 2482 if (err)
@@ -2605,6 +2562,72 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
2605 return htons(id | (cfi << 12) | (prio << 13)); 2562 return htons(id | (cfi << 12) | (prio << 13));
2606} 2563}
2607 2564
2565static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2566 int datalen)
2567{
2568 struct timeval timestamp;
2569 struct pktgen_hdr *pgh;
2570
2571 pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
2572 datalen -= sizeof(*pgh);
2573
2574 if (pkt_dev->nfrags <= 0) {
2575 memset(skb_put(skb, datalen), 0, datalen);
2576 } else {
2577 int frags = pkt_dev->nfrags;
2578 int i, len;
2579 int frag_len;
2580
2581
2582 if (frags > MAX_SKB_FRAGS)
2583 frags = MAX_SKB_FRAGS;
2584 len = datalen - frags * PAGE_SIZE;
2585 if (len > 0) {
2586 memset(skb_put(skb, len), 0, len);
2587 datalen = frags * PAGE_SIZE;
2588 }
2589
2590 i = 0;
2591 frag_len = (datalen/frags) < PAGE_SIZE ?
2592 (datalen/frags) : PAGE_SIZE;
2593 while (datalen > 0) {
2594 if (unlikely(!pkt_dev->page)) {
2595 int node = numa_node_id();
2596
2597 if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
2598 node = pkt_dev->node;
2599 pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
2600 if (!pkt_dev->page)
2601 break;
2602 }
2603 skb_shinfo(skb)->frags[i].page = pkt_dev->page;
2604 get_page(pkt_dev->page);
2605 skb_shinfo(skb)->frags[i].page_offset = 0;
2606 /*last fragment, fill rest of data*/
2607 if (i == (frags - 1))
2608 skb_shinfo(skb)->frags[i].size =
2609 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
2610 else
2611 skb_shinfo(skb)->frags[i].size = frag_len;
2612 datalen -= skb_shinfo(skb)->frags[i].size;
2613 skb->len += skb_shinfo(skb)->frags[i].size;
2614 skb->data_len += skb_shinfo(skb)->frags[i].size;
2615 i++;
2616 skb_shinfo(skb)->nr_frags = i;
2617 }
2618 }
2619
2620 /* Stamp the time, and sequence number,
2621 * convert them to network byte order
2622 */
2623 pgh->pgh_magic = htonl(PKTGEN_MAGIC);
2624 pgh->seq_num = htonl(pkt_dev->seq_num);
2625
2626 do_gettimeofday(&timestamp);
2627 pgh->tv_sec = htonl(timestamp.tv_sec);
2628 pgh->tv_usec = htonl(timestamp.tv_usec);
2629}
2630
2608static struct sk_buff *fill_packet_ipv4(struct net_device *odev, 2631static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2609 struct pktgen_dev *pkt_dev) 2632 struct pktgen_dev *pkt_dev)
2610{ 2633{
@@ -2613,7 +2636,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2613 struct udphdr *udph; 2636 struct udphdr *udph;
2614 int datalen, iplen; 2637 int datalen, iplen;
2615 struct iphdr *iph; 2638 struct iphdr *iph;
2616 struct pktgen_hdr *pgh = NULL;
2617 __be16 protocol = htons(ETH_P_IP); 2639 __be16 protocol = htons(ETH_P_IP);
2618 __be32 *mpls; 2640 __be32 *mpls;
2619 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ 2641 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -2729,76 +2751,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2729 pkt_dev->pkt_overhead); 2751 pkt_dev->pkt_overhead);
2730 skb->dev = odev; 2752 skb->dev = odev;
2731 skb->pkt_type = PACKET_HOST; 2753 skb->pkt_type = PACKET_HOST;
2732 2754 pktgen_finalize_skb(pkt_dev, skb, datalen);
2733 if (pkt_dev->nfrags <= 0) {
2734 pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
2735 memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
2736 } else {
2737 int frags = pkt_dev->nfrags;
2738 int i, len;
2739
2740 pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
2741
2742 if (frags > MAX_SKB_FRAGS)
2743 frags = MAX_SKB_FRAGS;
2744 if (datalen > frags * PAGE_SIZE) {
2745 len = datalen - frags * PAGE_SIZE;
2746 memset(skb_put(skb, len), 0, len);
2747 datalen = frags * PAGE_SIZE;
2748 }
2749
2750 i = 0;
2751 while (datalen > 0) {
2752 struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
2753 skb_shinfo(skb)->frags[i].page = page;
2754 skb_shinfo(skb)->frags[i].page_offset = 0;
2755 skb_shinfo(skb)->frags[i].size =
2756 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
2757 datalen -= skb_shinfo(skb)->frags[i].size;
2758 skb->len += skb_shinfo(skb)->frags[i].size;
2759 skb->data_len += skb_shinfo(skb)->frags[i].size;
2760 i++;
2761 skb_shinfo(skb)->nr_frags = i;
2762 }
2763
2764 while (i < frags) {
2765 int rem;
2766
2767 if (i == 0)
2768 break;
2769
2770 rem = skb_shinfo(skb)->frags[i - 1].size / 2;
2771 if (rem == 0)
2772 break;
2773
2774 skb_shinfo(skb)->frags[i - 1].size -= rem;
2775
2776 skb_shinfo(skb)->frags[i] =
2777 skb_shinfo(skb)->frags[i - 1];
2778 get_page(skb_shinfo(skb)->frags[i].page);
2779 skb_shinfo(skb)->frags[i].page =
2780 skb_shinfo(skb)->frags[i - 1].page;
2781 skb_shinfo(skb)->frags[i].page_offset +=
2782 skb_shinfo(skb)->frags[i - 1].size;
2783 skb_shinfo(skb)->frags[i].size = rem;
2784 i++;
2785 skb_shinfo(skb)->nr_frags = i;
2786 }
2787 }
2788
2789 /* Stamp the time, and sequence number,
2790 * convert them to network byte order
2791 */
2792 if (pgh) {
2793 struct timeval timestamp;
2794
2795 pgh->pgh_magic = htonl(PKTGEN_MAGIC);
2796 pgh->seq_num = htonl(pkt_dev->seq_num);
2797
2798 do_gettimeofday(&timestamp);
2799 pgh->tv_sec = htonl(timestamp.tv_sec);
2800 pgh->tv_usec = htonl(timestamp.tv_usec);
2801 }
2802 2755
2803#ifdef CONFIG_XFRM 2756#ifdef CONFIG_XFRM
2804 if (!process_ipsec(pkt_dev, skb, protocol)) 2757 if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2899,79 +2852,6 @@ static unsigned int scan_ip6(const char *s, char ip[16])
2899 return len; 2852 return len;
2900} 2853}
2901 2854
2902static char tohex(char hexdigit)
2903{
2904 return hexdigit > 9 ? hexdigit + 'a' - 10 : hexdigit + '0';
2905}
2906
2907static int fmt_xlong(char *s, unsigned int i)
2908{
2909 char *bak = s;
2910 *s = tohex((i >> 12) & 0xf);
2911 if (s != bak || *s != '0')
2912 ++s;
2913 *s = tohex((i >> 8) & 0xf);
2914 if (s != bak || *s != '0')
2915 ++s;
2916 *s = tohex((i >> 4) & 0xf);
2917 if (s != bak || *s != '0')
2918 ++s;
2919 *s = tohex(i & 0xf);
2920 return s - bak + 1;
2921}
2922
2923static unsigned int fmt_ip6(char *s, const char ip[16])
2924{
2925 unsigned int len;
2926 unsigned int i;
2927 unsigned int temp;
2928 unsigned int compressing;
2929 int j;
2930
2931 len = 0;
2932 compressing = 0;
2933 for (j = 0; j < 16; j += 2) {
2934
2935#ifdef V4MAPPEDPREFIX
2936 if (j == 12 && !memcmp(ip, V4mappedprefix, 12)) {
2937 inet_ntoa_r(*(struct in_addr *)(ip + 12), s);
2938 temp = strlen(s);
2939 return len + temp;
2940 }
2941#endif
2942 temp = ((unsigned long)(unsigned char)ip[j] << 8) +
2943 (unsigned long)(unsigned char)ip[j + 1];
2944 if (temp == 0) {
2945 if (!compressing) {
2946 compressing = 1;
2947 if (j == 0) {
2948 *s++ = ':';
2949 ++len;
2950 }
2951 }
2952 } else {
2953 if (compressing) {
2954 compressing = 0;
2955 *s++ = ':';
2956 ++len;
2957 }
2958 i = fmt_xlong(s, temp);
2959 len += i;
2960 s += i;
2961 if (j < 14) {
2962 *s++ = ':';
2963 ++len;
2964 }
2965 }
2966 }
2967 if (compressing) {
2968 *s++ = ':';
2969 ++len;
2970 }
2971 *s = 0;
2972 return len;
2973}
2974
2975static struct sk_buff *fill_packet_ipv6(struct net_device *odev, 2855static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2976 struct pktgen_dev *pkt_dev) 2856 struct pktgen_dev *pkt_dev)
2977{ 2857{
@@ -2980,7 +2860,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2980 struct udphdr *udph; 2860 struct udphdr *udph;
2981 int datalen; 2861 int datalen;
2982 struct ipv6hdr *iph; 2862 struct ipv6hdr *iph;
2983 struct pktgen_hdr *pgh = NULL;
2984 __be16 protocol = htons(ETH_P_IPV6); 2863 __be16 protocol = htons(ETH_P_IPV6);
2985 __be32 *mpls; 2864 __be32 *mpls;
2986 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ 2865 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -3083,75 +2962,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
3083 skb->dev = odev; 2962 skb->dev = odev;
3084 skb->pkt_type = PACKET_HOST; 2963 skb->pkt_type = PACKET_HOST;
3085 2964
3086 if (pkt_dev->nfrags <= 0) 2965 pktgen_finalize_skb(pkt_dev, skb, datalen);
3087 pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
3088 else {
3089 int frags = pkt_dev->nfrags;
3090 int i;
3091
3092 pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
3093
3094 if (frags > MAX_SKB_FRAGS)
3095 frags = MAX_SKB_FRAGS;
3096 if (datalen > frags * PAGE_SIZE) {
3097 skb_put(skb, datalen - frags * PAGE_SIZE);
3098 datalen = frags * PAGE_SIZE;
3099 }
3100
3101 i = 0;
3102 while (datalen > 0) {
3103 struct page *page = alloc_pages(GFP_KERNEL, 0);
3104 skb_shinfo(skb)->frags[i].page = page;
3105 skb_shinfo(skb)->frags[i].page_offset = 0;
3106 skb_shinfo(skb)->frags[i].size =
3107 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
3108 datalen -= skb_shinfo(skb)->frags[i].size;
3109 skb->len += skb_shinfo(skb)->frags[i].size;
3110 skb->data_len += skb_shinfo(skb)->frags[i].size;
3111 i++;
3112 skb_shinfo(skb)->nr_frags = i;
3113 }
3114
3115 while (i < frags) {
3116 int rem;
3117
3118 if (i == 0)
3119 break;
3120
3121 rem = skb_shinfo(skb)->frags[i - 1].size / 2;
3122 if (rem == 0)
3123 break;
3124
3125 skb_shinfo(skb)->frags[i - 1].size -= rem;
3126
3127 skb_shinfo(skb)->frags[i] =
3128 skb_shinfo(skb)->frags[i - 1];
3129 get_page(skb_shinfo(skb)->frags[i].page);
3130 skb_shinfo(skb)->frags[i].page =
3131 skb_shinfo(skb)->frags[i - 1].page;
3132 skb_shinfo(skb)->frags[i].page_offset +=
3133 skb_shinfo(skb)->frags[i - 1].size;
3134 skb_shinfo(skb)->frags[i].size = rem;
3135 i++;
3136 skb_shinfo(skb)->nr_frags = i;
3137 }
3138 }
3139
3140 /* Stamp the time, and sequence number,
3141 * convert them to network byte order
3142 * should we update cloned packets too ?
3143 */
3144 if (pgh) {
3145 struct timeval timestamp;
3146
3147 pgh->pgh_magic = htonl(PKTGEN_MAGIC);
3148 pgh->seq_num = htonl(pkt_dev->seq_num);
3149
3150 do_gettimeofday(&timestamp);
3151 pgh->tv_sec = htonl(timestamp.tv_sec);
3152 pgh->tv_usec = htonl(timestamp.tv_usec);
3153 }
3154 /* pkt_dev->seq_num++; FF: you really mean this? */
3155 2966
3156 return skb; 2967 return skb;
3157} 2968}
@@ -3321,7 +3132,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
3321 pkt_dev->started_at); 3132 pkt_dev->started_at);
3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); 3133 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
3323 3134
3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", 3135 p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
3325 (unsigned long long)ktime_to_us(elapsed), 3136 (unsigned long long)ktime_to_us(elapsed),
3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), 3137 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
3327 (unsigned long long)ktime_to_us(idle), 3138 (unsigned long long)ktime_to_us(idle),
@@ -3812,7 +3623,10 @@ static int __init pktgen_create_thread(int cpu)
3812 list_add_tail(&t->th_list, &pktgen_threads); 3623 list_add_tail(&t->th_list, &pktgen_threads);
3813 init_completion(&t->start_done); 3624 init_completion(&t->start_done);
3814 3625
3815 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3626 p = kthread_create_on_node(pktgen_thread_worker,
3627 t,
3628 cpu_to_node(cpu),
3629 "kpktgend_%d", cpu);
3816 if (IS_ERR(p)) { 3630 if (IS_ERR(p)) {
3817 pr_err("kernel_thread() failed for cpu %d\n", t->cpu); 3631 pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
3818 list_del(&t->th_list); 3632 list_del(&t->th_list);
@@ -3884,6 +3698,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3884 free_SAs(pkt_dev); 3698 free_SAs(pkt_dev);
3885#endif 3699#endif
3886 vfree(pkt_dev->flows); 3700 vfree(pkt_dev->flows);
3701 if (pkt_dev->page)
3702 put_page(pkt_dev->page);
3887 kfree(pkt_dev); 3703 kfree(pkt_dev);
3888 return 0; 3704 return 0;
3889} 3705}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d65c6bb24c1..d2ba2597c75a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -196,7 +196,7 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
196 * as failure of this function is very unlikely, it can only happen due 196 * as failure of this function is very unlikely, it can only happen due
197 * to lack of memory when allocating the chain to store all message 197 * to lack of memory when allocating the chain to store all message
198 * handlers for a protocol. Meant for use in init functions where lack 198 * handlers for a protocol. Meant for use in init functions where lack
199 * of memory implies no sense in continueing. 199 * of memory implies no sense in continuing.
200 */ 200 */
201void rtnl_register(int protocol, int msgtype, 201void rtnl_register(int protocol, int msgtype,
202 rtnl_doit_func doit, rtnl_dumpit_func dumpit) 202 rtnl_doit_func doit, rtnl_dumpit_func dumpit)
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
868 netif_running(dev) ? dev->operstate : IF_OPER_DOWN); 868 netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
869 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); 869 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
870 NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); 870 NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
871 NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
871 872
872 if (dev->ifindex != dev->iflink) 873 if (dev->ifindex != dev->iflink)
873 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); 874 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1006,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1006 s_h = cb->args[0]; 1007 s_h = cb->args[0];
1007 s_idx = cb->args[1]; 1008 s_idx = cb->args[1];
1008 1009
1010 rcu_read_lock();
1009 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1011 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1010 idx = 0; 1012 idx = 0;
1011 head = &net->dev_index_head[h]; 1013 head = &net->dev_index_head[h];
1012 hlist_for_each_entry(dev, node, head, index_hlist) { 1014 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1013 if (idx < s_idx) 1015 if (idx < s_idx)
1014 goto cont; 1016 goto cont;
1015 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1017 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1022,6 +1024,7 @@ cont:
1022 } 1024 }
1023 } 1025 }
1024out: 1026out:
1027 rcu_read_unlock();
1025 cb->args[1] = idx; 1028 cb->args[1] = idx;
1026 cb->args[0] = h; 1029 cb->args[0] = h;
1027 1030
@@ -1035,6 +1038,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1035 [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, 1038 [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
1036 [IFLA_MTU] = { .type = NLA_U32 }, 1039 [IFLA_MTU] = { .type = NLA_U32 },
1037 [IFLA_LINK] = { .type = NLA_U32 }, 1040 [IFLA_LINK] = { .type = NLA_U32 },
1041 [IFLA_MASTER] = { .type = NLA_U32 },
1038 [IFLA_TXQLEN] = { .type = NLA_U32 }, 1042 [IFLA_TXQLEN] = { .type = NLA_U32 },
1039 [IFLA_WEIGHT] = { .type = NLA_U32 }, 1043 [IFLA_WEIGHT] = { .type = NLA_U32 },
1040 [IFLA_OPERSTATE] = { .type = NLA_U8 }, 1044 [IFLA_OPERSTATE] = { .type = NLA_U8 },
@@ -1177,6 +1181,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1177 return err; 1181 return err;
1178} 1182}
1179 1183
1184static int do_set_master(struct net_device *dev, int ifindex)
1185{
1186 struct net_device *master_dev;
1187 const struct net_device_ops *ops;
1188 int err;
1189
1190 if (dev->master) {
1191 if (dev->master->ifindex == ifindex)
1192 return 0;
1193 ops = dev->master->netdev_ops;
1194 if (ops->ndo_del_slave) {
1195 err = ops->ndo_del_slave(dev->master, dev);
1196 if (err)
1197 return err;
1198 } else {
1199 return -EOPNOTSUPP;
1200 }
1201 }
1202
1203 if (ifindex) {
1204 master_dev = __dev_get_by_index(dev_net(dev), ifindex);
1205 if (!master_dev)
1206 return -EINVAL;
1207 ops = master_dev->netdev_ops;
1208 if (ops->ndo_add_slave) {
1209 err = ops->ndo_add_slave(master_dev, dev);
1210 if (err)
1211 return err;
1212 } else {
1213 return -EOPNOTSUPP;
1214 }
1215 }
1216 return 0;
1217}
1218
1180static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 1219static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1181 struct nlattr **tb, char *ifname, int modified) 1220 struct nlattr **tb, char *ifname, int modified)
1182{ 1221{
@@ -1264,6 +1303,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1264 modified = 1; 1303 modified = 1;
1265 } 1304 }
1266 1305
1306 if (tb[IFLA_GROUP]) {
1307 dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
1308 modified = 1;
1309 }
1310
1267 /* 1311 /*
1268 * Interface selected by interface index but interface 1312 * Interface selected by interface index but interface
1269 * name provided implies that a name change has been 1313 * name provided implies that a name change has been
@@ -1295,6 +1339,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1295 goto errout; 1339 goto errout;
1296 } 1340 }
1297 1341
1342 if (tb[IFLA_MASTER]) {
1343 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
1344 if (err)
1345 goto errout;
1346 modified = 1;
1347 }
1348
1298 if (tb[IFLA_TXQLEN]) 1349 if (tb[IFLA_TXQLEN])
1299 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 1350 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
1300 1351
@@ -1391,7 +1442,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1391errout: 1442errout:
1392 if (err < 0 && modified && net_ratelimit()) 1443 if (err < 0 && modified && net_ratelimit())
1393 printk(KERN_WARNING "A link change request failed with " 1444 printk(KERN_WARNING "A link change request failed with "
1394 "some changes comitted already. Interface %s may " 1445 "some changes committed already. Interface %s may "
1395 "have been left with an inconsistent configuration, " 1446 "have been left with an inconsistent configuration, "
1396 "please check.\n", dev->name); 1447 "please check.\n", dev->name);
1397 1448
@@ -1450,6 +1501,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1450 char ifname[IFNAMSIZ]; 1501 char ifname[IFNAMSIZ];
1451 struct nlattr *tb[IFLA_MAX+1]; 1502 struct nlattr *tb[IFLA_MAX+1];
1452 int err; 1503 int err;
1504 LIST_HEAD(list_kill);
1453 1505
1454 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); 1506 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
1455 if (err < 0) 1507 if (err < 0)
@@ -1473,7 +1525,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1473 if (!ops) 1525 if (!ops)
1474 return -EOPNOTSUPP; 1526 return -EOPNOTSUPP;
1475 1527
1476 ops->dellink(dev, NULL); 1528 ops->dellink(dev, &list_kill);
1529 unregister_netdevice_many(&list_kill);
1530 list_del(&list_kill);
1477 return 0; 1531 return 0;
1478} 1532}
1479 1533
@@ -1521,12 +1575,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1521 dev->rtnl_link_state = RTNL_LINK_INITIALIZING; 1575 dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
1522 dev->real_num_tx_queues = real_num_queues; 1576 dev->real_num_tx_queues = real_num_queues;
1523 1577
1524 if (strchr(dev->name, '%')) {
1525 err = dev_alloc_name(dev, dev->name);
1526 if (err < 0)
1527 goto err_free;
1528 }
1529
1530 if (tb[IFLA_MTU]) 1578 if (tb[IFLA_MTU])
1531 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1579 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
1532 if (tb[IFLA_ADDRESS]) 1580 if (tb[IFLA_ADDRESS])
@@ -1541,16 +1589,34 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1541 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); 1589 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
1542 if (tb[IFLA_LINKMODE]) 1590 if (tb[IFLA_LINKMODE])
1543 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); 1591 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
1592 if (tb[IFLA_GROUP])
1593 dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
1544 1594
1545 return dev; 1595 return dev;
1546 1596
1547err_free:
1548 free_netdev(dev);
1549err: 1597err:
1550 return ERR_PTR(err); 1598 return ERR_PTR(err);
1551} 1599}
1552EXPORT_SYMBOL(rtnl_create_link); 1600EXPORT_SYMBOL(rtnl_create_link);
1553 1601
1602static int rtnl_group_changelink(struct net *net, int group,
1603 struct ifinfomsg *ifm,
1604 struct nlattr **tb)
1605{
1606 struct net_device *dev;
1607 int err;
1608
1609 for_each_netdev(net, dev) {
1610 if (dev->group == group) {
1611 err = do_setlink(dev, ifm, tb, NULL, 0);
1612 if (err < 0)
1613 return err;
1614 }
1615 }
1616
1617 return 0;
1618}
1619
1554static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1620static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1555{ 1621{
1556 struct net *net = sock_net(skb->sk); 1622 struct net *net = sock_net(skb->sk);
@@ -1578,10 +1644,12 @@ replay:
1578 ifm = nlmsg_data(nlh); 1644 ifm = nlmsg_data(nlh);
1579 if (ifm->ifi_index > 0) 1645 if (ifm->ifi_index > 0)
1580 dev = __dev_get_by_index(net, ifm->ifi_index); 1646 dev = __dev_get_by_index(net, ifm->ifi_index);
1581 else if (ifname[0]) 1647 else {
1582 dev = __dev_get_by_name(net, ifname); 1648 if (ifname[0])
1583 else 1649 dev = __dev_get_by_name(net, ifname);
1584 dev = NULL; 1650 else
1651 dev = NULL;
1652 }
1585 1653
1586 err = validate_linkmsg(dev, tb); 1654 err = validate_linkmsg(dev, tb);
1587 if (err < 0) 1655 if (err < 0)
@@ -1645,8 +1713,13 @@ replay:
1645 return do_setlink(dev, ifm, tb, ifname, modified); 1713 return do_setlink(dev, ifm, tb, ifname, modified);
1646 } 1714 }
1647 1715
1648 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) 1716 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1717 if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
1718 return rtnl_group_changelink(net,
1719 nla_get_u32(tb[IFLA_GROUP]),
1720 ifm, tb);
1649 return -ENODEV; 1721 return -ENODEV;
1722 }
1650 1723
1651 if (ifm->ifi_index) 1724 if (ifm->ifi_index)
1652 return -EOPNOTSUPP; 1725 return -EOPNOTSUPP;
@@ -1803,7 +1876,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1803 int min_len; 1876 int min_len;
1804 int family; 1877 int family;
1805 int type; 1878 int type;
1806 int err;
1807 1879
1808 type = nlh->nlmsg_type; 1880 type = nlh->nlmsg_type;
1809 if (type > RTM_MAX) 1881 if (type > RTM_MAX)
@@ -1830,11 +1902,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1830 if (dumpit == NULL) 1902 if (dumpit == NULL)
1831 return -EOPNOTSUPP; 1903 return -EOPNOTSUPP;
1832 1904
1833 __rtnl_unlock();
1834 rtnl = net->rtnl; 1905 rtnl = net->rtnl;
1835 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); 1906 return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
1836 rtnl_lock();
1837 return err;
1838 } 1907 }
1839 1908
1840 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); 1909 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
@@ -1904,7 +1973,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
1904{ 1973{
1905 struct sock *sk; 1974 struct sock *sk;
1906 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, 1975 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
1907 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); 1976 rtnetlink_rcv, NULL, THIS_MODULE);
1908 if (!sk) 1977 if (!sk)
1909 return -ENOMEM; 1978 return -ENOMEM;
1910 net->rtnl = sk; 1979 net->rtnl = sk;
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
95 int fd = fdp[i]; 95 int fd = fdp[i];
96 struct file *file; 96 struct file *file;
97 97
98 if (fd < 0 || !(file = fget(fd))) 98 if (fd < 0 || !(file = fget_raw(fd)))
99 return -EBADF; 99 return -EBADF;
100 *fpp++ = file; 100 *fpp++ = file;
101 fpl->count++; 101 fpl->count++;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d883dcc78b6b..46cbd28f40f9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -57,6 +57,7 @@
57#include <linux/init.h> 57#include <linux/init.h>
58#include <linux/scatterlist.h> 58#include <linux/scatterlist.h>
59#include <linux/errqueue.h> 59#include <linux/errqueue.h>
60#include <linux/prefetch.h>
60 61
61#include <net/protocol.h> 62#include <net/protocol.h>
62#include <net/dst.h> 63#include <net/dst.h>
@@ -523,7 +524,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
523 new->ip_summed = old->ip_summed; 524 new->ip_summed = old->ip_summed;
524 skb_copy_queue_mapping(new, old); 525 skb_copy_queue_mapping(new, old);
525 new->priority = old->priority; 526 new->priority = old->priority;
526 new->deliver_no_wcard = old->deliver_no_wcard;
527#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 527#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
528 new->ipvs_property = old->ipvs_property; 528 new->ipvs_property = old->ipvs_property;
529#endif 529#endif
@@ -2268,7 +2268,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
2268 * of bytes already consumed and the next call to 2268 * of bytes already consumed and the next call to
2269 * skb_seq_read() will return the remaining part of the block. 2269 * skb_seq_read() will return the remaining part of the block.
2270 * 2270 *
2271 * Note 1: The size of each block of data returned can be arbitary, 2271 * Note 1: The size of each block of data returned can be arbitrary,
2272 * this limitation is the cost for zerocopy seqeuental 2272 * this limitation is the cost for zerocopy seqeuental
2273 * reads of potentially non linear data. 2273 * reads of potentially non linear data.
2274 * 2274 *
@@ -2434,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2434 return -ENOMEM; 2434 return -ENOMEM;
2435 2435
2436 /* initialize the next frag */ 2436 /* initialize the next frag */
2437 sk->sk_sndmsg_page = page;
2438 sk->sk_sndmsg_off = 0;
2439 skb_fill_page_desc(skb, frg_cnt, page, 0, 0); 2437 skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2440 skb->truesize += PAGE_SIZE; 2438 skb->truesize += PAGE_SIZE;
2441 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 2439 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
@@ -2455,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2455 return -EFAULT; 2453 return -EFAULT;
2456 2454
2457 /* copy was successful so update the size parameters */ 2455 /* copy was successful so update the size parameters */
2458 sk->sk_sndmsg_off += copy;
2459 frag->size += copy; 2456 frag->size += copy;
2460 skb->len += copy; 2457 skb->len += copy;
2461 skb->data_len += copy; 2458 skb->data_len += copy;
@@ -2498,7 +2495,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2498 * a pointer to the first in a list of new skbs for the segments. 2495 * a pointer to the first in a list of new skbs for the segments.
2499 * In case of error it returns ERR_PTR(err). 2496 * In case of error it returns ERR_PTR(err).
2500 */ 2497 */
2501struct sk_buff *skb_segment(struct sk_buff *skb, int features) 2498struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
2502{ 2499{
2503 struct sk_buff *segs = NULL; 2500 struct sk_buff *segs = NULL;
2504 struct sk_buff *tail = NULL; 2501 struct sk_buff *tail = NULL;
@@ -2508,7 +2505,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2508 unsigned int offset = doffset; 2505 unsigned int offset = doffset;
2509 unsigned int headroom; 2506 unsigned int headroom;
2510 unsigned int len; 2507 unsigned int len;
2511 int sg = features & NETIF_F_SG; 2508 int sg = !!(features & NETIF_F_SG);
2512 int nfrags = skb_shinfo(skb)->nr_frags; 2509 int nfrags = skb_shinfo(skb)->nr_frags;
2513 int err = -ENOMEM; 2510 int err = -ENOMEM;
2514 int i = 0; 2511 int i = 0;
@@ -2997,6 +2994,9 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
2997 skb->destructor = sock_rmem_free; 2994 skb->destructor = sock_rmem_free;
2998 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 2995 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
2999 2996
2997 /* before exiting rcu section, make sure dst is refcounted */
2998 skb_dst_force(skb);
2999
3000 skb_queue_tail(&sk->sk_error_queue, skb); 3000 skb_queue_tail(&sk->sk_error_queue, skb);
3001 if (!sock_flag(sk, SOCK_DEAD)) 3001 if (!sock_flag(sk, SOCK_DEAD))
3002 sk->sk_data_ready(sk, skb->len); 3002 sk->sk_data_ready(sk, skb->len);
diff --git a/net/core/sock.c b/net/core/sock.c
index 7dfed792434d..6e819780c232 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -215,7 +215,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
215__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 215__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
216__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 216__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
217 217
218/* Maximal space eaten by iovec or ancilliary data plus some space */ 218/* Maximal space eaten by iovec or ancillary data plus some space */
219int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 219int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
220EXPORT_SYMBOL(sysctl_optmem_max); 220EXPORT_SYMBOL(sysctl_optmem_max);
221 221
@@ -1175,7 +1175,7 @@ static void __sk_free(struct sock *sk)
1175void sk_free(struct sock *sk) 1175void sk_free(struct sock *sk)
1176{ 1176{
1177 /* 1177 /*
1178 * We substract one from sk_wmem_alloc and can know if 1178 * We subtract one from sk_wmem_alloc and can know if
1179 * some packets are still in some tx queue. 1179 * some packets are still in some tx queue.
1180 * If not null, sock_wfree() will call __sk_free(sk) later 1180 * If not null, sock_wfree() will call __sk_free(sk) later
1181 */ 1181 */
@@ -1185,10 +1185,10 @@ void sk_free(struct sock *sk)
1185EXPORT_SYMBOL(sk_free); 1185EXPORT_SYMBOL(sk_free);
1186 1186
1187/* 1187/*
1188 * Last sock_put should drop referrence to sk->sk_net. It has already 1188 * Last sock_put should drop reference to sk->sk_net. It has already
1189 * been dropped in sk_change_net. Taking referrence to stopping namespace 1189 * been dropped in sk_change_net. Taking reference to stopping namespace
1190 * is not an option. 1190 * is not an option.
1191 * Take referrence to a socket to remove it from hash _alive_ and after that 1191 * Take reference to a socket to remove it from hash _alive_ and after that
1192 * destroy it in the context of init_net. 1192 * destroy it in the context of init_net.
1193 */ 1193 */
1194void sk_release_kernel(struct sock *sk) 1194void sk_release_kernel(struct sock *sk)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 385b6095fdc4..a829e3f60aeb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = {
122 .mode = 0644, 122 .mode = 0644,
123 .proc_handler = proc_dointvec 123 .proc_handler = proc_dointvec
124 }, 124 },
125#ifdef CONFIG_BPF_JIT
126 {
127 .procname = "bpf_jit_enable",
128 .data = &bpf_jit_enable,
129 .maxlen = sizeof(int),
130 .mode = 0644,
131 .proc_handler = proc_dointvec
132 },
133#endif
125 { 134 {
126 .procname = "netdev_tstamp_prequeue", 135 .procname = "netdev_tstamp_prequeue",
127 .data = &netdev_tstamp_prequeue, 136 .data = &netdev_tstamp_prequeue,
diff --git a/net/core/utils.c b/net/core/utils.c
index 5fea0ab21902..2012bc797f9c 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -296,3 +296,27 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
296 csum_unfold(*sum))); 296 csum_unfold(*sum)));
297} 297}
298EXPORT_SYMBOL(inet_proto_csum_replace4); 298EXPORT_SYMBOL(inet_proto_csum_replace4);
299
300int mac_pton(const char *s, u8 *mac)
301{
302 int i;
303
304 /* XX:XX:XX:XX:XX:XX */
305 if (strlen(s) < 3 * ETH_ALEN - 1)
306 return 0;
307
308 /* Don't dirty result unless string is valid MAC. */
309 for (i = 0; i < ETH_ALEN; i++) {
310 if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
311 return 0;
312 if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
313 return 0;
314 if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
315 return 0;
316 }
317 for (i = 0; i < ETH_ALEN; i++) {
318 mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
319 }
320 return 1;
321}
322EXPORT_SYMBOL(mac_pton);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d5074a567289..3609eacaf4ce 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2008, Intel Corporation. 2 * Copyright (c) 2008-2011, Intel Corporation.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -1193,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
1193 goto err; 1193 goto err;
1194 } 1194 }
1195 1195
1196 if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { 1196 if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
1197 struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); 1197 struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
1198 err = ops->ieee_setpfc(netdev, pfc); 1198 err = ops->ieee_setpfc(netdev, pfc);
1199 if (err) 1199 if (err)
@@ -1224,6 +1224,59 @@ err:
1224 return err; 1224 return err;
1225} 1225}
1226 1226
1227static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb,
1228 int app_nested_type, int app_info_type,
1229 int app_entry_type)
1230{
1231 struct dcb_peer_app_info info;
1232 struct dcb_app *table = NULL;
1233 const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
1234 u16 app_count;
1235 int err;
1236
1237
1238 /**
1239 * retrieve the peer app configuration form the driver. If the driver
1240 * handlers fail exit without doing anything
1241 */
1242 err = ops->peer_getappinfo(netdev, &info, &app_count);
1243 if (!err && app_count) {
1244 table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL);
1245 if (!table)
1246 return -ENOMEM;
1247
1248 err = ops->peer_getapptable(netdev, table);
1249 }
1250
1251 if (!err) {
1252 u16 i;
1253 struct nlattr *app;
1254
1255 /**
1256 * build the message, from here on the only possible failure
1257 * is due to the skb size
1258 */
1259 err = -EMSGSIZE;
1260
1261 app = nla_nest_start(skb, app_nested_type);
1262 if (!app)
1263 goto nla_put_failure;
1264
1265 if (app_info_type)
1266 NLA_PUT(skb, app_info_type, sizeof(info), &info);
1267
1268 for (i = 0; i < app_count; i++)
1269 NLA_PUT(skb, app_entry_type, sizeof(struct dcb_app),
1270 &table[i]);
1271
1272 nla_nest_end(skb, app);
1273 }
1274 err = 0;
1275
1276nla_put_failure:
1277 kfree(table);
1278 return err;
1279}
1227 1280
1228/* Handle IEEE 802.1Qaz GET commands. */ 1281/* Handle IEEE 802.1Qaz GET commands. */
1229static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, 1282static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
@@ -1288,6 +1341,30 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
1288 spin_unlock(&dcb_lock); 1341 spin_unlock(&dcb_lock);
1289 nla_nest_end(skb, app); 1342 nla_nest_end(skb, app);
1290 1343
1344 /* get peer info if available */
1345 if (ops->ieee_peer_getets) {
1346 struct ieee_ets ets;
1347 err = ops->ieee_peer_getets(netdev, &ets);
1348 if (!err)
1349 NLA_PUT(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets);
1350 }
1351
1352 if (ops->ieee_peer_getpfc) {
1353 struct ieee_pfc pfc;
1354 err = ops->ieee_peer_getpfc(netdev, &pfc);
1355 if (!err)
1356 NLA_PUT(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc);
1357 }
1358
1359 if (ops->peer_getappinfo && ops->peer_getapptable) {
1360 err = dcbnl_build_peer_app(netdev, skb,
1361 DCB_ATTR_IEEE_PEER_APP,
1362 DCB_ATTR_IEEE_APP_UNSPEC,
1363 DCB_ATTR_IEEE_APP);
1364 if (err)
1365 goto nla_put_failure;
1366 }
1367
1291 nla_nest_end(skb, ieee); 1368 nla_nest_end(skb, ieee);
1292 nlmsg_end(skb, nlh); 1369 nlmsg_end(skb, nlh);
1293 1370
@@ -1441,6 +1518,71 @@ err:
1441 return ret; 1518 return ret;
1442} 1519}
1443 1520
1521/* Handle CEE DCBX GET commands. */
1522static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb,
1523 u32 pid, u32 seq, u16 flags)
1524{
1525 struct sk_buff *skb;
1526 struct nlmsghdr *nlh;
1527 struct dcbmsg *dcb;
1528 struct nlattr *cee;
1529 const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
1530 int err;
1531
1532 if (!ops)
1533 return -EOPNOTSUPP;
1534
1535 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1536 if (!skb)
1537 return -ENOBUFS;
1538
1539 nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
1540
1541 dcb = NLMSG_DATA(nlh);
1542 dcb->dcb_family = AF_UNSPEC;
1543 dcb->cmd = DCB_CMD_CEE_GET;
1544
1545 NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name);
1546
1547 cee = nla_nest_start(skb, DCB_ATTR_CEE);
1548 if (!cee)
1549 goto nla_put_failure;
1550
1551 /* get peer info if available */
1552 if (ops->cee_peer_getpg) {
1553 struct cee_pg pg;
1554 err = ops->cee_peer_getpg(netdev, &pg);
1555 if (!err)
1556 NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg);
1557 }
1558
1559 if (ops->cee_peer_getpfc) {
1560 struct cee_pfc pfc;
1561 err = ops->cee_peer_getpfc(netdev, &pfc);
1562 if (!err)
1563 NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc);
1564 }
1565
1566 if (ops->peer_getappinfo && ops->peer_getapptable) {
1567 err = dcbnl_build_peer_app(netdev, skb,
1568 DCB_ATTR_CEE_PEER_APP_TABLE,
1569 DCB_ATTR_CEE_PEER_APP_INFO,
1570 DCB_ATTR_CEE_PEER_APP);
1571 if (err)
1572 goto nla_put_failure;
1573 }
1574
1575 nla_nest_end(skb, cee);
1576 nlmsg_end(skb, nlh);
1577
1578 return rtnl_unicast(skb, &init_net, pid);
1579nla_put_failure:
1580 nlmsg_cancel(skb, nlh);
1581nlmsg_failure:
1582 kfree_skb(skb);
1583 return -1;
1584}
1585
1444static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1586static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1445{ 1587{
1446 struct net *net = sock_net(skb->sk); 1588 struct net *net = sock_net(skb->sk);
@@ -1570,6 +1712,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1570 ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, 1712 ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
1571 nlh->nlmsg_flags); 1713 nlh->nlmsg_flags);
1572 goto out; 1714 goto out;
1715 case DCB_CMD_CEE_GET:
1716 ret = dcbnl_cee_get(netdev, tb, pid, nlh->nlmsg_seq,
1717 nlh->nlmsg_flags);
1718 goto out;
1573 default: 1719 default:
1574 goto errout; 1720 goto errout;
1575 } 1721 }
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e96d5e810039..fadecd20d75b 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -583,6 +583,15 @@ done:
583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); 583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
584} 584}
585 585
586/*
587 * Convert RFC 3390 larger initial window into an equivalent number of packets.
588 * This is based on the numbers specified in RFC 5681, 3.1.
589 */
590static inline u32 rfc3390_bytes_to_packets(const u32 smss)
591{
592 return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
593}
594
586static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 595static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
587{ 596{
588 struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); 597 struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8cde009e8b85..4222e7a654b0 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
614 /* Caller (dccp_v4_do_rcv) will send Reset */ 614 /* Caller (dccp_v4_do_rcv) will send Reset */
615 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; 615 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
616 return 1; 616 return 1;
617 } else if (sk->sk_state == DCCP_CLOSED) {
618 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
619 return 1;
617 } 620 }
618 621
619 if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { 622 if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
@@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
668 } 671 }
669 672
670 switch (sk->sk_state) { 673 switch (sk->sk_state) {
671 case DCCP_CLOSED:
672 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
673 return 1;
674
675 case DCCP_REQUESTING: 674 case DCCP_REQUESTING:
676 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); 675 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
677 if (queued >= 0) 676 if (queued >= 0)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 45a434f94169..8c36adfd1919 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -40,13 +40,15 @@
40 40
41int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 41int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
42{ 42{
43 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
43 struct inet_sock *inet = inet_sk(sk); 44 struct inet_sock *inet = inet_sk(sk);
44 struct dccp_sock *dp = dccp_sk(sk); 45 struct dccp_sock *dp = dccp_sk(sk);
45 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 46 __be16 orig_sport, orig_dport;
46 struct rtable *rt;
47 __be32 daddr, nexthop; 47 __be32 daddr, nexthop;
48 int tmp; 48 struct flowi4 *fl4;
49 struct rtable *rt;
49 int err; 50 int err;
51 struct ip_options_rcu *inet_opt;
50 52
51 dp->dccps_role = DCCP_ROLE_CLIENT; 53 dp->dccps_role = DCCP_ROLE_CLIENT;
52 54
@@ -57,37 +59,43 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
57 return -EAFNOSUPPORT; 59 return -EAFNOSUPPORT;
58 60
59 nexthop = daddr = usin->sin_addr.s_addr; 61 nexthop = daddr = usin->sin_addr.s_addr;
60 if (inet->opt != NULL && inet->opt->srr) { 62
63 inet_opt = rcu_dereference_protected(inet->inet_opt,
64 sock_owned_by_user(sk));
65 if (inet_opt != NULL && inet_opt->opt.srr) {
61 if (daddr == 0) 66 if (daddr == 0)
62 return -EINVAL; 67 return -EINVAL;
63 nexthop = inet->opt->faddr; 68 nexthop = inet_opt->opt.faddr;
64 } 69 }
65 70
66 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, 71 orig_sport = inet->inet_sport;
67 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 72 orig_dport = usin->sin_port;
68 IPPROTO_DCCP, 73 fl4 = &inet->cork.fl.u.ip4;
69 inet->inet_sport, usin->sin_port, sk, 1); 74 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
70 if (tmp < 0) 75 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
71 return tmp; 76 IPPROTO_DCCP,
77 orig_sport, orig_dport, sk, true);
78 if (IS_ERR(rt))
79 return PTR_ERR(rt);
72 80
73 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 81 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
74 ip_rt_put(rt); 82 ip_rt_put(rt);
75 return -ENETUNREACH; 83 return -ENETUNREACH;
76 } 84 }
77 85
78 if (inet->opt == NULL || !inet->opt->srr) 86 if (inet_opt == NULL || !inet_opt->opt.srr)
79 daddr = rt->rt_dst; 87 daddr = fl4->daddr;
80 88
81 if (inet->inet_saddr == 0) 89 if (inet->inet_saddr == 0)
82 inet->inet_saddr = rt->rt_src; 90 inet->inet_saddr = fl4->saddr;
83 inet->inet_rcv_saddr = inet->inet_saddr; 91 inet->inet_rcv_saddr = inet->inet_saddr;
84 92
85 inet->inet_dport = usin->sin_port; 93 inet->inet_dport = usin->sin_port;
86 inet->inet_daddr = daddr; 94 inet->inet_daddr = daddr;
87 95
88 inet_csk(sk)->icsk_ext_hdr_len = 0; 96 inet_csk(sk)->icsk_ext_hdr_len = 0;
89 if (inet->opt != NULL) 97 if (inet_opt)
90 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 98 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
91 /* 99 /*
92 * Socket identity is still unknown (sport may be zero). 100 * Socket identity is still unknown (sport may be zero).
93 * However we set state to DCCP_REQUESTING and not releasing socket 101 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -99,11 +107,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
99 if (err != 0) 107 if (err != 0)
100 goto failure; 108 goto failure;
101 109
102 err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, 110 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
103 inet->inet_dport, sk); 111 inet->inet_sport, inet->inet_dport, sk);
104 if (err != 0) 112 if (IS_ERR(rt)) {
113 rt = NULL;
105 goto failure; 114 goto failure;
106 115 }
107 /* OK, now commit destination to socket. */ 116 /* OK, now commit destination to socket. */
108 sk_setup_caps(sk, &rt->dst); 117 sk_setup_caps(sk, &rt->dst);
109 118
@@ -387,32 +396,30 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
387 if (sk_acceptq_is_full(sk)) 396 if (sk_acceptq_is_full(sk))
388 goto exit_overflow; 397 goto exit_overflow;
389 398
390 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
391 goto exit;
392
393 newsk = dccp_create_openreq_child(sk, req, skb); 399 newsk = dccp_create_openreq_child(sk, req, skb);
394 if (newsk == NULL) 400 if (newsk == NULL)
395 goto exit_nonewsk; 401 goto exit_nonewsk;
396 402
397 sk_setup_caps(newsk, dst);
398
399 newinet = inet_sk(newsk); 403 newinet = inet_sk(newsk);
400 ireq = inet_rsk(req); 404 ireq = inet_rsk(req);
401 newinet->inet_daddr = ireq->rmt_addr; 405 newinet->inet_daddr = ireq->rmt_addr;
402 newinet->inet_rcv_saddr = ireq->loc_addr; 406 newinet->inet_rcv_saddr = ireq->loc_addr;
403 newinet->inet_saddr = ireq->loc_addr; 407 newinet->inet_saddr = ireq->loc_addr;
404 newinet->opt = ireq->opt; 408 newinet->inet_opt = ireq->opt;
405 ireq->opt = NULL; 409 ireq->opt = NULL;
406 newinet->mc_index = inet_iif(skb); 410 newinet->mc_index = inet_iif(skb);
407 newinet->mc_ttl = ip_hdr(skb)->ttl; 411 newinet->mc_ttl = ip_hdr(skb)->ttl;
408 newinet->inet_id = jiffies; 412 newinet->inet_id = jiffies;
409 413
414 if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
415 goto put_and_exit;
416
417 sk_setup_caps(newsk, dst);
418
410 dccp_sync_mss(newsk, dst_mtu(dst)); 419 dccp_sync_mss(newsk, dst_mtu(dst));
411 420
412 if (__inet_inherit_port(sk, newsk) < 0) { 421 if (__inet_inherit_port(sk, newsk) < 0)
413 sock_put(newsk); 422 goto put_and_exit;
414 goto exit;
415 }
416 __inet_hash_nolisten(newsk, NULL); 423 __inet_hash_nolisten(newsk, NULL);
417 424
418 return newsk; 425 return newsk;
@@ -424,6 +431,9 @@ exit_nonewsk:
424exit: 431exit:
425 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 432 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
426 return NULL; 433 return NULL;
434put_and_exit:
435 sock_put(newsk);
436 goto exit;
427} 437}
428 438
429EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); 439EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
@@ -461,17 +471,19 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
461 struct sk_buff *skb) 471 struct sk_buff *skb)
462{ 472{
463 struct rtable *rt; 473 struct rtable *rt;
464 struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, 474 struct flowi4 fl4 = {
465 .fl4_dst = ip_hdr(skb)->saddr, 475 .flowi4_oif = skb_rtable(skb)->rt_iif,
466 .fl4_src = ip_hdr(skb)->daddr, 476 .daddr = ip_hdr(skb)->saddr,
467 .fl4_tos = RT_CONN_FLAGS(sk), 477 .saddr = ip_hdr(skb)->daddr,
468 .proto = sk->sk_protocol, 478 .flowi4_tos = RT_CONN_FLAGS(sk),
469 .fl_ip_sport = dccp_hdr(skb)->dccph_dport, 479 .flowi4_proto = sk->sk_protocol,
470 .fl_ip_dport = dccp_hdr(skb)->dccph_sport 480 .fl4_sport = dccp_hdr(skb)->dccph_dport,
471 }; 481 .fl4_dport = dccp_hdr(skb)->dccph_sport,
472 482 };
473 security_skb_classify_flow(skb, &fl); 483
474 if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { 484 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
485 rt = ip_route_output_flow(net, &fl4, sk);
486 if (IS_ERR(rt)) {
475 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 487 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
476 return NULL; 488 return NULL;
477 } 489 }
@@ -485,8 +497,9 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
485 int err = -1; 497 int err = -1;
486 struct sk_buff *skb; 498 struct sk_buff *skb;
487 struct dst_entry *dst; 499 struct dst_entry *dst;
500 struct flowi4 fl4;
488 501
489 dst = inet_csk_route_req(sk, req); 502 dst = inet_csk_route_req(sk, &fl4, req);
490 if (dst == NULL) 503 if (dst == NULL)
491 goto out; 504 goto out;
492 505
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index dca711df9b60..8dc4348774a5 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -54,8 +54,8 @@ static void dccp_v6_hash(struct sock *sk)
54 54
55/* add pseudo-header to DCCP checksum stored in skb->csum */ 55/* add pseudo-header to DCCP checksum stored in skb->csum */
56static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, 56static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
57 struct in6_addr *saddr, 57 const struct in6_addr *saddr,
58 struct in6_addr *daddr) 58 const struct in6_addr *daddr)
59{ 59{
60 return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); 60 return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
61} 61}
@@ -87,7 +87,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
87static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 87static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 u8 type, u8 code, int offset, __be32 info) 88 u8 type, u8 code, int offset, __be32 info)
89{ 89{
90 struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; 90 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
91 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); 91 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
92 struct dccp_sock *dp; 92 struct dccp_sock *dp;
93 struct ipv6_pinfo *np; 93 struct ipv6_pinfo *np;
@@ -147,30 +147,24 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
147 dst = __sk_dst_check(sk, np->dst_cookie); 147 dst = __sk_dst_check(sk, np->dst_cookie);
148 if (dst == NULL) { 148 if (dst == NULL) {
149 struct inet_sock *inet = inet_sk(sk); 149 struct inet_sock *inet = inet_sk(sk);
150 struct flowi fl; 150 struct flowi6 fl6;
151 151
152 /* BUGGG_FUTURE: Again, it is not clear how 152 /* BUGGG_FUTURE: Again, it is not clear how
153 to handle rthdr case. Ignore this complexity 153 to handle rthdr case. Ignore this complexity
154 for now. 154 for now.
155 */ 155 */
156 memset(&fl, 0, sizeof(fl)); 156 memset(&fl6, 0, sizeof(fl6));
157 fl.proto = IPPROTO_DCCP; 157 fl6.flowi6_proto = IPPROTO_DCCP;
158 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 158 ipv6_addr_copy(&fl6.daddr, &np->daddr);
159 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 159 ipv6_addr_copy(&fl6.saddr, &np->saddr);
160 fl.oif = sk->sk_bound_dev_if; 160 fl6.flowi6_oif = sk->sk_bound_dev_if;
161 fl.fl_ip_dport = inet->inet_dport; 161 fl6.fl6_dport = inet->inet_dport;
162 fl.fl_ip_sport = inet->inet_sport; 162 fl6.fl6_sport = inet->inet_sport;
163 security_sk_classify_flow(sk, &fl); 163 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
164 164
165 err = ip6_dst_lookup(sk, &dst, &fl); 165 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
166 if (err) { 166 if (IS_ERR(dst)) {
167 sk->sk_err_soft = -err; 167 sk->sk_err_soft = -PTR_ERR(dst);
168 goto out;
169 }
170
171 err = xfrm_lookup(net, &dst, &fl, sk, 0);
172 if (err < 0) {
173 sk->sk_err_soft = -err;
174 goto out; 168 goto out;
175 } 169 }
176 } else 170 } else
@@ -249,34 +243,30 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
249 struct sk_buff *skb; 243 struct sk_buff *skb;
250 struct ipv6_txoptions *opt = NULL; 244 struct ipv6_txoptions *opt = NULL;
251 struct in6_addr *final_p, final; 245 struct in6_addr *final_p, final;
252 struct flowi fl; 246 struct flowi6 fl6;
253 int err = -1; 247 int err = -1;
254 struct dst_entry *dst; 248 struct dst_entry *dst;
255 249
256 memset(&fl, 0, sizeof(fl)); 250 memset(&fl6, 0, sizeof(fl6));
257 fl.proto = IPPROTO_DCCP; 251 fl6.flowi6_proto = IPPROTO_DCCP;
258 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 252 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
259 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 253 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
260 fl.fl6_flowlabel = 0; 254 fl6.flowlabel = 0;
261 fl.oif = ireq6->iif; 255 fl6.flowi6_oif = ireq6->iif;
262 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 256 fl6.fl6_dport = inet_rsk(req)->rmt_port;
263 fl.fl_ip_sport = inet_rsk(req)->loc_port; 257 fl6.fl6_sport = inet_rsk(req)->loc_port;
264 security_req_classify_flow(req, &fl); 258 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
265 259
266 opt = np->opt; 260 opt = np->opt;
267 261
268 final_p = fl6_update_dst(&fl, opt, &final); 262 final_p = fl6_update_dst(&fl6, opt, &final);
269 263
270 err = ip6_dst_lookup(sk, &dst, &fl); 264 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
271 if (err) 265 if (IS_ERR(dst)) {
272 goto done; 266 err = PTR_ERR(dst);
273 267 dst = NULL;
274 if (final_p)
275 ipv6_addr_copy(&fl.fl6_dst, final_p);
276
277 err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
278 if (err < 0)
279 goto done; 268 goto done;
269 }
280 270
281 skb = dccp_make_response(sk, dst, req); 271 skb = dccp_make_response(sk, dst, req);
282 if (skb != NULL) { 272 if (skb != NULL) {
@@ -285,8 +275,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
285 dh->dccph_checksum = dccp_v6_csum_finish(skb, 275 dh->dccph_checksum = dccp_v6_csum_finish(skb,
286 &ireq6->loc_addr, 276 &ireq6->loc_addr,
287 &ireq6->rmt_addr); 277 &ireq6->rmt_addr);
288 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 278 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
289 err = ip6_xmit(sk, skb, &fl, opt); 279 err = ip6_xmit(sk, skb, &fl6, opt);
290 err = net_xmit_eval(err); 280 err = net_xmit_eval(err);
291 } 281 }
292 282
@@ -306,9 +296,9 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
306 296
307static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) 297static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
308{ 298{
309 struct ipv6hdr *rxip6h; 299 const struct ipv6hdr *rxip6h;
310 struct sk_buff *skb; 300 struct sk_buff *skb;
311 struct flowi fl; 301 struct flowi6 fl6;
312 struct net *net = dev_net(skb_dst(rxskb)->dev); 302 struct net *net = dev_net(skb_dst(rxskb)->dev);
313 struct sock *ctl_sk = net->dccp.v6_ctl_sk; 303 struct sock *ctl_sk = net->dccp.v6_ctl_sk;
314 struct dst_entry *dst; 304 struct dst_entry *dst;
@@ -327,25 +317,24 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
327 dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, 317 dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
328 &rxip6h->daddr); 318 &rxip6h->daddr);
329 319
330 memset(&fl, 0, sizeof(fl)); 320 memset(&fl6, 0, sizeof(fl6));
331 ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); 321 ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr);
332 ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); 322 ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr);
333 323
334 fl.proto = IPPROTO_DCCP; 324 fl6.flowi6_proto = IPPROTO_DCCP;
335 fl.oif = inet6_iif(rxskb); 325 fl6.flowi6_oif = inet6_iif(rxskb);
336 fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; 326 fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
337 fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; 327 fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
338 security_skb_classify_flow(rxskb, &fl); 328 security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
339 329
340 /* sk = NULL, but it is safe for now. RST socket required. */ 330 /* sk = NULL, but it is safe for now. RST socket required. */
341 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { 331 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
342 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { 332 if (!IS_ERR(dst)) {
343 skb_dst_set(skb, dst); 333 skb_dst_set(skb, dst);
344 ip6_xmit(ctl_sk, skb, &fl, NULL); 334 ip6_xmit(ctl_sk, skb, &fl6, NULL);
345 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 335 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
346 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 336 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
347 return; 337 return;
348 }
349 } 338 }
350 339
351 kfree_skb(skb); 340 kfree_skb(skb);
@@ -484,7 +473,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
484 struct inet6_request_sock *ireq6 = inet6_rsk(req); 473 struct inet6_request_sock *ireq6 = inet6_rsk(req);
485 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 474 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
486 struct inet_sock *newinet; 475 struct inet_sock *newinet;
487 struct dccp_sock *newdp;
488 struct dccp6_sock *newdp6; 476 struct dccp6_sock *newdp6;
489 struct sock *newsk; 477 struct sock *newsk;
490 struct ipv6_txoptions *opt; 478 struct ipv6_txoptions *opt;
@@ -498,7 +486,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
498 return NULL; 486 return NULL;
499 487
500 newdp6 = (struct dccp6_sock *)newsk; 488 newdp6 = (struct dccp6_sock *)newsk;
501 newdp = dccp_sk(newsk);
502 newinet = inet_sk(newsk); 489 newinet = inet_sk(newsk);
503 newinet->pinet6 = &newdp6->inet6; 490 newinet->pinet6 = &newdp6->inet6;
504 newnp = inet6_sk(newsk); 491 newnp = inet6_sk(newsk);
@@ -540,25 +527,20 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
540 527
541 if (dst == NULL) { 528 if (dst == NULL) {
542 struct in6_addr *final_p, final; 529 struct in6_addr *final_p, final;
543 struct flowi fl; 530 struct flowi6 fl6;
544 531
545 memset(&fl, 0, sizeof(fl)); 532 memset(&fl6, 0, sizeof(fl6));
546 fl.proto = IPPROTO_DCCP; 533 fl6.flowi6_proto = IPPROTO_DCCP;
547 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 534 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
548 final_p = fl6_update_dst(&fl, opt, &final); 535 final_p = fl6_update_dst(&fl6, opt, &final);
549 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 536 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
550 fl.oif = sk->sk_bound_dev_if; 537 fl6.flowi6_oif = sk->sk_bound_dev_if;
551 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 538 fl6.fl6_dport = inet_rsk(req)->rmt_port;
552 fl.fl_ip_sport = inet_rsk(req)->loc_port; 539 fl6.fl6_sport = inet_rsk(req)->loc_port;
553 security_sk_classify_flow(sk, &fl); 540 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
554 541
555 if (ip6_dst_lookup(sk, &dst, &fl)) 542 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
556 goto out; 543 if (IS_ERR(dst))
557
558 if (final_p)
559 ipv6_addr_copy(&fl.fl6_dst, final_p);
560
561 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
562 goto out; 544 goto out;
563 } 545 }
564 546
@@ -578,7 +560,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
578 newdp6 = (struct dccp6_sock *)newsk; 560 newdp6 = (struct dccp6_sock *)newsk;
579 newinet = inet_sk(newsk); 561 newinet = inet_sk(newsk);
580 newinet->pinet6 = &newdp6->inet6; 562 newinet->pinet6 = &newdp6->inet6;
581 newdp = dccp_sk(newsk);
582 newnp = inet6_sk(newsk); 563 newnp = inet6_sk(newsk);
583 564
584 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 565 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -592,7 +573,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
592 573
593 First: no IPv4 options. 574 First: no IPv4 options.
594 */ 575 */
595 newinet->opt = NULL; 576 newinet->inet_opt = NULL;
596 577
597 /* Clone RX bits */ 578 /* Clone RX bits */
598 newnp->rxopt.all = np->rxopt.all; 579 newnp->rxopt.all = np->rxopt.all;
@@ -878,7 +859,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
878 struct ipv6_pinfo *np = inet6_sk(sk); 859 struct ipv6_pinfo *np = inet6_sk(sk);
879 struct dccp_sock *dp = dccp_sk(sk); 860 struct dccp_sock *dp = dccp_sk(sk);
880 struct in6_addr *saddr = NULL, *final_p, final; 861 struct in6_addr *saddr = NULL, *final_p, final;
881 struct flowi fl; 862 struct flowi6 fl6;
882 struct dst_entry *dst; 863 struct dst_entry *dst;
883 int addr_type; 864 int addr_type;
884 int err; 865 int err;
@@ -891,14 +872,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
891 if (usin->sin6_family != AF_INET6) 872 if (usin->sin6_family != AF_INET6)
892 return -EAFNOSUPPORT; 873 return -EAFNOSUPPORT;
893 874
894 memset(&fl, 0, sizeof(fl)); 875 memset(&fl6, 0, sizeof(fl6));
895 876
896 if (np->sndflow) { 877 if (np->sndflow) {
897 fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 878 fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
898 IP6_ECN_flow_init(fl.fl6_flowlabel); 879 IP6_ECN_flow_init(fl6.flowlabel);
899 if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { 880 if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
900 struct ip6_flowlabel *flowlabel; 881 struct ip6_flowlabel *flowlabel;
901 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 882 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
902 if (flowlabel == NULL) 883 if (flowlabel == NULL)
903 return -EINVAL; 884 return -EINVAL;
904 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); 885 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
@@ -935,7 +916,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
935 } 916 }
936 917
937 ipv6_addr_copy(&np->daddr, &usin->sin6_addr); 918 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
938 np->flow_label = fl.fl6_flowlabel; 919 np->flow_label = fl6.flowlabel;
939 920
940 /* 921 /*
941 * DCCP over IPv4 922 * DCCP over IPv4
@@ -972,33 +953,24 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
972 if (!ipv6_addr_any(&np->rcv_saddr)) 953 if (!ipv6_addr_any(&np->rcv_saddr))
973 saddr = &np->rcv_saddr; 954 saddr = &np->rcv_saddr;
974 955
975 fl.proto = IPPROTO_DCCP; 956 fl6.flowi6_proto = IPPROTO_DCCP;
976 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 957 ipv6_addr_copy(&fl6.daddr, &np->daddr);
977 ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); 958 ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr);
978 fl.oif = sk->sk_bound_dev_if; 959 fl6.flowi6_oif = sk->sk_bound_dev_if;
979 fl.fl_ip_dport = usin->sin6_port; 960 fl6.fl6_dport = usin->sin6_port;
980 fl.fl_ip_sport = inet->inet_sport; 961 fl6.fl6_sport = inet->inet_sport;
981 security_sk_classify_flow(sk, &fl); 962 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
982 963
983 final_p = fl6_update_dst(&fl, np->opt, &final); 964 final_p = fl6_update_dst(&fl6, np->opt, &final);
984 965
985 err = ip6_dst_lookup(sk, &dst, &fl); 966 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
986 if (err) 967 if (IS_ERR(dst)) {
968 err = PTR_ERR(dst);
987 goto failure; 969 goto failure;
988
989 if (final_p)
990 ipv6_addr_copy(&fl.fl6_dst, final_p);
991
992 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
993 if (err < 0) {
994 if (err == -EREMOTE)
995 err = ip6_dst_blackhole(sk, &dst, &fl);
996 if (err < 0)
997 goto failure;
998 } 970 }
999 971
1000 if (saddr == NULL) { 972 if (saddr == NULL) {
1001 saddr = &fl.fl6_src; 973 saddr = &fl6.saddr;
1002 ipv6_addr_copy(&np->rcv_saddr, saddr); 974 ipv6_addr_copy(&np->rcv_saddr, saddr);
1003 } 975 }
1004 976
diff --git a/net/dccp/options.c b/net/dccp/options.c
index f06ffcfc8d71..4b2ab657ac8e 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
123 case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: 123 case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
124 if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ 124 if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */
125 break; 125 break;
126 if (len == 0)
127 goto out_invalid_option;
126 rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, 128 rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
127 *value, value + 1, len - 1); 129 *value, value + 1, len - 1);
128 if (rc) 130 if (rc)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 784d30210543..fab108e51e5a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -43,7 +43,7 @@ static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
43static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) 43static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
44{ 44{
45 if (likely(skb != NULL)) { 45 if (likely(skb != NULL)) {
46 const struct inet_sock *inet = inet_sk(sk); 46 struct inet_sock *inet = inet_sk(sk);
47 const struct inet_connection_sock *icsk = inet_csk(sk); 47 const struct inet_connection_sock *icsk = inet_csk(sk);
48 struct dccp_sock *dp = dccp_sk(sk); 48 struct dccp_sock *dp = dccp_sk(sk);
49 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 49 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -136,14 +136,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
136 136
137 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 137 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
138 138
139 err = icsk->icsk_af_ops->queue_xmit(skb); 139 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
140 return net_xmit_eval(err); 140 return net_xmit_eval(err);
141 } 141 }
142 return -ENOBUFS; 142 return -ENOBUFS;
143} 143}
144 144
145/** 145/**
146 * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits 146 * dccp_determine_ccmps - Find out about CCID-specific packet-size limits
147 * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), 147 * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
148 * since the RX CCID is restricted to feedback packets (Acks), which are small 148 * since the RX CCID is restricted to feedback packets (Acks), which are small
149 * in comparison with the data traffic. A value of 0 means "no current CCMPS". 149 * in comparison with the data traffic. A value of 0 means "no current CCMPS".
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2af15b15d1fa..ea3b6ee21fc9 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -908,7 +908,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
908 struct socket *sock = sk->sk_socket; 908 struct socket *sock = sk->sk_socket;
909 struct dn_scp *scp = DN_SK(sk); 909 struct dn_scp *scp = DN_SK(sk);
910 int err = -EISCONN; 910 int err = -EISCONN;
911 struct flowi fl; 911 struct flowidn fld;
912 912
913 if (sock->state == SS_CONNECTED) 913 if (sock->state == SS_CONNECTED)
914 goto out; 914 goto out;
@@ -947,13 +947,13 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
947 memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); 947 memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
948 948
949 err = -EHOSTUNREACH; 949 err = -EHOSTUNREACH;
950 memset(&fl, 0, sizeof(fl)); 950 memset(&fld, 0, sizeof(fld));
951 fl.oif = sk->sk_bound_dev_if; 951 fld.flowidn_oif = sk->sk_bound_dev_if;
952 fl.fld_dst = dn_saddr2dn(&scp->peer); 952 fld.daddr = dn_saddr2dn(&scp->peer);
953 fl.fld_src = dn_saddr2dn(&scp->addr); 953 fld.saddr = dn_saddr2dn(&scp->addr);
954 dn_sk_ports_copy(&fl, scp); 954 dn_sk_ports_copy(&fld, scp);
955 fl.proto = DNPROTO_NSP; 955 fld.flowidn_proto = DNPROTO_NSP;
956 if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) 956 if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
957 goto out; 957 goto out;
958 sk->sk_route_caps = sk->sk_dst_cache->dev->features; 958 sk->sk_route_caps = sk->sk_dst_cache->dev->features;
959 sock->state = SS_CONNECTING; 959 sock->state = SS_CONNECTING;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0dcaa903e00e..cf26ac74a188 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -332,14 +332,9 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
332 return ifa; 332 return ifa;
333} 333}
334 334
335static void dn_dev_free_ifa_rcu(struct rcu_head *head)
336{
337 kfree(container_of(head, struct dn_ifaddr, rcu));
338}
339
340static void dn_dev_free_ifa(struct dn_ifaddr *ifa) 335static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
341{ 336{
342 call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu); 337 kfree_rcu(ifa, rcu);
343} 338}
344 339
345static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) 340static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
@@ -752,7 +747,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
752 skip_naddr = cb->args[1]; 747 skip_naddr = cb->args[1];
753 748
754 idx = 0; 749 idx = 0;
755 for_each_netdev(&init_net, dev) { 750 rcu_read_lock();
751 for_each_netdev_rcu(&init_net, dev) {
756 if (idx < skip_ndevs) 752 if (idx < skip_ndevs)
757 goto cont; 753 goto cont;
758 else if (idx > skip_ndevs) { 754 else if (idx > skip_ndevs) {
@@ -761,11 +757,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
761 skip_naddr = 0; 757 skip_naddr = 0;
762 } 758 }
763 759
764 if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) 760 if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
765 goto cont; 761 goto cont;
766 762
767 for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; 763 for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
768 ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { 764 ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
769 if (dn_idx < skip_naddr) 765 if (dn_idx < skip_naddr)
770 continue; 766 continue;
771 767
@@ -778,6 +774,7 @@ cont:
778 idx++; 774 idx++;
779 } 775 }
780done: 776done:
777 rcu_read_unlock();
781 cb->args[0] = idx; 778 cb->args[0] = idx;
782 cb->args[1] = dn_idx; 779 cb->args[1] = dn_idx;
783 780
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 0ef0a81bcd72..1c74ed36ce8f 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -201,7 +201,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
201 int err; 201 int err;
202 202
203 if (nh->nh_gw) { 203 if (nh->nh_gw) {
204 struct flowi fl; 204 struct flowidn fld;
205 struct dn_fib_res res; 205 struct dn_fib_res res;
206 206
207 if (nh->nh_flags&RTNH_F_ONLINK) { 207 if (nh->nh_flags&RTNH_F_ONLINK) {
@@ -221,15 +221,15 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
221 return 0; 221 return 0;
222 } 222 }
223 223
224 memset(&fl, 0, sizeof(fl)); 224 memset(&fld, 0, sizeof(fld));
225 fl.fld_dst = nh->nh_gw; 225 fld.daddr = nh->nh_gw;
226 fl.oif = nh->nh_oif; 226 fld.flowidn_oif = nh->nh_oif;
227 fl.fld_scope = r->rtm_scope + 1; 227 fld.flowidn_scope = r->rtm_scope + 1;
228 228
229 if (fl.fld_scope < RT_SCOPE_LINK) 229 if (fld.flowidn_scope < RT_SCOPE_LINK)
230 fl.fld_scope = RT_SCOPE_LINK; 230 fld.flowidn_scope = RT_SCOPE_LINK;
231 231
232 if ((err = dn_fib_lookup(&fl, &res)) != 0) 232 if ((err = dn_fib_lookup(&fld, &res)) != 0)
233 return err; 233 return err;
234 234
235 err = -EINVAL; 235 err = -EINVAL;
@@ -404,7 +404,7 @@ failure:
404 return NULL; 404 return NULL;
405} 405}
406 406
407int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *fl, struct dn_fib_res *res) 407int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res)
408{ 408{
409 int err = dn_fib_props[type].error; 409 int err = dn_fib_props[type].error;
410 410
@@ -424,7 +424,8 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *
424 for_nexthops(fi) { 424 for_nexthops(fi) {
425 if (nh->nh_flags & RTNH_F_DEAD) 425 if (nh->nh_flags & RTNH_F_DEAD)
426 continue; 426 continue;
427 if (!fl->oif || fl->oif == nh->nh_oif) 427 if (!fld->flowidn_oif ||
428 fld->flowidn_oif == nh->nh_oif)
428 break; 429 break;
429 } 430 }
430 if (nhsel < fi->fib_nhs) { 431 if (nhsel < fi->fib_nhs) {
@@ -445,7 +446,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *
445 return err; 446 return err;
446} 447}
447 448
448void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res) 449void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
449{ 450{
450 struct dn_fib_info *fi = res->fi; 451 struct dn_fib_info *fi = res->fi;
451 int w; 452 int w;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2ef115277bea..bd78836a81eb 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -78,7 +78,7 @@ static void dn_nsp_send(struct sk_buff *skb)
78 struct sock *sk = skb->sk; 78 struct sock *sk = skb->sk;
79 struct dn_scp *scp = DN_SK(sk); 79 struct dn_scp *scp = DN_SK(sk);
80 struct dst_entry *dst; 80 struct dst_entry *dst;
81 struct flowi fl; 81 struct flowidn fld;
82 82
83 skb_reset_transport_header(skb); 83 skb_reset_transport_header(skb);
84 scp->stamp = jiffies; 84 scp->stamp = jiffies;
@@ -91,13 +91,13 @@ try_again:
91 return; 91 return;
92 } 92 }
93 93
94 memset(&fl, 0, sizeof(fl)); 94 memset(&fld, 0, sizeof(fld));
95 fl.oif = sk->sk_bound_dev_if; 95 fld.flowidn_oif = sk->sk_bound_dev_if;
96 fl.fld_src = dn_saddr2dn(&scp->addr); 96 fld.saddr = dn_saddr2dn(&scp->addr);
97 fl.fld_dst = dn_saddr2dn(&scp->peer); 97 fld.daddr = dn_saddr2dn(&scp->peer);
98 dn_sk_ports_copy(&fl, scp); 98 dn_sk_ports_copy(&fld, scp);
99 fl.proto = DNPROTO_NSP; 99 fld.flowidn_proto = DNPROTO_NSP;
100 if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { 100 if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) {
101 dst = sk_dst_get(sk); 101 dst = sk_dst_get(sk);
102 sk->sk_route_caps = dst->dev->features; 102 sk->sk_route_caps = dst->dev->features;
103 goto try_again; 103 goto try_again;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e636365d33c..74544bc6fdec 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops);
112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); 113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); 114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
115static void dn_dst_destroy(struct dst_entry *);
115static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 116static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
116static void dn_dst_link_failure(struct sk_buff *); 117static void dn_dst_link_failure(struct sk_buff *);
117static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); 118static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = {
133 .check = dn_dst_check, 134 .check = dn_dst_check,
134 .default_advmss = dn_dst_default_advmss, 135 .default_advmss = dn_dst_default_advmss,
135 .default_mtu = dn_dst_default_mtu, 136 .default_mtu = dn_dst_default_mtu,
137 .cow_metrics = dst_cow_metrics_generic,
138 .destroy = dn_dst_destroy,
136 .negative_advice = dn_dst_negative_advice, 139 .negative_advice = dn_dst_negative_advice,
137 .link_failure = dn_dst_link_failure, 140 .link_failure = dn_dst_link_failure,
138 .update_pmtu = dn_dst_update_pmtu, 141 .update_pmtu = dn_dst_update_pmtu,
139}; 142};
140 143
144static void dn_dst_destroy(struct dst_entry *dst)
145{
146 dst_destroy_metrics_generic(dst);
147}
148
141static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 149static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
142{ 150{
143 __u16 tmp = (__u16 __force)(src ^ dst); 151 __u16 tmp = (__u16 __force)(src ^ dst);
@@ -274,14 +282,14 @@ static void dn_dst_link_failure(struct sk_buff *skb)
274{ 282{
275} 283}
276 284
277static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 285static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
278{ 286{
279 return ((fl1->fld_dst ^ fl2->fld_dst) | 287 return ((fl1->daddr ^ fl2->daddr) |
280 (fl1->fld_src ^ fl2->fld_src) | 288 (fl1->saddr ^ fl2->saddr) |
281 (fl1->mark ^ fl2->mark) | 289 (fl1->flowidn_mark ^ fl2->flowidn_mark) |
282 (fl1->fld_scope ^ fl2->fld_scope) | 290 (fl1->flowidn_scope ^ fl2->flowidn_scope) |
283 (fl1->oif ^ fl2->oif) | 291 (fl1->flowidn_oif ^ fl2->flowidn_oif) |
284 (fl1->iif ^ fl2->iif)) == 0; 292 (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
285} 293}
286 294
287static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) 295static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
@@ -295,7 +303,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
295 spin_lock_bh(&dn_rt_hash_table[hash].lock); 303 spin_lock_bh(&dn_rt_hash_table[hash].lock);
296 while ((rth = rcu_dereference_protected(*rthp, 304 while ((rth = rcu_dereference_protected(*rthp,
297 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { 305 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
298 if (compare_keys(&rth->fl, &rt->fl)) { 306 if (compare_keys(&rth->fld, &rt->fld)) {
299 /* Put it first */ 307 /* Put it first */
300 *rthp = rth->dst.dn_next; 308 *rthp = rth->dst.dn_next;
301 rcu_assign_pointer(rth->dst.dn_next, 309 rcu_assign_pointer(rth->dst.dn_next,
@@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
814{ 822{
815 struct dn_fib_info *fi = res->fi; 823 struct dn_fib_info *fi = res->fi;
816 struct net_device *dev = rt->dst.dev; 824 struct net_device *dev = rt->dst.dev;
825 unsigned int mss_metric;
817 struct neighbour *n; 826 struct neighbour *n;
818 unsigned int metric;
819 827
820 if (fi) { 828 if (fi) {
821 if (DN_FIB_RES_GW(*res) && 829 if (DN_FIB_RES_GW(*res) &&
822 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 830 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
823 rt->rt_gateway = DN_FIB_RES_GW(*res); 831 rt->rt_gateway = DN_FIB_RES_GW(*res);
824 dst_import_metrics(&rt->dst, fi->fib_metrics); 832 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
825 } 833 }
826 rt->rt_type = res->type; 834 rt->rt_type = res->type;
827 835
@@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
834 842
835 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) 843 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
836 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); 844 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
837 metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); 845 mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
838 if (metric) { 846 if (mss_metric) {
839 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); 847 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
840 if (metric > mss) 848 if (mss_metric > mss)
841 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); 849 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
842 } 850 }
843 return 0; 851 return 0;
@@ -895,14 +903,16 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re
895 return (daddr&~mask)|res->fi->fib_nh->nh_gw; 903 return (daddr&~mask)|res->fi->fib_nh->nh_gw;
896} 904}
897 905
898static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) 906static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
899{ 907{
900 struct flowi fl = { .fld_dst = oldflp->fld_dst, 908 struct flowidn fld = {
901 .fld_src = oldflp->fld_src, 909 .daddr = oldflp->daddr,
902 .fld_scope = RT_SCOPE_UNIVERSE, 910 .saddr = oldflp->saddr,
903 .mark = oldflp->mark, 911 .flowidn_scope = RT_SCOPE_UNIVERSE,
904 .iif = init_net.loopback_dev->ifindex, 912 .flowidn_mark = oldflp->flowidn_mark,
905 .oif = oldflp->oif }; 913 .flowidn_iif = init_net.loopback_dev->ifindex,
914 .flowidn_oif = oldflp->flowidn_oif,
915 };
906 struct dn_route *rt = NULL; 916 struct dn_route *rt = NULL;
907 struct net_device *dev_out = NULL, *dev; 917 struct net_device *dev_out = NULL, *dev;
908 struct neighbour *neigh = NULL; 918 struct neighbour *neigh = NULL;
@@ -916,13 +926,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
916 if (decnet_debug_level & 16) 926 if (decnet_debug_level & 16)
917 printk(KERN_DEBUG 927 printk(KERN_DEBUG
918 "dn_route_output_slow: dst=%04x src=%04x mark=%d" 928 "dn_route_output_slow: dst=%04x src=%04x mark=%d"
919 " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), 929 " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
920 le16_to_cpu(oldflp->fld_src), 930 le16_to_cpu(oldflp->saddr),
921 oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); 931 oldflp->flowidn_mark, init_net.loopback_dev->ifindex,
932 oldflp->flowidn_oif);
922 933
923 /* If we have an output interface, verify its a DECnet device */ 934 /* If we have an output interface, verify its a DECnet device */
924 if (oldflp->oif) { 935 if (oldflp->flowidn_oif) {
925 dev_out = dev_get_by_index(&init_net, oldflp->oif); 936 dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
926 err = -ENODEV; 937 err = -ENODEV;
927 if (dev_out && dev_out->dn_ptr == NULL) { 938 if (dev_out && dev_out->dn_ptr == NULL) {
928 dev_put(dev_out); 939 dev_put(dev_out);
@@ -933,11 +944,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
933 } 944 }
934 945
935 /* If we have a source address, verify that its a local address */ 946 /* If we have a source address, verify that its a local address */
936 if (oldflp->fld_src) { 947 if (oldflp->saddr) {
937 err = -EADDRNOTAVAIL; 948 err = -EADDRNOTAVAIL;
938 949
939 if (dev_out) { 950 if (dev_out) {
940 if (dn_dev_islocal(dev_out, oldflp->fld_src)) 951 if (dn_dev_islocal(dev_out, oldflp->saddr))
941 goto source_ok; 952 goto source_ok;
942 dev_put(dev_out); 953 dev_put(dev_out);
943 goto out; 954 goto out;
@@ -946,11 +957,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
946 for_each_netdev_rcu(&init_net, dev) { 957 for_each_netdev_rcu(&init_net, dev) {
947 if (!dev->dn_ptr) 958 if (!dev->dn_ptr)
948 continue; 959 continue;
949 if (!dn_dev_islocal(dev, oldflp->fld_src)) 960 if (!dn_dev_islocal(dev, oldflp->saddr))
950 continue; 961 continue;
951 if ((dev->flags & IFF_LOOPBACK) && 962 if ((dev->flags & IFF_LOOPBACK) &&
952 oldflp->fld_dst && 963 oldflp->daddr &&
953 !dn_dev_islocal(dev, oldflp->fld_dst)) 964 !dn_dev_islocal(dev, oldflp->daddr))
954 continue; 965 continue;
955 966
956 dev_out = dev; 967 dev_out = dev;
@@ -965,22 +976,22 @@ source_ok:
965 } 976 }
966 977
967 /* No destination? Assume its local */ 978 /* No destination? Assume its local */
968 if (!fl.fld_dst) { 979 if (!fld.daddr) {
969 fl.fld_dst = fl.fld_src; 980 fld.daddr = fld.saddr;
970 981
971 err = -EADDRNOTAVAIL; 982 err = -EADDRNOTAVAIL;
972 if (dev_out) 983 if (dev_out)
973 dev_put(dev_out); 984 dev_put(dev_out);
974 dev_out = init_net.loopback_dev; 985 dev_out = init_net.loopback_dev;
975 dev_hold(dev_out); 986 dev_hold(dev_out);
976 if (!fl.fld_dst) { 987 if (!fld.daddr) {
977 fl.fld_dst = 988 fld.daddr =
978 fl.fld_src = dnet_select_source(dev_out, 0, 989 fld.saddr = dnet_select_source(dev_out, 0,
979 RT_SCOPE_HOST); 990 RT_SCOPE_HOST);
980 if (!fl.fld_dst) 991 if (!fld.daddr)
981 goto out; 992 goto out;
982 } 993 }
983 fl.oif = init_net.loopback_dev->ifindex; 994 fld.flowidn_oif = init_net.loopback_dev->ifindex;
984 res.type = RTN_LOCAL; 995 res.type = RTN_LOCAL;
985 goto make_route; 996 goto make_route;
986 } 997 }
@@ -989,8 +1000,8 @@ source_ok:
989 printk(KERN_DEBUG 1000 printk(KERN_DEBUG
990 "dn_route_output_slow: initial checks complete." 1001 "dn_route_output_slow: initial checks complete."
991 " dst=%o4x src=%04x oif=%d try_hard=%d\n", 1002 " dst=%o4x src=%04x oif=%d try_hard=%d\n",
992 le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), 1003 le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
993 fl.oif, try_hard); 1004 fld.flowidn_oif, try_hard);
994 1005
995 /* 1006 /*
996 * N.B. If the kernel is compiled without router support then 1007 * N.B. If the kernel is compiled without router support then
@@ -998,7 +1009,7 @@ source_ok:
998 * will always be executed. 1009 * will always be executed.
999 */ 1010 */
1000 err = -ESRCH; 1011 err = -ESRCH;
1001 if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) { 1012 if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
1002 struct dn_dev *dn_db; 1013 struct dn_dev *dn_db;
1003 if (err != -ESRCH) 1014 if (err != -ESRCH)
1004 goto out; 1015 goto out;
@@ -1013,19 +1024,19 @@ source_ok:
1013 * here 1024 * here
1014 */ 1025 */
1015 if (!try_hard) { 1026 if (!try_hard) {
1016 neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); 1027 neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
1017 if (neigh) { 1028 if (neigh) {
1018 if ((oldflp->oif && 1029 if ((oldflp->flowidn_oif &&
1019 (neigh->dev->ifindex != oldflp->oif)) || 1030 (neigh->dev->ifindex != oldflp->flowidn_oif)) ||
1020 (oldflp->fld_src && 1031 (oldflp->saddr &&
1021 (!dn_dev_islocal(neigh->dev, 1032 (!dn_dev_islocal(neigh->dev,
1022 oldflp->fld_src)))) { 1033 oldflp->saddr)))) {
1023 neigh_release(neigh); 1034 neigh_release(neigh);
1024 neigh = NULL; 1035 neigh = NULL;
1025 } else { 1036 } else {
1026 if (dev_out) 1037 if (dev_out)
1027 dev_put(dev_out); 1038 dev_put(dev_out);
1028 if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { 1039 if (dn_dev_islocal(neigh->dev, fld.daddr)) {
1029 dev_out = init_net.loopback_dev; 1040 dev_out = init_net.loopback_dev;
1030 res.type = RTN_LOCAL; 1041 res.type = RTN_LOCAL;
1031 } else { 1042 } else {
@@ -1045,7 +1056,7 @@ source_ok:
1045 goto out; 1056 goto out;
1046 dn_db = rcu_dereference_raw(dev_out->dn_ptr); 1057 dn_db = rcu_dereference_raw(dev_out->dn_ptr);
1047 /* Possible improvement - check all devices for local addr */ 1058 /* Possible improvement - check all devices for local addr */
1048 if (dn_dev_islocal(dev_out, fl.fld_dst)) { 1059 if (dn_dev_islocal(dev_out, fld.daddr)) {
1049 dev_put(dev_out); 1060 dev_put(dev_out);
1050 dev_out = init_net.loopback_dev; 1061 dev_out = init_net.loopback_dev;
1051 dev_hold(dev_out); 1062 dev_hold(dev_out);
@@ -1061,16 +1072,16 @@ select_source:
1061 if (neigh) 1072 if (neigh)
1062 gateway = ((struct dn_neigh *)neigh)->addr; 1073 gateway = ((struct dn_neigh *)neigh)->addr;
1063 if (gateway == 0) 1074 if (gateway == 0)
1064 gateway = fl.fld_dst; 1075 gateway = fld.daddr;
1065 if (fl.fld_src == 0) { 1076 if (fld.saddr == 0) {
1066 fl.fld_src = dnet_select_source(dev_out, gateway, 1077 fld.saddr = dnet_select_source(dev_out, gateway,
1067 res.type == RTN_LOCAL ? 1078 res.type == RTN_LOCAL ?
1068 RT_SCOPE_HOST : 1079 RT_SCOPE_HOST :
1069 RT_SCOPE_LINK); 1080 RT_SCOPE_LINK);
1070 if (fl.fld_src == 0 && res.type != RTN_LOCAL) 1081 if (fld.saddr == 0 && res.type != RTN_LOCAL)
1071 goto e_addr; 1082 goto e_addr;
1072 } 1083 }
1073 fl.oif = dev_out->ifindex; 1084 fld.flowidn_oif = dev_out->ifindex;
1074 goto make_route; 1085 goto make_route;
1075 } 1086 }
1076 free_res = 1; 1087 free_res = 1;
@@ -1079,64 +1090,60 @@ select_source:
1079 goto e_inval; 1090 goto e_inval;
1080 1091
1081 if (res.type == RTN_LOCAL) { 1092 if (res.type == RTN_LOCAL) {
1082 if (!fl.fld_src) 1093 if (!fld.saddr)
1083 fl.fld_src = fl.fld_dst; 1094 fld.saddr = fld.daddr;
1084 if (dev_out) 1095 if (dev_out)
1085 dev_put(dev_out); 1096 dev_put(dev_out);
1086 dev_out = init_net.loopback_dev; 1097 dev_out = init_net.loopback_dev;
1087 dev_hold(dev_out); 1098 dev_hold(dev_out);
1088 fl.oif = dev_out->ifindex; 1099 fld.flowidn_oif = dev_out->ifindex;
1089 if (res.fi) 1100 if (res.fi)
1090 dn_fib_info_put(res.fi); 1101 dn_fib_info_put(res.fi);
1091 res.fi = NULL; 1102 res.fi = NULL;
1092 goto make_route; 1103 goto make_route;
1093 } 1104 }
1094 1105
1095 if (res.fi->fib_nhs > 1 && fl.oif == 0) 1106 if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
1096 dn_fib_select_multipath(&fl, &res); 1107 dn_fib_select_multipath(&fld, &res);
1097 1108
1098 /* 1109 /*
1099 * We could add some logic to deal with default routes here and 1110 * We could add some logic to deal with default routes here and
1100 * get rid of some of the special casing above. 1111 * get rid of some of the special casing above.
1101 */ 1112 */
1102 1113
1103 if (!fl.fld_src) 1114 if (!fld.saddr)
1104 fl.fld_src = DN_FIB_RES_PREFSRC(res); 1115 fld.saddr = DN_FIB_RES_PREFSRC(res);
1105 1116
1106 if (dev_out) 1117 if (dev_out)
1107 dev_put(dev_out); 1118 dev_put(dev_out);
1108 dev_out = DN_FIB_RES_DEV(res); 1119 dev_out = DN_FIB_RES_DEV(res);
1109 dev_hold(dev_out); 1120 dev_hold(dev_out);
1110 fl.oif = dev_out->ifindex; 1121 fld.flowidn_oif = dev_out->ifindex;
1111 gateway = DN_FIB_RES_GW(res); 1122 gateway = DN_FIB_RES_GW(res);
1112 1123
1113make_route: 1124make_route:
1114 if (dev_out->flags & IFF_LOOPBACK) 1125 if (dev_out->flags & IFF_LOOPBACK)
1115 flags |= RTCF_LOCAL; 1126 flags |= RTCF_LOCAL;
1116 1127
1117 rt = dst_alloc(&dn_dst_ops); 1128 rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST);
1118 if (rt == NULL) 1129 if (rt == NULL)
1119 goto e_nobufs; 1130 goto e_nobufs;
1120 1131
1121 atomic_set(&rt->dst.__refcnt, 1); 1132 memset(&rt->fld, 0, sizeof(rt->fld));
1122 rt->dst.flags = DST_HOST; 1133 rt->fld.saddr = oldflp->saddr;
1123 1134 rt->fld.daddr = oldflp->daddr;
1124 rt->fl.fld_src = oldflp->fld_src; 1135 rt->fld.flowidn_oif = oldflp->flowidn_oif;
1125 rt->fl.fld_dst = oldflp->fld_dst; 1136 rt->fld.flowidn_iif = 0;
1126 rt->fl.oif = oldflp->oif; 1137 rt->fld.flowidn_mark = oldflp->flowidn_mark;
1127 rt->fl.iif = 0;
1128 rt->fl.mark = oldflp->mark;
1129 1138
1130 rt->rt_saddr = fl.fld_src; 1139 rt->rt_saddr = fld.saddr;
1131 rt->rt_daddr = fl.fld_dst; 1140 rt->rt_daddr = fld.daddr;
1132 rt->rt_gateway = gateway ? gateway : fl.fld_dst; 1141 rt->rt_gateway = gateway ? gateway : fld.daddr;
1133 rt->rt_local_src = fl.fld_src; 1142 rt->rt_local_src = fld.saddr;
1134 1143
1135 rt->rt_dst_map = fl.fld_dst; 1144 rt->rt_dst_map = fld.daddr;
1136 rt->rt_src_map = fl.fld_src; 1145 rt->rt_src_map = fld.saddr;
1137 1146
1138 rt->dst.dev = dev_out;
1139 dev_hold(dev_out);
1140 rt->dst.neighbour = neigh; 1147 rt->dst.neighbour = neigh;
1141 neigh = NULL; 1148 neigh = NULL;
1142 1149
@@ -1151,7 +1158,7 @@ make_route:
1151 if (err) 1158 if (err)
1152 goto e_neighbour; 1159 goto e_neighbour;
1153 1160
1154 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1161 hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
1155 dn_insert_route(rt, hash, (struct dn_route **)pprt); 1162 dn_insert_route(rt, hash, (struct dn_route **)pprt);
1156 1163
1157done: 1164done:
@@ -1182,20 +1189,20 @@ e_neighbour:
1182/* 1189/*
1183 * N.B. The flags may be moved into the flowi at some future stage. 1190 * N.B. The flags may be moved into the flowi at some future stage.
1184 */ 1191 */
1185static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags) 1192static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
1186{ 1193{
1187 unsigned hash = dn_hash(flp->fld_src, flp->fld_dst); 1194 unsigned hash = dn_hash(flp->saddr, flp->daddr);
1188 struct dn_route *rt = NULL; 1195 struct dn_route *rt = NULL;
1189 1196
1190 if (!(flags & MSG_TRYHARD)) { 1197 if (!(flags & MSG_TRYHARD)) {
1191 rcu_read_lock_bh(); 1198 rcu_read_lock_bh();
1192 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; 1199 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
1193 rt = rcu_dereference_bh(rt->dst.dn_next)) { 1200 rt = rcu_dereference_bh(rt->dst.dn_next)) {
1194 if ((flp->fld_dst == rt->fl.fld_dst) && 1201 if ((flp->daddr == rt->fld.daddr) &&
1195 (flp->fld_src == rt->fl.fld_src) && 1202 (flp->saddr == rt->fld.saddr) &&
1196 (flp->mark == rt->fl.mark) && 1203 (flp->flowidn_mark == rt->fld.flowidn_mark) &&
1197 dn_is_output_route(rt) && 1204 dn_is_output_route(rt) &&
1198 (rt->fl.oif == flp->oif)) { 1205 (rt->fld.flowidn_oif == flp->flowidn_oif)) {
1199 dst_use(&rt->dst, jiffies); 1206 dst_use(&rt->dst, jiffies);
1200 rcu_read_unlock_bh(); 1207 rcu_read_unlock_bh();
1201 *pprt = &rt->dst; 1208 *pprt = &rt->dst;
@@ -1208,25 +1215,36 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
1208 return dn_route_output_slow(pprt, flp, flags); 1215 return dn_route_output_slow(pprt, flp, flags);
1209} 1216}
1210 1217
1211static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags) 1218static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
1212{ 1219{
1213 int err; 1220 int err;
1214 1221
1215 err = __dn_route_output_key(pprt, flp, flags); 1222 err = __dn_route_output_key(pprt, flp, flags);
1216 if (err == 0 && flp->proto) { 1223 if (err == 0 && flp->flowidn_proto) {
1217 err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); 1224 *pprt = xfrm_lookup(&init_net, *pprt,
1225 flowidn_to_flowi(flp), NULL, 0);
1226 if (IS_ERR(*pprt)) {
1227 err = PTR_ERR(*pprt);
1228 *pprt = NULL;
1229 }
1218 } 1230 }
1219 return err; 1231 return err;
1220} 1232}
1221 1233
1222int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags) 1234int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags)
1223{ 1235{
1224 int err; 1236 int err;
1225 1237
1226 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); 1238 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
1227 if (err == 0 && fl->proto) { 1239 if (err == 0 && fl->flowidn_proto) {
1228 err = xfrm_lookup(&init_net, pprt, fl, sk, 1240 if (!(flags & MSG_DONTWAIT))
1229 (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); 1241 fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP;
1242 *pprt = xfrm_lookup(&init_net, *pprt,
1243 flowidn_to_flowi(fl), sk, 0);
1244 if (IS_ERR(*pprt)) {
1245 err = PTR_ERR(*pprt);
1246 *pprt = NULL;
1247 }
1230 } 1248 }
1231 return err; 1249 return err;
1232} 1250}
@@ -1243,11 +1261,13 @@ static int dn_route_input_slow(struct sk_buff *skb)
1243 int flags = 0; 1261 int flags = 0;
1244 __le16 gateway = 0; 1262 __le16 gateway = 0;
1245 __le16 local_src = 0; 1263 __le16 local_src = 0;
1246 struct flowi fl = { .fld_dst = cb->dst, 1264 struct flowidn fld = {
1247 .fld_src = cb->src, 1265 .daddr = cb->dst,
1248 .fld_scope = RT_SCOPE_UNIVERSE, 1266 .saddr = cb->src,
1249 .mark = skb->mark, 1267 .flowidn_scope = RT_SCOPE_UNIVERSE,
1250 .iif = skb->dev->ifindex }; 1268 .flowidn_mark = skb->mark,
1269 .flowidn_iif = skb->dev->ifindex,
1270 };
1251 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; 1271 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
1252 int err = -EINVAL; 1272 int err = -EINVAL;
1253 int free_res = 0; 1273 int free_res = 0;
@@ -1258,7 +1278,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1258 goto out; 1278 goto out;
1259 1279
1260 /* Zero source addresses are not allowed */ 1280 /* Zero source addresses are not allowed */
1261 if (fl.fld_src == 0) 1281 if (fld.saddr == 0)
1262 goto out; 1282 goto out;
1263 1283
1264 /* 1284 /*
@@ -1272,7 +1292,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1272 if (dn_dev_islocal(in_dev, cb->src)) 1292 if (dn_dev_islocal(in_dev, cb->src))
1273 goto out; 1293 goto out;
1274 1294
1275 err = dn_fib_lookup(&fl, &res); 1295 err = dn_fib_lookup(&fld, &res);
1276 if (err) { 1296 if (err) {
1277 if (err != -ESRCH) 1297 if (err != -ESRCH)
1278 goto out; 1298 goto out;
@@ -1284,7 +1304,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1284 1304
1285 res.type = RTN_LOCAL; 1305 res.type = RTN_LOCAL;
1286 } else { 1306 } else {
1287 __le16 src_map = fl.fld_src; 1307 __le16 src_map = fld.saddr;
1288 free_res = 1; 1308 free_res = 1;
1289 1309
1290 out_dev = DN_FIB_RES_DEV(res); 1310 out_dev = DN_FIB_RES_DEV(res);
@@ -1297,22 +1317,22 @@ static int dn_route_input_slow(struct sk_buff *skb)
1297 dev_hold(out_dev); 1317 dev_hold(out_dev);
1298 1318
1299 if (res.r) 1319 if (res.r)
1300 src_map = fl.fld_src; /* no NAT support for now */ 1320 src_map = fld.saddr; /* no NAT support for now */
1301 1321
1302 gateway = DN_FIB_RES_GW(res); 1322 gateway = DN_FIB_RES_GW(res);
1303 if (res.type == RTN_NAT) { 1323 if (res.type == RTN_NAT) {
1304 fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res); 1324 fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
1305 dn_fib_res_put(&res); 1325 dn_fib_res_put(&res);
1306 free_res = 0; 1326 free_res = 0;
1307 if (dn_fib_lookup(&fl, &res)) 1327 if (dn_fib_lookup(&fld, &res))
1308 goto e_inval; 1328 goto e_inval;
1309 free_res = 1; 1329 free_res = 1;
1310 if (res.type != RTN_UNICAST) 1330 if (res.type != RTN_UNICAST)
1311 goto e_inval; 1331 goto e_inval;
1312 flags |= RTCF_DNAT; 1332 flags |= RTCF_DNAT;
1313 gateway = fl.fld_dst; 1333 gateway = fld.daddr;
1314 } 1334 }
1315 fl.fld_src = src_map; 1335 fld.saddr = src_map;
1316 } 1336 }
1317 1337
1318 switch(res.type) { 1338 switch(res.type) {
@@ -1326,8 +1346,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
1326 if (dn_db->parms.forwarding == 0) 1346 if (dn_db->parms.forwarding == 0)
1327 goto e_inval; 1347 goto e_inval;
1328 1348
1329 if (res.fi->fib_nhs > 1 && fl.oif == 0) 1349 if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
1330 dn_fib_select_multipath(&fl, &res); 1350 dn_fib_select_multipath(&fld, &res);
1331 1351
1332 /* 1352 /*
1333 * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT 1353 * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
@@ -1345,8 +1365,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
1345 break; 1365 break;
1346 case RTN_LOCAL: 1366 case RTN_LOCAL:
1347 flags |= RTCF_LOCAL; 1367 flags |= RTCF_LOCAL;
1348 fl.fld_src = cb->dst; 1368 fld.saddr = cb->dst;
1349 fl.fld_dst = cb->src; 1369 fld.daddr = cb->src;
1350 1370
1351 /* Routing tables gave us a gateway */ 1371 /* Routing tables gave us a gateway */
1352 if (gateway) 1372 if (gateway)
@@ -1375,29 +1395,28 @@ static int dn_route_input_slow(struct sk_buff *skb)
1375 } 1395 }
1376 1396
1377make_route: 1397make_route:
1378 rt = dst_alloc(&dn_dst_ops); 1398 rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST);
1379 if (rt == NULL) 1399 if (rt == NULL)
1380 goto e_nobufs; 1400 goto e_nobufs;
1381 1401
1382 rt->rt_saddr = fl.fld_src; 1402 memset(&rt->fld, 0, sizeof(rt->fld));
1383 rt->rt_daddr = fl.fld_dst; 1403 rt->rt_saddr = fld.saddr;
1384 rt->rt_gateway = fl.fld_dst; 1404 rt->rt_daddr = fld.daddr;
1405 rt->rt_gateway = fld.daddr;
1385 if (gateway) 1406 if (gateway)
1386 rt->rt_gateway = gateway; 1407 rt->rt_gateway = gateway;
1387 rt->rt_local_src = local_src ? local_src : rt->rt_saddr; 1408 rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
1388 1409
1389 rt->rt_dst_map = fl.fld_dst; 1410 rt->rt_dst_map = fld.daddr;
1390 rt->rt_src_map = fl.fld_src; 1411 rt->rt_src_map = fld.saddr;
1391 1412
1392 rt->fl.fld_src = cb->src; 1413 rt->fld.saddr = cb->src;
1393 rt->fl.fld_dst = cb->dst; 1414 rt->fld.daddr = cb->dst;
1394 rt->fl.oif = 0; 1415 rt->fld.flowidn_oif = 0;
1395 rt->fl.iif = in_dev->ifindex; 1416 rt->fld.flowidn_iif = in_dev->ifindex;
1396 rt->fl.mark = fl.mark; 1417 rt->fld.flowidn_mark = fld.flowidn_mark;
1397 1418
1398 rt->dst.flags = DST_HOST;
1399 rt->dst.neighbour = neigh; 1419 rt->dst.neighbour = neigh;
1400 rt->dst.dev = out_dev;
1401 rt->dst.lastuse = jiffies; 1420 rt->dst.lastuse = jiffies;
1402 rt->dst.output = dn_rt_bug; 1421 rt->dst.output = dn_rt_bug;
1403 switch(res.type) { 1422 switch(res.type) {
@@ -1416,14 +1435,12 @@ make_route:
1416 rt->dst.input = dst_discard; 1435 rt->dst.input = dst_discard;
1417 } 1436 }
1418 rt->rt_flags = flags; 1437 rt->rt_flags = flags;
1419 if (rt->dst.dev)
1420 dev_hold(rt->dst.dev);
1421 1438
1422 err = dn_rt_set_next_hop(rt, &res); 1439 err = dn_rt_set_next_hop(rt, &res);
1423 if (err) 1440 if (err)
1424 goto e_neighbour; 1441 goto e_neighbour;
1425 1442
1426 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1443 hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
1427 dn_insert_route(rt, hash, &rt); 1444 dn_insert_route(rt, hash, &rt);
1428 skb_dst_set(skb, &rt->dst); 1445 skb_dst_set(skb, &rt->dst);
1429 1446
@@ -1463,11 +1480,11 @@ static int dn_route_input(struct sk_buff *skb)
1463 rcu_read_lock(); 1480 rcu_read_lock();
1464 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; 1481 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
1465 rt = rcu_dereference(rt->dst.dn_next)) { 1482 rt = rcu_dereference(rt->dst.dn_next)) {
1466 if ((rt->fl.fld_src == cb->src) && 1483 if ((rt->fld.saddr == cb->src) &&
1467 (rt->fl.fld_dst == cb->dst) && 1484 (rt->fld.daddr == cb->dst) &&
1468 (rt->fl.oif == 0) && 1485 (rt->fld.flowidn_oif == 0) &&
1469 (rt->fl.mark == skb->mark) && 1486 (rt->fld.flowidn_mark == skb->mark) &&
1470 (rt->fl.iif == cb->iif)) { 1487 (rt->fld.flowidn_iif == cb->iif)) {
1471 dst_use(&rt->dst, jiffies); 1488 dst_use(&rt->dst, jiffies);
1472 rcu_read_unlock(); 1489 rcu_read_unlock();
1473 skb_dst_set(skb, (struct dst_entry *)rt); 1490 skb_dst_set(skb, (struct dst_entry *)rt);
@@ -1503,9 +1520,9 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1503 if (rt->rt_flags & RTCF_NOTIFY) 1520 if (rt->rt_flags & RTCF_NOTIFY)
1504 r->rtm_flags |= RTM_F_NOTIFY; 1521 r->rtm_flags |= RTM_F_NOTIFY;
1505 RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr); 1522 RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr);
1506 if (rt->fl.fld_src) { 1523 if (rt->fld.saddr) {
1507 r->rtm_src_len = 16; 1524 r->rtm_src_len = 16;
1508 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); 1525 RTA_PUT(skb, RTA_SRC, 2, &rt->fld.saddr);
1509 } 1526 }
1510 if (rt->dst.dev) 1527 if (rt->dst.dev)
1511 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); 1528 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
@@ -1524,7 +1541,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1524 rt->dst.error) < 0) 1541 rt->dst.error) < 0)
1525 goto rtattr_failure; 1542 goto rtattr_failure;
1526 if (dn_is_input_route(rt)) 1543 if (dn_is_input_route(rt))
1527 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1544 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fld.flowidn_iif);
1528 1545
1529 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1546 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1530 return skb->len; 1547 return skb->len;
@@ -1547,13 +1564,13 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1547 struct dn_skb_cb *cb; 1564 struct dn_skb_cb *cb;
1548 int err; 1565 int err;
1549 struct sk_buff *skb; 1566 struct sk_buff *skb;
1550 struct flowi fl; 1567 struct flowidn fld;
1551 1568
1552 if (!net_eq(net, &init_net)) 1569 if (!net_eq(net, &init_net))
1553 return -EINVAL; 1570 return -EINVAL;
1554 1571
1555 memset(&fl, 0, sizeof(fl)); 1572 memset(&fld, 0, sizeof(fld));
1556 fl.proto = DNPROTO_NSP; 1573 fld.flowidn_proto = DNPROTO_NSP;
1557 1574
1558 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1575 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1559 if (skb == NULL) 1576 if (skb == NULL)
@@ -1562,15 +1579,15 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1562 cb = DN_SKB_CB(skb); 1579 cb = DN_SKB_CB(skb);
1563 1580
1564 if (rta[RTA_SRC-1]) 1581 if (rta[RTA_SRC-1])
1565 memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2); 1582 memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2);
1566 if (rta[RTA_DST-1]) 1583 if (rta[RTA_DST-1])
1567 memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); 1584 memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2);
1568 if (rta[RTA_IIF-1]) 1585 if (rta[RTA_IIF-1])
1569 memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1586 memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1570 1587
1571 if (fl.iif) { 1588 if (fld.flowidn_iif) {
1572 struct net_device *dev; 1589 struct net_device *dev;
1573 if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { 1590 if ((dev = dev_get_by_index(&init_net, fld.flowidn_iif)) == NULL) {
1574 kfree_skb(skb); 1591 kfree_skb(skb);
1575 return -ENODEV; 1592 return -ENODEV;
1576 } 1593 }
@@ -1581,8 +1598,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1581 } 1598 }
1582 skb->protocol = htons(ETH_P_DNA_RT); 1599 skb->protocol = htons(ETH_P_DNA_RT);
1583 skb->dev = dev; 1600 skb->dev = dev;
1584 cb->src = fl.fld_src; 1601 cb->src = fld.saddr;
1585 cb->dst = fl.fld_dst; 1602 cb->dst = fld.daddr;
1586 local_bh_disable(); 1603 local_bh_disable();
1587 err = dn_route_input(skb); 1604 err = dn_route_input(skb);
1588 local_bh_enable(); 1605 local_bh_enable();
@@ -1594,8 +1611,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1594 int oif = 0; 1611 int oif = 0;
1595 if (rta[RTA_OIF - 1]) 1612 if (rta[RTA_OIF - 1])
1596 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 1613 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
1597 fl.oif = oif; 1614 fld.flowidn_oif = oif;
1598 err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); 1615 err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
1599 } 1616 }
1600 1617
1601 if (skb->dev) 1618 if (skb->dev)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 6eb91df3c550..f0efb0ccfeca 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -49,14 +49,15 @@ struct dn_fib_rule
49}; 49};
50 50
51 51
52int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) 52int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res)
53{ 53{
54 struct fib_lookup_arg arg = { 54 struct fib_lookup_arg arg = {
55 .result = res, 55 .result = res,
56 }; 56 };
57 int err; 57 int err;
58 58
59 err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); 59 err = fib_rules_lookup(dn_fib_rules_ops,
60 flowidn_to_flowi(flp), 0, &arg);
60 res->r = arg.rule; 61 res->r = arg.rule;
61 62
62 return err; 63 return err;
@@ -65,6 +66,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
65static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, 66static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
66 int flags, struct fib_lookup_arg *arg) 67 int flags, struct fib_lookup_arg *arg)
67{ 68{
69 struct flowidn *fld = &flp->u.dn;
68 int err = -EAGAIN; 70 int err = -EAGAIN;
69 struct dn_fib_table *tbl; 71 struct dn_fib_table *tbl;
70 72
@@ -90,7 +92,7 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
90 if (tbl == NULL) 92 if (tbl == NULL)
91 goto errout; 93 goto errout;
92 94
93 err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); 95 err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result);
94 if (err > 0) 96 if (err > 0)
95 err = -EAGAIN; 97 err = -EAGAIN;
96errout: 98errout:
@@ -104,8 +106,9 @@ static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = {
104static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 106static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
105{ 107{
106 struct dn_fib_rule *r = (struct dn_fib_rule *)rule; 108 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
107 __le16 daddr = fl->fld_dst; 109 struct flowidn *fld = &fl->u.dn;
108 __le16 saddr = fl->fld_src; 110 __le16 daddr = fld->daddr;
111 __le16 saddr = fld->saddr;
109 112
110 if (((saddr ^ r->src) & r->srcmask) || 113 if (((saddr ^ r->src) & r->srcmask) ||
111 ((daddr ^ r->dst) & r->dstmask)) 114 ((daddr ^ r->dst) & r->dstmask))
@@ -175,7 +178,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
175 178
176unsigned dnet_addr_type(__le16 addr) 179unsigned dnet_addr_type(__le16 addr)
177{ 180{
178 struct flowi fl = { .fld_dst = addr }; 181 struct flowidn fld = { .daddr = addr };
179 struct dn_fib_res res; 182 struct dn_fib_res res;
180 unsigned ret = RTN_UNICAST; 183 unsigned ret = RTN_UNICAST;
181 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); 184 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
@@ -183,7 +186,7 @@ unsigned dnet_addr_type(__le16 addr)
183 res.r = NULL; 186 res.r = NULL;
184 187
185 if (tb) { 188 if (tb) {
186 if (!tb->lookup(tb, &fl, &res)) { 189 if (!tb->lookup(tb, &fld, &res)) {
187 ret = res.type; 190 ret = res.type;
188 dn_fib_res_put(&res); 191 dn_fib_res_put(&res);
189 } 192 }
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f2abd3755690..bd0a52dd1d40 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -59,7 +59,6 @@ struct dn_hash
59}; 59};
60 60
61#define dz_key_0(key) ((key).datum = 0) 61#define dz_key_0(key) ((key).datum = 0)
62#define dz_prefix(key,dz) ((key).datum)
63 62
64#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ 63#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
65 for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
@@ -124,11 +123,11 @@ static inline void dn_rebuild_zone(struct dn_zone *dz,
124 struct dn_fib_node **old_ht, 123 struct dn_fib_node **old_ht,
125 int old_divisor) 124 int old_divisor)
126{ 125{
127 int i;
128 struct dn_fib_node *f, **fp, *next; 126 struct dn_fib_node *f, **fp, *next;
127 int i;
129 128
130 for(i = 0; i < old_divisor; i++) { 129 for(i = 0; i < old_divisor; i++) {
131 for(f = old_ht[i]; f; f = f->fn_next) { 130 for(f = old_ht[i]; f; f = next) {
132 next = f->fn_next; 131 next = f->fn_next;
133 for(fp = dn_chain_p(f->fn_key, dz); 132 for(fp = dn_chain_p(f->fn_key, dz);
134 *fp && dn_key_leq((*fp)->fn_key, f->fn_key); 133 *fp && dn_key_leq((*fp)->fn_key, f->fn_key);
@@ -765,7 +764,7 @@ static int dn_fib_table_flush(struct dn_fib_table *tb)
765 return found; 764 return found;
766} 765}
767 766
768static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, struct dn_fib_res *res) 767static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res)
769{ 768{
770 int err; 769 int err;
771 struct dn_zone *dz; 770 struct dn_zone *dz;
@@ -774,7 +773,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp,
774 read_lock(&dn_fib_tables_lock); 773 read_lock(&dn_fib_tables_lock);
775 for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { 774 for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
776 struct dn_fib_node *f; 775 struct dn_fib_node *f;
777 dn_fib_key_t k = dz_key(flp->fld_dst, dz); 776 dn_fib_key_t k = dz_key(flp->daddr, dz);
778 777
779 for(f = dz_chain(k, dz); f; f = f->fn_next) { 778 for(f = dz_chain(k, dz); f; f = f->fn_next) {
780 if (!dn_key_eq(k, f->fn_key)) { 779 if (!dn_key_eq(k, f->fn_key)) {
@@ -789,7 +788,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp,
789 if (f->fn_state&DN_S_ZOMBIE) 788 if (f->fn_state&DN_S_ZOMBIE)
790 continue; 789 continue;
791 790
792 if (f->fn_scope < flp->fld_scope) 791 if (f->fn_scope < flp->flowidn_scope)
793 continue; 792 continue;
794 793
795 err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); 794 err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 739435a6af39..cfa7a5e1c5c9 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
67 size_t result_len = 0; 67 size_t result_len = 0;
68 const char *data = _data, *end, *opt; 68 const char *data = _data, *end, *opt;
69 69
70 kenter("%%%d,%s,'%s',%zu", 70 kenter("%%%d,%s,'%*.*s',%zu",
71 key->serial, key->description, data, datalen); 71 key->serial, key->description,
72 (int)datalen, (int)datalen, data, datalen);
72 73
73 if (datalen <= 1 || !data || data[datalen - 1] != '\0') 74 if (datalen <= 1 || !data || data[datalen - 1] != '\0')
74 return -EINVAL; 75 return -EINVAL;
@@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
217 seq_printf(m, ": %u", key->datalen); 218 seq_printf(m, ": %u", key->datalen);
218} 219}
219 220
221/*
222 * read the DNS data
223 * - the key's semaphore is read-locked
224 */
225static long dns_resolver_read(const struct key *key,
226 char __user *buffer, size_t buflen)
227{
228 if (key->type_data.x[0])
229 return key->type_data.x[0];
230
231 return user_read(key, buffer, buflen);
232}
233
220struct key_type key_type_dns_resolver = { 234struct key_type key_type_dns_resolver = {
221 .name = "dns_resolver", 235 .name = "dns_resolver",
222 .instantiate = dns_resolver_instantiate, 236 .instantiate = dns_resolver_instantiate,
@@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = {
224 .revoke = user_revoke, 238 .revoke = user_revoke,
225 .destroy = user_destroy, 239 .destroy = user_destroy,
226 .describe = dns_resolver_describe, 240 .describe = dns_resolver_describe,
227 .read = user_read, 241 .read = dns_resolver_read,
228}; 242};
229 243
230static int __init init_dns_resolver(void) 244static int __init init_dns_resolver(void)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 87bb5f4de0e8..c53ded2a98df 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -41,12 +41,12 @@ config NET_DSA_MV88E6XXX_NEED_PPU
41 default n 41 default n
42 42
43config NET_DSA_MV88E6131 43config NET_DSA_MV88E6131
44 bool "Marvell 88E6095/6095F/6131 ethernet switch chip support" 44 bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
45 select NET_DSA_MV88E6XXX 45 select NET_DSA_MV88E6XXX
46 select NET_DSA_MV88E6XXX_NEED_PPU 46 select NET_DSA_MV88E6XXX_NEED_PPU
47 select NET_DSA_TAG_DSA 47 select NET_DSA_TAG_DSA
48 ---help--- 48 ---help---
49 This enables support for the Marvell 88E6095/6095F/6131 49 This enables support for the Marvell 88E6085/6095/6095F/6131
50 ethernet switch chips. 50 ethernet switch chips.
51 51
52config NET_DSA_MV88E6123_61_65 52config NET_DSA_MV88E6123_61_65
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
index 83277f463af7..8f4ff5a2c813 100644
--- a/net/dsa/mv88e6060.c
+++ b/net/dsa/mv88e6060.c
@@ -18,7 +18,7 @@
18 18
19static int reg_read(struct dsa_switch *ds, int addr, int reg) 19static int reg_read(struct dsa_switch *ds, int addr, int reg)
20{ 20{
21 return mdiobus_read(ds->master_mii_bus, addr, reg); 21 return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg);
22} 22}
23 23
24#define REG_READ(addr, reg) \ 24#define REG_READ(addr, reg) \
@@ -34,7 +34,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg)
34 34
35static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) 35static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
36{ 36{
37 return mdiobus_write(ds->master_mii_bus, addr, reg, val); 37 return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr,
38 reg, val);
38} 39}
39 40
40#define REG_WRITE(addr, reg, val) \ 41#define REG_WRITE(addr, reg, val) \
@@ -50,7 +51,7 @@ static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
50{ 51{
51 int ret; 52 int ret;
52 53
53 ret = mdiobus_read(bus, REG_PORT(0), 0x03); 54 ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
54 if (ret >= 0) { 55 if (ret >= 0) {
55 ret &= 0xfff0; 56 ret &= 0xfff0;
56 if (ret == 0x0600) 57 if (ret == 0x0600)
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
index bb2b41bc854e..45f7411e90ba 100644
--- a/net/dsa/mv88e6131.c
+++ b/net/dsa/mv88e6131.c
@@ -14,6 +14,13 @@
14#include "dsa_priv.h" 14#include "dsa_priv.h"
15#include "mv88e6xxx.h" 15#include "mv88e6xxx.h"
16 16
17/*
18 * Switch product IDs
19 */
20#define ID_6085 0x04a0
21#define ID_6095 0x0950
22#define ID_6131 0x1060
23
17static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) 24static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
18{ 25{
19 int ret; 26 int ret;
@@ -21,9 +28,11 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
21 ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); 28 ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
22 if (ret >= 0) { 29 if (ret >= 0) {
23 ret &= 0xfff0; 30 ret &= 0xfff0;
24 if (ret == 0x0950) 31 if (ret == ID_6085)
32 return "Marvell 88E6085";
33 if (ret == ID_6095)
25 return "Marvell 88E6095/88E6095F"; 34 return "Marvell 88E6095/88E6095F";
26 if (ret == 0x1060) 35 if (ret == ID_6131)
27 return "Marvell 88E6131"; 36 return "Marvell 88E6131";
28 } 37 }
29 38
@@ -124,7 +133,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
124 * Ignore removed tag data on doubly tagged packets, disable 133 * Ignore removed tag data on doubly tagged packets, disable
125 * flow control messages, force flow control priority to the 134 * flow control messages, force flow control priority to the
126 * highest, and send all special multicast frames to the CPU 135 * highest, and send all special multicast frames to the CPU
127 * port at the higest priority. 136 * port at the highest priority.
128 */ 137 */
129 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); 138 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
130 139
@@ -164,6 +173,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
164 173
165static int mv88e6131_setup_port(struct dsa_switch *ds, int p) 174static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
166{ 175{
176 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
167 int addr = REG_PORT(p); 177 int addr = REG_PORT(p);
168 u16 val; 178 u16 val;
169 179
@@ -171,10 +181,13 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
171 * MAC Forcing register: don't force link, speed, duplex 181 * MAC Forcing register: don't force link, speed, duplex
172 * or flow control state to any particular values on physical 182 * or flow control state to any particular values on physical
173 * ports, but force the CPU port and all DSA ports to 1000 Mb/s 183 * ports, but force the CPU port and all DSA ports to 1000 Mb/s
174 * full duplex. 184 * (100 Mb/s on 6085) full duplex.
175 */ 185 */
176 if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) 186 if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
177 REG_WRITE(addr, 0x01, 0x003e); 187 if (ps->id == ID_6085)
188 REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */
189 else
190 REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */
178 else 191 else
179 REG_WRITE(addr, 0x01, 0x0003); 192 REG_WRITE(addr, 0x01, 0x0003);
180 193
@@ -194,8 +207,15 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
194 * mode, but do not enable forwarding of unknown unicasts. 207 * mode, but do not enable forwarding of unknown unicasts.
195 */ 208 */
196 val = 0x0433; 209 val = 0x0433;
197 if (p == dsa_upstream_port(ds)) 210 if (p == dsa_upstream_port(ds)) {
198 val |= 0x0104; 211 val |= 0x0104;
212 /*
213 * On 6085, unknown multicast forward is controlled
214 * here rather than in Port Control 2 register.
215 */
216 if (ps->id == ID_6085)
217 val |= 0x0008;
218 }
199 if (ds->dsa_port_mask & (1 << p)) 219 if (ds->dsa_port_mask & (1 << p))
200 val |= 0x0100; 220 val |= 0x0100;
201 REG_WRITE(addr, 0x04, val); 221 REG_WRITE(addr, 0x04, val);
@@ -238,10 +258,19 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
238 * If this is the upstream port for this switch, enable 258 * If this is the upstream port for this switch, enable
239 * forwarding of unknown multicast addresses. 259 * forwarding of unknown multicast addresses.
240 */ 260 */
241 val = 0x0080 | dsa_upstream_port(ds); 261 if (ps->id == ID_6085)
242 if (p == dsa_upstream_port(ds)) 262 /*
243 val |= 0x0040; 263 * on 6085, bits 3:0 are reserved, bit 6 control ARP
244 REG_WRITE(addr, 0x08, val); 264 * mirroring, and multicast forward is handled in
265 * Port Control register.
266 */
267 REG_WRITE(addr, 0x08, 0x0080);
268 else {
269 val = 0x0080 | dsa_upstream_port(ds);
270 if (p == dsa_upstream_port(ds))
271 val |= 0x0040;
272 REG_WRITE(addr, 0x08, val);
273 }
245 274
246 /* 275 /*
247 * Rate Control: disable ingress rate limiting. 276 * Rate Control: disable ingress rate limiting.
@@ -286,6 +315,8 @@ static int mv88e6131_setup(struct dsa_switch *ds)
286 mv88e6xxx_ppu_state_init(ds); 315 mv88e6xxx_ppu_state_init(ds);
287 mutex_init(&ps->stats_mutex); 316 mutex_init(&ps->stats_mutex);
288 317
318 ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
319
289 ret = mv88e6131_switch_reset(ds); 320 ret = mv88e6131_switch_reset(ds);
290 if (ret < 0) 321 if (ret < 0)
291 return ret; 322 return ret;
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
index eb0e0aaa9f1b..61156ca26a0d 100644
--- a/net/dsa/mv88e6xxx.h
+++ b/net/dsa/mv88e6xxx.h
@@ -39,6 +39,8 @@ struct mv88e6xxx_priv_state {
39 * Hold this mutex over snapshot + dump sequences. 39 * Hold this mutex over snapshot + dump sequences.
40 */ 40 */
41 struct mutex stats_mutex; 41 struct mutex stats_mutex;
42
43 int id; /* switch product id */
42}; 44};
43 45
44struct mv88e6xxx_hw_stat { 46struct mv88e6xxx_hw_stat {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 64ca2a6fa0d4..0a47b6c37038 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -288,7 +288,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
288 .get_drvinfo = dsa_slave_get_drvinfo, 288 .get_drvinfo = dsa_slave_get_drvinfo,
289 .nway_reset = dsa_slave_nway_reset, 289 .nway_reset = dsa_slave_nway_reset,
290 .get_link = dsa_slave_get_link, 290 .get_link = dsa_slave_get_link,
291 .set_sg = ethtool_op_set_sg,
292 .get_strings = dsa_slave_get_strings, 291 .get_strings = dsa_slave_get_strings,
293 .get_ethtool_stats = dsa_slave_get_ethtool_stats, 292 .get_ethtool_stats = dsa_slave_get_ethtool_stats,
294 .get_sset_count = dsa_slave_get_sset_count, 293 .get_sset_count = dsa_slave_get_sset_count,
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 0c2826337919..a1d9f3787dd5 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
435 udpdest.sin_addr.s_addr = htonl(network | addr.station); 435 udpdest.sin_addr.s_addr = htonl(network | addr.station);
436 } 436 }
437 437
438 memset(&ah, 0, sizeof(ah));
438 ah.port = port; 439 ah.port = port;
439 ah.cb = cb & 0x7f; 440 ah.cb = cb & 0x7f;
440 ah.code = 2; /* magic */ 441 ah.code = 2; /* magic */
441 ah.pad = 0;
442 442
443 /* tack our header on the front of the iovec */ 443 /* tack our header on the front of the iovec */
444 size = sizeof(struct aunhdr); 444 size = sizeof(struct aunhdr);
@@ -935,7 +935,6 @@ static void aun_data_available(struct sock *sk, int slen)
935 struct sk_buff *skb; 935 struct sk_buff *skb;
936 unsigned char *data; 936 unsigned char *data;
937 struct aunhdr *ah; 937 struct aunhdr *ah;
938 struct iphdr *ip;
939 size_t len; 938 size_t len;
940 939
941 while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) { 940 while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) {
@@ -949,7 +948,6 @@ static void aun_data_available(struct sock *sk, int slen)
949 data = skb_transport_header(skb) + sizeof(struct udphdr); 948 data = skb_transport_header(skb) + sizeof(struct udphdr);
950 ah = (struct aunhdr *)data; 949 ah = (struct aunhdr *)data;
951 len = skb->len - sizeof(struct udphdr); 950 len = skb->len - sizeof(struct udphdr);
952 ip = ip_hdr(skb);
953 951
954 switch (ah->code) 952 switch (ah->code)
955 { 953 {
@@ -962,12 +960,6 @@ static void aun_data_available(struct sock *sk, int slen)
962 case 4: 960 case 4:
963 aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING); 961 aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING);
964 break; 962 break;
965#if 0
966 /* This isn't quite right yet. */
967 case 5:
968 aun_send_response(ip->saddr, ah->handle, 6, ah->cb);
969 break;
970#endif
971 default: 963 default:
972 printk(KERN_DEBUG "unknown AUN packet (type %d)\n", data[0]); 964 printk(KERN_DEBUG "unknown AUN packet (type %d)\n", data[0]);
973 } 965 }
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index ce2d33582859..5761185f884e 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,5 +1,3 @@
1obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o 1obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
2ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o 2ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
3af_802154-y := af_ieee802154.o raw.o dgram.o 3af_802154-y := af_ieee802154.o raw.o dgram.o
4
5ccflags-y += -Wall -DDEBUG
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a5a1050595d1..cbb505ba9324 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER
55 55
56 If unsure, say N here. 56 If unsure, say N here.
57 57
58choice
59 prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
60 depends on IP_ADVANCED_ROUTER
61 default ASK_IP_FIB_HASH
62
63config ASK_IP_FIB_HASH
64 bool "FIB_HASH"
65 ---help---
66 Current FIB is very proven and good enough for most users.
67
68config IP_FIB_TRIE
69 bool "FIB_TRIE"
70 ---help---
71 Use new experimental LC-trie as FIB lookup algorithm.
72 This improves lookup performance if you have a large
73 number of routes.
74
75 LC-trie is a longest matching prefix lookup algorithm which
76 performs better than FIB_HASH for large routing tables.
77 But, it consumes more memory and is more complex.
78
79 LC-trie is described in:
80
81 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
82 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
83 June 1999
84
85 An experimental study of compression methods for dynamic tries
86 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
87 <http://www.csc.kth.se/~snilsson/software/dyntrie2/>
88
89endchoice
90
91config IP_FIB_HASH
92 def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
93
94config IP_FIB_TRIE_STATS 58config IP_FIB_TRIE_STATS
95 bool "FIB TRIE statistics" 59 bool "FIB TRIE statistics"
96 depends on IP_FIB_TRIE 60 depends on IP_ADVANCED_ROUTER
97 ---help--- 61 ---help---
98 Keep track of statistics on structure of FIB TRIE table. 62 Keep track of statistics on structure of FIB TRIE table.
99 Useful for testing and measuring TRIE performance. 63 Useful for testing and measuring TRIE performance.
@@ -140,6 +104,9 @@ config IP_ROUTE_VERBOSE
140 handled by the klogd daemon which is responsible for kernel messages 104 handled by the klogd daemon which is responsible for kernel messages
141 ("man klogd"). 105 ("man klogd").
142 106
107config IP_ROUTE_CLASSID
108 bool
109
143config IP_PNP 110config IP_PNP
144 bool "IP: kernel level autoconfiguration" 111 bool "IP: kernel level autoconfiguration"
145 help 112 help
@@ -657,4 +624,3 @@ config TCP_MD5SIG
657 on the Internet. 624 on the Internet.
658 625
659 If unsure, say N. 626 If unsure, say N.
660
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4978d22f9a75..f2dc69cffb57 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
18obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
19obj-$(CONFIG_PROC_FS) += proc.o 17obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 18obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 19obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 45b89d7bda5a..cc1463156cd0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -105,6 +105,7 @@
105#include <net/tcp.h> 105#include <net/tcp.h>
106#include <net/udp.h> 106#include <net/udp.h>
107#include <net/udplite.h> 107#include <net/udplite.h>
108#include <net/ping.h>
108#include <linux/skbuff.h> 109#include <linux/skbuff.h>
109#include <net/sock.h> 110#include <net/sock.h>
110#include <net/raw.h> 111#include <net/raw.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
153 WARN_ON(sk->sk_wmem_queued); 154 WARN_ON(sk->sk_wmem_queued);
154 WARN_ON(sk->sk_forward_alloc); 155 WARN_ON(sk->sk_forward_alloc);
155 156
156 kfree(inet->opt); 157 kfree(rcu_dereference_protected(inet->inet_opt, 1));
157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); 158 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 159 sk_refcnt_debug_dec(sk);
159} 160}
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
1008 .flags = INET_PROTOSW_PERMANENT, 1009 .flags = INET_PROTOSW_PERMANENT,
1009 }, 1010 },
1010 1011
1012 {
1013 .type = SOCK_DGRAM,
1014 .protocol = IPPROTO_ICMP,
1015 .prot = &ping_prot,
1016 .ops = &inet_dgram_ops,
1017 .no_check = UDP_CSUM_DEFAULT,
1018 .flags = INET_PROTOSW_REUSE,
1019 },
1011 1020
1012 { 1021 {
1013 .type = SOCK_RAW, 1022 .type = SOCK_RAW,
@@ -1101,27 +1110,29 @@ int sysctl_ip_dynaddr __read_mostly;
1101static int inet_sk_reselect_saddr(struct sock *sk) 1110static int inet_sk_reselect_saddr(struct sock *sk)
1102{ 1111{
1103 struct inet_sock *inet = inet_sk(sk); 1112 struct inet_sock *inet = inet_sk(sk);
1104 int err;
1105 struct rtable *rt;
1106 __be32 old_saddr = inet->inet_saddr; 1113 __be32 old_saddr = inet->inet_saddr;
1107 __be32 new_saddr;
1108 __be32 daddr = inet->inet_daddr; 1114 __be32 daddr = inet->inet_daddr;
1115 struct flowi4 *fl4;
1116 struct rtable *rt;
1117 __be32 new_saddr;
1118 struct ip_options_rcu *inet_opt;
1109 1119
1110 if (inet->opt && inet->opt->srr) 1120 inet_opt = rcu_dereference_protected(inet->inet_opt,
1111 daddr = inet->opt->faddr; 1121 sock_owned_by_user(sk));
1122 if (inet_opt && inet_opt->opt.srr)
1123 daddr = inet_opt->opt.faddr;
1112 1124
1113 /* Query new route. */ 1125 /* Query new route. */
1114 err = ip_route_connect(&rt, daddr, 0, 1126 fl4 = &inet->cork.fl.u.ip4;
1115 RT_CONN_FLAGS(sk), 1127 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
1116 sk->sk_bound_dev_if, 1128 sk->sk_bound_dev_if, sk->sk_protocol,
1117 sk->sk_protocol, 1129 inet->inet_sport, inet->inet_dport, sk, false);
1118 inet->inet_sport, inet->inet_dport, sk, 0); 1130 if (IS_ERR(rt))
1119 if (err) 1131 return PTR_ERR(rt);
1120 return err;
1121 1132
1122 sk_setup_caps(sk, &rt->dst); 1133 sk_setup_caps(sk, &rt->dst);
1123 1134
1124 new_saddr = rt->rt_src; 1135 new_saddr = fl4->saddr;
1125 1136
1126 if (new_saddr == old_saddr) 1137 if (new_saddr == old_saddr)
1127 return 0; 1138 return 0;
@@ -1150,6 +1161,8 @@ int inet_sk_rebuild_header(struct sock *sk)
1150 struct inet_sock *inet = inet_sk(sk); 1161 struct inet_sock *inet = inet_sk(sk);
1151 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); 1162 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1152 __be32 daddr; 1163 __be32 daddr;
1164 struct ip_options_rcu *inet_opt;
1165 struct flowi4 *fl4;
1153 int err; 1166 int err;
1154 1167
1155 /* Route is OK, nothing to do. */ 1168 /* Route is OK, nothing to do. */
@@ -1157,28 +1170,23 @@ int inet_sk_rebuild_header(struct sock *sk)
1157 return 0; 1170 return 0;
1158 1171
1159 /* Reroute. */ 1172 /* Reroute. */
1173 rcu_read_lock();
1174 inet_opt = rcu_dereference(inet->inet_opt);
1160 daddr = inet->inet_daddr; 1175 daddr = inet->inet_daddr;
1161 if (inet->opt && inet->opt->srr) 1176 if (inet_opt && inet_opt->opt.srr)
1162 daddr = inet->opt->faddr; 1177 daddr = inet_opt->opt.faddr;
1163{ 1178 rcu_read_unlock();
1164 struct flowi fl = { 1179 fl4 = &inet->cork.fl.u.ip4;
1165 .oif = sk->sk_bound_dev_if, 1180 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
1166 .mark = sk->sk_mark, 1181 inet->inet_dport, inet->inet_sport,
1167 .fl4_dst = daddr, 1182 sk->sk_protocol, RT_CONN_FLAGS(sk),
1168 .fl4_src = inet->inet_saddr, 1183 sk->sk_bound_dev_if);
1169 .fl4_tos = RT_CONN_FLAGS(sk), 1184 if (!IS_ERR(rt)) {
1170 .proto = sk->sk_protocol, 1185 err = 0;
1171 .flags = inet_sk_flowi_flags(sk),
1172 .fl_ip_sport = inet->inet_sport,
1173 .fl_ip_dport = inet->inet_dport,
1174 };
1175
1176 security_sk_classify_flow(sk, &fl);
1177 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1178}
1179 if (!err)
1180 sk_setup_caps(sk, &rt->dst); 1186 sk_setup_caps(sk, &rt->dst);
1181 else { 1187 } else {
1188 err = PTR_ERR(rt);
1189
1182 /* Routing failed... */ 1190 /* Routing failed... */
1183 sk->sk_route_caps = 0; 1191 sk->sk_route_caps = 0;
1184 /* 1192 /*
@@ -1198,7 +1206,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1198 1206
1199static int inet_gso_send_check(struct sk_buff *skb) 1207static int inet_gso_send_check(struct sk_buff *skb)
1200{ 1208{
1201 struct iphdr *iph; 1209 const struct iphdr *iph;
1202 const struct net_protocol *ops; 1210 const struct net_protocol *ops;
1203 int proto; 1211 int proto;
1204 int ihl; 1212 int ihl;
@@ -1231,7 +1239,7 @@ out:
1231 return err; 1239 return err;
1232} 1240}
1233 1241
1234static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) 1242static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
1235{ 1243{
1236 struct sk_buff *segs = ERR_PTR(-EINVAL); 1244 struct sk_buff *segs = ERR_PTR(-EINVAL);
1237 struct iphdr *iph; 1245 struct iphdr *iph;
@@ -1305,7 +1313,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1305 const struct net_protocol *ops; 1313 const struct net_protocol *ops;
1306 struct sk_buff **pp = NULL; 1314 struct sk_buff **pp = NULL;
1307 struct sk_buff *p; 1315 struct sk_buff *p;
1308 struct iphdr *iph; 1316 const struct iphdr *iph;
1309 unsigned int hlen; 1317 unsigned int hlen;
1310 unsigned int off; 1318 unsigned int off;
1311 unsigned int id; 1319 unsigned int id;
@@ -1528,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
1528 1536
1529static const struct net_protocol icmp_protocol = { 1537static const struct net_protocol icmp_protocol = {
1530 .handler = icmp_rcv, 1538 .handler = icmp_rcv,
1539 .err_handler = ping_err,
1531 .no_policy = 1, 1540 .no_policy = 1,
1532 .netns_ok = 1, 1541 .netns_ok = 1,
1533}; 1542};
@@ -1643,6 +1652,10 @@ static int __init inet_init(void)
1643 if (rc) 1652 if (rc)
1644 goto out_unregister_udp_proto; 1653 goto out_unregister_udp_proto;
1645 1654
1655 rc = proto_register(&ping_prot, 1);
1656 if (rc)
1657 goto out_unregister_raw_proto;
1658
1646 /* 1659 /*
1647 * Tell SOCKET that we are alive... 1660 * Tell SOCKET that we are alive...
1648 */ 1661 */
@@ -1698,6 +1711,8 @@ static int __init inet_init(void)
1698 /* Add UDP-Lite (RFC 3828) */ 1711 /* Add UDP-Lite (RFC 3828) */
1699 udplite4_register(); 1712 udplite4_register();
1700 1713
1714 ping_init();
1715
1701 /* 1716 /*
1702 * Set the ICMP layer up 1717 * Set the ICMP layer up
1703 */ 1718 */
@@ -1728,6 +1743,8 @@ static int __init inet_init(void)
1728 rc = 0; 1743 rc = 0;
1729out: 1744out:
1730 return rc; 1745 return rc;
1746out_unregister_raw_proto:
1747 proto_unregister(&raw_prot);
1731out_unregister_udp_proto: 1748out_unregister_udp_proto:
1732 proto_unregister(&udp_prot); 1749 proto_unregister(&udp_prot);
1733out_unregister_tcp_proto: 1750out_unregister_tcp_proto:
@@ -1752,11 +1769,15 @@ static int __init ipv4_proc_init(void)
1752 goto out_tcp; 1769 goto out_tcp;
1753 if (udp4_proc_init()) 1770 if (udp4_proc_init())
1754 goto out_udp; 1771 goto out_udp;
1772 if (ping_proc_init())
1773 goto out_ping;
1755 if (ip_misc_proc_init()) 1774 if (ip_misc_proc_init())
1756 goto out_misc; 1775 goto out_misc;
1757out: 1776out:
1758 return rc; 1777 return rc;
1759out_misc: 1778out_misc:
1779 ping_proc_exit();
1780out_ping:
1760 udp4_proc_exit(); 1781 udp4_proc_exit();
1761out_udp: 1782out_udp:
1762 tcp4_proc_exit(); 1783 tcp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 86961bec70ab..c1f4154552fc 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
73 * into IP header for icv calculation. Options are already checked 73 * into IP header for icv calculation. Options are already checked
74 * for validity, so paranoia is not required. */ 74 * for validity, so paranoia is not required. */
75 75
76static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) 76static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
77{ 77{
78 unsigned char * optptr = (unsigned char*)(iph+1); 78 unsigned char * optptr = (unsigned char*)(iph+1);
79 int l = iph->ihl*4 - sizeof(struct iphdr); 79 int l = iph->ihl*4 - sizeof(struct iphdr);
@@ -201,11 +201,14 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
201 top_iph->ttl = 0; 201 top_iph->ttl = 0;
202 top_iph->check = 0; 202 top_iph->check = 0;
203 203
204 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; 204 if (x->props.flags & XFRM_STATE_ALIGN4)
205 ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
206 else
207 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
205 208
206 ah->reserved = 0; 209 ah->reserved = 0;
207 ah->spi = x->id.spi; 210 ah->spi = x->id.spi;
208 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 211 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
209 212
210 sg_init_table(sg, nfrags); 213 sg_init_table(sg, nfrags);
211 skb_to_sgvec(skb, sg, 0, skb->len); 214 skb_to_sgvec(skb, sg, 0, skb->len);
@@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
299 nexthdr = ah->nexthdr; 302 nexthdr = ah->nexthdr;
300 ah_hlen = (ah->hdrlen + 2) << 2; 303 ah_hlen = (ah->hdrlen + 2) << 2;
301 304
302 if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && 305 if (x->props.flags & XFRM_STATE_ALIGN4) {
303 ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) 306 if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) &&
304 goto out; 307 ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len))
308 goto out;
309 } else {
310 if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
311 ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
312 goto out;
313 }
305 314
306 if (!pskb_may_pull(skb, ah_hlen)) 315 if (!pskb_may_pull(skb, ah_hlen))
307 goto out; 316 goto out;
@@ -387,7 +396,7 @@ out:
387static void ah4_err(struct sk_buff *skb, u32 info) 396static void ah4_err(struct sk_buff *skb, u32 info)
388{ 397{
389 struct net *net = dev_net(skb->dev); 398 struct net *net = dev_net(skb->dev);
390 struct iphdr *iph = (struct iphdr *)skb->data; 399 const struct iphdr *iph = (const struct iphdr *)skb->data;
391 struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); 400 struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
392 struct xfrm_state *x; 401 struct xfrm_state *x;
393 402
@@ -395,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
395 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 404 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
396 return; 405 return;
397 406
398 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); 407 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
408 ah->spi, IPPROTO_AH, AF_INET);
399 if (!x) 409 if (!x)
400 return; 410 return;
401 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", 411 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
@@ -450,8 +460,12 @@ static int ah_init_state(struct xfrm_state *x)
450 460
451 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); 461 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
452 462
453 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 463 if (x->props.flags & XFRM_STATE_ALIGN4)
454 ahp->icv_trunc_len); 464 x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) +
465 ahp->icv_trunc_len);
466 else
467 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
468 ahp->icv_trunc_len);
455 if (x->props.mode == XFRM_MODE_TUNNEL) 469 if (x->props.mode == XFRM_MODE_TUNNEL)
456 x->props.header_len += sizeof(struct iphdr); 470 x->props.header_len += sizeof(struct iphdr);
457 x->data = ahp; 471 x->data = ahp;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7927589813b5..1b74d3b64371 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -215,6 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
215 case ARPHRD_INFINIBAND: 215 case ARPHRD_INFINIBAND:
216 ip_ib_mc_map(addr, dev->broadcast, haddr); 216 ip_ib_mc_map(addr, dev->broadcast, haddr);
217 return 0; 217 return 0;
218 case ARPHRD_IPGRE:
219 ip_ipgre_mc_map(addr, dev->broadcast, haddr);
220 return 0;
218 default: 221 default:
219 if (dir) { 222 if (dir) {
220 memcpy(haddr, dev->broadcast, dev->addr_len); 223 memcpy(haddr, dev->broadcast, dev->addr_len);
@@ -433,14 +436,13 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
433 436
434static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 437static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
435{ 438{
436 struct flowi fl = { .fl4_dst = sip,
437 .fl4_src = tip };
438 struct rtable *rt; 439 struct rtable *rt;
439 int flag = 0; 440 int flag = 0;
440 /*unsigned long now; */ 441 /*unsigned long now; */
441 struct net *net = dev_net(dev); 442 struct net *net = dev_net(dev);
442 443
443 if (ip_route_output_key(net, &rt, &fl) < 0) 444 rt = ip_route_output(net, sip, tip, 0, 0);
445 if (IS_ERR(rt))
444 return 1; 446 return 1;
445 if (rt->dst.dev != dev) { 447 if (rt->dst.dev != dev) {
446 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); 448 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
@@ -1061,12 +1063,10 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1061 if (r->arp_flags & ATF_PERM) 1063 if (r->arp_flags & ATF_PERM)
1062 r->arp_flags |= ATF_COM; 1064 r->arp_flags |= ATF_COM;
1063 if (dev == NULL) { 1065 if (dev == NULL) {
1064 struct flowi fl = { .fl4_dst = ip, 1066 struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
1065 .fl4_tos = RTO_ONLINK }; 1067
1066 struct rtable *rt; 1068 if (IS_ERR(rt))
1067 err = ip_route_output_key(net, &rt, &fl); 1069 return PTR_ERR(rt);
1068 if (err != 0)
1069 return err;
1070 dev = rt->dst.dev; 1070 dev = rt->dst.dev;
1071 ip_rt_put(rt); 1071 ip_rt_put(rt);
1072 if (!dev) 1072 if (!dev)
@@ -1177,7 +1177,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1177static int arp_req_delete(struct net *net, struct arpreq *r, 1177static int arp_req_delete(struct net *net, struct arpreq *r,
1178 struct net_device *dev) 1178 struct net_device *dev)
1179{ 1179{
1180 int err;
1181 __be32 ip; 1180 __be32 ip;
1182 1181
1183 if (r->arp_flags & ATF_PUBL) 1182 if (r->arp_flags & ATF_PUBL)
@@ -1185,12 +1184,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1185 1184
1186 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1185 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1187 if (dev == NULL) { 1186 if (dev == NULL) {
1188 struct flowi fl = { .fl4_dst = ip, 1187 struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
1189 .fl4_tos = RTO_ONLINK }; 1188 if (IS_ERR(rt))
1190 struct rtable *rt; 1189 return PTR_ERR(rt);
1191 err = ip_route_output_key(net, &rt, &fl);
1192 if (err != 0)
1193 return err;
1194 dev = rt->dst.dev; 1190 dev = rt->dst.dev;
1195 ip_rt_put(rt); 1191 ip_rt_put(rt);
1196 if (!dev) 1192 if (!dev)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 094e150c6260..2b3c23c287cd 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -112,7 +112,7 @@ int cipso_v4_rbm_strictvalid = 1;
112/* The maximum number of category ranges permitted in the ranged category tag 112/* The maximum number of category ranges permitted in the ranged category tag
113 * (tag #5). You may note that the IETF draft states that the maximum number 113 * (tag #5). You may note that the IETF draft states that the maximum number
114 * of category ranges is 7, but if the low end of the last category range is 114 * of category ranges is 7, but if the low end of the last category range is
115 * zero then it is possibile to fit 8 category ranges because the zero should 115 * zero then it is possible to fit 8 category ranges because the zero should
116 * be omitted. */ 116 * be omitted. */
117#define CIPSO_V4_TAG_RNG_CAT_MAX 8 117#define CIPSO_V4_TAG_RNG_CAT_MAX 8
118 118
@@ -438,7 +438,7 @@ cache_add_failure:
438 * 438 *
439 * Description: 439 * Description:
440 * Search the DOI definition list for a DOI definition with a DOI value that 440 * Search the DOI definition list for a DOI definition with a DOI value that
441 * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). 441 * matches @doi. The caller is responsible for calling rcu_read_[un]lock().
442 * Returns a pointer to the DOI definition on success and NULL on failure. 442 * Returns a pointer to the DOI definition on success and NULL on failure.
443 */ 443 */
444static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) 444static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
@@ -1293,7 +1293,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
1293 return ret_val; 1293 return ret_val;
1294 1294
1295 /* This will send packets using the "optimized" format when 1295 /* This will send packets using the "optimized" format when
1296 * possibile as specified in section 3.4.2.6 of the 1296 * possible as specified in section 3.4.2.6 of the
1297 * CIPSO draft. */ 1297 * CIPSO draft. */
1298 if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) 1298 if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
1299 tag_len = 14; 1299 tag_len = 14;
@@ -1752,7 +1752,7 @@ validate_return:
1752} 1752}
1753 1753
1754/** 1754/**
1755 * cipso_v4_error - Send the correct reponse for a bad packet 1755 * cipso_v4_error - Send the correct response for a bad packet
1756 * @skb: the packet 1756 * @skb: the packet
1757 * @error: the error code 1757 * @error: the error code
1758 * @gateway: CIPSO gateway flag 1758 * @gateway: CIPSO gateway flag
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
1857 return CIPSO_V4_HDR_LEN + ret_val; 1857 return CIPSO_V4_HDR_LEN + ret_val;
1858} 1858}
1859 1859
1860static void opt_kfree_rcu(struct rcu_head *head)
1861{
1862 kfree(container_of(head, struct ip_options_rcu, rcu));
1863}
1864
1860/** 1865/**
1861 * cipso_v4_sock_setattr - Add a CIPSO option to a socket 1866 * cipso_v4_sock_setattr - Add a CIPSO option to a socket
1862 * @sk: the socket 1867 * @sk: the socket
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
1879 unsigned char *buf = NULL; 1884 unsigned char *buf = NULL;
1880 u32 buf_len; 1885 u32 buf_len;
1881 u32 opt_len; 1886 u32 opt_len;
1882 struct ip_options *opt = NULL; 1887 struct ip_options_rcu *old, *opt = NULL;
1883 struct inet_sock *sk_inet; 1888 struct inet_sock *sk_inet;
1884 struct inet_connection_sock *sk_conn; 1889 struct inet_connection_sock *sk_conn;
1885 1890
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
1915 ret_val = -ENOMEM; 1920 ret_val = -ENOMEM;
1916 goto socket_setattr_failure; 1921 goto socket_setattr_failure;
1917 } 1922 }
1918 memcpy(opt->__data, buf, buf_len); 1923 memcpy(opt->opt.__data, buf, buf_len);
1919 opt->optlen = opt_len; 1924 opt->opt.optlen = opt_len;
1920 opt->cipso = sizeof(struct iphdr); 1925 opt->opt.cipso = sizeof(struct iphdr);
1921 kfree(buf); 1926 kfree(buf);
1922 buf = NULL; 1927 buf = NULL;
1923 1928
1924 sk_inet = inet_sk(sk); 1929 sk_inet = inet_sk(sk);
1930
1931 old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk));
1925 if (sk_inet->is_icsk) { 1932 if (sk_inet->is_icsk) {
1926 sk_conn = inet_csk(sk); 1933 sk_conn = inet_csk(sk);
1927 if (sk_inet->opt) 1934 if (old)
1928 sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; 1935 sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
1929 sk_conn->icsk_ext_hdr_len += opt->optlen; 1936 sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
1930 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); 1937 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1931 } 1938 }
1932 opt = xchg(&sk_inet->opt, opt); 1939 rcu_assign_pointer(sk_inet->inet_opt, opt);
1933 kfree(opt); 1940 if (old)
1941 call_rcu(&old->rcu, opt_kfree_rcu);
1934 1942
1935 return 0; 1943 return 0;
1936 1944
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
1960 unsigned char *buf = NULL; 1968 unsigned char *buf = NULL;
1961 u32 buf_len; 1969 u32 buf_len;
1962 u32 opt_len; 1970 u32 opt_len;
1963 struct ip_options *opt = NULL; 1971 struct ip_options_rcu *opt = NULL;
1964 struct inet_request_sock *req_inet; 1972 struct inet_request_sock *req_inet;
1965 1973
1966 /* We allocate the maximum CIPSO option size here so we are probably 1974 /* We allocate the maximum CIPSO option size here so we are probably
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
1988 ret_val = -ENOMEM; 1996 ret_val = -ENOMEM;
1989 goto req_setattr_failure; 1997 goto req_setattr_failure;
1990 } 1998 }
1991 memcpy(opt->__data, buf, buf_len); 1999 memcpy(opt->opt.__data, buf, buf_len);
1992 opt->optlen = opt_len; 2000 opt->opt.optlen = opt_len;
1993 opt->cipso = sizeof(struct iphdr); 2001 opt->opt.cipso = sizeof(struct iphdr);
1994 kfree(buf); 2002 kfree(buf);
1995 buf = NULL; 2003 buf = NULL;
1996 2004
1997 req_inet = inet_rsk(req); 2005 req_inet = inet_rsk(req);
1998 opt = xchg(&req_inet->opt, opt); 2006 opt = xchg(&req_inet->opt, opt);
1999 kfree(opt); 2007 if (opt)
2008 call_rcu(&opt->rcu, opt_kfree_rcu);
2000 2009
2001 return 0; 2010 return 0;
2002 2011
@@ -2016,34 +2025,34 @@ req_setattr_failure:
2016 * values on failure. 2025 * values on failure.
2017 * 2026 *
2018 */ 2027 */
2019static int cipso_v4_delopt(struct ip_options **opt_ptr) 2028static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
2020{ 2029{
2021 int hdr_delta = 0; 2030 int hdr_delta = 0;
2022 struct ip_options *opt = *opt_ptr; 2031 struct ip_options_rcu *opt = *opt_ptr;
2023 2032
2024 if (opt->srr || opt->rr || opt->ts || opt->router_alert) { 2033 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
2025 u8 cipso_len; 2034 u8 cipso_len;
2026 u8 cipso_off; 2035 u8 cipso_off;
2027 unsigned char *cipso_ptr; 2036 unsigned char *cipso_ptr;
2028 int iter; 2037 int iter;
2029 int optlen_new; 2038 int optlen_new;
2030 2039
2031 cipso_off = opt->cipso - sizeof(struct iphdr); 2040 cipso_off = opt->opt.cipso - sizeof(struct iphdr);
2032 cipso_ptr = &opt->__data[cipso_off]; 2041 cipso_ptr = &opt->opt.__data[cipso_off];
2033 cipso_len = cipso_ptr[1]; 2042 cipso_len = cipso_ptr[1];
2034 2043
2035 if (opt->srr > opt->cipso) 2044 if (opt->opt.srr > opt->opt.cipso)
2036 opt->srr -= cipso_len; 2045 opt->opt.srr -= cipso_len;
2037 if (opt->rr > opt->cipso) 2046 if (opt->opt.rr > opt->opt.cipso)
2038 opt->rr -= cipso_len; 2047 opt->opt.rr -= cipso_len;
2039 if (opt->ts > opt->cipso) 2048 if (opt->opt.ts > opt->opt.cipso)
2040 opt->ts -= cipso_len; 2049 opt->opt.ts -= cipso_len;
2041 if (opt->router_alert > opt->cipso) 2050 if (opt->opt.router_alert > opt->opt.cipso)
2042 opt->router_alert -= cipso_len; 2051 opt->opt.router_alert -= cipso_len;
2043 opt->cipso = 0; 2052 opt->opt.cipso = 0;
2044 2053
2045 memmove(cipso_ptr, cipso_ptr + cipso_len, 2054 memmove(cipso_ptr, cipso_ptr + cipso_len,
2046 opt->optlen - cipso_off - cipso_len); 2055 opt->opt.optlen - cipso_off - cipso_len);
2047 2056
2048 /* determining the new total option length is tricky because of 2057 /* determining the new total option length is tricky because of
2049 * the padding necessary, the only thing i can think to do at 2058 * the padding necessary, the only thing i can think to do at
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2052 * from there we can determine the new total option length */ 2061 * from there we can determine the new total option length */
2053 iter = 0; 2062 iter = 0;
2054 optlen_new = 0; 2063 optlen_new = 0;
2055 while (iter < opt->optlen) 2064 while (iter < opt->opt.optlen)
2056 if (opt->__data[iter] != IPOPT_NOP) { 2065 if (opt->opt.__data[iter] != IPOPT_NOP) {
2057 iter += opt->__data[iter + 1]; 2066 iter += opt->opt.__data[iter + 1];
2058 optlen_new = iter; 2067 optlen_new = iter;
2059 } else 2068 } else
2060 iter++; 2069 iter++;
2061 hdr_delta = opt->optlen; 2070 hdr_delta = opt->opt.optlen;
2062 opt->optlen = (optlen_new + 3) & ~3; 2071 opt->opt.optlen = (optlen_new + 3) & ~3;
2063 hdr_delta -= opt->optlen; 2072 hdr_delta -= opt->opt.optlen;
2064 } else { 2073 } else {
2065 /* only the cipso option was present on the socket so we can 2074 /* only the cipso option was present on the socket so we can
2066 * remove the entire option struct */ 2075 * remove the entire option struct */
2067 *opt_ptr = NULL; 2076 *opt_ptr = NULL;
2068 hdr_delta = opt->optlen; 2077 hdr_delta = opt->opt.optlen;
2069 kfree(opt); 2078 call_rcu(&opt->rcu, opt_kfree_rcu);
2070 } 2079 }
2071 2080
2072 return hdr_delta; 2081 return hdr_delta;
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
2083void cipso_v4_sock_delattr(struct sock *sk) 2092void cipso_v4_sock_delattr(struct sock *sk)
2084{ 2093{
2085 int hdr_delta; 2094 int hdr_delta;
2086 struct ip_options *opt; 2095 struct ip_options_rcu *opt;
2087 struct inet_sock *sk_inet; 2096 struct inet_sock *sk_inet;
2088 2097
2089 sk_inet = inet_sk(sk); 2098 sk_inet = inet_sk(sk);
2090 opt = sk_inet->opt; 2099 opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
2091 if (opt == NULL || opt->cipso == 0) 2100 if (opt == NULL || opt->opt.cipso == 0)
2092 return; 2101 return;
2093 2102
2094 hdr_delta = cipso_v4_delopt(&sk_inet->opt); 2103 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
2095 if (sk_inet->is_icsk && hdr_delta > 0) { 2104 if (sk_inet->is_icsk && hdr_delta > 0) {
2096 struct inet_connection_sock *sk_conn = inet_csk(sk); 2105 struct inet_connection_sock *sk_conn = inet_csk(sk);
2097 sk_conn->icsk_ext_hdr_len -= hdr_delta; 2106 sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
2109 */ 2118 */
2110void cipso_v4_req_delattr(struct request_sock *req) 2119void cipso_v4_req_delattr(struct request_sock *req)
2111{ 2120{
2112 struct ip_options *opt; 2121 struct ip_options_rcu *opt;
2113 struct inet_request_sock *req_inet; 2122 struct inet_request_sock *req_inet;
2114 2123
2115 req_inet = inet_rsk(req); 2124 req_inet = inet_rsk(req);
2116 opt = req_inet->opt; 2125 opt = req_inet->opt;
2117 if (opt == NULL || opt->cipso == 0) 2126 if (opt == NULL || opt->opt.cipso == 0)
2118 return; 2127 return;
2119 2128
2120 cipso_v4_delopt(&req_inet->opt); 2129 cipso_v4_delopt(&req_inet->opt);
@@ -2184,14 +2193,18 @@ getattr_return:
2184 */ 2193 */
2185int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) 2194int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
2186{ 2195{
2187 struct ip_options *opt; 2196 struct ip_options_rcu *opt;
2197 int res = -ENOMSG;
2188 2198
2189 opt = inet_sk(sk)->opt; 2199 rcu_read_lock();
2190 if (opt == NULL || opt->cipso == 0) 2200 opt = rcu_dereference(inet_sk(sk)->inet_opt);
2191 return -ENOMSG; 2201 if (opt && opt->opt.cipso)
2192 2202 res = cipso_v4_getattr(opt->opt.__data +
2193 return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), 2203 opt->opt.cipso -
2194 secattr); 2204 sizeof(struct iphdr),
2205 secattr);
2206 rcu_read_unlock();
2207 return res;
2195} 2208}
2196 2209
2197/** 2210/**
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 174be6caa5c8..424fafbc8cb0 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
24{ 24{
25 struct inet_sock *inet = inet_sk(sk); 25 struct inet_sock *inet = inet_sk(sk);
26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; 26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
27 struct flowi4 *fl4;
27 struct rtable *rt; 28 struct rtable *rt;
28 __be32 saddr; 29 __be32 saddr;
29 int oif; 30 int oif;
@@ -38,6 +39,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
38 39
39 sk_dst_reset(sk); 40 sk_dst_reset(sk);
40 41
42 lock_sock(sk);
43
41 oif = sk->sk_bound_dev_if; 44 oif = sk->sk_bound_dev_if;
42 saddr = inet->inet_saddr; 45 saddr = inet->inet_saddr;
43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) { 46 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -46,33 +49,39 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
46 if (!saddr) 49 if (!saddr)
47 saddr = inet->mc_addr; 50 saddr = inet->mc_addr;
48 } 51 }
49 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, 52 fl4 = &inet->cork.fl.u.ip4;
50 RT_CONN_FLAGS(sk), oif, 53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
51 sk->sk_protocol, 54 RT_CONN_FLAGS(sk), oif,
52 inet->inet_sport, usin->sin_port, sk, 1); 55 sk->sk_protocol,
53 if (err) { 56 inet->inet_sport, usin->sin_port, sk, true);
57 if (IS_ERR(rt)) {
58 err = PTR_ERR(rt);
54 if (err == -ENETUNREACH) 59 if (err == -ENETUNREACH)
55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 60 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
56 return err; 61 goto out;
57 } 62 }
58 63
59 if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { 64 if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
60 ip_rt_put(rt); 65 ip_rt_put(rt);
61 return -EACCES; 66 err = -EACCES;
67 goto out;
62 } 68 }
63 if (!inet->inet_saddr) 69 if (!inet->inet_saddr)
64 inet->inet_saddr = rt->rt_src; /* Update source address */ 70 inet->inet_saddr = fl4->saddr; /* Update source address */
65 if (!inet->inet_rcv_saddr) { 71 if (!inet->inet_rcv_saddr) {
66 inet->inet_rcv_saddr = rt->rt_src; 72 inet->inet_rcv_saddr = fl4->saddr;
67 if (sk->sk_prot->rehash) 73 if (sk->sk_prot->rehash)
68 sk->sk_prot->rehash(sk); 74 sk->sk_prot->rehash(sk);
69 } 75 }
70 inet->inet_daddr = rt->rt_dst; 76 inet->inet_daddr = fl4->daddr;
71 inet->inet_dport = usin->sin_port; 77 inet->inet_dport = usin->sin_port;
72 sk->sk_state = TCP_ESTABLISHED; 78 sk->sk_state = TCP_ESTABLISHED;
73 inet->inet_id = jiffies; 79 inet->inet_id = jiffies;
74 80
75 sk_dst_set(sk, &rt->dst); 81 sk_dst_set(sk, &rt->dst);
76 return 0; 82 err = 0;
83out:
84 release_sock(sk);
85 return err;
77} 86}
78EXPORT_SYMBOL(ip4_datagram_connect); 87EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce929..0d4a184af16f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -51,6 +51,7 @@
51#include <linux/inetdevice.h> 51#include <linux/inetdevice.h>
52#include <linux/igmp.h> 52#include <linux/igmp.h>
53#include <linux/slab.h> 53#include <linux/slab.h>
54#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL 55#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h> 56#include <linux/sysctl.h>
56#endif 57#endif
@@ -63,6 +64,8 @@
63#include <net/rtnetlink.h> 64#include <net/rtnetlink.h>
64#include <net/net_namespace.h> 65#include <net/net_namespace.h>
65 66
67#include "fib_lookup.h"
68
66static struct ipv4_devconf ipv4_devconf = { 69static struct ipv4_devconf ipv4_devconf = {
67 .data = { 70 .data = {
68 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -92,6 +95,85 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 95 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93}; 96};
94 97
98/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99 * value. So if you change this define, make appropriate changes to
100 * inet_addr_hash as well.
101 */
102#define IN4_ADDR_HSIZE 256
103static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107{
108 u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109
110 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111 (IN4_ADDR_HSIZE - 1));
112}
113
114static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115{
116 unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117
118 spin_lock(&inet_addr_hash_lock);
119 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120 spin_unlock(&inet_addr_hash_lock);
121}
122
123static void inet_hash_remove(struct in_ifaddr *ifa)
124{
125 spin_lock(&inet_addr_hash_lock);
126 hlist_del_init_rcu(&ifa->hash);
127 spin_unlock(&inet_addr_hash_lock);
128}
129
130/**
131 * __ip_dev_find - find the first device with a given source address.
132 * @net: the net namespace
133 * @addr: the source address
134 * @devref: if true, take a reference on the found device
135 *
136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 */
138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139{
140 unsigned int hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL;
142 struct in_ifaddr *ifa;
143 struct hlist_node *node;
144
145 rcu_read_lock();
146 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147 struct net_device *dev = ifa->ifa_dev->dev;
148
149 if (!net_eq(dev_net(dev), net))
150 continue;
151 if (ifa->ifa_local == addr) {
152 result = dev;
153 break;
154 }
155 }
156 if (!result) {
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
160
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
163 */
164 local = fib_get_table(net, RT_TABLE_LOCAL);
165 if (local &&
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
169 }
170 if (result && devref)
171 dev_hold(result);
172 rcu_read_unlock();
173 return result;
174}
175EXPORT_SYMBOL(__ip_dev_find);
176
95static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 177static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 178
97static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 179static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -265,6 +347,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
265 } 347 }
266 348
267 if (!do_promote) { 349 if (!do_promote) {
350 inet_hash_remove(ifa);
268 *ifap1 = ifa->ifa_next; 351 *ifap1 = ifa->ifa_next;
269 352
270 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); 353 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
@@ -278,9 +361,21 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
278 } 361 }
279 } 362 }
280 363
364 /* On promotion all secondaries from subnet are changing
365 * the primary IP, we must remove all their routes silently
366 * and later to add them back with new prefsrc. Do this
367 * while all addresses are on the device list.
368 */
369 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370 if (ifa1->ifa_mask == ifa->ifa_mask &&
371 inet_ifa_match(ifa1->ifa_address, ifa))
372 fib_del_ifaddr(ifa, ifa1);
373 }
374
281 /* 2. Unlink it */ 375 /* 2. Unlink it */
282 376
283 *ifap = ifa1->ifa_next; 377 *ifap = ifa1->ifa_next;
378 inet_hash_remove(ifa1);
284 379
285 /* 3. Announce address deletion */ 380 /* 3. Announce address deletion */
286 381
@@ -296,6 +391,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
296 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 391 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297 392
298 if (promote) { 393 if (promote) {
394 struct in_ifaddr *next_sec = promote->ifa_next;
299 395
300 if (prev_prom) { 396 if (prev_prom) {
301 prev_prom->ifa_next = promote->ifa_next; 397 prev_prom->ifa_next = promote->ifa_next;
@@ -307,7 +403,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
307 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); 403 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 blocking_notifier_call_chain(&inetaddr_chain, 404 blocking_notifier_call_chain(&inetaddr_chain,
309 NETDEV_UP, promote); 405 NETDEV_UP, promote);
310 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 406 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
311 if (ifa1->ifa_mask != ifa->ifa_mask || 407 if (ifa1->ifa_mask != ifa->ifa_mask ||
312 !inet_ifa_match(ifa1->ifa_address, ifa)) 408 !inet_ifa_match(ifa1->ifa_address, ifa))
313 continue; 409 continue;
@@ -368,6 +464,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
368 ifa->ifa_next = *ifap; 464 ifa->ifa_next = *ifap;
369 *ifap = ifa; 465 *ifap = ifa;
370 466
467 inet_hash_insert(dev_net(in_dev->dev), ifa);
468
371 /* Send message first, then call notifier. 469 /* Send message first, then call notifier.
372 Notifier will trigger FIB update, so that 470 Notifier will trigger FIB update, so that
373 listeners of netlink will know about new ifaddr */ 471 listeners of netlink will know about new ifaddr */
@@ -521,6 +619,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
521 if (tb[IFA_ADDRESS] == NULL) 619 if (tb[IFA_ADDRESS] == NULL)
522 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 620 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523 621
622 INIT_HLIST_NODE(&ifa->hash);
524 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 623 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 624 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 ifa->ifa_flags = ifm->ifa_flags; 625 ifa->ifa_flags = ifm->ifa_flags;
@@ -670,7 +769,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
670 ifap = &ifa->ifa_next) { 769 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 770 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr == 771 sin_orig.sin_addr.s_addr ==
673 ifa->ifa_address) { 772 ifa->ifa_local) {
674 break; /* found */ 773 break; /* found */
675 } 774 }
676 } 775 }
@@ -728,6 +827,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
728 if (!ifa) { 827 if (!ifa) {
729 ret = -ENOBUFS; 828 ret = -ENOBUFS;
730 ifa = inet_alloc_ifa(); 829 ifa = inet_alloc_ifa();
830 INIT_HLIST_NODE(&ifa->hash);
731 if (!ifa) 831 if (!ifa)
732 break; 832 break;
733 if (colon) 833 if (colon)
@@ -1040,8 +1140,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
1040 return; 1140 return;
1041 1141
1042 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1142 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043 ifa->ifa_address, dev, 1143 ifa->ifa_local, dev,
1044 ifa->ifa_address, NULL, 1144 ifa->ifa_local, NULL,
1045 dev->dev_addr, NULL); 1145 dev->dev_addr, NULL);
1046} 1146}
1047 1147
@@ -1084,6 +1184,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1084 struct in_ifaddr *ifa = inet_alloc_ifa(); 1184 struct in_ifaddr *ifa = inet_alloc_ifa();
1085 1185
1086 if (ifa) { 1186 if (ifa) {
1187 INIT_HLIST_NODE(&ifa->hash);
1087 ifa->ifa_local = 1188 ifa->ifa_local =
1088 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1189 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1089 ifa->ifa_prefixlen = 8; 1190 ifa->ifa_prefixlen = 8;
@@ -1268,7 +1369,7 @@ errout:
1268 1369
1269static size_t inet_get_link_af_size(const struct net_device *dev) 1370static size_t inet_get_link_af_size(const struct net_device *dev)
1270{ 1371{
1271 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1372 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1272 1373
1273 if (!in_dev) 1374 if (!in_dev)
1274 return 0; 1375 return 0;
@@ -1278,7 +1379,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
1278 1379
1279static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1280{ 1381{
1281 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1382 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1282 struct nlattr *nla; 1383 struct nlattr *nla;
1283 int i; 1384 int i;
1284 1385
@@ -1579,7 +1680,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1579 return; 1680 return;
1580 1681
1581 cnf->sysctl = NULL; 1682 cnf->sysctl = NULL;
1582 unregister_sysctl_table(t->sysctl_header); 1683 unregister_net_sysctl_table(t->sysctl_header);
1583 kfree(t->dev_name); 1684 kfree(t->dev_name);
1584 kfree(t); 1685 kfree(t);
1585} 1686}
@@ -1720,6 +1821,11 @@ static struct rtnl_af_ops inet_af_ops = {
1720 1821
1721void __init devinet_init(void) 1822void __init devinet_init(void)
1722{ 1823{
1824 int i;
1825
1826 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1827 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1828
1723 register_pernet_subsys(&devinet_ops); 1829 register_pernet_subsys(&devinet_ops);
1724 1830
1725 register_gifconf(PF_INET, inet_gifconf); 1831 register_gifconf(PF_INET, inet_gifconf);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index e42a905180f0..a5b413416da3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
33 * 33 *
34 * TODO: Use spare space in skb for this where possible. 34 * TODO: Use spare space in skb for this where possible.
35 */ 35 */
36static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) 36static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
37{ 37{
38 unsigned int len; 38 unsigned int len;
39 39
40 len = crypto_aead_ivsize(aead); 40 len = seqhilen;
41
42 len += crypto_aead_ivsize(aead);
43
41 if (len) { 44 if (len) {
42 len += crypto_aead_alignmask(aead) & 45 len += crypto_aead_alignmask(aead) &
43 ~(crypto_tfm_ctx_alignment() - 1); 46 ~(crypto_tfm_ctx_alignment() - 1);
@@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
52 return kmalloc(len, GFP_ATOMIC); 55 return kmalloc(len, GFP_ATOMIC);
53} 56}
54 57
55static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) 58static inline __be32 *esp_tmp_seqhi(void *tmp)
59{
60 return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
61}
62static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
56{ 63{
57 return crypto_aead_ivsize(aead) ? 64 return crypto_aead_ivsize(aead) ?
58 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; 65 PTR_ALIGN((u8 *)tmp + seqhilen,
66 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
59} 67}
60 68
61static inline struct aead_givcrypt_request *esp_tmp_givreq( 69static inline struct aead_givcrypt_request *esp_tmp_givreq(
@@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
122 int plen; 130 int plen;
123 int tfclen; 131 int tfclen;
124 int nfrags; 132 int nfrags;
133 int assoclen;
134 int sglists;
135 int seqhilen;
136 __be32 *seqhi;
125 137
126 /* skb is pure payload to encrypt */ 138 /* skb is pure payload to encrypt */
127 139
@@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
151 goto error; 163 goto error;
152 nfrags = err; 164 nfrags = err;
153 165
154 tmp = esp_alloc_tmp(aead, nfrags + 1); 166 assoclen = sizeof(*esph);
167 sglists = 1;
168 seqhilen = 0;
169
170 if (x->props.flags & XFRM_STATE_ESN) {
171 sglists += 2;
172 seqhilen += sizeof(__be32);
173 assoclen += seqhilen;
174 }
175
176 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
155 if (!tmp) 177 if (!tmp)
156 goto error; 178 goto error;
157 179
158 iv = esp_tmp_iv(aead, tmp); 180 seqhi = esp_tmp_seqhi(tmp);
181 iv = esp_tmp_iv(aead, tmp, seqhilen);
159 req = esp_tmp_givreq(aead, iv); 182 req = esp_tmp_givreq(aead, iv);
160 asg = esp_givreq_sg(aead, req); 183 asg = esp_givreq_sg(aead, req);
161 sg = asg + 1; 184 sg = asg + sglists;
162 185
163 /* Fill padding... */ 186 /* Fill padding... */
164 tail = skb_tail_pointer(trailer); 187 tail = skb_tail_pointer(trailer);
@@ -215,19 +238,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
215 } 238 }
216 239
217 esph->spi = x->id.spi; 240 esph->spi = x->id.spi;
218 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 241 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
219 242
220 sg_init_table(sg, nfrags); 243 sg_init_table(sg, nfrags);
221 skb_to_sgvec(skb, sg, 244 skb_to_sgvec(skb, sg,
222 esph->enc_data + crypto_aead_ivsize(aead) - skb->data, 245 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
223 clen + alen); 246 clen + alen);
224 sg_init_one(asg, esph, sizeof(*esph)); 247
248 if ((x->props.flags & XFRM_STATE_ESN)) {
249 sg_init_table(asg, 3);
250 sg_set_buf(asg, &esph->spi, sizeof(__be32));
251 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
252 sg_set_buf(asg + 1, seqhi, seqhilen);
253 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
254 } else
255 sg_init_one(asg, esph, sizeof(*esph));
225 256
226 aead_givcrypt_set_callback(req, 0, esp_output_done, skb); 257 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
227 aead_givcrypt_set_crypt(req, sg, sg, clen, iv); 258 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
228 aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); 259 aead_givcrypt_set_assoc(req, asg, assoclen);
229 aead_givcrypt_set_giv(req, esph->enc_data, 260 aead_givcrypt_set_giv(req, esph->enc_data,
230 XFRM_SKB_CB(skb)->seq.output); 261 XFRM_SKB_CB(skb)->seq.output.low);
231 262
232 ESP_SKB_CB(skb)->tmp = tmp; 263 ESP_SKB_CB(skb)->tmp = tmp;
233 err = crypto_aead_givencrypt(req); 264 err = crypto_aead_givencrypt(req);
@@ -245,7 +276,7 @@ error:
245 276
246static int esp_input_done2(struct sk_buff *skb, int err) 277static int esp_input_done2(struct sk_buff *skb, int err)
247{ 278{
248 struct iphdr *iph; 279 const struct iphdr *iph;
249 struct xfrm_state *x = xfrm_input_state(skb); 280 struct xfrm_state *x = xfrm_input_state(skb);
250 struct esp_data *esp = x->data; 281 struct esp_data *esp = x->data;
251 struct crypto_aead *aead = esp->aead; 282 struct crypto_aead *aead = esp->aead;
@@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
346 struct sk_buff *trailer; 377 struct sk_buff *trailer;
347 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); 378 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
348 int nfrags; 379 int nfrags;
380 int assoclen;
381 int sglists;
382 int seqhilen;
383 __be32 *seqhi;
349 void *tmp; 384 void *tmp;
350 u8 *iv; 385 u8 *iv;
351 struct scatterlist *sg; 386 struct scatterlist *sg;
@@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
362 goto out; 397 goto out;
363 nfrags = err; 398 nfrags = err;
364 399
400 assoclen = sizeof(*esph);
401 sglists = 1;
402 seqhilen = 0;
403
404 if (x->props.flags & XFRM_STATE_ESN) {
405 sglists += 2;
406 seqhilen += sizeof(__be32);
407 assoclen += seqhilen;
408 }
409
365 err = -ENOMEM; 410 err = -ENOMEM;
366 tmp = esp_alloc_tmp(aead, nfrags + 1); 411 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
367 if (!tmp) 412 if (!tmp)
368 goto out; 413 goto out;
369 414
370 ESP_SKB_CB(skb)->tmp = tmp; 415 ESP_SKB_CB(skb)->tmp = tmp;
371 iv = esp_tmp_iv(aead, tmp); 416 seqhi = esp_tmp_seqhi(tmp);
417 iv = esp_tmp_iv(aead, tmp, seqhilen);
372 req = esp_tmp_req(aead, iv); 418 req = esp_tmp_req(aead, iv);
373 asg = esp_req_sg(aead, req); 419 asg = esp_req_sg(aead, req);
374 sg = asg + 1; 420 sg = asg + sglists;
375 421
376 skb->ip_summed = CHECKSUM_NONE; 422 skb->ip_summed = CHECKSUM_NONE;
377 423
@@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
382 428
383 sg_init_table(sg, nfrags); 429 sg_init_table(sg, nfrags);
384 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); 430 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
385 sg_init_one(asg, esph, sizeof(*esph)); 431
432 if ((x->props.flags & XFRM_STATE_ESN)) {
433 sg_init_table(asg, 3);
434 sg_set_buf(asg, &esph->spi, sizeof(__be32));
435 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
436 sg_set_buf(asg + 1, seqhi, seqhilen);
437 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
438 } else
439 sg_init_one(asg, esph, sizeof(*esph));
386 440
387 aead_request_set_callback(req, 0, esp_input_done, skb); 441 aead_request_set_callback(req, 0, esp_input_done, skb);
388 aead_request_set_crypt(req, sg, sg, elen, iv); 442 aead_request_set_crypt(req, sg, sg, elen, iv);
389 aead_request_set_assoc(req, asg, sizeof(*esph)); 443 aead_request_set_assoc(req, asg, assoclen);
390 444
391 err = crypto_aead_decrypt(req); 445 err = crypto_aead_decrypt(req);
392 if (err == -EINPROGRESS) 446 if (err == -EINPROGRESS)
@@ -430,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
430static void esp4_err(struct sk_buff *skb, u32 info) 484static void esp4_err(struct sk_buff *skb, u32 info)
431{ 485{
432 struct net *net = dev_net(skb->dev); 486 struct net *net = dev_net(skb->dev);
433 struct iphdr *iph = (struct iphdr *)skb->data; 487 const struct iphdr *iph = (const struct iphdr *)skb->data;
434 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); 488 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
435 struct xfrm_state *x; 489 struct xfrm_state *x;
436 490
@@ -438,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
438 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 492 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
439 return; 493 return;
440 494
441 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); 495 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
496 esph->spi, IPPROTO_ESP, AF_INET);
442 if (!x) 497 if (!x)
443 return; 498 return;
444 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", 499 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
@@ -500,10 +555,20 @@ static int esp_init_authenc(struct xfrm_state *x)
500 goto error; 555 goto error;
501 556
502 err = -ENAMETOOLONG; 557 err = -ENAMETOOLONG;
503 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", 558
504 x->aalg ? x->aalg->alg_name : "digest_null", 559 if ((x->props.flags & XFRM_STATE_ESN)) {
505 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 560 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
506 goto error; 561 "authencesn(%s,%s)",
562 x->aalg ? x->aalg->alg_name : "digest_null",
563 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
564 goto error;
565 } else {
566 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
567 "authenc(%s,%s)",
568 x->aalg ? x->aalg->alg_name : "digest_null",
569 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
570 goto error;
571 }
507 572
508 aead = crypto_alloc_aead(authenc_name, 0, 0); 573 aead = crypto_alloc_aead(authenc_name, 0, 0);
509 err = PTR_ERR(aead); 574 err = PTR_ERR(aead);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1d2cdd43a878..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
44#include <net/arp.h> 44#include <net/arp.h>
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h>
47 48
48#ifndef CONFIG_IP_MULTIPLE_TABLES 49#ifndef CONFIG_IP_MULTIPLE_TABLES
49 50
@@ -51,11 +52,11 @@ static int __net_init fib4_rules_init(struct net *net)
51{ 52{
52 struct fib_table *local_table, *main_table; 53 struct fib_table *local_table, *main_table;
53 54
54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 local_table = fib_trie_table(RT_TABLE_LOCAL);
55 if (local_table == NULL) 56 if (local_table == NULL)
56 return -ENOMEM; 57 return -ENOMEM;
57 58
58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 main_table = fib_trie_table(RT_TABLE_MAIN);
59 if (main_table == NULL) 60 if (main_table == NULL)
60 goto fail; 61 goto fail;
61 62
@@ -82,7 +83,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
82 if (tb) 83 if (tb)
83 return tb; 84 return tb;
84 85
85 tb = fib_hash_table(id); 86 tb = fib_trie_table(id);
86 if (!tb) 87 if (!tb)
87 return NULL; 88 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1); 89 h = id & (FIB_TABLE_HASHSZ - 1);
@@ -114,21 +115,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
114} 115}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116#endif /* CONFIG_IP_MULTIPLE_TABLES */
116 117
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net) 118static void fib_flush(struct net *net)
133{ 119{
134 int flushed = 0; 120 int flushed = 0;
@@ -147,46 +133,6 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 133 rt_cache_flush(net, -1);
148} 134}
149 135
150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU, or RTNL
157 */
158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
159{
160 struct flowi fl = {
161 .fl4_dst = addr,
162 };
163 struct fib_result res = { 0 };
164 struct net_device *dev = NULL;
165 struct fib_table *local_table;
166
167#ifdef CONFIG_IP_MULTIPLE_TABLES
168 res.r = NULL;
169#endif
170
171 rcu_read_lock();
172 local_table = fib_get_table(net, RT_TABLE_LOCAL);
173 if (!local_table ||
174 fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
175 rcu_read_unlock();
176 return NULL;
177 }
178 if (res.type != RTN_LOCAL)
179 goto out;
180 dev = FIB_RES_DEV(res);
181
182 if (dev && devref)
183 dev_hold(dev);
184out:
185 rcu_read_unlock();
186 return dev;
187}
188EXPORT_SYMBOL(__ip_dev_find);
189
190/* 136/*
191 * Find address type as if only "dev" was present in the system. If 137 * Find address type as if only "dev" was present in the system. If
192 * on_dev is NULL then all interfaces are taken into consideration. 138 * on_dev is NULL then all interfaces are taken into consideration.
@@ -195,7 +141,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
195 const struct net_device *dev, 141 const struct net_device *dev,
196 __be32 addr) 142 __be32 addr)
197{ 143{
198 struct flowi fl = { .fl4_dst = addr }; 144 struct flowi4 fl4 = { .daddr = addr };
199 struct fib_result res; 145 struct fib_result res;
200 unsigned ret = RTN_BROADCAST; 146 unsigned ret = RTN_BROADCAST;
201 struct fib_table *local_table; 147 struct fib_table *local_table;
@@ -213,7 +159,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
213 if (local_table) { 159 if (local_table) {
214 ret = RTN_UNICAST; 160 ret = RTN_UNICAST;
215 rcu_read_lock(); 161 rcu_read_lock();
216 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 162 if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
217 if (!dev || dev == res.fi->fib_dev) 163 if (!dev || dev == res.fi->fib_dev)
218 ret = res.type; 164 ret = res.type;
219 } 165 }
@@ -243,45 +189,48 @@ EXPORT_SYMBOL(inet_dev_addr_type);
243 * - check, that packet arrived from expected physical interface. 189 * - check, that packet arrived from expected physical interface.
244 * called with rcu_read_lock() 190 * called with rcu_read_lock()
245 */ 191 */
246int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 192int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
247 struct net_device *dev, __be32 *spec_dst, 193 int oif, struct net_device *dev, __be32 *spec_dst,
248 u32 *itag, u32 mark) 194 u32 *itag)
249{ 195{
250 struct in_device *in_dev; 196 struct in_device *in_dev;
251 struct flowi fl = { 197 struct flowi4 fl4;
252 .fl4_dst = src,
253 .fl4_src = dst,
254 .fl4_tos = tos,
255 .mark = mark,
256 .iif = oif
257 };
258 struct fib_result res; 198 struct fib_result res;
259 int no_addr, rpf, accept_local; 199 int no_addr, rpf, accept_local;
260 bool dev_match; 200 bool dev_match;
261 int ret; 201 int ret;
262 struct net *net; 202 struct net *net;
263 203
204 fl4.flowi4_oif = 0;
205 fl4.flowi4_iif = oif;
206 fl4.daddr = src;
207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos;
209 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
210
264 no_addr = rpf = accept_local = 0; 211 no_addr = rpf = accept_local = 0;
265 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
266 if (in_dev) { 213 if (in_dev) {
267 no_addr = in_dev->ifa_list == NULL; 214 no_addr = in_dev->ifa_list == NULL;
268 rpf = IN_DEV_RPFILTER(in_dev); 215
216 /* Ignore rp_filter for packets protected by IPsec. */
217 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
218
269 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 219 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
270 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 220 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
271 fl.mark = 0;
272 } 221 }
273 222
274 if (in_dev == NULL) 223 if (in_dev == NULL)
275 goto e_inval; 224 goto e_inval;
276 225
277 net = dev_net(dev); 226 net = dev_net(dev);
278 if (fib_lookup(net, &fl, &res)) 227 if (fib_lookup(net, &fl4, &res))
279 goto last_resort; 228 goto last_resort;
280 if (res.type != RTN_UNICAST) { 229 if (res.type != RTN_UNICAST) {
281 if (res.type != RTN_LOCAL || !accept_local) 230 if (res.type != RTN_LOCAL || !accept_local)
282 goto e_inval; 231 goto e_inval;
283 } 232 }
284 *spec_dst = FIB_RES_PREFSRC(res); 233 *spec_dst = FIB_RES_PREFSRC(net, res);
285 fib_combine_itag(itag, &res); 234 fib_combine_itag(itag, &res);
286 dev_match = false; 235 dev_match = false;
287 236
@@ -306,12 +255,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
306 goto last_resort; 255 goto last_resort;
307 if (rpf == 1) 256 if (rpf == 1)
308 goto e_rpf; 257 goto e_rpf;
309 fl.oif = dev->ifindex; 258 fl4.flowi4_oif = dev->ifindex;
310 259
311 ret = 0; 260 ret = 0;
312 if (fib_lookup(net, &fl, &res) == 0) { 261 if (fib_lookup(net, &fl4, &res) == 0) {
313 if (res.type == RTN_UNICAST) { 262 if (res.type == RTN_UNICAST) {
314 *spec_dst = FIB_RES_PREFSRC(res); 263 *spec_dst = FIB_RES_PREFSRC(net, res);
315 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 264 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
316 } 265 }
317 } 266 }
@@ -775,12 +724,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
775 } 724 }
776} 725}
777 726
778static void fib_del_ifaddr(struct in_ifaddr *ifa) 727/* Delete primary or secondary address.
728 * Optionally, on secondary address promotion consider the addresses
729 * from subnet iprim as deleted, even if they are in device list.
730 * In this case the secondary ifa can be in device list.
731 */
732void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
779{ 733{
780 struct in_device *in_dev = ifa->ifa_dev; 734 struct in_device *in_dev = ifa->ifa_dev;
781 struct net_device *dev = in_dev->dev; 735 struct net_device *dev = in_dev->dev;
782 struct in_ifaddr *ifa1; 736 struct in_ifaddr *ifa1;
783 struct in_ifaddr *prim = ifa; 737 struct in_ifaddr *prim = ifa, *prim1 = NULL;
784 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; 738 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
785 __be32 any = ifa->ifa_address & ifa->ifa_mask; 739 __be32 any = ifa->ifa_address & ifa->ifa_mask;
786#define LOCAL_OK 1 740#define LOCAL_OK 1
@@ -788,17 +742,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
788#define BRD0_OK 4 742#define BRD0_OK 4
789#define BRD1_OK 8 743#define BRD1_OK 8
790 unsigned ok = 0; 744 unsigned ok = 0;
745 int subnet = 0; /* Primary network */
746 int gone = 1; /* Address is missing */
747 int same_prefsrc = 0; /* Another primary with same IP */
791 748
792 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 749 if (ifa->ifa_flags & IFA_F_SECONDARY) {
793 fib_magic(RTM_DELROUTE,
794 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
795 any, ifa->ifa_prefixlen, prim);
796 else {
797 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 750 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
798 if (prim == NULL) { 751 if (prim == NULL) {
799 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 752 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
800 return; 753 return;
801 } 754 }
755 if (iprim && iprim != prim) {
756 printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
757 return;
758 }
759 } else if (!ipv4_is_zeronet(any) &&
760 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
761 fib_magic(RTM_DELROUTE,
762 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
763 any, ifa->ifa_prefixlen, prim);
764 subnet = 1;
802 } 765 }
803 766
804 /* Deletion is more complicated than add. 767 /* Deletion is more complicated than add.
@@ -808,6 +771,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
808 */ 771 */
809 772
810 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 773 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
774 if (ifa1 == ifa) {
775 /* promotion, keep the IP */
776 gone = 0;
777 continue;
778 }
779 /* Ignore IFAs from our subnet */
780 if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
781 inet_ifa_match(ifa1->ifa_address, iprim))
782 continue;
783
784 /* Ignore ifa1 if it uses different primary IP (prefsrc) */
785 if (ifa1->ifa_flags & IFA_F_SECONDARY) {
786 /* Another address from our subnet? */
787 if (ifa1->ifa_mask == prim->ifa_mask &&
788 inet_ifa_match(ifa1->ifa_address, prim))
789 prim1 = prim;
790 else {
791 /* We reached the secondaries, so
792 * same_prefsrc should be determined.
793 */
794 if (!same_prefsrc)
795 continue;
796 /* Search new prim1 if ifa1 is not
797 * using the current prim1
798 */
799 if (!prim1 ||
800 ifa1->ifa_mask != prim1->ifa_mask ||
801 !inet_ifa_match(ifa1->ifa_address, prim1))
802 prim1 = inet_ifa_byprefix(in_dev,
803 ifa1->ifa_address,
804 ifa1->ifa_mask);
805 if (!prim1)
806 continue;
807 if (prim1->ifa_local != prim->ifa_local)
808 continue;
809 }
810 } else {
811 if (prim->ifa_local != ifa1->ifa_local)
812 continue;
813 prim1 = ifa1;
814 if (prim != prim1)
815 same_prefsrc = 1;
816 }
811 if (ifa->ifa_local == ifa1->ifa_local) 817 if (ifa->ifa_local == ifa1->ifa_local)
812 ok |= LOCAL_OK; 818 ok |= LOCAL_OK;
813 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 819 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -816,19 +822,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
816 ok |= BRD1_OK; 822 ok |= BRD1_OK;
817 if (any == ifa1->ifa_broadcast) 823 if (any == ifa1->ifa_broadcast)
818 ok |= BRD0_OK; 824 ok |= BRD0_OK;
825 /* primary has network specific broadcasts */
826 if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
827 __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
828 __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
829
830 if (!ipv4_is_zeronet(any1)) {
831 if (ifa->ifa_broadcast == brd1 ||
832 ifa->ifa_broadcast == any1)
833 ok |= BRD_OK;
834 if (brd == brd1 || brd == any1)
835 ok |= BRD1_OK;
836 if (any == brd1 || any == any1)
837 ok |= BRD0_OK;
838 }
839 }
819 } 840 }
820 841
821 if (!(ok & BRD_OK)) 842 if (!(ok & BRD_OK))
822 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 843 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
823 if (!(ok & BRD1_OK)) 844 if (subnet && ifa->ifa_prefixlen < 31) {
824 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 845 if (!(ok & BRD1_OK))
825 if (!(ok & BRD0_OK)) 846 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
826 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 847 if (!(ok & BRD0_OK))
848 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
849 }
827 if (!(ok & LOCAL_OK)) { 850 if (!(ok & LOCAL_OK)) {
828 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 851 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
829 852
830 /* Check, that this local address finally disappeared. */ 853 /* Check, that this local address finally disappeared. */
831 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 854 if (gone &&
855 inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
832 /* And the last, but not the least thing. 856 /* And the last, but not the least thing.
833 * We must flush stray FIB entries. 857 * We must flush stray FIB entries.
834 * 858 *
@@ -849,11 +873,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
849{ 873{
850 874
851 struct fib_result res; 875 struct fib_result res;
852 struct flowi fl = { 876 struct flowi4 fl4 = {
853 .mark = frn->fl_mark, 877 .flowi4_mark = frn->fl_mark,
854 .fl4_dst = frn->fl_addr, 878 .daddr = frn->fl_addr,
855 .fl4_tos = frn->fl_tos, 879 .flowi4_tos = frn->fl_tos,
856 .fl4_scope = frn->fl_scope, 880 .flowi4_scope = frn->fl_scope,
857 }; 881 };
858 882
859#ifdef CONFIG_IP_MULTIPLE_TABLES 883#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -866,7 +890,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
866 890
867 frn->tb_id = tb->tb_id; 891 frn->tb_id = tb->tb_id;
868 rcu_read_lock(); 892 rcu_read_lock();
869 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); 893 frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
870 894
871 if (!frn->err) { 895 if (!frn->err) {
872 frn->prefixlen = res.prefixlen; 896 frn->prefixlen = res.prefixlen;
@@ -938,6 +962,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
938{ 962{
939 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 963 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
940 struct net_device *dev = ifa->ifa_dev->dev; 964 struct net_device *dev = ifa->ifa_dev->dev;
965 struct net *net = dev_net(dev);
941 966
942 switch (event) { 967 switch (event) {
943 case NETDEV_UP: 968 case NETDEV_UP:
@@ -945,10 +970,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
945#ifdef CONFIG_IP_ROUTE_MULTIPATH 970#ifdef CONFIG_IP_ROUTE_MULTIPATH
946 fib_sync_up(dev); 971 fib_sync_up(dev);
947#endif 972#endif
973 atomic_inc(&net->ipv4.dev_addr_genid);
948 rt_cache_flush(dev_net(dev), -1); 974 rt_cache_flush(dev_net(dev), -1);
949 break; 975 break;
950 case NETDEV_DOWN: 976 case NETDEV_DOWN:
951 fib_del_ifaddr(ifa); 977 fib_del_ifaddr(ifa, NULL);
978 atomic_inc(&net->ipv4.dev_addr_genid);
952 if (ifa->ifa_dev->ifa_list == NULL) { 979 if (ifa->ifa_dev->ifa_list == NULL) {
953 /* Last address was deleted from this interface. 980 /* Last address was deleted from this interface.
954 * Disable IP. 981 * Disable IP.
@@ -966,6 +993,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
966{ 993{
967 struct net_device *dev = ptr; 994 struct net_device *dev = ptr;
968 struct in_device *in_dev = __in_dev_get_rtnl(dev); 995 struct in_device *in_dev = __in_dev_get_rtnl(dev);
996 struct net *net = dev_net(dev);
969 997
970 if (event == NETDEV_UNREGISTER) { 998 if (event == NETDEV_UNREGISTER) {
971 fib_disable_ip(dev, 2, -1); 999 fib_disable_ip(dev, 2, -1);
@@ -983,6 +1011,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
983#ifdef CONFIG_IP_ROUTE_MULTIPATH 1011#ifdef CONFIG_IP_ROUTE_MULTIPATH
984 fib_sync_up(dev); 1012 fib_sync_up(dev);
985#endif 1013#endif
1014 atomic_inc(&net->ipv4.dev_addr_genid);
986 rt_cache_flush(dev_net(dev), -1); 1015 rt_cache_flush(dev_net(dev), -1);
987 break; 1016 break;
988 case NETDEV_DOWN: 1017 case NETDEV_DOWN:
@@ -1041,6 +1070,7 @@ static void ip_fib_net_exit(struct net *net)
1041 fib4_rules_exit(net); 1070 fib4_rules_exit(net);
1042#endif 1071#endif
1043 1072
1073 rtnl_lock();
1044 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1074 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1045 struct fib_table *tb; 1075 struct fib_table *tb;
1046 struct hlist_head *head; 1076 struct hlist_head *head;
@@ -1053,6 +1083,7 @@ static void ip_fib_net_exit(struct net *net)
1053 fib_free_table(tb); 1083 fib_free_table(tb);
1054 } 1084 }
1055 } 1085 }
1086 rtnl_unlock();
1056 kfree(net->ipv4.fib_table_hash); 1087 kfree(net->ipv4.fib_table_hash);
1057} 1088}
1058 1089
@@ -1101,5 +1132,5 @@ void __init ip_fib_init(void)
1101 register_netdevice_notifier(&fib_netdev_notifier); 1132 register_netdevice_notifier(&fib_netdev_notifier);
1102 register_inetaddr_notifier(&fib_inetaddr_notifier); 1133 register_inetaddr_notifier(&fib_inetaddr_notifier);
1103 1134
1104 fib_hash_init(); 1135 fib_trie_init();
1105} 1136}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
deleted file mode 100644
index b3acb0417b21..000000000000
--- a/net/ipv4/fib_hash.c
+++ /dev/null
@@ -1,1133 +0,0 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 FIB: lookup engine and maintenance routines.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <asm/uaccess.h>
17#include <asm/system.h>
18#include <linux/bitops.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/netlink.h>
34#include <linux/init.h>
35#include <linux/slab.h>
36
37#include <net/net_namespace.h>
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44
45#include "fib_lookup.h"
46
47static struct kmem_cache *fn_hash_kmem __read_mostly;
48static struct kmem_cache *fn_alias_kmem __read_mostly;
49
50struct fib_node {
51 struct hlist_node fn_hash;
52 struct list_head fn_alias;
53 __be32 fn_key;
54 struct fib_alias fn_embedded_alias;
55};
56
57#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
58
59struct fn_zone {
60 struct fn_zone __rcu *fz_next; /* Next not empty zone */
61 struct hlist_head __rcu *fz_hash; /* Hash table pointer */
62 seqlock_t fz_lock;
63 u32 fz_hashmask; /* (fz_divisor - 1) */
64
65 u8 fz_order; /* Zone order (0..32) */
66 u8 fz_revorder; /* 32 - fz_order */
67 __be32 fz_mask; /* inet_make_mask(order) */
68#define FZ_MASK(fz) ((fz)->fz_mask)
69
70 struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE];
71
72 int fz_nent; /* Number of entries */
73 int fz_divisor; /* Hash size (mask+1) */
74};
75
76struct fn_hash {
77 struct fn_zone *fn_zones[33];
78 struct fn_zone __rcu *fn_zone_list;
79};
80
81static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
82{
83 u32 h = ntohl(key) >> fz->fz_revorder;
84 h ^= (h>>20);
85 h ^= (h>>10);
86 h ^= (h>>5);
87 h &= fz->fz_hashmask;
88 return h;
89}
90
91static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
92{
93 return dst & FZ_MASK(fz);
94}
95
96static unsigned int fib_hash_genid;
97
98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
99
100static struct hlist_head *fz_hash_alloc(int divisor)
101{
102 unsigned long size = divisor * sizeof(struct hlist_head);
103
104 if (size <= PAGE_SIZE)
105 return kzalloc(size, GFP_KERNEL);
106
107 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109}
110
111/* The fib hash lock must be held when this is called. */
112static inline void fn_rebuild_zone(struct fn_zone *fz,
113 struct hlist_head *old_ht,
114 int old_divisor)
115{
116 int i;
117
118 for (i = 0; i < old_divisor; i++) {
119 struct hlist_node *node, *n;
120 struct fib_node *f;
121
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
123 struct hlist_head *new_head;
124
125 hlist_del_rcu(&f->fn_hash);
126
127 new_head = rcu_dereference_protected(fz->fz_hash, 1) +
128 fn_hash(f->fn_key, fz);
129 hlist_add_head_rcu(&f->fn_hash, new_head);
130 }
131 }
132}
133
134static void fz_hash_free(struct hlist_head *hash, int divisor)
135{
136 unsigned long size = divisor * sizeof(struct hlist_head);
137
138 if (size <= PAGE_SIZE)
139 kfree(hash);
140 else
141 free_pages((unsigned long)hash, get_order(size));
142}
143
144static void fn_rehash_zone(struct fn_zone *fz)
145{
146 struct hlist_head *ht, *old_ht;
147 int old_divisor, new_divisor;
148 u32 new_hashmask;
149
150 new_divisor = old_divisor = fz->fz_divisor;
151
152 switch (old_divisor) {
153 case EMBEDDED_HASH_SIZE:
154 new_divisor *= EMBEDDED_HASH_SIZE;
155 break;
156 case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
157 new_divisor *= (EMBEDDED_HASH_SIZE/2);
158 break;
159 default:
160 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
161 printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
162 return;
163 }
164 new_divisor = (old_divisor << 1);
165 break;
166 }
167
168 new_hashmask = (new_divisor - 1);
169
170#if RT_CACHE_DEBUG >= 2
171 printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
172 fz->fz_order, old_divisor);
173#endif
174
175 ht = fz_hash_alloc(new_divisor);
176
177 if (ht) {
178 struct fn_zone nfz;
179
180 memcpy(&nfz, fz, sizeof(nfz));
181
182 write_seqlock_bh(&fz->fz_lock);
183 old_ht = rcu_dereference_protected(fz->fz_hash, 1);
184 RCU_INIT_POINTER(nfz.fz_hash, ht);
185 nfz.fz_hashmask = new_hashmask;
186 nfz.fz_divisor = new_divisor;
187 fn_rebuild_zone(&nfz, old_ht, old_divisor);
188 fib_hash_genid++;
189 rcu_assign_pointer(fz->fz_hash, ht);
190 fz->fz_hashmask = new_hashmask;
191 fz->fz_divisor = new_divisor;
192 write_sequnlock_bh(&fz->fz_lock);
193
194 if (old_ht != fz->fz_embedded_hash) {
195 synchronize_rcu();
196 fz_hash_free(old_ht, old_divisor);
197 }
198 }
199}
200
201static void fn_free_node_rcu(struct rcu_head *head)
202{
203 struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
204
205 kmem_cache_free(fn_hash_kmem, f);
206}
207
208static inline void fn_free_node(struct fib_node *f)
209{
210 call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
211}
212
213static void fn_free_alias_rcu(struct rcu_head *head)
214{
215 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
216
217 kmem_cache_free(fn_alias_kmem, fa);
218}
219
220static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
221{
222 fib_release_info(fa->fa_info);
223 if (fa == &f->fn_embedded_alias)
224 fa->fa_info = NULL;
225 else
226 call_rcu(&fa->rcu, fn_free_alias_rcu);
227}
228
229static struct fn_zone *
230fn_new_zone(struct fn_hash *table, int z)
231{
232 int i;
233 struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
234 if (!fz)
235 return NULL;
236
237 seqlock_init(&fz->fz_lock);
238 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
239 fz->fz_hashmask = fz->fz_divisor - 1;
240 RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
241 fz->fz_order = z;
242 fz->fz_revorder = 32 - z;
243 fz->fz_mask = inet_make_mask(z);
244
245 /* Find the first not empty zone with more specific mask */
246 for (i = z + 1; i <= 32; i++)
247 if (table->fn_zones[i])
248 break;
249 if (i > 32) {
250 /* No more specific masks, we are the first. */
251 rcu_assign_pointer(fz->fz_next,
252 rtnl_dereference(table->fn_zone_list));
253 rcu_assign_pointer(table->fn_zone_list, fz);
254 } else {
255 rcu_assign_pointer(fz->fz_next,
256 rtnl_dereference(table->fn_zones[i]->fz_next));
257 rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
258 }
259 table->fn_zones[z] = fz;
260 fib_hash_genid++;
261 return fz;
262}
263
264int fib_table_lookup(struct fib_table *tb,
265 const struct flowi *flp, struct fib_result *res,
266 int fib_flags)
267{
268 int err;
269 struct fn_zone *fz;
270 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
271
272 rcu_read_lock();
273 for (fz = rcu_dereference(t->fn_zone_list);
274 fz != NULL;
275 fz = rcu_dereference(fz->fz_next)) {
276 struct hlist_head *head;
277 struct hlist_node *node;
278 struct fib_node *f;
279 __be32 k;
280 unsigned int seq;
281
282 do {
283 seq = read_seqbegin(&fz->fz_lock);
284 k = fz_key(flp->fl4_dst, fz);
285
286 head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
287 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
288 if (f->fn_key != k)
289 continue;
290
291 err = fib_semantic_match(&f->fn_alias,
292 flp, res,
293 fz->fz_order, fib_flags);
294 if (err <= 0)
295 goto out;
296 }
297 } while (read_seqretry(&fz->fz_lock, seq));
298 }
299 err = 1;
300out:
301 rcu_read_unlock();
302 return err;
303}
304
305void fib_table_select_default(struct fib_table *tb,
306 const struct flowi *flp, struct fib_result *res)
307{
308 int order, last_idx;
309 struct hlist_node *node;
310 struct fib_node *f;
311 struct fib_info *fi = NULL;
312 struct fib_info *last_resort;
313 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
314 struct fn_zone *fz = t->fn_zones[0];
315 struct hlist_head *head;
316
317 if (fz == NULL)
318 return;
319
320 last_idx = -1;
321 last_resort = NULL;
322 order = -1;
323
324 rcu_read_lock();
325 head = rcu_dereference(fz->fz_hash);
326 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
327 struct fib_alias *fa;
328
329 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
330 struct fib_info *next_fi = fa->fa_info;
331
332 if (fa->fa_scope != res->scope ||
333 fa->fa_type != RTN_UNICAST)
334 continue;
335
336 if (next_fi->fib_priority > res->fi->fib_priority)
337 break;
338 if (!next_fi->fib_nh[0].nh_gw ||
339 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
340 continue;
341
342 fib_alias_accessed(fa);
343
344 if (fi == NULL) {
345 if (next_fi != res->fi)
346 break;
347 } else if (!fib_detect_death(fi, order, &last_resort,
348 &last_idx, tb->tb_default)) {
349 fib_result_assign(res, fi);
350 tb->tb_default = order;
351 goto out;
352 }
353 fi = next_fi;
354 order++;
355 }
356 }
357
358 if (order <= 0 || fi == NULL) {
359 tb->tb_default = -1;
360 goto out;
361 }
362
363 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
364 tb->tb_default)) {
365 fib_result_assign(res, fi);
366 tb->tb_default = order;
367 goto out;
368 }
369
370 if (last_idx >= 0)
371 fib_result_assign(res, last_resort);
372 tb->tb_default = last_idx;
373out:
374 rcu_read_unlock();
375}
376
377/* Insert node F to FZ. */
378static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
379{
380 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
381
382 hlist_add_head_rcu(&f->fn_hash, head);
383}
384
385/* Return the node in FZ matching KEY. */
386static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
387{
388 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
389 struct hlist_node *node;
390 struct fib_node *f;
391
392 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
393 if (f->fn_key == key)
394 return f;
395 }
396
397 return NULL;
398}
399
400
401static struct fib_alias *fib_fast_alloc(struct fib_node *f)
402{
403 struct fib_alias *fa = &f->fn_embedded_alias;
404
405 if (fa->fa_info != NULL)
406 fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
407 return fa;
408}
409
410/* Caller must hold RTNL. */
411int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
412{
413 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
414 struct fib_node *new_f = NULL;
415 struct fib_node *f;
416 struct fib_alias *fa, *new_fa;
417 struct fn_zone *fz;
418 struct fib_info *fi;
419 u8 tos = cfg->fc_tos;
420 __be32 key;
421 int err;
422
423 if (cfg->fc_dst_len > 32)
424 return -EINVAL;
425
426 fz = table->fn_zones[cfg->fc_dst_len];
427 if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
428 return -ENOBUFS;
429
430 key = 0;
431 if (cfg->fc_dst) {
432 if (cfg->fc_dst & ~FZ_MASK(fz))
433 return -EINVAL;
434 key = fz_key(cfg->fc_dst, fz);
435 }
436
437 fi = fib_create_info(cfg);
438 if (IS_ERR(fi))
439 return PTR_ERR(fi);
440
441 if (fz->fz_nent > (fz->fz_divisor<<1) &&
442 fz->fz_divisor < FZ_MAX_DIVISOR &&
443 (cfg->fc_dst_len == 32 ||
444 (1 << cfg->fc_dst_len) > fz->fz_divisor))
445 fn_rehash_zone(fz);
446
447 f = fib_find_node(fz, key);
448
449 if (!f)
450 fa = NULL;
451 else
452 fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
453
454 /* Now fa, if non-NULL, points to the first fib alias
455 * with the same keys [prefix,tos,priority], if such key already
456 * exists or to the node before which we will insert new one.
457 *
458 * If fa is NULL, we will need to allocate a new one and
459 * insert to the head of f.
460 *
461 * If f is NULL, no fib node matched the destination key
462 * and we need to allocate a new one of those as well.
463 */
464
465 if (fa && fa->fa_tos == tos &&
466 fa->fa_info->fib_priority == fi->fib_priority) {
467 struct fib_alias *fa_first, *fa_match;
468
469 err = -EEXIST;
470 if (cfg->fc_nlflags & NLM_F_EXCL)
471 goto out;
472
473 /* We have 2 goals:
474 * 1. Find exact match for type, scope, fib_info to avoid
475 * duplicate routes
476 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
477 */
478 fa_match = NULL;
479 fa_first = fa;
480 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
481 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
482 if (fa->fa_tos != tos)
483 break;
484 if (fa->fa_info->fib_priority != fi->fib_priority)
485 break;
486 if (fa->fa_type == cfg->fc_type &&
487 fa->fa_scope == cfg->fc_scope &&
488 fa->fa_info == fi) {
489 fa_match = fa;
490 break;
491 }
492 }
493
494 if (cfg->fc_nlflags & NLM_F_REPLACE) {
495 u8 state;
496
497 fa = fa_first;
498 if (fa_match) {
499 if (fa == fa_match)
500 err = 0;
501 goto out;
502 }
503 err = -ENOBUFS;
504 new_fa = fib_fast_alloc(f);
505 if (new_fa == NULL)
506 goto out;
507
508 new_fa->fa_tos = fa->fa_tos;
509 new_fa->fa_info = fi;
510 new_fa->fa_type = cfg->fc_type;
511 new_fa->fa_scope = cfg->fc_scope;
512 state = fa->fa_state;
513 new_fa->fa_state = state & ~FA_S_ACCESSED;
514 fib_hash_genid++;
515 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
516
517 fn_free_alias(fa, f);
518 if (state & FA_S_ACCESSED)
519 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
520 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
521 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
522 return 0;
523 }
524
525 /* Error if we find a perfect match which
526 * uses the same scope, type, and nexthop
527 * information.
528 */
529 if (fa_match)
530 goto out;
531
532 if (!(cfg->fc_nlflags & NLM_F_APPEND))
533 fa = fa_first;
534 }
535
536 err = -ENOENT;
537 if (!(cfg->fc_nlflags & NLM_F_CREATE))
538 goto out;
539
540 err = -ENOBUFS;
541
542 if (!f) {
543 new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
544 if (new_f == NULL)
545 goto out;
546
547 INIT_HLIST_NODE(&new_f->fn_hash);
548 INIT_LIST_HEAD(&new_f->fn_alias);
549 new_f->fn_key = key;
550 f = new_f;
551 }
552
553 new_fa = fib_fast_alloc(f);
554 if (new_fa == NULL)
555 goto out;
556
557 new_fa->fa_info = fi;
558 new_fa->fa_tos = tos;
559 new_fa->fa_type = cfg->fc_type;
560 new_fa->fa_scope = cfg->fc_scope;
561 new_fa->fa_state = 0;
562
563 /*
564 * Insert new entry to the list.
565 */
566
567 if (new_f)
568 fib_insert_node(fz, new_f);
569 list_add_tail_rcu(&new_fa->fa_list,
570 (fa ? &fa->fa_list : &f->fn_alias));
571 fib_hash_genid++;
572
573 if (new_f)
574 fz->fz_nent++;
575 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
576
577 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
578 &cfg->fc_nlinfo, 0);
579 return 0;
580
581out:
582 if (new_f)
583 kmem_cache_free(fn_hash_kmem, new_f);
584 fib_release_info(fi);
585 return err;
586}
587
588int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
589{
590 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
591 struct fib_node *f;
592 struct fib_alias *fa, *fa_to_delete;
593 struct fn_zone *fz;
594 __be32 key;
595
596 if (cfg->fc_dst_len > 32)
597 return -EINVAL;
598
599 if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
600 return -ESRCH;
601
602 key = 0;
603 if (cfg->fc_dst) {
604 if (cfg->fc_dst & ~FZ_MASK(fz))
605 return -EINVAL;
606 key = fz_key(cfg->fc_dst, fz);
607 }
608
609 f = fib_find_node(fz, key);
610
611 if (!f)
612 fa = NULL;
613 else
614 fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
615 if (!fa)
616 return -ESRCH;
617
618 fa_to_delete = NULL;
619 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
620 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
621 struct fib_info *fi = fa->fa_info;
622
623 if (fa->fa_tos != cfg->fc_tos)
624 break;
625
626 if ((!cfg->fc_type ||
627 fa->fa_type == cfg->fc_type) &&
628 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
629 fa->fa_scope == cfg->fc_scope) &&
630 (!cfg->fc_protocol ||
631 fi->fib_protocol == cfg->fc_protocol) &&
632 fib_nh_match(cfg, fi) == 0) {
633 fa_to_delete = fa;
634 break;
635 }
636 }
637
638 if (fa_to_delete) {
639 int kill_fn;
640
641 fa = fa_to_delete;
642 rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
643 tb->tb_id, &cfg->fc_nlinfo, 0);
644
645 kill_fn = 0;
646 list_del_rcu(&fa->fa_list);
647 if (list_empty(&f->fn_alias)) {
648 hlist_del_rcu(&f->fn_hash);
649 kill_fn = 1;
650 }
651 fib_hash_genid++;
652
653 if (fa->fa_state & FA_S_ACCESSED)
654 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
655 fn_free_alias(fa, f);
656 if (kill_fn) {
657 fn_free_node(f);
658 fz->fz_nent--;
659 }
660
661 return 0;
662 }
663 return -ESRCH;
664}
665
666static int fn_flush_list(struct fn_zone *fz, int idx)
667{
668 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
669 struct hlist_node *node, *n;
670 struct fib_node *f;
671 int found = 0;
672
673 hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
674 struct fib_alias *fa, *fa_node;
675 int kill_f;
676
677 kill_f = 0;
678 list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
679 struct fib_info *fi = fa->fa_info;
680
681 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
682 list_del_rcu(&fa->fa_list);
683 if (list_empty(&f->fn_alias)) {
684 hlist_del_rcu(&f->fn_hash);
685 kill_f = 1;
686 }
687 fib_hash_genid++;
688
689 fn_free_alias(fa, f);
690 found++;
691 }
692 }
693 if (kill_f) {
694 fn_free_node(f);
695 fz->fz_nent--;
696 }
697 }
698 return found;
699}
700
701/* caller must hold RTNL. */
702int fib_table_flush(struct fib_table *tb)
703{
704 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
705 struct fn_zone *fz;
706 int found = 0;
707
708 for (fz = rtnl_dereference(table->fn_zone_list);
709 fz != NULL;
710 fz = rtnl_dereference(fz->fz_next)) {
711 int i;
712
713 for (i = fz->fz_divisor - 1; i >= 0; i--)
714 found += fn_flush_list(fz, i);
715 }
716 return found;
717}
718
719void fib_free_table(struct fib_table *tb)
720{
721 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
722 struct fn_zone *fz, *next;
723
724 next = table->fn_zone_list;
725 while (next != NULL) {
726 fz = next;
727 next = fz->fz_next;
728
729 if (fz->fz_hash != fz->fz_embedded_hash)
730 fz_hash_free(fz->fz_hash, fz->fz_divisor);
731
732 kfree(fz);
733 }
734
735 kfree(tb);
736}
737
738static inline int
739fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
740 struct fib_table *tb,
741 struct fn_zone *fz,
742 struct hlist_head *head)
743{
744 struct hlist_node *node;
745 struct fib_node *f;
746 int i, s_i;
747
748 s_i = cb->args[4];
749 i = 0;
750 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
751 struct fib_alias *fa;
752
753 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
754 if (i < s_i)
755 goto next;
756
757 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
758 cb->nlh->nlmsg_seq,
759 RTM_NEWROUTE,
760 tb->tb_id,
761 fa->fa_type,
762 fa->fa_scope,
763 f->fn_key,
764 fz->fz_order,
765 fa->fa_tos,
766 fa->fa_info,
767 NLM_F_MULTI) < 0) {
768 cb->args[4] = i;
769 return -1;
770 }
771next:
772 i++;
773 }
774 }
775 cb->args[4] = i;
776 return skb->len;
777}
778
779static inline int
780fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
781 struct fib_table *tb,
782 struct fn_zone *fz)
783{
784 int h, s_h;
785 struct hlist_head *head = rcu_dereference(fz->fz_hash);
786
787 if (head == NULL)
788 return skb->len;
789 s_h = cb->args[3];
790 for (h = s_h; h < fz->fz_divisor; h++) {
791 if (hlist_empty(head + h))
792 continue;
793 if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
794 cb->args[3] = h;
795 return -1;
796 }
797 memset(&cb->args[4], 0,
798 sizeof(cb->args) - 4*sizeof(cb->args[0]));
799 }
800 cb->args[3] = h;
801 return skb->len;
802}
803
804int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
805 struct netlink_callback *cb)
806{
807 int m = 0, s_m;
808 struct fn_zone *fz;
809 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
810
811 s_m = cb->args[2];
812 rcu_read_lock();
813 for (fz = rcu_dereference(table->fn_zone_list);
814 fz != NULL;
815 fz = rcu_dereference(fz->fz_next), m++) {
816 if (m < s_m)
817 continue;
818 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
819 cb->args[2] = m;
820 rcu_read_unlock();
821 return -1;
822 }
823 memset(&cb->args[3], 0,
824 sizeof(cb->args) - 3*sizeof(cb->args[0]));
825 }
826 rcu_read_unlock();
827 cb->args[2] = m;
828 return skb->len;
829}
830
831void __init fib_hash_init(void)
832{
833 fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
834 0, SLAB_PANIC, NULL);
835
836 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
837 0, SLAB_PANIC, NULL);
838
839}
840
841struct fib_table *fib_hash_table(u32 id)
842{
843 struct fib_table *tb;
844
845 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
846 GFP_KERNEL);
847 if (tb == NULL)
848 return NULL;
849
850 tb->tb_id = id;
851 tb->tb_default = -1;
852
853 memset(tb->tb_data, 0, sizeof(struct fn_hash));
854 return tb;
855}
856
857/* ------------------------------------------------------------------------ */
858#ifdef CONFIG_PROC_FS
859
860struct fib_iter_state {
861 struct seq_net_private p;
862 struct fn_zone *zone;
863 int bucket;
864 struct hlist_head *hash_head;
865 struct fib_node *fn;
866 struct fib_alias *fa;
867 loff_t pos;
868 unsigned int genid;
869 int valid;
870};
871
872static struct fib_alias *fib_get_first(struct seq_file *seq)
873{
874 struct fib_iter_state *iter = seq->private;
875 struct fib_table *main_table;
876 struct fn_hash *table;
877
878 main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
879 table = (struct fn_hash *)main_table->tb_data;
880
881 iter->bucket = 0;
882 iter->hash_head = NULL;
883 iter->fn = NULL;
884 iter->fa = NULL;
885 iter->pos = 0;
886 iter->genid = fib_hash_genid;
887 iter->valid = 1;
888
889 for (iter->zone = rcu_dereference(table->fn_zone_list);
890 iter->zone != NULL;
891 iter->zone = rcu_dereference(iter->zone->fz_next)) {
892 int maxslot;
893
894 if (!iter->zone->fz_nent)
895 continue;
896
897 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
898 maxslot = iter->zone->fz_divisor;
899
900 for (iter->bucket = 0; iter->bucket < maxslot;
901 ++iter->bucket, ++iter->hash_head) {
902 struct hlist_node *node;
903 struct fib_node *fn;
904
905 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
906 struct fib_alias *fa;
907
908 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
909 iter->fn = fn;
910 iter->fa = fa;
911 goto out;
912 }
913 }
914 }
915 }
916out:
917 return iter->fa;
918}
919
920static struct fib_alias *fib_get_next(struct seq_file *seq)
921{
922 struct fib_iter_state *iter = seq->private;
923 struct fib_node *fn;
924 struct fib_alias *fa;
925
926 /* Advance FA, if any. */
927 fn = iter->fn;
928 fa = iter->fa;
929 if (fa) {
930 BUG_ON(!fn);
931 list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
932 iter->fa = fa;
933 goto out;
934 }
935 }
936
937 fa = iter->fa = NULL;
938
939 /* Advance FN. */
940 if (fn) {
941 struct hlist_node *node = &fn->fn_hash;
942 hlist_for_each_entry_continue(fn, node, fn_hash) {
943 iter->fn = fn;
944
945 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
946 iter->fa = fa;
947 goto out;
948 }
949 }
950 }
951
952 fn = iter->fn = NULL;
953
954 /* Advance hash chain. */
955 if (!iter->zone)
956 goto out;
957
958 for (;;) {
959 struct hlist_node *node;
960 int maxslot;
961
962 maxslot = iter->zone->fz_divisor;
963
964 while (++iter->bucket < maxslot) {
965 iter->hash_head++;
966
967 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
968 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
969 iter->fn = fn;
970 iter->fa = fa;
971 goto out;
972 }
973 }
974 }
975
976 iter->zone = rcu_dereference(iter->zone->fz_next);
977
978 if (!iter->zone)
979 goto out;
980
981 iter->bucket = 0;
982 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
983
984 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
985 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
986 iter->fn = fn;
987 iter->fa = fa;
988 goto out;
989 }
990 }
991 }
992out:
993 iter->pos++;
994 return fa;
995}
996
997static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
998{
999 struct fib_iter_state *iter = seq->private;
1000 struct fib_alias *fa;
1001
1002 if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
1003 fa = iter->fa;
1004 pos -= iter->pos;
1005 } else
1006 fa = fib_get_first(seq);
1007
1008 if (fa)
1009 while (pos && (fa = fib_get_next(seq)))
1010 --pos;
1011 return pos ? NULL : fa;
1012}
1013
1014static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
1015 __acquires(RCU)
1016{
1017 void *v = NULL;
1018
1019 rcu_read_lock();
1020 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
1021 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1022 return v;
1023}
1024
1025static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1026{
1027 ++*pos;
1028 return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
1029}
1030
1031static void fib_seq_stop(struct seq_file *seq, void *v)
1032 __releases(RCU)
1033{
1034 rcu_read_unlock();
1035}
1036
1037static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
1038{
1039 static const unsigned type2flags[RTN_MAX + 1] = {
1040 [7] = RTF_REJECT,
1041 [8] = RTF_REJECT,
1042 };
1043 unsigned flags = type2flags[type];
1044
1045 if (fi && fi->fib_nh->nh_gw)
1046 flags |= RTF_GATEWAY;
1047 if (mask == htonl(0xFFFFFFFF))
1048 flags |= RTF_HOST;
1049 flags |= RTF_UP;
1050 return flags;
1051}
1052
1053/*
1054 * This outputs /proc/net/route.
1055 *
1056 * It always works in backward compatibility mode.
1057 * The format of the file is not supposed to be changed.
1058 */
1059static int fib_seq_show(struct seq_file *seq, void *v)
1060{
1061 struct fib_iter_state *iter;
1062 int len;
1063 __be32 prefix, mask;
1064 unsigned flags;
1065 struct fib_node *f;
1066 struct fib_alias *fa;
1067 struct fib_info *fi;
1068
1069 if (v == SEQ_START_TOKEN) {
1070 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1071 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1072 "\tWindow\tIRTT");
1073 goto out;
1074 }
1075
1076 iter = seq->private;
1077 f = iter->fn;
1078 fa = iter->fa;
1079 fi = fa->fa_info;
1080 prefix = f->fn_key;
1081 mask = FZ_MASK(iter->zone);
1082 flags = fib_flag_trans(fa->fa_type, mask, fi);
1083 if (fi)
1084 seq_printf(seq,
1085 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1086 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1087 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1088 mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1089 fi->fib_window,
1090 fi->fib_rtt >> 3, &len);
1091 else
1092 seq_printf(seq,
1093 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1094 prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len);
1095
1096 seq_printf(seq, "%*s\n", 127 - len, "");
1097out:
1098 return 0;
1099}
1100
1101static const struct seq_operations fib_seq_ops = {
1102 .start = fib_seq_start,
1103 .next = fib_seq_next,
1104 .stop = fib_seq_stop,
1105 .show = fib_seq_show,
1106};
1107
1108static int fib_seq_open(struct inode *inode, struct file *file)
1109{
1110 return seq_open_net(inode, file, &fib_seq_ops,
1111 sizeof(struct fib_iter_state));
1112}
1113
1114static const struct file_operations fib_seq_fops = {
1115 .owner = THIS_MODULE,
1116 .open = fib_seq_open,
1117 .read = seq_read,
1118 .llseek = seq_lseek,
1119 .release = seq_release_net,
1120};
1121
1122int __net_init fib_proc_init(struct net *net)
1123{
1124 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
1125 return -ENOMEM;
1126 return 0;
1127}
1128
1129void __net_exit fib_proc_exit(struct net *net)
1130{
1131 proc_net_remove(net, "route");
1132}
1133#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c079cc0ec651..af0f14aba169 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -10,7 +10,6 @@ struct fib_alias {
10 struct fib_info *fa_info; 10 struct fib_info *fa_info;
11 u8 fa_tos; 11 u8 fa_tos;
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_scope;
14 u8 fa_state; 13 u8 fa_state;
15 struct rcu_head rcu; 14 struct rcu_head rcu;
16}; 15};
@@ -25,14 +24,11 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
25} 24}
26 25
27/* Exported by fib_semantics.c */ 26/* Exported by fib_semantics.c */
28extern int fib_semantic_match(struct list_head *head,
29 const struct flowi *flp,
30 struct fib_result *res, int prefixlen, int fib_flags);
31extern void fib_release_info(struct fib_info *); 27extern void fib_release_info(struct fib_info *);
32extern struct fib_info *fib_create_info(struct fib_config *cfg); 28extern struct fib_info *fib_create_info(struct fib_config *cfg);
33extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); 29extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
34extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 30extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
35 u32 tb_id, u8 type, u8 scope, __be32 dst, 31 u32 tb_id, u8 type, __be32 dst,
36 int dst_len, u8 tos, struct fib_info *fi, 32 int dst_len, u8 tos, struct fib_info *fi,
37 unsigned int); 33 unsigned int);
38extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 34extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
@@ -51,4 +47,11 @@ static inline void fib_result_assign(struct fib_result *res,
51 res->fi = fi; 47 res->fi = fi;
52} 48}
53 49
50struct fib_prop {
51 int error;
52 u8 scope;
53};
54
55extern const struct fib_prop fib_props[RTN_MAX + 1];
56
54#endif /* _FIB_LOOKUP_H */ 57#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 7981a24f5c7b..a53bb1b5b118 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,19 +41,19 @@ struct fib4_rule {
41 __be32 srcmask; 41 __be32 srcmask;
42 __be32 dst; 42 __be32 dst;
43 __be32 dstmask; 43 __be32 dstmask;
44#ifdef CONFIG_NET_CLS_ROUTE 44#ifdef CONFIG_IP_ROUTE_CLASSID
45 u32 tclassid; 45 u32 tclassid;
46#endif 46#endif
47}; 47};
48 48
49#ifdef CONFIG_NET_CLS_ROUTE 49#ifdef CONFIG_IP_ROUTE_CLASSID
50u32 fib_rules_tclass(struct fib_result *res) 50u32 fib_rules_tclass(const struct fib_result *res)
51{ 51{
52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; 52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
53} 53}
54#endif 54#endif
55 55
56int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) 56int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
57{ 57{
58 struct fib_lookup_arg arg = { 58 struct fib_lookup_arg arg = {
59 .result = res, 59 .result = res,
@@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
61 }; 61 };
62 int err; 62 int err;
63 63
64 err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); 64 err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
65 res->r = arg.rule; 65 res->r = arg.rule;
66 66
67 return err; 67 return err;
@@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
95 if (!tbl) 95 if (!tbl)
96 goto errout; 96 goto errout;
97 97
98 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); 98 err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags);
99 if (err > 0) 99 if (err > 0)
100 err = -EAGAIN; 100 err = -EAGAIN;
101errout: 101errout:
@@ -106,14 +106,15 @@ errout:
106static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 106static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
107{ 107{
108 struct fib4_rule *r = (struct fib4_rule *) rule; 108 struct fib4_rule *r = (struct fib4_rule *) rule;
109 __be32 daddr = fl->fl4_dst; 109 struct flowi4 *fl4 = &fl->u.ip4;
110 __be32 saddr = fl->fl4_src; 110 __be32 daddr = fl4->daddr;
111 __be32 saddr = fl4->saddr;
111 112
112 if (((saddr ^ r->src) & r->srcmask) || 113 if (((saddr ^ r->src) & r->srcmask) ||
113 ((daddr ^ r->dst) & r->dstmask)) 114 ((daddr ^ r->dst) & r->dstmask))
114 return 0; 115 return 0;
115 116
116 if (r->tos && (r->tos != fl->fl4_tos)) 117 if (r->tos && (r->tos != fl4->flowi4_tos))
117 return 0; 118 return 0;
118 119
119 return 1; 120 return 1;
@@ -165,7 +166,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
165 if (frh->dst_len) 166 if (frh->dst_len)
166 rule4->dst = nla_get_be32(tb[FRA_DST]); 167 rule4->dst = nla_get_be32(tb[FRA_DST]);
167 168
168#ifdef CONFIG_NET_CLS_ROUTE 169#ifdef CONFIG_IP_ROUTE_CLASSID
169 if (tb[FRA_FLOW]) 170 if (tb[FRA_FLOW])
170 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); 171 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
171#endif 172#endif
@@ -195,7 +196,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
195 if (frh->tos && (rule4->tos != frh->tos)) 196 if (frh->tos && (rule4->tos != frh->tos))
196 return 0; 197 return 0;
197 198
198#ifdef CONFIG_NET_CLS_ROUTE 199#ifdef CONFIG_IP_ROUTE_CLASSID
199 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 200 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
200 return 0; 201 return 0;
201#endif 202#endif
@@ -224,7 +225,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
224 if (rule4->src_len) 225 if (rule4->src_len)
225 NLA_PUT_BE32(skb, FRA_SRC, rule4->src); 226 NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
226 227
227#ifdef CONFIG_NET_CLS_ROUTE 228#ifdef CONFIG_IP_ROUTE_CLASSID
228 if (rule4->tclassid) 229 if (rule4->tclassid)
229 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); 230 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
230#endif 231#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 12d3dc3df1b7..33e2c35b74b7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -49,7 +49,7 @@
49static DEFINE_SPINLOCK(fib_info_lock); 49static DEFINE_SPINLOCK(fib_info_lock);
50static struct hlist_head *fib_info_hash; 50static struct hlist_head *fib_info_hash;
51static struct hlist_head *fib_info_laddrhash; 51static struct hlist_head *fib_info_laddrhash;
52static unsigned int fib_hash_size; 52static unsigned int fib_info_hash_size;
53static unsigned int fib_info_cnt; 53static unsigned int fib_info_cnt;
54 54
55#define DEVINDEX_HASHBITS 8 55#define DEVINDEX_HASHBITS 8
@@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock);
90#define endfor_nexthops(fi) } 90#define endfor_nexthops(fi) }
91 91
92 92
93static const struct 93const struct fib_prop fib_props[RTN_MAX + 1] = {
94{
95 int error;
96 u8 scope;
97} fib_props[RTN_MAX + 1] = {
98 [RTN_UNSPEC] = { 94 [RTN_UNSPEC] = {
99 .error = 0, 95 .error = 0,
100 .scope = RT_SCOPE_NOWHERE, 96 .scope = RT_SCOPE_NOWHERE,
@@ -145,16 +141,8 @@ static const struct
145 }, 141 },
146}; 142};
147 143
148
149/* Release a nexthop info record */ 144/* Release a nexthop info record */
150 145
151static void free_fib_info_rcu(struct rcu_head *head)
152{
153 struct fib_info *fi = container_of(head, struct fib_info, rcu);
154
155 kfree(fi);
156}
157
158void free_fib_info(struct fib_info *fi) 146void free_fib_info(struct fib_info *fi)
159{ 147{
160 if (fi->fib_dead == 0) { 148 if (fi->fib_dead == 0) {
@@ -168,7 +156,7 @@ void free_fib_info(struct fib_info *fi)
168 } endfor_nexthops(fi); 156 } endfor_nexthops(fi);
169 fib_info_cnt--; 157 fib_info_cnt--;
170 release_net(fi->fib_net); 158 release_net(fi->fib_net);
171 call_rcu(&fi->rcu, free_fib_info_rcu); 159 kfree_rcu(fi, rcu);
172} 160}
173 161
174void fib_release_info(struct fib_info *fi) 162void fib_release_info(struct fib_info *fi)
@@ -200,7 +188,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
200#ifdef CONFIG_IP_ROUTE_MULTIPATH 188#ifdef CONFIG_IP_ROUTE_MULTIPATH
201 nh->nh_weight != onh->nh_weight || 189 nh->nh_weight != onh->nh_weight ||
202#endif 190#endif
203#ifdef CONFIG_NET_CLS_ROUTE 191#ifdef CONFIG_IP_ROUTE_CLASSID
204 nh->nh_tclassid != onh->nh_tclassid || 192 nh->nh_tclassid != onh->nh_tclassid ||
205#endif 193#endif
206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) 194 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -221,10 +209,10 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
221 209
222static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 210static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
223{ 211{
224 unsigned int mask = (fib_hash_size - 1); 212 unsigned int mask = (fib_info_hash_size - 1);
225 unsigned int val = fi->fib_nhs; 213 unsigned int val = fi->fib_nhs;
226 214
227 val ^= fi->fib_protocol; 215 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
228 val ^= (__force u32)fi->fib_prefsrc; 216 val ^= (__force u32)fi->fib_prefsrc;
229 val ^= fi->fib_priority; 217 val ^= fi->fib_priority;
230 for_nexthops(fi) { 218 for_nexthops(fi) {
@@ -250,10 +238,11 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
250 if (fi->fib_nhs != nfi->fib_nhs) 238 if (fi->fib_nhs != nfi->fib_nhs)
251 continue; 239 continue;
252 if (nfi->fib_protocol == fi->fib_protocol && 240 if (nfi->fib_protocol == fi->fib_protocol &&
241 nfi->fib_scope == fi->fib_scope &&
253 nfi->fib_prefsrc == fi->fib_prefsrc && 242 nfi->fib_prefsrc == fi->fib_prefsrc &&
254 nfi->fib_priority == fi->fib_priority && 243 nfi->fib_priority == fi->fib_priority &&
255 memcmp(nfi->fib_metrics, fi->fib_metrics, 244 memcmp(nfi->fib_metrics, fi->fib_metrics,
256 sizeof(fi->fib_metrics)) == 0 && 245 sizeof(u32) * RTAX_MAX) == 0 &&
257 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && 246 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
258 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 247 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
259 return fi; 248 return fi;
@@ -330,7 +319,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
330 goto errout; 319 goto errout;
331 320
332 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 321 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
333 fa->fa_type, fa->fa_scope, key, dst_len, 322 fa->fa_type, key, dst_len,
334 fa->fa_tos, fa->fa_info, nlm_flags); 323 fa->fa_tos, fa->fa_info, nlm_flags);
335 if (err < 0) { 324 if (err < 0) {
336 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 325 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -422,7 +411,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
422 411
423 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 412 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
424 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 413 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
425#ifdef CONFIG_NET_CLS_ROUTE 414#ifdef CONFIG_IP_ROUTE_CLASSID
426 nla = nla_find(attrs, attrlen, RTA_FLOW); 415 nla = nla_find(attrs, attrlen, RTA_FLOW);
427 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 416 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
428#endif 417#endif
@@ -476,7 +465,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
476 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 465 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
477 if (nla && nla_get_be32(nla) != nh->nh_gw) 466 if (nla && nla_get_be32(nla) != nh->nh_gw)
478 return 1; 467 return 1;
479#ifdef CONFIG_NET_CLS_ROUTE 468#ifdef CONFIG_IP_ROUTE_CLASSID
480 nla = nla_find(attrs, attrlen, RTA_FLOW); 469 nla = nla_find(attrs, attrlen, RTA_FLOW);
481 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 470 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
482 return 1; 471 return 1;
@@ -562,16 +551,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
562 } 551 }
563 rcu_read_lock(); 552 rcu_read_lock();
564 { 553 {
565 struct flowi fl = { 554 struct flowi4 fl4 = {
566 .fl4_dst = nh->nh_gw, 555 .daddr = nh->nh_gw,
567 .fl4_scope = cfg->fc_scope + 1, 556 .flowi4_scope = cfg->fc_scope + 1,
568 .oif = nh->nh_oif, 557 .flowi4_oif = nh->nh_oif,
569 }; 558 };
570 559
571 /* It is not necessary, but requires a bit of thinking */ 560 /* It is not necessary, but requires a bit of thinking */
572 if (fl.fl4_scope < RT_SCOPE_LINK) 561 if (fl4.flowi4_scope < RT_SCOPE_LINK)
573 fl.fl4_scope = RT_SCOPE_LINK; 562 fl4.flowi4_scope = RT_SCOPE_LINK;
574 err = fib_lookup(net, &fl, &res); 563 err = fib_lookup(net, &fl4, &res);
575 if (err) { 564 if (err) {
576 rcu_read_unlock(); 565 rcu_read_unlock();
577 return err; 566 return err;
@@ -613,14 +602,14 @@ out:
613 602
614static inline unsigned int fib_laddr_hashfn(__be32 val) 603static inline unsigned int fib_laddr_hashfn(__be32 val)
615{ 604{
616 unsigned int mask = (fib_hash_size - 1); 605 unsigned int mask = (fib_info_hash_size - 1);
617 606
618 return ((__force u32)val ^ 607 return ((__force u32)val ^
619 ((__force u32)val >> 7) ^ 608 ((__force u32)val >> 7) ^
620 ((__force u32)val >> 14)) & mask; 609 ((__force u32)val >> 14)) & mask;
621} 610}
622 611
623static struct hlist_head *fib_hash_alloc(int bytes) 612static struct hlist_head *fib_info_hash_alloc(int bytes)
624{ 613{
625 if (bytes <= PAGE_SIZE) 614 if (bytes <= PAGE_SIZE)
626 return kzalloc(bytes, GFP_KERNEL); 615 return kzalloc(bytes, GFP_KERNEL);
@@ -630,7 +619,7 @@ static struct hlist_head *fib_hash_alloc(int bytes)
630 get_order(bytes)); 619 get_order(bytes));
631} 620}
632 621
633static void fib_hash_free(struct hlist_head *hash, int bytes) 622static void fib_info_hash_free(struct hlist_head *hash, int bytes)
634{ 623{
635 if (!hash) 624 if (!hash)
636 return; 625 return;
@@ -641,18 +630,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes)
641 free_pages((unsigned long) hash, get_order(bytes)); 630 free_pages((unsigned long) hash, get_order(bytes));
642} 631}
643 632
644static void fib_hash_move(struct hlist_head *new_info_hash, 633static void fib_info_hash_move(struct hlist_head *new_info_hash,
645 struct hlist_head *new_laddrhash, 634 struct hlist_head *new_laddrhash,
646 unsigned int new_size) 635 unsigned int new_size)
647{ 636{
648 struct hlist_head *old_info_hash, *old_laddrhash; 637 struct hlist_head *old_info_hash, *old_laddrhash;
649 unsigned int old_size = fib_hash_size; 638 unsigned int old_size = fib_info_hash_size;
650 unsigned int i, bytes; 639 unsigned int i, bytes;
651 640
652 spin_lock_bh(&fib_info_lock); 641 spin_lock_bh(&fib_info_lock);
653 old_info_hash = fib_info_hash; 642 old_info_hash = fib_info_hash;
654 old_laddrhash = fib_info_laddrhash; 643 old_laddrhash = fib_info_laddrhash;
655 fib_hash_size = new_size; 644 fib_info_hash_size = new_size;
656 645
657 for (i = 0; i < old_size; i++) { 646 for (i = 0; i < old_size; i++) {
658 struct hlist_head *head = &fib_info_hash[i]; 647 struct hlist_head *head = &fib_info_hash[i];
@@ -693,8 +682,18 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
693 spin_unlock_bh(&fib_info_lock); 682 spin_unlock_bh(&fib_info_lock);
694 683
695 bytes = old_size * sizeof(struct hlist_head *); 684 bytes = old_size * sizeof(struct hlist_head *);
696 fib_hash_free(old_info_hash, bytes); 685 fib_info_hash_free(old_info_hash, bytes);
697 fib_hash_free(old_laddrhash, bytes); 686 fib_info_hash_free(old_laddrhash, bytes);
687}
688
689__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
690{
691 nh->nh_saddr = inet_select_addr(nh->nh_dev,
692 nh->nh_gw,
693 nh->nh_parent->fib_scope);
694 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
695
696 return nh->nh_saddr;
698} 697}
699 698
700struct fib_info *fib_create_info(struct fib_config *cfg) 699struct fib_info *fib_create_info(struct fib_config *cfg)
@@ -705,6 +704,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
705 int nhs = 1; 704 int nhs = 1;
706 struct net *net = cfg->fc_nlinfo.nl_net; 705 struct net *net = cfg->fc_nlinfo.nl_net;
707 706
707 if (cfg->fc_type > RTN_MAX)
708 goto err_inval;
709
708 /* Fast check to catch the most weird cases */ 710 /* Fast check to catch the most weird cases */
709 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 711 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
710 goto err_inval; 712 goto err_inval;
@@ -718,8 +720,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
718#endif 720#endif
719 721
720 err = -ENOBUFS; 722 err = -ENOBUFS;
721 if (fib_info_cnt >= fib_hash_size) { 723 if (fib_info_cnt >= fib_info_hash_size) {
722 unsigned int new_size = fib_hash_size << 1; 724 unsigned int new_size = fib_info_hash_size << 1;
723 struct hlist_head *new_info_hash; 725 struct hlist_head *new_info_hash;
724 struct hlist_head *new_laddrhash; 726 struct hlist_head *new_laddrhash;
725 unsigned int bytes; 727 unsigned int bytes;
@@ -727,25 +729,32 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
727 if (!new_size) 729 if (!new_size)
728 new_size = 1; 730 new_size = 1;
729 bytes = new_size * sizeof(struct hlist_head *); 731 bytes = new_size * sizeof(struct hlist_head *);
730 new_info_hash = fib_hash_alloc(bytes); 732 new_info_hash = fib_info_hash_alloc(bytes);
731 new_laddrhash = fib_hash_alloc(bytes); 733 new_laddrhash = fib_info_hash_alloc(bytes);
732 if (!new_info_hash || !new_laddrhash) { 734 if (!new_info_hash || !new_laddrhash) {
733 fib_hash_free(new_info_hash, bytes); 735 fib_info_hash_free(new_info_hash, bytes);
734 fib_hash_free(new_laddrhash, bytes); 736 fib_info_hash_free(new_laddrhash, bytes);
735 } else 737 } else
736 fib_hash_move(new_info_hash, new_laddrhash, new_size); 738 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
737 739
738 if (!fib_hash_size) 740 if (!fib_info_hash_size)
739 goto failure; 741 goto failure;
740 } 742 }
741 743
742 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 744 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
743 if (fi == NULL) 745 if (fi == NULL)
744 goto failure; 746 goto failure;
747 if (cfg->fc_mx) {
748 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
749 if (!fi->fib_metrics)
750 goto failure;
751 } else
752 fi->fib_metrics = (u32 *) dst_default_metrics;
745 fib_info_cnt++; 753 fib_info_cnt++;
746 754
747 fi->fib_net = hold_net(net); 755 fi->fib_net = hold_net(net);
748 fi->fib_protocol = cfg->fc_protocol; 756 fi->fib_protocol = cfg->fc_protocol;
757 fi->fib_scope = cfg->fc_scope;
749 fi->fib_flags = cfg->fc_flags; 758 fi->fib_flags = cfg->fc_flags;
750 fi->fib_priority = cfg->fc_priority; 759 fi->fib_priority = cfg->fc_priority;
751 fi->fib_prefsrc = cfg->fc_prefsrc; 760 fi->fib_prefsrc = cfg->fc_prefsrc;
@@ -779,7 +788,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
779 goto err_inval; 788 goto err_inval;
780 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 789 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
781 goto err_inval; 790 goto err_inval;
782#ifdef CONFIG_NET_CLS_ROUTE 791#ifdef CONFIG_IP_ROUTE_CLASSID
783 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 792 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
784 goto err_inval; 793 goto err_inval;
785#endif 794#endif
@@ -792,7 +801,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
792 nh->nh_oif = cfg->fc_oif; 801 nh->nh_oif = cfg->fc_oif;
793 nh->nh_gw = cfg->fc_gw; 802 nh->nh_gw = cfg->fc_gw;
794 nh->nh_flags = cfg->fc_flags; 803 nh->nh_flags = cfg->fc_flags;
795#ifdef CONFIG_NET_CLS_ROUTE 804#ifdef CONFIG_IP_ROUTE_CLASSID
796 nh->nh_tclassid = cfg->fc_flow; 805 nh->nh_tclassid = cfg->fc_flow;
797#endif 806#endif
798#ifdef CONFIG_IP_ROUTE_MULTIPATH 807#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -804,6 +813,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
804 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 813 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
805 goto err_inval; 814 goto err_inval;
806 goto link_it; 815 goto link_it;
816 } else {
817 switch (cfg->fc_type) {
818 case RTN_UNICAST:
819 case RTN_LOCAL:
820 case RTN_BROADCAST:
821 case RTN_ANYCAST:
822 case RTN_MULTICAST:
823 break;
824 default:
825 goto err_inval;
826 }
807 } 827 }
808 828
809 if (cfg->fc_scope > RT_SCOPE_HOST) 829 if (cfg->fc_scope > RT_SCOPE_HOST)
@@ -835,6 +855,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
835 goto err_inval; 855 goto err_inval;
836 } 856 }
837 857
858 change_nexthops(fi) {
859 fib_info_update_nh_saddr(net, nexthop_nh);
860 } endfor_nexthops(fi)
861
838link_it: 862link_it:
839 ofi = fib_find_info(fi); 863 ofi = fib_find_info(fi);
840 if (ofi) { 864 if (ofi) {
@@ -880,86 +904,8 @@ failure:
880 return ERR_PTR(err); 904 return ERR_PTR(err);
881} 905}
882 906
883/* Note! fib_semantic_match intentionally uses RCU list functions. */
884int fib_semantic_match(struct list_head *head, const struct flowi *flp,
885 struct fib_result *res, int prefixlen, int fib_flags)
886{
887 struct fib_alias *fa;
888 int nh_sel = 0;
889
890 list_for_each_entry_rcu(fa, head, fa_list) {
891 int err;
892
893 if (fa->fa_tos &&
894 fa->fa_tos != flp->fl4_tos)
895 continue;
896
897 if (fa->fa_scope < flp->fl4_scope)
898 continue;
899
900 fib_alias_accessed(fa);
901
902 err = fib_props[fa->fa_type].error;
903 if (err == 0) {
904 struct fib_info *fi = fa->fa_info;
905
906 if (fi->fib_flags & RTNH_F_DEAD)
907 continue;
908
909 switch (fa->fa_type) {
910 case RTN_UNICAST:
911 case RTN_LOCAL:
912 case RTN_BROADCAST:
913 case RTN_ANYCAST:
914 case RTN_MULTICAST:
915 for_nexthops(fi) {
916 if (nh->nh_flags & RTNH_F_DEAD)
917 continue;
918 if (!flp->oif || flp->oif == nh->nh_oif)
919 break;
920 }
921#ifdef CONFIG_IP_ROUTE_MULTIPATH
922 if (nhsel < fi->fib_nhs) {
923 nh_sel = nhsel;
924 goto out_fill_res;
925 }
926#else
927 if (nhsel < 1)
928 goto out_fill_res;
929#endif
930 endfor_nexthops(fi);
931 continue;
932
933 default:
934 pr_warning("fib_semantic_match bad type %#x\n",
935 fa->fa_type);
936 return -EINVAL;
937 }
938 }
939 return err;
940 }
941 return 1;
942
943out_fill_res:
944 res->prefixlen = prefixlen;
945 res->nh_sel = nh_sel;
946 res->type = fa->fa_type;
947 res->scope = fa->fa_scope;
948 res->fi = fa->fa_info;
949 if (!(fib_flags & FIB_LOOKUP_NOREF))
950 atomic_inc(&res->fi->fib_clntref);
951 return 0;
952}
953
954/* Find appropriate source address to this destination */
955
956__be32 __fib_res_prefsrc(struct fib_result *res)
957{
958 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
959}
960
961int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 907int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
962 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 908 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
963 struct fib_info *fi, unsigned int flags) 909 struct fib_info *fi, unsigned int flags)
964{ 910{
965 struct nlmsghdr *nlh; 911 struct nlmsghdr *nlh;
@@ -981,7 +927,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
981 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 927 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
982 rtm->rtm_type = type; 928 rtm->rtm_type = type;
983 rtm->rtm_flags = fi->fib_flags; 929 rtm->rtm_flags = fi->fib_flags;
984 rtm->rtm_scope = scope; 930 rtm->rtm_scope = fi->fib_scope;
985 rtm->rtm_protocol = fi->fib_protocol; 931 rtm->rtm_protocol = fi->fib_protocol;
986 932
987 if (rtm->rtm_dst_len) 933 if (rtm->rtm_dst_len)
@@ -1002,7 +948,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
1002 948
1003 if (fi->fib_nh->nh_oif) 949 if (fi->fib_nh->nh_oif)
1004 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 950 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
1005#ifdef CONFIG_NET_CLS_ROUTE 951#ifdef CONFIG_IP_ROUTE_CLASSID
1006 if (fi->fib_nh[0].nh_tclassid) 952 if (fi->fib_nh[0].nh_tclassid)
1007 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 953 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
1008#endif 954#endif
@@ -1027,7 +973,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
1027 973
1028 if (nh->nh_gw) 974 if (nh->nh_gw)
1029 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 975 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1030#ifdef CONFIG_NET_CLS_ROUTE 976#ifdef CONFIG_IP_ROUTE_CLASSID
1031 if (nh->nh_tclassid) 977 if (nh->nh_tclassid)
1032 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 978 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1033#endif 979#endif
@@ -1125,6 +1071,62 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1125 return ret; 1071 return ret;
1126} 1072}
1127 1073
1074/* Must be invoked inside of an RCU protected region. */
1075void fib_select_default(struct fib_result *res)
1076{
1077 struct fib_info *fi = NULL, *last_resort = NULL;
1078 struct list_head *fa_head = res->fa_head;
1079 struct fib_table *tb = res->table;
1080 int order = -1, last_idx = -1;
1081 struct fib_alias *fa;
1082
1083 list_for_each_entry_rcu(fa, fa_head, fa_list) {
1084 struct fib_info *next_fi = fa->fa_info;
1085
1086 if (next_fi->fib_scope != res->scope ||
1087 fa->fa_type != RTN_UNICAST)
1088 continue;
1089
1090 if (next_fi->fib_priority > res->fi->fib_priority)
1091 break;
1092 if (!next_fi->fib_nh[0].nh_gw ||
1093 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1094 continue;
1095
1096 fib_alias_accessed(fa);
1097
1098 if (fi == NULL) {
1099 if (next_fi != res->fi)
1100 break;
1101 } else if (!fib_detect_death(fi, order, &last_resort,
1102 &last_idx, tb->tb_default)) {
1103 fib_result_assign(res, fi);
1104 tb->tb_default = order;
1105 goto out;
1106 }
1107 fi = next_fi;
1108 order++;
1109 }
1110
1111 if (order <= 0 || fi == NULL) {
1112 tb->tb_default = -1;
1113 goto out;
1114 }
1115
1116 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1117 tb->tb_default)) {
1118 fib_result_assign(res, fi);
1119 tb->tb_default = order;
1120 goto out;
1121 }
1122
1123 if (last_idx >= 0)
1124 fib_result_assign(res, last_resort);
1125 tb->tb_default = last_idx;
1126out:
1127 return;
1128}
1129
1128#ifdef CONFIG_IP_ROUTE_MULTIPATH 1130#ifdef CONFIG_IP_ROUTE_MULTIPATH
1129 1131
1130/* 1132/*
@@ -1189,7 +1191,7 @@ int fib_sync_up(struct net_device *dev)
1189 * The algorithm is suboptimal, but it provides really 1191 * The algorithm is suboptimal, but it provides really
1190 * fair weighted route distribution. 1192 * fair weighted route distribution.
1191 */ 1193 */
1192void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1194void fib_select_multipath(struct fib_result *res)
1193{ 1195{
1194 struct fib_info *fi = res->fi; 1196 struct fib_info *fi = res->fi;
1195 int w; 1197 int w;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0f280348e0fd..c779ce96e5b5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -12,7 +12,7 @@
12 * 12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 * 14 *
15 * This work is based on the LPC-trie which is originally descibed in: 15 * This work is based on the LPC-trie which is originally described in:
16 * 16 *
17 * An experimental study of compression methods for dynamic tries 17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
@@ -95,7 +95,7 @@ typedef unsigned int t_key;
95#define IS_TNODE(n) (!(n->parent & T_LEAF)) 95#define IS_TNODE(n) (!(n->parent & T_LEAF))
96#define IS_LEAF(n) (n->parent & T_LEAF) 96#define IS_LEAF(n) (n->parent & T_LEAF)
97 97
98struct node { 98struct rt_trie_node {
99 unsigned long parent; 99 unsigned long parent;
100 t_key key; 100 t_key key;
101}; 101};
@@ -126,7 +126,7 @@ struct tnode {
126 struct work_struct work; 126 struct work_struct work;
127 struct tnode *tnode_free; 127 struct tnode *tnode_free;
128 }; 128 };
129 struct node *child[0]; 129 struct rt_trie_node __rcu *child[0];
130}; 130};
131 131
132#ifdef CONFIG_IP_FIB_TRIE_STATS 132#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,16 +151,16 @@ struct trie_stat {
151}; 151};
152 152
153struct trie { 153struct trie {
154 struct node *trie; 154 struct rt_trie_node __rcu *trie;
155#ifdef CONFIG_IP_FIB_TRIE_STATS 155#ifdef CONFIG_IP_FIB_TRIE_STATS
156 struct trie_use_stats stats; 156 struct trie_use_stats stats;
157#endif 157#endif
158}; 158};
159 159
160static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 160static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
161static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 161static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
162 int wasfull); 162 int wasfull);
163static struct node *resize(struct trie *t, struct tnode *tn); 163static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
164static struct tnode *inflate(struct trie *t, struct tnode *tn); 164static struct tnode *inflate(struct trie *t, struct tnode *tn);
165static struct tnode *halve(struct trie *t, struct tnode *tn); 165static struct tnode *halve(struct trie *t, struct tnode *tn);
166/* tnodes to free after resize(); protected by RTNL */ 166/* tnodes to free after resize(); protected by RTNL */
@@ -177,39 +177,58 @@ static const int sync_pages = 128;
177static struct kmem_cache *fn_alias_kmem __read_mostly; 177static struct kmem_cache *fn_alias_kmem __read_mostly;
178static struct kmem_cache *trie_leaf_kmem __read_mostly; 178static struct kmem_cache *trie_leaf_kmem __read_mostly;
179 179
180static inline struct tnode *node_parent(struct node *node) 180/*
181 * caller must hold RTNL
182 */
183static inline struct tnode *node_parent(const struct rt_trie_node *node)
181{ 184{
182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 185 unsigned long parent;
186
187 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
188
189 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
183} 190}
184 191
185static inline struct tnode *node_parent_rcu(struct node *node) 192/*
193 * caller must hold RCU read lock or RTNL
194 */
195static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
186{ 196{
187 struct tnode *ret = node_parent(node); 197 unsigned long parent;
198
199 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
200 lockdep_rtnl_is_held());
188 201
189 return rcu_dereference_rtnl(ret); 202 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
190} 203}
191 204
192/* Same as rcu_assign_pointer 205/* Same as rcu_assign_pointer
193 * but that macro() assumes that value is a pointer. 206 * but that macro() assumes that value is a pointer.
194 */ 207 */
195static inline void node_set_parent(struct node *node, struct tnode *ptr) 208static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
196{ 209{
197 smp_wmb(); 210 smp_wmb();
198 node->parent = (unsigned long)ptr | NODE_TYPE(node); 211 node->parent = (unsigned long)ptr | NODE_TYPE(node);
199} 212}
200 213
201static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) 214/*
215 * caller must hold RTNL
216 */
217static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
202{ 218{
203 BUG_ON(i >= 1U << tn->bits); 219 BUG_ON(i >= 1U << tn->bits);
204 220
205 return tn->child[i]; 221 return rtnl_dereference(tn->child[i]);
206} 222}
207 223
208static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 224/*
225 * caller must hold RCU read lock or RTNL
226 */
227static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
209{ 228{
210 struct node *ret = tnode_get_child(tn, i); 229 BUG_ON(i >= 1U << tn->bits);
211 230
212 return rcu_dereference_rtnl(ret); 231 return rcu_dereference_rtnl(tn->child[i]);
213} 232}
214 233
215static inline int tnode_child_length(const struct tnode *tn) 234static inline int tnode_child_length(const struct tnode *tn)
@@ -217,12 +236,12 @@ static inline int tnode_child_length(const struct tnode *tn)
217 return 1 << tn->bits; 236 return 1 << tn->bits;
218} 237}
219 238
220static inline t_key mask_pfx(t_key k, unsigned short l) 239static inline t_key mask_pfx(t_key k, unsigned int l)
221{ 240{
222 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 241 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
223} 242}
224 243
225static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 244static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
226{ 245{
227 if (offset < KEYLENGTH) 246 if (offset < KEYLENGTH)
228 return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 247 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
@@ -350,14 +369,9 @@ static inline void free_leaf(struct leaf *l)
350 call_rcu_bh(&l->rcu, __leaf_free_rcu); 369 call_rcu_bh(&l->rcu, __leaf_free_rcu);
351} 370}
352 371
353static void __leaf_info_free_rcu(struct rcu_head *head)
354{
355 kfree(container_of(head, struct leaf_info, rcu));
356}
357
358static inline void free_leaf_info(struct leaf_info *leaf) 372static inline void free_leaf_info(struct leaf_info *leaf)
359{ 373{
360 call_rcu(&leaf->rcu, __leaf_info_free_rcu); 374 kfree_rcu(leaf, rcu);
361} 375}
362 376
363static struct tnode *tnode_alloc(size_t size) 377static struct tnode *tnode_alloc(size_t size)
@@ -378,7 +392,7 @@ static void __tnode_free_rcu(struct rcu_head *head)
378{ 392{
379 struct tnode *tn = container_of(head, struct tnode, rcu); 393 struct tnode *tn = container_of(head, struct tnode, rcu);
380 size_t size = sizeof(struct tnode) + 394 size_t size = sizeof(struct tnode) +
381 (sizeof(struct node *) << tn->bits); 395 (sizeof(struct rt_trie_node *) << tn->bits);
382 396
383 if (size <= PAGE_SIZE) 397 if (size <= PAGE_SIZE)
384 kfree(tn); 398 kfree(tn);
@@ -402,7 +416,7 @@ static void tnode_free_safe(struct tnode *tn)
402 tn->tnode_free = tnode_free_head; 416 tn->tnode_free = tnode_free_head;
403 tnode_free_head = tn; 417 tnode_free_head = tn;
404 tnode_free_size += sizeof(struct tnode) + 418 tnode_free_size += sizeof(struct tnode) +
405 (sizeof(struct node *) << tn->bits); 419 (sizeof(struct rt_trie_node *) << tn->bits);
406} 420}
407 421
408static void tnode_free_flush(void) 422static void tnode_free_flush(void)
@@ -443,7 +457,7 @@ static struct leaf_info *leaf_info_new(int plen)
443 457
444static struct tnode *tnode_new(t_key key, int pos, int bits) 458static struct tnode *tnode_new(t_key key, int pos, int bits)
445{ 459{
446 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); 460 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
447 struct tnode *tn = tnode_alloc(sz); 461 struct tnode *tn = tnode_alloc(sz);
448 462
449 if (tn) { 463 if (tn) {
@@ -456,7 +470,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
456 } 470 }
457 471
458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), 472 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
459 sizeof(struct node) << bits); 473 sizeof(struct rt_trie_node) << bits);
460 return tn; 474 return tn;
461} 475}
462 476
@@ -465,7 +479,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
465 * and no bits are skipped. See discussion in dyntree paper p. 6 479 * and no bits are skipped. See discussion in dyntree paper p. 6
466 */ 480 */
467 481
468static inline int tnode_full(const struct tnode *tn, const struct node *n) 482static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
469{ 483{
470 if (n == NULL || IS_LEAF(n)) 484 if (n == NULL || IS_LEAF(n))
471 return 0; 485 return 0;
@@ -474,7 +488,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
474} 488}
475 489
476static inline void put_child(struct trie *t, struct tnode *tn, int i, 490static inline void put_child(struct trie *t, struct tnode *tn, int i,
477 struct node *n) 491 struct rt_trie_node *n)
478{ 492{
479 tnode_put_child_reorg(tn, i, n, -1); 493 tnode_put_child_reorg(tn, i, n, -1);
480} 494}
@@ -484,10 +498,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
484 * Update the value of full_children and empty_children. 498 * Update the value of full_children and empty_children.
485 */ 499 */
486 500
487static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 501static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
488 int wasfull) 502 int wasfull)
489{ 503{
490 struct node *chi = tn->child[i]; 504 struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
491 int isfull; 505 int isfull;
492 506
493 BUG_ON(i >= 1<<tn->bits); 507 BUG_ON(i >= 1<<tn->bits);
@@ -515,7 +529,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
515} 529}
516 530
517#define MAX_WORK 10 531#define MAX_WORK 10
518static struct node *resize(struct trie *t, struct tnode *tn) 532static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
519{ 533{
520 int i; 534 int i;
521 struct tnode *old_tn; 535 struct tnode *old_tn;
@@ -605,7 +619,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
605 619
606 /* Keep root node larger */ 620 /* Keep root node larger */
607 621
608 if (!node_parent((struct node *)tn)) { 622 if (!node_parent((struct rt_trie_node *)tn)) {
609 inflate_threshold_use = inflate_threshold_root; 623 inflate_threshold_use = inflate_threshold_root;
610 halve_threshold_use = halve_threshold_root; 624 halve_threshold_use = halve_threshold_root;
611 } else { 625 } else {
@@ -635,7 +649,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
635 649
636 /* Return if at least one inflate is run */ 650 /* Return if at least one inflate is run */
637 if (max_work != MAX_WORK) 651 if (max_work != MAX_WORK)
638 return (struct node *) tn; 652 return (struct rt_trie_node *) tn;
639 653
640 /* 654 /*
641 * Halve as long as the number of empty children in this 655 * Halve as long as the number of empty children in this
@@ -663,9 +677,9 @@ static struct node *resize(struct trie *t, struct tnode *tn)
663 if (tn->empty_children == tnode_child_length(tn) - 1) { 677 if (tn->empty_children == tnode_child_length(tn) - 1) {
664one_child: 678one_child:
665 for (i = 0; i < tnode_child_length(tn); i++) { 679 for (i = 0; i < tnode_child_length(tn); i++) {
666 struct node *n; 680 struct rt_trie_node *n;
667 681
668 n = tn->child[i]; 682 n = rtnl_dereference(tn->child[i]);
669 if (!n) 683 if (!n)
670 continue; 684 continue;
671 685
@@ -676,7 +690,21 @@ one_child:
676 return n; 690 return n;
677 } 691 }
678 } 692 }
679 return (struct node *) tn; 693 return (struct rt_trie_node *) tn;
694}
695
696
697static void tnode_clean_free(struct tnode *tn)
698{
699 int i;
700 struct tnode *tofree;
701
702 for (i = 0; i < tnode_child_length(tn); i++) {
703 tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
704 if (tofree)
705 tnode_free(tofree);
706 }
707 tnode_free(tn);
680} 708}
681 709
682static struct tnode *inflate(struct trie *t, struct tnode *tn) 710static struct tnode *inflate(struct trie *t, struct tnode *tn)
@@ -723,14 +751,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
723 goto nomem; 751 goto nomem;
724 } 752 }
725 753
726 put_child(t, tn, 2*i, (struct node *) left); 754 put_child(t, tn, 2*i, (struct rt_trie_node *) left);
727 put_child(t, tn, 2*i+1, (struct node *) right); 755 put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
728 } 756 }
729 } 757 }
730 758
731 for (i = 0; i < olen; i++) { 759 for (i = 0; i < olen; i++) {
732 struct tnode *inode; 760 struct tnode *inode;
733 struct node *node = tnode_get_child(oldtnode, i); 761 struct rt_trie_node *node = tnode_get_child(oldtnode, i);
734 struct tnode *left, *right; 762 struct tnode *left, *right;
735 int size, j; 763 int size, j;
736 764
@@ -755,8 +783,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
755 inode = (struct tnode *) node; 783 inode = (struct tnode *) node;
756 784
757 if (inode->bits == 1) { 785 if (inode->bits == 1) {
758 put_child(t, tn, 2*i, inode->child[0]); 786 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
759 put_child(t, tn, 2*i+1, inode->child[1]); 787 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
760 788
761 tnode_free_safe(inode); 789 tnode_free_safe(inode);
762 continue; 790 continue;
@@ -797,8 +825,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
797 825
798 size = tnode_child_length(left); 826 size = tnode_child_length(left);
799 for (j = 0; j < size; j++) { 827 for (j = 0; j < size; j++) {
800 put_child(t, left, j, inode->child[j]); 828 put_child(t, left, j, rtnl_dereference(inode->child[j]));
801 put_child(t, right, j, inode->child[j + size]); 829 put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
802 } 830 }
803 put_child(t, tn, 2*i, resize(t, left)); 831 put_child(t, tn, 2*i, resize(t, left));
804 put_child(t, tn, 2*i+1, resize(t, right)); 832 put_child(t, tn, 2*i+1, resize(t, right));
@@ -808,24 +836,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
808 tnode_free_safe(oldtnode); 836 tnode_free_safe(oldtnode);
809 return tn; 837 return tn;
810nomem: 838nomem:
811 { 839 tnode_clean_free(tn);
812 int size = tnode_child_length(tn); 840 return ERR_PTR(-ENOMEM);
813 int j;
814
815 for (j = 0; j < size; j++)
816 if (tn->child[j])
817 tnode_free((struct tnode *)tn->child[j]);
818
819 tnode_free(tn);
820
821 return ERR_PTR(-ENOMEM);
822 }
823} 841}
824 842
825static struct tnode *halve(struct trie *t, struct tnode *tn) 843static struct tnode *halve(struct trie *t, struct tnode *tn)
826{ 844{
827 struct tnode *oldtnode = tn; 845 struct tnode *oldtnode = tn;
828 struct node *left, *right; 846 struct rt_trie_node *left, *right;
829 int i; 847 int i;
830 int olen = tnode_child_length(tn); 848 int olen = tnode_child_length(tn);
831 849
@@ -856,7 +874,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
856 if (!newn) 874 if (!newn)
857 goto nomem; 875 goto nomem;
858 876
859 put_child(t, tn, i/2, (struct node *)newn); 877 put_child(t, tn, i/2, (struct rt_trie_node *)newn);
860 } 878 }
861 879
862 } 880 }
@@ -890,18 +908,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
890 tnode_free_safe(oldtnode); 908 tnode_free_safe(oldtnode);
891 return tn; 909 return tn;
892nomem: 910nomem:
893 { 911 tnode_clean_free(tn);
894 int size = tnode_child_length(tn); 912 return ERR_PTR(-ENOMEM);
895 int j;
896
897 for (j = 0; j < size; j++)
898 if (tn->child[j])
899 tnode_free((struct tnode *)tn->child[j]);
900
901 tnode_free(tn);
902
903 return ERR_PTR(-ENOMEM);
904 }
905} 913}
906 914
907/* readside must use rcu_read_lock currently dump routines 915/* readside must use rcu_read_lock currently dump routines
@@ -958,7 +966,7 @@ fib_find_node(struct trie *t, u32 key)
958{ 966{
959 int pos; 967 int pos;
960 struct tnode *tn; 968 struct tnode *tn;
961 struct node *n; 969 struct rt_trie_node *n;
962 970
963 pos = 0; 971 pos = 0;
964 n = rcu_dereference_rtnl(t->trie); 972 n = rcu_dereference_rtnl(t->trie);
@@ -993,17 +1001,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
993 1001
994 key = tn->key; 1002 key = tn->key;
995 1003
996 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 1004 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
997 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1005 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
998 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 1006 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
999 tn = (struct tnode *) resize(t, (struct tnode *)tn); 1007 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1000 1008
1001 tnode_put_child_reorg((struct tnode *)tp, cindex, 1009 tnode_put_child_reorg((struct tnode *)tp, cindex,
1002 (struct node *)tn, wasfull); 1010 (struct rt_trie_node *)tn, wasfull);
1003 1011
1004 tp = node_parent((struct node *) tn); 1012 tp = node_parent((struct rt_trie_node *) tn);
1005 if (!tp) 1013 if (!tp)
1006 rcu_assign_pointer(t->trie, (struct node *)tn); 1014 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1007 1015
1008 tnode_free_flush(); 1016 tnode_free_flush();
1009 if (!tp) 1017 if (!tp)
@@ -1015,7 +1023,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1015 if (IS_TNODE(tn)) 1023 if (IS_TNODE(tn))
1016 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1024 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1017 1025
1018 rcu_assign_pointer(t->trie, (struct node *)tn); 1026 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1019 tnode_free_flush(); 1027 tnode_free_flush();
1020} 1028}
1021 1029
@@ -1025,7 +1033,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1025{ 1033{
1026 int pos, newpos; 1034 int pos, newpos;
1027 struct tnode *tp = NULL, *tn = NULL; 1035 struct tnode *tp = NULL, *tn = NULL;
1028 struct node *n; 1036 struct rt_trie_node *n;
1029 struct leaf *l; 1037 struct leaf *l;
1030 int missbit; 1038 int missbit;
1031 struct list_head *fa_head = NULL; 1039 struct list_head *fa_head = NULL;
@@ -1033,7 +1041,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1033 t_key cindex; 1041 t_key cindex;
1034 1042
1035 pos = 0; 1043 pos = 0;
1036 n = t->trie; 1044 n = rtnl_dereference(t->trie);
1037 1045
1038 /* If we point to NULL, stop. Either the tree is empty and we should 1046 /* If we point to NULL, stop. Either the tree is empty and we should
1039 * just put a new leaf in if, or we have reached an empty child slot, 1047 * just put a new leaf in if, or we have reached an empty child slot,
@@ -1111,10 +1119,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1111 if (t->trie && n == NULL) { 1119 if (t->trie && n == NULL) {
1112 /* Case 2: n is NULL, and will just insert a new leaf */ 1120 /* Case 2: n is NULL, and will just insert a new leaf */
1113 1121
1114 node_set_parent((struct node *)l, tp); 1122 node_set_parent((struct rt_trie_node *)l, tp);
1115 1123
1116 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1124 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1117 put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 1125 put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l);
1118 } else { 1126 } else {
1119 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1127 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1120 /* 1128 /*
@@ -1141,18 +1149,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1141 return NULL; 1149 return NULL;
1142 } 1150 }
1143 1151
1144 node_set_parent((struct node *)tn, tp); 1152 node_set_parent((struct rt_trie_node *)tn, tp);
1145 1153
1146 missbit = tkey_extract_bits(key, newpos, 1); 1154 missbit = tkey_extract_bits(key, newpos, 1);
1147 put_child(t, tn, missbit, (struct node *)l); 1155 put_child(t, tn, missbit, (struct rt_trie_node *)l);
1148 put_child(t, tn, 1-missbit, n); 1156 put_child(t, tn, 1-missbit, n);
1149 1157
1150 if (tp) { 1158 if (tp) {
1151 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1159 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1152 put_child(t, (struct tnode *)tp, cindex, 1160 put_child(t, (struct tnode *)tp, cindex,
1153 (struct node *)tn); 1161 (struct rt_trie_node *)tn);
1154 } else { 1162 } else {
1155 rcu_assign_pointer(t->trie, (struct node *)tn); 1163 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1156 tp = tn; 1164 tp = tn;
1157 } 1165 }
1158 } 1166 }
@@ -1245,7 +1253,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1245 if (fa->fa_info->fib_priority != fi->fib_priority) 1253 if (fa->fa_info->fib_priority != fi->fib_priority)
1246 break; 1254 break;
1247 if (fa->fa_type == cfg->fc_type && 1255 if (fa->fa_type == cfg->fc_type &&
1248 fa->fa_scope == cfg->fc_scope &&
1249 fa->fa_info == fi) { 1256 fa->fa_info == fi) {
1250 fa_match = fa; 1257 fa_match = fa;
1251 break; 1258 break;
@@ -1271,7 +1278,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1271 new_fa->fa_tos = fa->fa_tos; 1278 new_fa->fa_tos = fa->fa_tos;
1272 new_fa->fa_info = fi; 1279 new_fa->fa_info = fi;
1273 new_fa->fa_type = cfg->fc_type; 1280 new_fa->fa_type = cfg->fc_type;
1274 new_fa->fa_scope = cfg->fc_scope;
1275 state = fa->fa_state; 1281 state = fa->fa_state;
1276 new_fa->fa_state = state & ~FA_S_ACCESSED; 1282 new_fa->fa_state = state & ~FA_S_ACCESSED;
1277 1283
@@ -1308,7 +1314,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1308 new_fa->fa_info = fi; 1314 new_fa->fa_info = fi;
1309 new_fa->fa_tos = tos; 1315 new_fa->fa_tos = tos;
1310 new_fa->fa_type = cfg->fc_type; 1316 new_fa->fa_type = cfg->fc_type;
1311 new_fa->fa_scope = cfg->fc_scope;
1312 new_fa->fa_state = 0; 1317 new_fa->fa_state = 0;
1313 /* 1318 /*
1314 * Insert new entry to the list. 1319 * Insert new entry to the list.
@@ -1322,6 +1327,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1322 } 1327 }
1323 } 1328 }
1324 1329
1330 if (!plen)
1331 tb->tb_num_default++;
1332
1325 list_add_tail_rcu(&new_fa->fa_list, 1333 list_add_tail_rcu(&new_fa->fa_list,
1326 (fa ? &fa->fa_list : fa_head)); 1334 (fa ? &fa->fa_list : fa_head));
1327 1335
@@ -1340,8 +1348,8 @@ err:
1340} 1348}
1341 1349
1342/* should be called with rcu_read_lock */ 1350/* should be called with rcu_read_lock */
1343static int check_leaf(struct trie *t, struct leaf *l, 1351static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1344 t_key key, const struct flowi *flp, 1352 t_key key, const struct flowi4 *flp,
1345 struct fib_result *res, int fib_flags) 1353 struct fib_result *res, int fib_flags)
1346{ 1354{
1347 struct leaf_info *li; 1355 struct leaf_info *li;
@@ -1349,40 +1357,75 @@ static int check_leaf(struct trie *t, struct leaf *l,
1349 struct hlist_node *node; 1357 struct hlist_node *node;
1350 1358
1351 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1359 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1352 int err; 1360 struct fib_alias *fa;
1353 int plen = li->plen; 1361 int plen = li->plen;
1354 __be32 mask = inet_make_mask(plen); 1362 __be32 mask = inet_make_mask(plen);
1355 1363
1356 if (l->key != (key & ntohl(mask))) 1364 if (l->key != (key & ntohl(mask)))
1357 continue; 1365 continue;
1358 1366
1359 err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); 1367 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1368 struct fib_info *fi = fa->fa_info;
1369 int nhsel, err;
1360 1370
1371 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1372 continue;
1373 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1374 continue;
1375 fib_alias_accessed(fa);
1376 err = fib_props[fa->fa_type].error;
1377 if (err) {
1361#ifdef CONFIG_IP_FIB_TRIE_STATS 1378#ifdef CONFIG_IP_FIB_TRIE_STATS
1362 if (err <= 0) 1379 t->stats.semantic_match_passed++;
1363 t->stats.semantic_match_passed++; 1380#endif
1364 else 1381 return err;
1365 t->stats.semantic_match_miss++; 1382 }
1383 if (fi->fib_flags & RTNH_F_DEAD)
1384 continue;
1385 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1386 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1387
1388 if (nh->nh_flags & RTNH_F_DEAD)
1389 continue;
1390 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1391 continue;
1392
1393#ifdef CONFIG_IP_FIB_TRIE_STATS
1394 t->stats.semantic_match_passed++;
1395#endif
1396 res->prefixlen = plen;
1397 res->nh_sel = nhsel;
1398 res->type = fa->fa_type;
1399 res->scope = fa->fa_info->fib_scope;
1400 res->fi = fi;
1401 res->table = tb;
1402 res->fa_head = &li->falh;
1403 if (!(fib_flags & FIB_LOOKUP_NOREF))
1404 atomic_inc(&res->fi->fib_clntref);
1405 return 0;
1406 }
1407 }
1408
1409#ifdef CONFIG_IP_FIB_TRIE_STATS
1410 t->stats.semantic_match_miss++;
1366#endif 1411#endif
1367 if (err <= 0)
1368 return err;
1369 } 1412 }
1370 1413
1371 return 1; 1414 return 1;
1372} 1415}
1373 1416
1374int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, 1417int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1375 struct fib_result *res, int fib_flags) 1418 struct fib_result *res, int fib_flags)
1376{ 1419{
1377 struct trie *t = (struct trie *) tb->tb_data; 1420 struct trie *t = (struct trie *) tb->tb_data;
1378 int ret; 1421 int ret;
1379 struct node *n; 1422 struct rt_trie_node *n;
1380 struct tnode *pn; 1423 struct tnode *pn;
1381 int pos, bits; 1424 unsigned int pos, bits;
1382 t_key key = ntohl(flp->fl4_dst); 1425 t_key key = ntohl(flp->daddr);
1383 int chopped_off; 1426 unsigned int chopped_off;
1384 t_key cindex = 0; 1427 t_key cindex = 0;
1385 int current_prefix_length = KEYLENGTH; 1428 unsigned int current_prefix_length = KEYLENGTH;
1386 struct tnode *cn; 1429 struct tnode *cn;
1387 t_key pref_mismatch; 1430 t_key pref_mismatch;
1388 1431
@@ -1398,7 +1441,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1398 1441
1399 /* Just a leaf? */ 1442 /* Just a leaf? */
1400 if (IS_LEAF(n)) { 1443 if (IS_LEAF(n)) {
1401 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); 1444 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1402 goto found; 1445 goto found;
1403 } 1446 }
1404 1447
@@ -1423,7 +1466,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1423 } 1466 }
1424 1467
1425 if (IS_LEAF(n)) { 1468 if (IS_LEAF(n)) {
1426 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); 1469 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1427 if (ret > 0) 1470 if (ret > 0)
1428 goto backtrace; 1471 goto backtrace;
1429 goto found; 1472 goto found;
@@ -1541,7 +1584,7 @@ backtrace:
1541 if (chopped_off <= pn->bits) { 1584 if (chopped_off <= pn->bits) {
1542 cindex &= ~(1 << (chopped_off-1)); 1585 cindex &= ~(1 << (chopped_off-1));
1543 } else { 1586 } else {
1544 struct tnode *parent = node_parent_rcu((struct node *) pn); 1587 struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
1545 if (!parent) 1588 if (!parent)
1546 goto failed; 1589 goto failed;
1547 1590
@@ -1568,7 +1611,7 @@ found:
1568 */ 1611 */
1569static void trie_leaf_remove(struct trie *t, struct leaf *l) 1612static void trie_leaf_remove(struct trie *t, struct leaf *l)
1570{ 1613{
1571 struct tnode *tp = node_parent((struct node *) l); 1614 struct tnode *tp = node_parent((struct rt_trie_node *) l);
1572 1615
1573 pr_debug("entering trie_leaf_remove(%p)\n", l); 1616 pr_debug("entering trie_leaf_remove(%p)\n", l);
1574 1617
@@ -1629,7 +1672,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1629 1672
1630 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 1673 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1631 (cfg->fc_scope == RT_SCOPE_NOWHERE || 1674 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1632 fa->fa_scope == cfg->fc_scope) && 1675 fa->fa_info->fib_scope == cfg->fc_scope) &&
1676 (!cfg->fc_prefsrc ||
1677 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1633 (!cfg->fc_protocol || 1678 (!cfg->fc_protocol ||
1634 fi->fib_protocol == cfg->fc_protocol) && 1679 fi->fib_protocol == cfg->fc_protocol) &&
1635 fib_nh_match(cfg, fi) == 0) { 1680 fib_nh_match(cfg, fi) == 0) {
@@ -1650,6 +1695,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1650 1695
1651 list_del_rcu(&fa->fa_list); 1696 list_del_rcu(&fa->fa_list);
1652 1697
1698 if (!plen)
1699 tb->tb_num_default--;
1700
1653 if (list_empty(fa_head)) { 1701 if (list_empty(fa_head)) {
1654 hlist_del_rcu(&li->hlist); 1702 hlist_del_rcu(&li->hlist);
1655 free_leaf_info(li); 1703 free_leaf_info(li);
@@ -1706,7 +1754,7 @@ static int trie_flush_leaf(struct leaf *l)
1706 * Scan for the next right leaf starting at node p->child[idx] 1754 * Scan for the next right leaf starting at node p->child[idx]
1707 * Since we have back pointer, no recursion necessary. 1755 * Since we have back pointer, no recursion necessary.
1708 */ 1756 */
1709static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) 1757static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1710{ 1758{
1711 do { 1759 do {
1712 t_key idx; 1760 t_key idx;
@@ -1722,7 +1770,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1722 continue; 1770 continue;
1723 1771
1724 if (IS_LEAF(c)) { 1772 if (IS_LEAF(c)) {
1725 prefetch(p->child[idx]); 1773 prefetch(rcu_dereference_rtnl(p->child[idx]));
1726 return (struct leaf *) c; 1774 return (struct leaf *) c;
1727 } 1775 }
1728 1776
@@ -1732,7 +1780,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1732 } 1780 }
1733 1781
1734 /* Node empty, walk back up to parent */ 1782 /* Node empty, walk back up to parent */
1735 c = (struct node *) p; 1783 c = (struct rt_trie_node *) p;
1736 } while ((p = node_parent_rcu(c)) != NULL); 1784 } while ((p = node_parent_rcu(c)) != NULL);
1737 1785
1738 return NULL; /* Root of trie */ 1786 return NULL; /* Root of trie */
@@ -1753,7 +1801,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
1753 1801
1754static struct leaf *trie_nextleaf(struct leaf *l) 1802static struct leaf *trie_nextleaf(struct leaf *l)
1755{ 1803{
1756 struct node *c = (struct node *) l; 1804 struct rt_trie_node *c = (struct rt_trie_node *) l;
1757 struct tnode *p = node_parent_rcu(c); 1805 struct tnode *p = node_parent_rcu(c);
1758 1806
1759 if (!p) 1807 if (!p)
@@ -1802,80 +1850,6 @@ void fib_free_table(struct fib_table *tb)
1802 kfree(tb); 1850 kfree(tb);
1803} 1851}
1804 1852
1805void fib_table_select_default(struct fib_table *tb,
1806 const struct flowi *flp,
1807 struct fib_result *res)
1808{
1809 struct trie *t = (struct trie *) tb->tb_data;
1810 int order, last_idx;
1811 struct fib_info *fi = NULL;
1812 struct fib_info *last_resort;
1813 struct fib_alias *fa = NULL;
1814 struct list_head *fa_head;
1815 struct leaf *l;
1816
1817 last_idx = -1;
1818 last_resort = NULL;
1819 order = -1;
1820
1821 rcu_read_lock();
1822
1823 l = fib_find_node(t, 0);
1824 if (!l)
1825 goto out;
1826
1827 fa_head = get_fa_head(l, 0);
1828 if (!fa_head)
1829 goto out;
1830
1831 if (list_empty(fa_head))
1832 goto out;
1833
1834 list_for_each_entry_rcu(fa, fa_head, fa_list) {
1835 struct fib_info *next_fi = fa->fa_info;
1836
1837 if (fa->fa_scope != res->scope ||
1838 fa->fa_type != RTN_UNICAST)
1839 continue;
1840
1841 if (next_fi->fib_priority > res->fi->fib_priority)
1842 break;
1843 if (!next_fi->fib_nh[0].nh_gw ||
1844 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1845 continue;
1846
1847 fib_alias_accessed(fa);
1848
1849 if (fi == NULL) {
1850 if (next_fi != res->fi)
1851 break;
1852 } else if (!fib_detect_death(fi, order, &last_resort,
1853 &last_idx, tb->tb_default)) {
1854 fib_result_assign(res, fi);
1855 tb->tb_default = order;
1856 goto out;
1857 }
1858 fi = next_fi;
1859 order++;
1860 }
1861 if (order <= 0 || fi == NULL) {
1862 tb->tb_default = -1;
1863 goto out;
1864 }
1865
1866 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1867 tb->tb_default)) {
1868 fib_result_assign(res, fi);
1869 tb->tb_default = order;
1870 goto out;
1871 }
1872 if (last_idx >= 0)
1873 fib_result_assign(res, last_resort);
1874 tb->tb_default = last_idx;
1875out:
1876 rcu_read_unlock();
1877}
1878
1879static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1853static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1880 struct fib_table *tb, 1854 struct fib_table *tb,
1881 struct sk_buff *skb, struct netlink_callback *cb) 1855 struct sk_buff *skb, struct netlink_callback *cb)
@@ -1900,7 +1874,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1900 RTM_NEWROUTE, 1874 RTM_NEWROUTE,
1901 tb->tb_id, 1875 tb->tb_id,
1902 fa->fa_type, 1876 fa->fa_type,
1903 fa->fa_scope,
1904 xkey, 1877 xkey,
1905 plen, 1878 plen,
1906 fa->fa_tos, 1879 fa->fa_tos,
@@ -1990,7 +1963,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1990 return skb->len; 1963 return skb->len;
1991} 1964}
1992 1965
1993void __init fib_hash_init(void) 1966void __init fib_trie_init(void)
1994{ 1967{
1995 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1968 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1996 sizeof(struct fib_alias), 1969 sizeof(struct fib_alias),
@@ -2003,8 +1976,7 @@ void __init fib_hash_init(void)
2003} 1976}
2004 1977
2005 1978
2006/* Fix more generic FIB names for init later */ 1979struct fib_table *fib_trie_table(u32 id)
2007struct fib_table *fib_hash_table(u32 id)
2008{ 1980{
2009 struct fib_table *tb; 1981 struct fib_table *tb;
2010 struct trie *t; 1982 struct trie *t;
@@ -2016,13 +1988,11 @@ struct fib_table *fib_hash_table(u32 id)
2016 1988
2017 tb->tb_id = id; 1989 tb->tb_id = id;
2018 tb->tb_default = -1; 1990 tb->tb_default = -1;
1991 tb->tb_num_default = 0;
2019 1992
2020 t = (struct trie *) tb->tb_data; 1993 t = (struct trie *) tb->tb_data;
2021 memset(t, 0, sizeof(*t)); 1994 memset(t, 0, sizeof(*t));
2022 1995
2023 if (id == RT_TABLE_LOCAL)
2024 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2025
2026 return tb; 1996 return tb;
2027} 1997}
2028 1998
@@ -2036,7 +2006,7 @@ struct fib_trie_iter {
2036 unsigned int depth; 2006 unsigned int depth;
2037}; 2007};
2038 2008
2039static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 2009static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
2040{ 2010{
2041 struct tnode *tn = iter->tnode; 2011 struct tnode *tn = iter->tnode;
2042 unsigned int cindex = iter->index; 2012 unsigned int cindex = iter->index;
@@ -2050,7 +2020,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2050 iter->tnode, iter->index, iter->depth); 2020 iter->tnode, iter->index, iter->depth);
2051rescan: 2021rescan:
2052 while (cindex < (1<<tn->bits)) { 2022 while (cindex < (1<<tn->bits)) {
2053 struct node *n = tnode_get_child_rcu(tn, cindex); 2023 struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
2054 2024
2055 if (n) { 2025 if (n) {
2056 if (IS_LEAF(n)) { 2026 if (IS_LEAF(n)) {
@@ -2069,7 +2039,7 @@ rescan:
2069 } 2039 }
2070 2040
2071 /* Current node exhausted, pop back up */ 2041 /* Current node exhausted, pop back up */
2072 p = node_parent_rcu((struct node *)tn); 2042 p = node_parent_rcu((struct rt_trie_node *)tn);
2073 if (p) { 2043 if (p) {
2074 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2044 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2075 tn = p; 2045 tn = p;
@@ -2081,10 +2051,10 @@ rescan:
2081 return NULL; 2051 return NULL;
2082} 2052}
2083 2053
2084static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2054static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
2085 struct trie *t) 2055 struct trie *t)
2086{ 2056{
2087 struct node *n; 2057 struct rt_trie_node *n;
2088 2058
2089 if (!t) 2059 if (!t)
2090 return NULL; 2060 return NULL;
@@ -2108,7 +2078,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2108 2078
2109static void trie_collect_stats(struct trie *t, struct trie_stat *s) 2079static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2110{ 2080{
2111 struct node *n; 2081 struct rt_trie_node *n;
2112 struct fib_trie_iter iter; 2082 struct fib_trie_iter iter;
2113 2083
2114 memset(s, 0, sizeof(*s)); 2084 memset(s, 0, sizeof(*s));
@@ -2181,7 +2151,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2181 seq_putc(seq, '\n'); 2151 seq_putc(seq, '\n');
2182 seq_printf(seq, "\tPointers: %u\n", pointers); 2152 seq_printf(seq, "\tPointers: %u\n", pointers);
2183 2153
2184 bytes += sizeof(struct node *) * pointers; 2154 bytes += sizeof(struct rt_trie_node *) * pointers;
2185 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2155 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2186 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 2156 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2187} 2157}
@@ -2262,7 +2232,7 @@ static const struct file_operations fib_triestat_fops = {
2262 .release = single_release_net, 2232 .release = single_release_net,
2263}; 2233};
2264 2234
2265static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 2235static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2266{ 2236{
2267 struct fib_trie_iter *iter = seq->private; 2237 struct fib_trie_iter *iter = seq->private;
2268 struct net *net = seq_file_net(seq); 2238 struct net *net = seq_file_net(seq);
@@ -2275,7 +2245,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2275 struct fib_table *tb; 2245 struct fib_table *tb;
2276 2246
2277 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 2247 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2278 struct node *n; 2248 struct rt_trie_node *n;
2279 2249
2280 for (n = fib_trie_get_first(iter, 2250 for (n = fib_trie_get_first(iter,
2281 (struct trie *) tb->tb_data); 2251 (struct trie *) tb->tb_data);
@@ -2304,7 +2274,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2304 struct fib_table *tb = iter->tb; 2274 struct fib_table *tb = iter->tb;
2305 struct hlist_node *tb_node; 2275 struct hlist_node *tb_node;
2306 unsigned int h; 2276 unsigned int h;
2307 struct node *n; 2277 struct rt_trie_node *n;
2308 2278
2309 ++*pos; 2279 ++*pos;
2310 /* next node in same table */ 2280 /* next node in same table */
@@ -2314,7 +2284,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2314 2284
2315 /* walk rest of this hash chain */ 2285 /* walk rest of this hash chain */
2316 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 2286 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2317 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 2287 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2318 tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 2288 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2319 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 2289 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2320 if (n) 2290 if (n)
@@ -2390,7 +2360,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2390static int fib_trie_seq_show(struct seq_file *seq, void *v) 2360static int fib_trie_seq_show(struct seq_file *seq, void *v)
2391{ 2361{
2392 const struct fib_trie_iter *iter = seq->private; 2362 const struct fib_trie_iter *iter = seq->private;
2393 struct node *n = v; 2363 struct rt_trie_node *n = v;
2394 2364
2395 if (!node_parent_rcu(n)) 2365 if (!node_parent_rcu(n))
2396 fib_table_print(seq, iter->tb); 2366 fib_table_print(seq, iter->tb);
@@ -2422,7 +2392,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2422 seq_indent(seq, iter->depth+1); 2392 seq_indent(seq, iter->depth+1);
2423 seq_printf(seq, " /%d %s %s", li->plen, 2393 seq_printf(seq, " /%d %s %s", li->plen,
2424 rtn_scope(buf1, sizeof(buf1), 2394 rtn_scope(buf1, sizeof(buf1),
2425 fa->fa_scope), 2395 fa->fa_info->fib_scope),
2426 rtn_type(buf2, sizeof(buf2), 2396 rtn_type(buf2, sizeof(buf2),
2427 fa->fa_type)); 2397 fa->fa_type));
2428 if (fa->fa_tos) 2398 if (fa->fa_tos)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4aa1b7f01ea0..5395e45dcce6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -83,6 +83,7 @@
83#include <net/tcp.h> 83#include <net/tcp.h>
84#include <net/udp.h> 84#include <net/udp.h>
85#include <net/raw.h> 85#include <net/raw.h>
86#include <net/ping.h>
86#include <linux/skbuff.h> 87#include <linux/skbuff.h>
87#include <net/sock.h> 88#include <net/sock.h>
88#include <linux/errno.h> 89#include <linux/errno.h>
@@ -108,8 +109,7 @@ struct icmp_bxm {
108 __be32 times[3]; 109 __be32 times[3];
109 } data; 110 } data;
110 int head_len; 111 int head_len;
111 struct ip_options replyopts; 112 struct ip_options_data replyopts;
112 unsigned char optbuf[40];
113}; 113};
114 114
115/* An array of errno for error messages from dest unreach. */ 115/* An array of errno for error messages from dest unreach. */
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk)
233 * Send an ICMP frame. 233 * Send an ICMP frame.
234 */ 234 */
235 235
236/* 236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
237 * Check transmit rate limitation for given message. 237 struct flowi4 *fl4, int type, int code)
238 * The rate information is held in the destination cache now.
239 * This function is generic and could be used for other purposes
240 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
241 *
242 * Note that the same dst_entry fields are modified by functions in
243 * route.c too, but these work for packet destinations while xrlim_allow
244 * works for icmp destinations. This means the rate limiting information
245 * for one "ip object" is shared - and these ICMPs are twice limited:
246 * by source and by destination.
247 *
248 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
249 * SHOULD allow setting of rate limits
250 *
251 * Shared between ICMPv4 and ICMPv6.
252 */
253#define XRLIM_BURST_FACTOR 6
254int xrlim_allow(struct dst_entry *dst, int timeout)
255{
256 unsigned long now, token = dst->rate_tokens;
257 int rc = 0;
258
259 now = jiffies;
260 token += now - dst->rate_last;
261 dst->rate_last = now;
262 if (token > XRLIM_BURST_FACTOR * timeout)
263 token = XRLIM_BURST_FACTOR * timeout;
264 if (token >= timeout) {
265 token -= timeout;
266 rc = 1;
267 }
268 dst->rate_tokens = token;
269 return rc;
270}
271EXPORT_SYMBOL(xrlim_allow);
272
273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
274 int type, int code)
275{ 238{
276 struct dst_entry *dst = &rt->dst; 239 struct dst_entry *dst = &rt->dst;
277 int rc = 1; 240 bool rc = true;
278 241
279 if (type > NR_ICMP_TYPES) 242 if (type > NR_ICMP_TYPES)
280 goto out; 243 goto out;
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
288 goto out; 251 goto out;
289 252
290 /* Limit if icmp type is enabled in ratemask. */ 253 /* Limit if icmp type is enabled in ratemask. */
291 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) 254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
292 rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); 255 if (!rt->peer)
256 rt_bind_peer(rt, fl4->daddr, 1);
257 rc = inet_peer_xrlim_allow(rt->peer,
258 net->ipv4.sysctl_icmp_ratelimit);
259 }
293out: 260out:
294 return rc; 261 return rc;
295} 262}
@@ -324,13 +291,14 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
324} 291}
325 292
326static void icmp_push_reply(struct icmp_bxm *icmp_param, 293static void icmp_push_reply(struct icmp_bxm *icmp_param,
294 struct flowi4 *fl4,
327 struct ipcm_cookie *ipc, struct rtable **rt) 295 struct ipcm_cookie *ipc, struct rtable **rt)
328{ 296{
329 struct sock *sk; 297 struct sock *sk;
330 struct sk_buff *skb; 298 struct sk_buff *skb;
331 299
332 sk = icmp_sk(dev_net((*rt)->dst.dev)); 300 sk = icmp_sk(dev_net((*rt)->dst.dev));
333 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 301 if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
334 icmp_param->data_len+icmp_param->head_len, 302 icmp_param->data_len+icmp_param->head_len,
335 icmp_param->head_len, 303 icmp_param->head_len,
336 ipc, rt, MSG_DONTWAIT) < 0) { 304 ipc, rt, MSG_DONTWAIT) < 0) {
@@ -349,7 +317,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
349 icmp_param->head_len, csum); 317 icmp_param->head_len, csum);
350 icmph->checksum = csum_fold(csum); 318 icmph->checksum = csum_fold(csum);
351 skb->ip_summed = CHECKSUM_NONE; 319 skb->ip_summed = CHECKSUM_NONE;
352 ip_push_pending_frames(sk); 320 ip_push_pending_frames(sk, fl4);
353 } 321 }
354} 322}
355 323
@@ -362,11 +330,12 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
362 struct ipcm_cookie ipc; 330 struct ipcm_cookie ipc;
363 struct rtable *rt = skb_rtable(skb); 331 struct rtable *rt = skb_rtable(skb);
364 struct net *net = dev_net(rt->dst.dev); 332 struct net *net = dev_net(rt->dst.dev);
333 struct flowi4 fl4;
365 struct sock *sk; 334 struct sock *sk;
366 struct inet_sock *inet; 335 struct inet_sock *inet;
367 __be32 daddr; 336 __be32 daddr;
368 337
369 if (ip_options_echo(&icmp_param->replyopts, skb)) 338 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
370 return; 339 return;
371 340
372 sk = icmp_xmit_lock(net); 341 sk = icmp_xmit_lock(net);
@@ -377,31 +346,120 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
377 icmp_param->data.icmph.checksum = 0; 346 icmp_param->data.icmph.checksum = 0;
378 347
379 inet->tos = ip_hdr(skb)->tos; 348 inet->tos = ip_hdr(skb)->tos;
380 daddr = ipc.addr = rt->rt_src; 349 daddr = ipc.addr = ip_hdr(skb)->saddr;
381 ipc.opt = NULL; 350 ipc.opt = NULL;
382 ipc.tx_flags = 0; 351 ipc.tx_flags = 0;
383 if (icmp_param->replyopts.optlen) { 352 if (icmp_param->replyopts.opt.opt.optlen) {
384 ipc.opt = &icmp_param->replyopts; 353 ipc.opt = &icmp_param->replyopts.opt;
385 if (ipc.opt->srr) 354 if (ipc.opt->opt.srr)
386 daddr = icmp_param->replyopts.faddr; 355 daddr = icmp_param->replyopts.opt.opt.faddr;
387 } 356 }
388 { 357 memset(&fl4, 0, sizeof(fl4));
389 struct flowi fl = { .fl4_dst= daddr, 358 fl4.daddr = daddr;
390 .fl4_src = rt->rt_spec_dst, 359 fl4.saddr = rt->rt_spec_dst;
391 .fl4_tos = RT_TOS(ip_hdr(skb)->tos), 360 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
392 .proto = IPPROTO_ICMP }; 361 fl4.flowi4_proto = IPPROTO_ICMP;
393 security_skb_classify_flow(skb, &fl); 362 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
394 if (ip_route_output_key(net, &rt, &fl)) 363 rt = ip_route_output_key(net, &fl4);
395 goto out_unlock; 364 if (IS_ERR(rt))
396 } 365 goto out_unlock;
397 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, 366 if (icmpv4_xrlim_allow(net, rt, &fl4, icmp_param->data.icmph.type,
398 icmp_param->data.icmph.code)) 367 icmp_param->data.icmph.code))
399 icmp_push_reply(icmp_param, &ipc, &rt); 368 icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
400 ip_rt_put(rt); 369 ip_rt_put(rt);
401out_unlock: 370out_unlock:
402 icmp_xmit_unlock(sk); 371 icmp_xmit_unlock(sk);
403} 372}
404 373
374static struct rtable *icmp_route_lookup(struct net *net,
375 struct flowi4 *fl4,
376 struct sk_buff *skb_in,
377 const struct iphdr *iph,
378 __be32 saddr, u8 tos,
379 int type, int code,
380 struct icmp_bxm *param)
381{
382 struct rtable *rt, *rt2;
383 int err;
384
385 memset(fl4, 0, sizeof(*fl4));
386 fl4->daddr = (param->replyopts.opt.opt.srr ?
387 param->replyopts.opt.opt.faddr : iph->saddr);
388 fl4->saddr = saddr;
389 fl4->flowi4_tos = RT_TOS(tos);
390 fl4->flowi4_proto = IPPROTO_ICMP;
391 fl4->fl4_icmp_type = type;
392 fl4->fl4_icmp_code = code;
393 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
394 rt = __ip_route_output_key(net, fl4);
395 if (IS_ERR(rt))
396 return rt;
397
398 /* No need to clone since we're just using its address. */
399 rt2 = rt;
400
401 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
402 flowi4_to_flowi(fl4), NULL, 0);
403 if (!IS_ERR(rt)) {
404 if (rt != rt2)
405 return rt;
406 } else if (PTR_ERR(rt) == -EPERM) {
407 rt = NULL;
408 } else
409 return rt;
410
411 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET);
412 if (err)
413 goto relookup_failed;
414
415 if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) {
416 rt2 = __ip_route_output_key(net, fl4);
417 if (IS_ERR(rt2))
418 err = PTR_ERR(rt2);
419 } else {
420 struct flowi4 fl4_2 = {};
421 unsigned long orefdst;
422
423 fl4_2.daddr = fl4->saddr;
424 rt2 = ip_route_output_key(net, &fl4_2);
425 if (IS_ERR(rt2)) {
426 err = PTR_ERR(rt2);
427 goto relookup_failed;
428 }
429 /* Ugh! */
430 orefdst = skb_in->_skb_refdst; /* save old refdst */
431 err = ip_route_input(skb_in, fl4->daddr, fl4->saddr,
432 RT_TOS(tos), rt2->dst.dev);
433
434 dst_release(&rt2->dst);
435 rt2 = skb_rtable(skb_in);
436 skb_in->_skb_refdst = orefdst; /* restore old refdst */
437 }
438
439 if (err)
440 goto relookup_failed;
441
442 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
443 flowi4_to_flowi(fl4), NULL,
444 XFRM_LOOKUP_ICMP);
445 if (!IS_ERR(rt2)) {
446 dst_release(&rt->dst);
447 rt = rt2;
448 } else if (PTR_ERR(rt2) == -EPERM) {
449 if (rt)
450 dst_release(&rt->dst);
451 return rt2;
452 } else {
453 err = PTR_ERR(rt2);
454 goto relookup_failed;
455 }
456 return rt;
457
458relookup_failed:
459 if (rt)
460 return rt;
461 return ERR_PTR(err);
462}
405 463
406/* 464/*
407 * Send an ICMP message in response to a situation 465 * Send an ICMP message in response to a situation
@@ -421,6 +479,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
421 struct icmp_bxm icmp_param; 479 struct icmp_bxm icmp_param;
422 struct rtable *rt = skb_rtable(skb_in); 480 struct rtable *rt = skb_rtable(skb_in);
423 struct ipcm_cookie ipc; 481 struct ipcm_cookie ipc;
482 struct flowi4 fl4;
424 __be32 saddr; 483 __be32 saddr;
425 u8 tos; 484 u8 tos;
426 struct net *net; 485 struct net *net;
@@ -507,7 +566,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
507 rcu_read_lock(); 566 rcu_read_lock();
508 if (rt_is_input_route(rt) && 567 if (rt_is_input_route(rt) &&
509 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) 568 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
510 dev = dev_get_by_index_rcu(net, rt->fl.iif); 569 dev = dev_get_by_index_rcu(net, rt->rt_iif);
511 570
512 if (dev) 571 if (dev)
513 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); 572 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -520,7 +579,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
520 IPTOS_PREC_INTERNETCONTROL) : 579 IPTOS_PREC_INTERNETCONTROL) :
521 iph->tos; 580 iph->tos;
522 581
523 if (ip_options_echo(&icmp_param.replyopts, skb_in)) 582 if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
524 goto out_unlock; 583 goto out_unlock;
525 584
526 585
@@ -536,90 +595,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
536 icmp_param.offset = skb_network_offset(skb_in); 595 icmp_param.offset = skb_network_offset(skb_in);
537 inet_sk(sk)->tos = tos; 596 inet_sk(sk)->tos = tos;
538 ipc.addr = iph->saddr; 597 ipc.addr = iph->saddr;
539 ipc.opt = &icmp_param.replyopts; 598 ipc.opt = &icmp_param.replyopts.opt;
540 ipc.tx_flags = 0; 599 ipc.tx_flags = 0;
541 600
542 { 601 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
543 struct flowi fl = { 602 type, code, &icmp_param);
544 .fl4_dst = icmp_param.replyopts.srr ? 603 if (IS_ERR(rt))
545 icmp_param.replyopts.faddr : iph->saddr, 604 goto out_unlock;
546 .fl4_src = saddr,
547 .fl4_tos = RT_TOS(tos),
548 .proto = IPPROTO_ICMP,
549 .fl_icmp_type = type,
550 .fl_icmp_code = code,
551 };
552 int err;
553 struct rtable *rt2;
554
555 security_skb_classify_flow(skb_in, &fl);
556 if (__ip_route_output_key(net, &rt, &fl))
557 goto out_unlock;
558
559 /* No need to clone since we're just using its address. */
560 rt2 = rt;
561
562 if (!fl.nl_u.ip4_u.saddr)
563 fl.nl_u.ip4_u.saddr = rt->rt_src;
564
565 err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
566 switch (err) {
567 case 0:
568 if (rt != rt2)
569 goto route_done;
570 break;
571 case -EPERM:
572 rt = NULL;
573 break;
574 default:
575 goto out_unlock;
576 }
577
578 if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
579 goto relookup_failed;
580
581 if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
582 err = __ip_route_output_key(net, &rt2, &fl);
583 else {
584 struct flowi fl2 = {};
585 unsigned long orefdst;
586
587 fl2.fl4_dst = fl.fl4_src;
588 if (ip_route_output_key(net, &rt2, &fl2))
589 goto relookup_failed;
590
591 /* Ugh! */
592 orefdst = skb_in->_skb_refdst; /* save old refdst */
593 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
594 RT_TOS(tos), rt2->dst.dev);
595
596 dst_release(&rt2->dst);
597 rt2 = skb_rtable(skb_in);
598 skb_in->_skb_refdst = orefdst; /* restore old refdst */
599 }
600
601 if (err)
602 goto relookup_failed;
603
604 err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL,
605 XFRM_LOOKUP_ICMP);
606 switch (err) {
607 case 0:
608 dst_release(&rt->dst);
609 rt = rt2;
610 break;
611 case -EPERM:
612 goto ende;
613 default:
614relookup_failed:
615 if (!rt)
616 goto out_unlock;
617 break;
618 }
619 }
620 605
621route_done: 606 if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
622 if (!icmpv4_xrlim_allow(net, rt, type, code))
623 goto ende; 607 goto ende;
624 608
625 /* RFC says return as much as we can without exceeding 576 bytes. */ 609 /* RFC says return as much as we can without exceeding 576 bytes. */
@@ -627,7 +611,7 @@ route_done:
627 room = dst_mtu(&rt->dst); 611 room = dst_mtu(&rt->dst);
628 if (room > 576) 612 if (room > 576)
629 room = 576; 613 room = 576;
630 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 614 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
631 room -= sizeof(struct icmphdr); 615 room -= sizeof(struct icmphdr);
632 616
633 icmp_param.data_len = skb_in->len - icmp_param.offset; 617 icmp_param.data_len = skb_in->len - icmp_param.offset;
@@ -635,7 +619,7 @@ route_done:
635 icmp_param.data_len = room; 619 icmp_param.data_len = room;
636 icmp_param.head_len = sizeof(struct icmphdr); 620 icmp_param.head_len = sizeof(struct icmphdr);
637 621
638 icmp_push_reply(&icmp_param, &ipc, &rt); 622 icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
639ende: 623ende:
640 ip_rt_put(rt); 624 ip_rt_put(rt);
641out_unlock: 625out_unlock:
@@ -651,7 +635,7 @@ EXPORT_SYMBOL(icmp_send);
651 635
652static void icmp_unreach(struct sk_buff *skb) 636static void icmp_unreach(struct sk_buff *skb)
653{ 637{
654 struct iphdr *iph; 638 const struct iphdr *iph;
655 struct icmphdr *icmph; 639 struct icmphdr *icmph;
656 int hash, protocol; 640 int hash, protocol;
657 const struct net_protocol *ipprot; 641 const struct net_protocol *ipprot;
@@ -670,7 +654,7 @@ static void icmp_unreach(struct sk_buff *skb)
670 goto out_err; 654 goto out_err;
671 655
672 icmph = icmp_hdr(skb); 656 icmph = icmp_hdr(skb);
673 iph = (struct iphdr *)skb->data; 657 iph = (const struct iphdr *)skb->data;
674 658
675 if (iph->ihl < 5) /* Mangled header, drop. */ 659 if (iph->ihl < 5) /* Mangled header, drop. */
676 goto out_err; 660 goto out_err;
@@ -718,7 +702,7 @@ static void icmp_unreach(struct sk_buff *skb)
718 */ 702 */
719 703
720 /* 704 /*
721 * Check the other end isnt violating RFC 1122. Some routers send 705 * Check the other end isn't violating RFC 1122. Some routers send
722 * bogus responses to broadcast frames. If you see this message 706 * bogus responses to broadcast frames. If you see this message
723 * first check your netmask matches at both ends, if it does then 707 * first check your netmask matches at both ends, if it does then
724 * get the other vendor to fix their kit. 708 * get the other vendor to fix their kit.
@@ -743,7 +727,7 @@ static void icmp_unreach(struct sk_buff *skb)
743 if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) 727 if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
744 goto out; 728 goto out;
745 729
746 iph = (struct iphdr *)skb->data; 730 iph = (const struct iphdr *)skb->data;
747 protocol = iph->protocol; 731 protocol = iph->protocol;
748 732
749 /* 733 /*
@@ -772,7 +756,7 @@ out_err:
772 756
773static void icmp_redirect(struct sk_buff *skb) 757static void icmp_redirect(struct sk_buff *skb)
774{ 758{
775 struct iphdr *iph; 759 const struct iphdr *iph;
776 760
777 if (skb->len < sizeof(struct iphdr)) 761 if (skb->len < sizeof(struct iphdr))
778 goto out_err; 762 goto out_err;
@@ -783,7 +767,7 @@ static void icmp_redirect(struct sk_buff *skb)
783 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 767 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
784 goto out; 768 goto out;
785 769
786 iph = (struct iphdr *)skb->data; 770 iph = (const struct iphdr *)skb->data;
787 771
788 switch (icmp_hdr(skb)->code & 7) { 772 switch (icmp_hdr(skb)->code & 7) {
789 case ICMP_REDIR_NET: 773 case ICMP_REDIR_NET:
@@ -798,6 +782,15 @@ static void icmp_redirect(struct sk_buff *skb)
798 iph->saddr, skb->dev); 782 iph->saddr, skb->dev);
799 break; 783 break;
800 } 784 }
785
786 /* Ping wants to see redirects.
787 * Let's pretend they are errors of sorts... */
788 if (iph->protocol == IPPROTO_ICMP &&
789 iph->ihl >= 5 &&
790 pskb_may_pull(skb, (iph->ihl<<2)+8)) {
791 ping_err(skb, icmp_hdr(skb)->un.gateway);
792 }
793
801out: 794out:
802 return; 795 return;
803out_err: 796out_err:
@@ -947,12 +940,12 @@ static void icmp_address_reply(struct sk_buff *skb)
947 BUG_ON(mp == NULL); 940 BUG_ON(mp == NULL);
948 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 941 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
949 if (*mp == ifa->ifa_mask && 942 if (*mp == ifa->ifa_mask &&
950 inet_ifa_match(rt->rt_src, ifa)) 943 inet_ifa_match(ip_hdr(skb)->saddr, ifa))
951 break; 944 break;
952 } 945 }
953 if (!ifa && net_ratelimit()) { 946 if (!ifa && net_ratelimit()) {
954 printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", 947 printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n",
955 mp, dev->name, &rt->rt_src); 948 mp, dev->name, &ip_hdr(skb)->saddr);
956 } 949 }
957 } 950 }
958} 951}
@@ -1058,7 +1051,7 @@ error:
1058 */ 1051 */
1059static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { 1052static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1060 [ICMP_ECHOREPLY] = { 1053 [ICMP_ECHOREPLY] = {
1061 .handler = icmp_discard, 1054 .handler = ping_rcv,
1062 }, 1055 },
1063 [1] = { 1056 [1] = {
1064 .handler = icmp_discard, 1057 .handler = icmp_discard,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index e0e77e297de3..672e476c8c8a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,17 +149,11 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, 149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
150 int sfcount, __be32 *psfsrc, int delta); 150 int sfcount, __be32 *psfsrc, int delta);
151 151
152
153static void ip_mc_list_reclaim(struct rcu_head *head)
154{
155 kfree(container_of(head, struct ip_mc_list, rcu));
156}
157
158static void ip_ma_put(struct ip_mc_list *im) 152static void ip_ma_put(struct ip_mc_list *im)
159{ 153{
160 if (atomic_dec_and_test(&im->refcnt)) { 154 if (atomic_dec_and_test(&im->refcnt)) {
161 in_dev_put(im->interface); 155 in_dev_put(im->interface);
162 call_rcu(&im->rcu, ip_mc_list_reclaim); 156 kfree_rcu(im, rcu);
163 } 157 }
164} 158}
165 159
@@ -309,6 +303,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
309 struct iphdr *pip; 303 struct iphdr *pip;
310 struct igmpv3_report *pig; 304 struct igmpv3_report *pig;
311 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
306 struct flowi4 fl4;
312 307
313 while (1) { 308 while (1) {
314 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), 309 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
@@ -321,18 +316,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
321 } 316 }
322 igmp_skb_size(skb) = size; 317 igmp_skb_size(skb) = size;
323 318
324 { 319 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
325 struct flowi fl = { .oif = dev->ifindex, 320 0, 0,
326 .fl4_dst = IGMPV3_ALL_MCR, 321 IPPROTO_IGMP, 0, dev->ifindex);
327 .proto = IPPROTO_IGMP }; 322 if (IS_ERR(rt)) {
328 if (ip_route_output_key(net, &rt, &fl)) {
329 kfree_skb(skb);
330 return NULL;
331 }
332 }
333 if (rt->rt_src == 0) {
334 kfree_skb(skb); 323 kfree_skb(skb);
335 ip_rt_put(rt);
336 return NULL; 324 return NULL;
337 } 325 }
338 326
@@ -350,8 +338,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
350 pip->tos = 0xc0; 338 pip->tos = 0xc0;
351 pip->frag_off = htons(IP_DF); 339 pip->frag_off = htons(IP_DF);
352 pip->ttl = 1; 340 pip->ttl = 1;
353 pip->daddr = rt->rt_dst; 341 pip->daddr = fl4.daddr;
354 pip->saddr = rt->rt_src; 342 pip->saddr = fl4.saddr;
355 pip->protocol = IPPROTO_IGMP; 343 pip->protocol = IPPROTO_IGMP;
356 pip->tot_len = 0; /* filled in later */ 344 pip->tot_len = 0; /* filled in later */
357 ip_select_ident(pip, &rt->dst, NULL); 345 ip_select_ident(pip, &rt->dst, NULL);
@@ -657,6 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
657 struct net_device *dev = in_dev->dev; 645 struct net_device *dev = in_dev->dev;
658 struct net *net = dev_net(dev); 646 struct net *net = dev_net(dev);
659 __be32 group = pmc ? pmc->multiaddr : 0; 647 __be32 group = pmc ? pmc->multiaddr : 0;
648 struct flowi4 fl4;
660 __be32 dst; 649 __be32 dst;
661 650
662 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) 651 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
@@ -666,17 +655,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
666 else 655 else
667 dst = group; 656 dst = group;
668 657
669 { 658 rt = ip_route_output_ports(net, &fl4, NULL, dst, 0,
670 struct flowi fl = { .oif = dev->ifindex, 659 0, 0,
671 .fl4_dst = dst, 660 IPPROTO_IGMP, 0, dev->ifindex);
672 .proto = IPPROTO_IGMP }; 661 if (IS_ERR(rt))
673 if (ip_route_output_key(net, &rt, &fl))
674 return -1;
675 }
676 if (rt->rt_src == 0) {
677 ip_rt_put(rt);
678 return -1; 662 return -1;
679 }
680 663
681 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 664 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
682 if (skb == NULL) { 665 if (skb == NULL) {
@@ -698,7 +681,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
698 iph->frag_off = htons(IP_DF); 681 iph->frag_off = htons(IP_DF);
699 iph->ttl = 1; 682 iph->ttl = 1;
700 iph->daddr = dst; 683 iph->daddr = dst;
701 iph->saddr = rt->rt_src; 684 iph->saddr = fl4.saddr;
702 iph->protocol = IPPROTO_IGMP; 685 iph->protocol = IPPROTO_IGMP;
703 ip_select_ident(iph, &rt->dst, NULL); 686 ip_select_ident(iph, &rt->dst, NULL);
704 ((u8*)&iph[1])[0] = IPOPT_RA; 687 ((u8*)&iph[1])[0] = IPOPT_RA;
@@ -1439,8 +1422,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1439/* RTNL is locked */ 1422/* RTNL is locked */
1440static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) 1423static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1441{ 1424{
1442 struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr };
1443 struct rtable *rt;
1444 struct net_device *dev = NULL; 1425 struct net_device *dev = NULL;
1445 struct in_device *idev = NULL; 1426 struct in_device *idev = NULL;
1446 1427
@@ -1454,9 +1435,14 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1454 return NULL; 1435 return NULL;
1455 } 1436 }
1456 1437
1457 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1438 if (!dev) {
1458 dev = rt->dst.dev; 1439 struct rtable *rt = ip_route_output(net,
1459 ip_rt_put(rt); 1440 imr->imr_multiaddr.s_addr,
1441 0, 0, 0);
1442 if (!IS_ERR(rt)) {
1443 dev = rt->dst.dev;
1444 ip_rt_put(rt);
1445 }
1460 } 1446 }
1461 if (dev) { 1447 if (dev) {
1462 imr->imr_ifindex = dev->ifindex; 1448 imr->imr_ifindex = dev->ifindex;
@@ -1836,12 +1822,6 @@ done:
1836} 1822}
1837EXPORT_SYMBOL(ip_mc_join_group); 1823EXPORT_SYMBOL(ip_mc_join_group);
1838 1824
1839static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1840{
1841 kfree(container_of(rp, struct ip_sf_socklist, rcu));
1842 /* sk_omem_alloc should have been decreased by the caller*/
1843}
1844
1845static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, 1825static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1846 struct in_device *in_dev) 1826 struct in_device *in_dev)
1847{ 1827{
@@ -1858,18 +1838,10 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1858 rcu_assign_pointer(iml->sflist, NULL); 1838 rcu_assign_pointer(iml->sflist, NULL);
1859 /* decrease mem now to avoid the memleak warning */ 1839 /* decrease mem now to avoid the memleak warning */
1860 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); 1840 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
1861 call_rcu(&psf->rcu, ip_sf_socklist_reclaim); 1841 kfree_rcu(psf, rcu);
1862 return err; 1842 return err;
1863} 1843}
1864 1844
1865
1866static void ip_mc_socklist_reclaim(struct rcu_head *rp)
1867{
1868 kfree(container_of(rp, struct ip_mc_socklist, rcu));
1869 /* sk_omem_alloc should have been decreased by the caller*/
1870}
1871
1872
1873/* 1845/*
1874 * Ask a socket to leave a group. 1846 * Ask a socket to leave a group.
1875 */ 1847 */
@@ -1909,7 +1881,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1909 rtnl_unlock(); 1881 rtnl_unlock();
1910 /* decrease mem now to avoid the memleak warning */ 1882 /* decrease mem now to avoid the memleak warning */
1911 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 1883 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
1912 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 1884 kfree_rcu(iml, rcu);
1913 return 0; 1885 return 0;
1914 } 1886 }
1915 if (!in_dev) 1887 if (!in_dev)
@@ -2026,7 +1998,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2026 newpsl->sl_addr[i] = psl->sl_addr[i]; 1998 newpsl->sl_addr[i] = psl->sl_addr[i];
2027 /* decrease mem now to avoid the memleak warning */ 1999 /* decrease mem now to avoid the memleak warning */
2028 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2000 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2029 call_rcu(&psl->rcu, ip_sf_socklist_reclaim); 2001 kfree_rcu(psl, rcu);
2030 } 2002 }
2031 rcu_assign_pointer(pmc->sflist, newpsl); 2003 rcu_assign_pointer(pmc->sflist, newpsl);
2032 psl = newpsl; 2004 psl = newpsl;
@@ -2127,7 +2099,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2127 psl->sl_count, psl->sl_addr, 0); 2099 psl->sl_count, psl->sl_addr, 0);
2128 /* decrease mem now to avoid the memleak warning */ 2100 /* decrease mem now to avoid the memleak warning */
2129 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2101 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2130 call_rcu(&psl->rcu, ip_sf_socklist_reclaim); 2102 kfree_rcu(psl, rcu);
2131 } else 2103 } else
2132 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2104 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2133 0, NULL, 0); 2105 0, NULL, 0);
@@ -2324,18 +2296,18 @@ void ip_mc_drop_socket(struct sock *sk)
2324 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2296 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2325 /* decrease mem now to avoid the memleak warning */ 2297 /* decrease mem now to avoid the memleak warning */
2326 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2298 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2327 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2299 kfree_rcu(iml, rcu);
2328 } 2300 }
2329 rtnl_unlock(); 2301 rtnl_unlock();
2330} 2302}
2331 2303
2332int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) 2304/* called with rcu_read_lock() */
2305int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
2333{ 2306{
2334 struct ip_mc_list *im; 2307 struct ip_mc_list *im;
2335 struct ip_sf_list *psf; 2308 struct ip_sf_list *psf;
2336 int rv = 0; 2309 int rv = 0;
2337 2310
2338 rcu_read_lock();
2339 for_each_pmc_rcu(in_dev, im) { 2311 for_each_pmc_rcu(in_dev, im) {
2340 if (im->multiaddr == mc_addr) 2312 if (im->multiaddr == mc_addr)
2341 break; 2313 break;
@@ -2357,7 +2329,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
2357 } else 2329 } else
2358 rv = 1; /* unspecified source; tentatively allow */ 2330 rv = 1; /* unspecified source; tentatively allow */
2359 } 2331 }
2360 rcu_read_unlock();
2361 return rv; 2332 return rv;
2362} 2333}
2363 2334
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 97e5fb765265..61fac4cabc78 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
73 !sk2->sk_bound_dev_if || 73 !sk2->sk_bound_dev_if ||
74 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { 74 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
75 if (!reuse || !sk2->sk_reuse || 75 if (!reuse || !sk2->sk_reuse ||
76 ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { 76 sk2->sk_state == TCP_LISTEN) {
77 const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); 77 const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
78 if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || 78 if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
79 sk2_rcv_saddr == sk_rcv_saddr(sk)) 79 sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -122,8 +122,7 @@ again:
122 (tb->num_owners < smallest_size || smallest_size == -1)) { 122 (tb->num_owners < smallest_size || smallest_size == -1)) {
123 smallest_size = tb->num_owners; 123 smallest_size = tb->num_owners;
124 smallest_rover = rover; 124 smallest_rover = rover;
125 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && 125 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
126 !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
127 spin_unlock(&head->lock); 126 spin_unlock(&head->lock);
128 snum = smallest_rover; 127 snum = smallest_rover;
129 goto have_snum; 128 goto have_snum;
@@ -351,27 +350,24 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
351EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 350EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
352 351
353struct dst_entry *inet_csk_route_req(struct sock *sk, 352struct dst_entry *inet_csk_route_req(struct sock *sk,
353 struct flowi4 *fl4,
354 const struct request_sock *req) 354 const struct request_sock *req)
355{ 355{
356 struct rtable *rt; 356 struct rtable *rt;
357 const struct inet_request_sock *ireq = inet_rsk(req); 357 const struct inet_request_sock *ireq = inet_rsk(req);
358 struct ip_options *opt = inet_rsk(req)->opt; 358 struct ip_options_rcu *opt = inet_rsk(req)->opt;
359 struct flowi fl = { .oif = sk->sk_bound_dev_if,
360 .mark = sk->sk_mark,
361 .fl4_dst = ((opt && opt->srr) ?
362 opt->faddr : ireq->rmt_addr),
363 .fl4_src = ireq->loc_addr,
364 .fl4_tos = RT_CONN_FLAGS(sk),
365 .proto = sk->sk_protocol,
366 .flags = inet_sk_flowi_flags(sk),
367 .fl_ip_sport = inet_sk(sk)->inet_sport,
368 .fl_ip_dport = ireq->rmt_port };
369 struct net *net = sock_net(sk); 359 struct net *net = sock_net(sk);
370 360
371 security_req_classify_flow(req, &fl); 361 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
372 if (ip_route_output_flow(net, &rt, &fl, sk, 0)) 362 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
363 sk->sk_protocol, inet_sk_flowi_flags(sk),
364 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
365 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
366 security_req_classify_flow(req, flowi4_to_flowi(fl4));
367 rt = ip_route_output_flow(net, fl4, sk);
368 if (IS_ERR(rt))
373 goto no_route; 369 goto no_route;
374 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 370 if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
375 goto route_err; 371 goto route_err;
376 return &rt->dst; 372 return &rt->dst;
377 373
@@ -383,6 +379,39 @@ no_route:
383} 379}
384EXPORT_SYMBOL_GPL(inet_csk_route_req); 380EXPORT_SYMBOL_GPL(inet_csk_route_req);
385 381
382struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
383 struct sock *newsk,
384 const struct request_sock *req)
385{
386 const struct inet_request_sock *ireq = inet_rsk(req);
387 struct inet_sock *newinet = inet_sk(newsk);
388 struct ip_options_rcu *opt = ireq->opt;
389 struct net *net = sock_net(sk);
390 struct flowi4 *fl4;
391 struct rtable *rt;
392
393 fl4 = &newinet->cork.fl.u.ip4;
394 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
395 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
396 sk->sk_protocol, inet_sk_flowi_flags(sk),
397 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
398 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
399 security_req_classify_flow(req, flowi4_to_flowi(fl4));
400 rt = ip_route_output_flow(net, fl4, sk);
401 if (IS_ERR(rt))
402 goto no_route;
403 if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
404 goto route_err;
405 return &rt->dst;
406
407route_err:
408 ip_rt_put(rt);
409no_route:
410 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
411 return NULL;
412}
413EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
414
386static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 415static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
387 const u32 rnd, const u32 synq_hsize) 416 const u32 rnd, const u32 synq_hsize)
388{ 417{
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada17129fce..6ffe94ca5bc9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -124,7 +124,7 @@ static int inet_csk_diag_fill(struct sock *sk,
124 124
125#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 125#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
126 if (r->idiag_family == AF_INET6) { 126 if (r->idiag_family == AF_INET6) {
127 struct ipv6_pinfo *np = inet6_sk(sk); 127 const struct ipv6_pinfo *np = inet6_sk(sk);
128 128
129 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 129 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
130 &np->rcv_saddr); 130 &np->rcv_saddr);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 47038cb6c138..85a0f75dae64 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -51,8 +51,8 @@ MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
51 * Basic tcp checks whether packet is suitable for LRO 51 * Basic tcp checks whether packet is suitable for LRO
52 */ 52 */
53 53
54static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, 54static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
55 int len, struct net_lro_desc *lro_desc) 55 int len, const struct net_lro_desc *lro_desc)
56{ 56{
57 /* check ip header: don't aggregate padded frames */ 57 /* check ip header: don't aggregate padded frames */
58 if (ntohs(iph->tot_len) != len) 58 if (ntohs(iph->tot_len) != len)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index a96e65674ac3..9df4e635fb5f 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = {
81 81
82struct inet_peer_base { 82struct inet_peer_base {
83 struct inet_peer __rcu *root; 83 struct inet_peer __rcu *root;
84 spinlock_t lock; 84 seqlock_t lock;
85 int total; 85 int total;
86}; 86};
87 87
88static struct inet_peer_base v4_peers = { 88static struct inet_peer_base v4_peers = {
89 .root = peer_avl_empty_rcu, 89 .root = peer_avl_empty_rcu,
90 .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), 90 .lock = __SEQLOCK_UNLOCKED(v4_peers.lock),
91 .total = 0, 91 .total = 0,
92}; 92};
93 93
94static struct inet_peer_base v6_peers = { 94static struct inet_peer_base v6_peers = {
95 .root = peer_avl_empty_rcu, 95 .root = peer_avl_empty_rcu,
96 .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), 96 .lock = __SEQLOCK_UNLOCKED(v6_peers.lock),
97 .total = 0, 97 .total = 0,
98}; 98};
99 99
@@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a,
167 int i, n = (a->family == AF_INET ? 1 : 4); 167 int i, n = (a->family == AF_INET ? 1 : 4);
168 168
169 for (i = 0; i < n; i++) { 169 for (i = 0; i < n; i++) {
170 if (a->a6[i] == b->a6[i]) 170 if (a->addr.a6[i] == b->addr.a6[i])
171 continue; 171 continue;
172 if (a->a6[i] < b->a6[i]) 172 if (a->addr.a6[i] < b->addr.a6[i])
173 return -1; 173 return -1;
174 return 1; 174 return 1;
175 } 175 }
@@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a,
177 return 0; 177 return 0;
178} 178}
179 179
180#define rcu_deref_locked(X, BASE) \
181 rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
182
180/* 183/*
181 * Called with local BH disabled and the pool lock held. 184 * Called with local BH disabled and the pool lock held.
182 */ 185 */
@@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a,
187 \ 190 \
188 stackptr = _stack; \ 191 stackptr = _stack; \
189 *stackptr++ = &_base->root; \ 192 *stackptr++ = &_base->root; \
190 for (u = rcu_dereference_protected(_base->root, \ 193 for (u = rcu_deref_locked(_base->root, _base); \
191 lockdep_is_held(&_base->lock)); \
192 u != peer_avl_empty; ) { \ 194 u != peer_avl_empty; ) { \
193 int cmp = addr_compare(_daddr, &u->daddr); \ 195 int cmp = addr_compare(_daddr, &u->daddr); \
194 if (cmp == 0) \ 196 if (cmp == 0) \
@@ -198,23 +200,22 @@ static int addr_compare(const struct inetpeer_addr *a,
198 else \ 200 else \
199 v = &u->avl_right; \ 201 v = &u->avl_right; \
200 *stackptr++ = v; \ 202 *stackptr++ = v; \
201 u = rcu_dereference_protected(*v, \ 203 u = rcu_deref_locked(*v, _base); \
202 lockdep_is_held(&_base->lock)); \
203 } \ 204 } \
204 u; \ 205 u; \
205}) 206})
206 207
207/* 208/*
208 * Called with rcu_read_lock_bh() 209 * Called with rcu_read_lock()
209 * Because we hold no lock against a writer, its quite possible we fall 210 * Because we hold no lock against a writer, its quite possible we fall
210 * in an endless loop. 211 * in an endless loop.
211 * But every pointer we follow is guaranteed to be valid thanks to RCU. 212 * But every pointer we follow is guaranteed to be valid thanks to RCU.
212 * We exit from this function if number of links exceeds PEER_MAXDEPTH 213 * We exit from this function if number of links exceeds PEER_MAXDEPTH
213 */ 214 */
214static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, 215static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
215 struct inet_peer_base *base) 216 struct inet_peer_base *base)
216{ 217{
217 struct inet_peer *u = rcu_dereference_bh(base->root); 218 struct inet_peer *u = rcu_dereference(base->root);
218 int count = 0; 219 int count = 0;
219 220
220 while (u != peer_avl_empty) { 221 while (u != peer_avl_empty) {
@@ -230,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
230 return u; 231 return u;
231 } 232 }
232 if (cmp == -1) 233 if (cmp == -1)
233 u = rcu_dereference_bh(u->avl_left); 234 u = rcu_dereference(u->avl_left);
234 else 235 else
235 u = rcu_dereference_bh(u->avl_right); 236 u = rcu_dereference(u->avl_right);
236 if (unlikely(++count == PEER_MAXDEPTH)) 237 if (unlikely(++count == PEER_MAXDEPTH))
237 break; 238 break;
238 } 239 }
@@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
246 struct inet_peer __rcu **v; \ 247 struct inet_peer __rcu **v; \
247 *stackptr++ = &start->avl_left; \ 248 *stackptr++ = &start->avl_left; \
248 v = &start->avl_left; \ 249 v = &start->avl_left; \
249 for (u = rcu_dereference_protected(*v, \ 250 for (u = rcu_deref_locked(*v, base); \
250 lockdep_is_held(&base->lock)); \
251 u->avl_right != peer_avl_empty_rcu; ) { \ 251 u->avl_right != peer_avl_empty_rcu; ) { \
252 v = &u->avl_right; \ 252 v = &u->avl_right; \
253 *stackptr++ = v; \ 253 *stackptr++ = v; \
254 u = rcu_dereference_protected(*v, \ 254 u = rcu_deref_locked(*v, base); \
255 lockdep_is_held(&base->lock)); \
256 } \ 255 } \
257 u; \ 256 u; \
258}) 257})
@@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
271 270
272 while (stackend > stack) { 271 while (stackend > stack) {
273 nodep = *--stackend; 272 nodep = *--stackend;
274 node = rcu_dereference_protected(*nodep, 273 node = rcu_deref_locked(*nodep, base);
275 lockdep_is_held(&base->lock)); 274 l = rcu_deref_locked(node->avl_left, base);
276 l = rcu_dereference_protected(node->avl_left, 275 r = rcu_deref_locked(node->avl_right, base);
277 lockdep_is_held(&base->lock));
278 r = rcu_dereference_protected(node->avl_right,
279 lockdep_is_held(&base->lock));
280 lh = node_height(l); 276 lh = node_height(l);
281 rh = node_height(r); 277 rh = node_height(r);
282 if (lh > rh + 1) { /* l: RH+2 */ 278 if (lh > rh + 1) { /* l: RH+2 */
283 struct inet_peer *ll, *lr, *lrl, *lrr; 279 struct inet_peer *ll, *lr, *lrl, *lrr;
284 int lrh; 280 int lrh;
285 ll = rcu_dereference_protected(l->avl_left, 281 ll = rcu_deref_locked(l->avl_left, base);
286 lockdep_is_held(&base->lock)); 282 lr = rcu_deref_locked(l->avl_right, base);
287 lr = rcu_dereference_protected(l->avl_right,
288 lockdep_is_held(&base->lock));
289 lrh = node_height(lr); 283 lrh = node_height(lr);
290 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 284 if (lrh <= node_height(ll)) { /* ll: RH+1 */
291 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ 285 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
@@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
296 l->avl_height = node->avl_height + 1; 290 l->avl_height = node->avl_height + 1;
297 RCU_INIT_POINTER(*nodep, l); 291 RCU_INIT_POINTER(*nodep, l);
298 } else { /* ll: RH, lr: RH+1 */ 292 } else { /* ll: RH, lr: RH+1 */
299 lrl = rcu_dereference_protected(lr->avl_left, 293 lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
300 lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ 294 lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
301 lrr = rcu_dereference_protected(lr->avl_right,
302 lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
303 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ 295 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
304 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ 296 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
305 node->avl_height = rh + 1; /* node: RH+1 */ 297 node->avl_height = rh + 1; /* node: RH+1 */
@@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
314 } else if (rh > lh + 1) { /* r: LH+2 */ 306 } else if (rh > lh + 1) { /* r: LH+2 */
315 struct inet_peer *rr, *rl, *rlr, *rll; 307 struct inet_peer *rr, *rl, *rlr, *rll;
316 int rlh; 308 int rlh;
317 rr = rcu_dereference_protected(r->avl_right, 309 rr = rcu_deref_locked(r->avl_right, base);
318 lockdep_is_held(&base->lock)); 310 rl = rcu_deref_locked(r->avl_left, base);
319 rl = rcu_dereference_protected(r->avl_left,
320 lockdep_is_held(&base->lock));
321 rlh = node_height(rl); 311 rlh = node_height(rl);
322 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 312 if (rlh <= node_height(rr)) { /* rr: LH+1 */
323 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ 313 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
@@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
328 r->avl_height = node->avl_height + 1; 318 r->avl_height = node->avl_height + 1;
329 RCU_INIT_POINTER(*nodep, r); 319 RCU_INIT_POINTER(*nodep, r);
330 } else { /* rr: RH, rl: RH+1 */ 320 } else { /* rr: RH, rl: RH+1 */
331 rlr = rcu_dereference_protected(rl->avl_right, 321 rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
332 lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ 322 rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
333 rll = rcu_dereference_protected(rl->avl_left,
334 lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
335 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ 323 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
336 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ 324 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
337 node->avl_height = lh + 1; /* node: LH+1 */ 325 node->avl_height = lh + 1; /* node: LH+1 */
@@ -366,13 +354,14 @@ static void inetpeer_free_rcu(struct rcu_head *head)
366} 354}
367 355
368/* May be called with local BH enabled. */ 356/* May be called with local BH enabled. */
369static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) 357static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
358 struct inet_peer __rcu **stack[PEER_MAXDEPTH])
370{ 359{
371 int do_free; 360 int do_free;
372 361
373 do_free = 0; 362 do_free = 0;
374 363
375 spin_lock_bh(&base->lock); 364 write_seqlock_bh(&base->lock);
376 /* Check the reference counter. It was artificially incremented by 1 365 /* Check the reference counter. It was artificially incremented by 1
377 * in cleanup() function to prevent sudden disappearing. If we can 366 * in cleanup() function to prevent sudden disappearing. If we can
378 * atomically (because of lockless readers) take this last reference, 367 * atomically (because of lockless readers) take this last reference,
@@ -380,7 +369,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
380 * We use refcnt=-1 to alert lockless readers this entry is deleted. 369 * We use refcnt=-1 to alert lockless readers this entry is deleted.
381 */ 370 */
382 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 371 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
383 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
384 struct inet_peer __rcu ***stackptr, ***delp; 372 struct inet_peer __rcu ***stackptr, ***delp;
385 if (lookup(&p->daddr, stack, base) != p) 373 if (lookup(&p->daddr, stack, base) != p)
386 BUG(); 374 BUG();
@@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
392 /* look for a node to insert instead of p */ 380 /* look for a node to insert instead of p */
393 struct inet_peer *t; 381 struct inet_peer *t;
394 t = lookup_rightempty(p, base); 382 t = lookup_rightempty(p, base);
395 BUG_ON(rcu_dereference_protected(*stackptr[-1], 383 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
396 lockdep_is_held(&base->lock)) != t);
397 **--stackptr = t->avl_left; 384 **--stackptr = t->avl_left;
398 /* t is removed, t->daddr > x->daddr for any 385 /* t is removed, t->daddr > x->daddr for any
399 * x in p->avl_left subtree. 386 * x in p->avl_left subtree.
@@ -409,10 +396,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
409 base->total--; 396 base->total--;
410 do_free = 1; 397 do_free = 1;
411 } 398 }
412 spin_unlock_bh(&base->lock); 399 write_sequnlock_bh(&base->lock);
413 400
414 if (do_free) 401 if (do_free)
415 call_rcu_bh(&p->rcu, inetpeer_free_rcu); 402 call_rcu(&p->rcu, inetpeer_free_rcu);
416 else 403 else
417 /* The node is used again. Decrease the reference counter 404 /* The node is used again. Decrease the reference counter
418 * back. The loop "cleanup -> unlink_from_unused 405 * back. The loop "cleanup -> unlink_from_unused
@@ -435,7 +422,7 @@ static struct inet_peer_base *peer_to_base(struct inet_peer *p)
435} 422}
436 423
437/* May be called with local BH enabled. */ 424/* May be called with local BH enabled. */
438static int cleanup_once(unsigned long ttl) 425static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH])
439{ 426{
440 struct inet_peer *p = NULL; 427 struct inet_peer *p = NULL;
441 428
@@ -467,7 +454,7 @@ static int cleanup_once(unsigned long ttl)
467 * happen because of entry limits in route cache. */ 454 * happen because of entry limits in route cache. */
468 return -1; 455 return -1;
469 456
470 unlink_from_pool(p, peer_to_base(p)); 457 unlink_from_pool(p, peer_to_base(p), stack);
471 return 0; 458 return 0;
472} 459}
473 460
@@ -477,13 +464,17 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
477 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 464 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478 struct inet_peer_base *base = family_to_base(daddr->family); 465 struct inet_peer_base *base = family_to_base(daddr->family);
479 struct inet_peer *p; 466 struct inet_peer *p;
467 unsigned int sequence;
468 int invalidated;
480 469
481 /* Look up for the address quickly, lockless. 470 /* Look up for the address quickly, lockless.
482 * Because of a concurrent writer, we might not find an existing entry. 471 * Because of a concurrent writer, we might not find an existing entry.
483 */ 472 */
484 rcu_read_lock_bh(); 473 rcu_read_lock();
485 p = lookup_rcu_bh(daddr, base); 474 sequence = read_seqbegin(&base->lock);
486 rcu_read_unlock_bh(); 475 p = lookup_rcu(daddr, base);
476 invalidated = read_seqretry(&base->lock, sequence);
477 rcu_read_unlock();
487 478
488 if (p) { 479 if (p) {
489 /* The existing node has been found. 480 /* The existing node has been found.
@@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
493 return p; 484 return p;
494 } 485 }
495 486
487 /* If no writer did a change during our lookup, we can return early. */
488 if (!create && !invalidated)
489 return NULL;
490
496 /* retry an exact lookup, taking the lock before. 491 /* retry an exact lookup, taking the lock before.
497 * At least, nodes should be hot in our cache. 492 * At least, nodes should be hot in our cache.
498 */ 493 */
499 spin_lock_bh(&base->lock); 494 write_seqlock_bh(&base->lock);
500 p = lookup(daddr, stack, base); 495 p = lookup(daddr, stack, base);
501 if (p != peer_avl_empty) { 496 if (p != peer_avl_empty) {
502 atomic_inc(&p->refcnt); 497 atomic_inc(&p->refcnt);
503 spin_unlock_bh(&base->lock); 498 write_sequnlock_bh(&base->lock);
504 /* Remove the entry from unused list if it was there. */ 499 /* Remove the entry from unused list if it was there. */
505 unlink_from_unused(p); 500 unlink_from_unused(p);
506 return p; 501 return p;
@@ -510,8 +505,14 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
510 p->daddr = *daddr; 505 p->daddr = *daddr;
511 atomic_set(&p->refcnt, 1); 506 atomic_set(&p->refcnt, 1);
512 atomic_set(&p->rid, 0); 507 atomic_set(&p->rid, 0);
513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); 508 atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4));
514 p->tcp_ts_stamp = 0; 509 p->tcp_ts_stamp = 0;
510 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
511 p->rate_tokens = 0;
512 p->rate_last = 0;
513 p->pmtu_expires = 0;
514 p->pmtu_orig = 0;
515 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
515 INIT_LIST_HEAD(&p->unused); 516 INIT_LIST_HEAD(&p->unused);
516 517
517 518
@@ -519,11 +520,11 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
519 link_to_pool(p, base); 520 link_to_pool(p, base);
520 base->total++; 521 base->total++;
521 } 522 }
522 spin_unlock_bh(&base->lock); 523 write_sequnlock_bh(&base->lock);
523 524
524 if (base->total >= inet_peer_threshold) 525 if (base->total >= inet_peer_threshold)
525 /* Remove one less-recently-used entry. */ 526 /* Remove one less-recently-used entry. */
526 cleanup_once(0); 527 cleanup_once(0, stack);
527 528
528 return p; 529 return p;
529} 530}
@@ -539,6 +540,7 @@ static void peer_check_expire(unsigned long dummy)
539{ 540{
540 unsigned long now = jiffies; 541 unsigned long now = jiffies;
541 int ttl, total; 542 int ttl, total;
543 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
542 544
543 total = compute_total(); 545 total = compute_total();
544 if (total >= inet_peer_threshold) 546 if (total >= inet_peer_threshold)
@@ -547,7 +549,7 @@ static void peer_check_expire(unsigned long dummy)
547 ttl = inet_peer_maxttl 549 ttl = inet_peer_maxttl
548 - (inet_peer_maxttl - inet_peer_minttl) / HZ * 550 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
549 total / inet_peer_threshold * HZ; 551 total / inet_peer_threshold * HZ;
550 while (!cleanup_once(ttl)) { 552 while (!cleanup_once(ttl, stack)) {
551 if (jiffies != now) 553 if (jiffies != now)
552 break; 554 break;
553 } 555 }
@@ -579,3 +581,44 @@ void inet_putpeer(struct inet_peer *p)
579 local_bh_enable(); 581 local_bh_enable();
580} 582}
581EXPORT_SYMBOL_GPL(inet_putpeer); 583EXPORT_SYMBOL_GPL(inet_putpeer);
584
585/*
586 * Check transmit rate limitation for given message.
587 * The rate information is held in the inet_peer entries now.
588 * This function is generic and could be used for other purposes
589 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
590 *
591 * Note that the same inet_peer fields are modified by functions in
592 * route.c too, but these work for packet destinations while xrlim_allow
593 * works for icmp destinations. This means the rate limiting information
594 * for one "ip object" is shared - and these ICMPs are twice limited:
595 * by source and by destination.
596 *
597 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
598 * SHOULD allow setting of rate limits
599 *
600 * Shared between ICMPv4 and ICMPv6.
601 */
602#define XRLIM_BURST_FACTOR 6
603bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
604{
605 unsigned long now, token;
606 bool rc = false;
607
608 if (!peer)
609 return true;
610
611 token = peer->rate_tokens;
612 now = jiffies;
613 token += now - peer->rate_last;
614 peer->rate_last = now;
615 if (token > XRLIM_BURST_FACTOR * timeout)
616 token = XRLIM_BURST_FACTOR * timeout;
617 if (token >= timeout) {
618 token -= timeout;
619 rc = true;
620 }
621 peer->rate_tokens = token;
622 return rc;
623}
624EXPORT_SYMBOL(inet_peer_xrlim_allow);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 99461f09320f..3b34d1c86270 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb)
84 84
85 rt = skb_rtable(skb); 85 rt = skb_rtable(skb);
86 86
87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 87 if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway)
88 goto sr_failed; 88 goto sr_failed;
89 89
90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && 90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a1151b8adf3c..0ad6035f6366 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -77,22 +77,40 @@ struct ipq {
77 struct inet_peer *peer; 77 struct inet_peer *peer;
78}; 78};
79 79
80#define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */ 80/* RFC 3168 support :
81#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */ 81 * We want to check ECN values of all fragments, do detect invalid combinations.
82 * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
83 */
84#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
85#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
86#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
87#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
82 88
83static inline u8 ip4_frag_ecn(u8 tos) 89static inline u8 ip4_frag_ecn(u8 tos)
84{ 90{
85 tos = (tos & INET_ECN_MASK) + 1; 91 return 1 << (tos & INET_ECN_MASK);
86 /*
87 * After the last operation we have (in binary):
88 * INET_ECN_NOT_ECT => 001
89 * INET_ECN_ECT_1 => 010
90 * INET_ECN_ECT_0 => 011
91 * INET_ECN_CE => 100
92 */
93 return (tos & 2) ? 0 : tos;
94} 92}
95 93
94/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
95 * Value : 0xff if frame should be dropped.
96 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
97 */
98static const u8 ip4_frag_ecn_table[16] = {
99 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
100 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
101 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
102 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
103
104 /* invalid combinations : drop frame */
105 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
106 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
107 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
108 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
109 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
110 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
111 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
112};
113
96static struct inet_frags ip4_frags; 114static struct inet_frags ip4_frags;
97 115
98int ip_frag_nqueues(struct net *net) 116int ip_frag_nqueues(struct net *net)
@@ -223,31 +241,30 @@ static void ip_expire(unsigned long arg)
223 241
224 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { 242 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
225 struct sk_buff *head = qp->q.fragments; 243 struct sk_buff *head = qp->q.fragments;
244 const struct iphdr *iph;
245 int err;
226 246
227 rcu_read_lock(); 247 rcu_read_lock();
228 head->dev = dev_get_by_index_rcu(net, qp->iif); 248 head->dev = dev_get_by_index_rcu(net, qp->iif);
229 if (!head->dev) 249 if (!head->dev)
230 goto out_rcu_unlock; 250 goto out_rcu_unlock;
231 251
252 /* skb dst is stale, drop it, and perform route lookup again */
253 skb_dst_drop(head);
254 iph = ip_hdr(head);
255 err = ip_route_input_noref(head, iph->daddr, iph->saddr,
256 iph->tos, head->dev);
257 if (err)
258 goto out_rcu_unlock;
259
232 /* 260 /*
233 * Only search router table for the head fragment, 261 * Only an end host needs to send an ICMP
234 * when defraging timeout at PRE_ROUTING HOOK. 262 * "Fragment Reassembly Timeout" message, per RFC792.
235 */ 263 */
236 if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { 264 if (qp->user == IP_DEFRAG_CONNTRACK_IN &&
237 const struct iphdr *iph = ip_hdr(head); 265 skb_rtable(head)->rt_type != RTN_LOCAL)
238 int err = ip_route_input(head, iph->daddr, iph->saddr, 266 goto out_rcu_unlock;
239 iph->tos, head->dev);
240 if (unlikely(err))
241 goto out_rcu_unlock;
242
243 /*
244 * Only an end host needs to send an ICMP
245 * "Fragment Reassembly Timeout" message, per RFC792.
246 */
247 if (skb_rtable(head)->rt_type != RTN_LOCAL)
248 goto out_rcu_unlock;
249 267
250 }
251 268
252 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 269 /* Send an ICMP "Fragment Reassembly Timeout" message. */
253 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 270 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -525,9 +542,15 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
525 int len; 542 int len;
526 int ihlen; 543 int ihlen;
527 int err; 544 int err;
545 u8 ecn;
528 546
529 ipq_kill(qp); 547 ipq_kill(qp);
530 548
549 ecn = ip4_frag_ecn_table[qp->ecn];
550 if (unlikely(ecn == 0xff)) {
551 err = -EINVAL;
552 goto out_fail;
553 }
531 /* Make the one we just received the head. */ 554 /* Make the one we just received the head. */
532 if (prev) { 555 if (prev) {
533 head = prev->next; 556 head = prev->next;
@@ -606,17 +629,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
606 iph = ip_hdr(head); 629 iph = ip_hdr(head);
607 iph->frag_off = 0; 630 iph->frag_off = 0;
608 iph->tot_len = htons(len); 631 iph->tot_len = htons(len);
609 /* RFC3168 5.3 Fragmentation support 632 iph->tos |= ecn;
610 * If one fragment had INET_ECN_NOT_ECT,
611 * reassembled frame also has INET_ECN_NOT_ECT
612 * Elif one fragment had INET_ECN_CE
613 * reassembled frame also has INET_ECN_CE
614 */
615 if (qp->ecn & IPFRAG_ECN_CLEAR)
616 iph->tos &= ~INET_ECN_MASK;
617 else if (qp->ecn & IPFRAG_ECN_SET_CE)
618 iph->tos |= INET_ECN_CE;
619
620 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 633 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
621 qp->q.fragments = NULL; 634 qp->q.fragments = NULL;
622 qp->q.fragments_tail = NULL; 635 qp->q.fragments_tail = NULL;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6613edfac28c..8871067560db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -413,11 +413,6 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
413 413
414 dev_net_set(dev, net); 414 dev_net_set(dev, net);
415 415
416 if (strchr(name, '%')) {
417 if (dev_alloc_name(dev, name) < 0)
418 goto failed_free;
419 }
420
421 nt = netdev_priv(dev); 416 nt = netdev_priv(dev);
422 nt->parms = *parms; 417 nt->parms = *parms;
423 dev->rtnl_link_ops = &ipgre_link_ops; 418 dev->rtnl_link_ops = &ipgre_link_ops;
@@ -462,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
462 by themself??? 457 by themself???
463 */ 458 */
464 459
465 struct iphdr *iph = (struct iphdr *)skb->data; 460 const struct iphdr *iph = (const struct iphdr *)skb->data;
466 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 461 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
467 int grehlen = (iph->ihl<<2) + 4; 462 int grehlen = (iph->ihl<<2) + 4;
468 const int type = icmp_hdr(skb)->type; 463 const int type = icmp_hdr(skb)->type;
@@ -534,7 +529,7 @@ out:
534 rcu_read_unlock(); 529 rcu_read_unlock();
535} 530}
536 531
537static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 532static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
538{ 533{
539 if (INET_ECN_is_ce(iph->tos)) { 534 if (INET_ECN_is_ce(iph->tos)) {
540 if (skb->protocol == htons(ETH_P_IP)) { 535 if (skb->protocol == htons(ETH_P_IP)) {
@@ -546,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
546} 541}
547 542
548static inline u8 543static inline u8
549ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 544ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
550{ 545{
551 u8 inner = 0; 546 u8 inner = 0;
552 if (skb->protocol == htons(ETH_P_IP)) 547 if (skb->protocol == htons(ETH_P_IP))
553 inner = old_iph->tos; 548 inner = old_iph->tos;
554 else if (skb->protocol == htons(ETH_P_IPV6)) 549 else if (skb->protocol == htons(ETH_P_IPV6))
555 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 550 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
556 return INET_ECN_encapsulate(tos, inner); 551 return INET_ECN_encapsulate(tos, inner);
557} 552}
558 553
559static int ipgre_rcv(struct sk_buff *skb) 554static int ipgre_rcv(struct sk_buff *skb)
560{ 555{
561 struct iphdr *iph; 556 const struct iphdr *iph;
562 u8 *h; 557 u8 *h;
563 __be16 flags; 558 __be16 flags;
564 __sum16 csum = 0; 559 __sum16 csum = 0;
@@ -697,8 +692,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
697{ 692{
698 struct ip_tunnel *tunnel = netdev_priv(dev); 693 struct ip_tunnel *tunnel = netdev_priv(dev);
699 struct pcpu_tstats *tstats; 694 struct pcpu_tstats *tstats;
700 struct iphdr *old_iph = ip_hdr(skb); 695 const struct iphdr *old_iph = ip_hdr(skb);
701 struct iphdr *tiph; 696 const struct iphdr *tiph;
697 struct flowi4 fl4;
702 u8 tos; 698 u8 tos;
703 __be16 df; 699 __be16 df;
704 struct rtable *rt; /* Route to the other host */ 700 struct rtable *rt; /* Route to the other host */
@@ -714,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
714 710
715 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 711 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
716 gre_hlen = 0; 712 gre_hlen = 0;
717 tiph = (struct iphdr *)skb->data; 713 tiph = (const struct iphdr *)skb->data;
718 } else { 714 } else {
719 gre_hlen = tunnel->hlen; 715 gre_hlen = tunnel->hlen;
720 tiph = &tunnel->parms.iph; 716 tiph = &tunnel->parms.iph;
@@ -735,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
735 } 731 }
736#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
737 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
738 struct in6_addr *addr6; 734 const struct in6_addr *addr6;
739 int addr_type; 735 int addr_type;
740 struct neighbour *neigh = skb_dst(skb)->neighbour; 736 struct neighbour *neigh = skb_dst(skb)->neighbour;
741 737
742 if (neigh == NULL) 738 if (neigh == NULL)
743 goto tx_error; 739 goto tx_error;
744 740
745 addr6 = (struct in6_addr *)&neigh->primary_key; 741 addr6 = (const struct in6_addr *)&neigh->primary_key;
746 addr_type = ipv6_addr_type(addr6); 742 addr_type = ipv6_addr_type(addr6);
747 743
748 if (addr_type == IPV6_ADDR_ANY) { 744 if (addr_type == IPV6_ADDR_ANY) {
@@ -766,22 +762,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
766 if (skb->protocol == htons(ETH_P_IP)) 762 if (skb->protocol == htons(ETH_P_IP))
767 tos = old_iph->tos; 763 tos = old_iph->tos;
768 else if (skb->protocol == htons(ETH_P_IPV6)) 764 else if (skb->protocol == htons(ETH_P_IPV6))
769 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 765 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
770 } 766 }
771 767
772 { 768 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
773 struct flowi fl = { 769 tunnel->parms.o_key, RT_TOS(tos),
774 .oif = tunnel->parms.link, 770 tunnel->parms.link);
775 .fl4_dst = dst, 771 if (IS_ERR(rt)) {
776 .fl4_src = tiph->saddr, 772 dev->stats.tx_carrier_errors++;
777 .fl4_tos = RT_TOS(tos), 773 goto tx_error;
778 .proto = IPPROTO_GRE,
779 .fl_gre_key = tunnel->parms.o_key
780 };
781 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
782 dev->stats.tx_carrier_errors++;
783 goto tx_error;
784 }
785 } 774 }
786 tdev = rt->dst.dev; 775 tdev = rt->dst.dev;
787 776
@@ -880,15 +869,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
880 iph->frag_off = df; 869 iph->frag_off = df;
881 iph->protocol = IPPROTO_GRE; 870 iph->protocol = IPPROTO_GRE;
882 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 871 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
883 iph->daddr = rt->rt_dst; 872 iph->daddr = fl4.daddr;
884 iph->saddr = rt->rt_src; 873 iph->saddr = fl4.saddr;
885 874
886 if ((iph->ttl = tiph->ttl) == 0) { 875 if ((iph->ttl = tiph->ttl) == 0) {
887 if (skb->protocol == htons(ETH_P_IP)) 876 if (skb->protocol == htons(ETH_P_IP))
888 iph->ttl = old_iph->ttl; 877 iph->ttl = old_iph->ttl;
889#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 878#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
890 else if (skb->protocol == htons(ETH_P_IPV6)) 879 else if (skb->protocol == htons(ETH_P_IPV6))
891 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 880 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
892#endif 881#endif
893 else 882 else
894 iph->ttl = ip4_dst_hoplimit(&rt->dst); 883 iph->ttl = ip4_dst_hoplimit(&rt->dst);
@@ -934,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
934{ 923{
935 struct net_device *tdev = NULL; 924 struct net_device *tdev = NULL;
936 struct ip_tunnel *tunnel; 925 struct ip_tunnel *tunnel;
937 struct iphdr *iph; 926 const struct iphdr *iph;
938 int hlen = LL_MAX_HEADER; 927 int hlen = LL_MAX_HEADER;
939 int mtu = ETH_DATA_LEN; 928 int mtu = ETH_DATA_LEN;
940 int addend = sizeof(struct iphdr) + 4; 929 int addend = sizeof(struct iphdr) + 4;
@@ -945,17 +934,15 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
945 /* Guess output device to choose reasonable mtu and needed_headroom */ 934 /* Guess output device to choose reasonable mtu and needed_headroom */
946 935
947 if (iph->daddr) { 936 if (iph->daddr) {
948 struct flowi fl = { 937 struct flowi4 fl4;
949 .oif = tunnel->parms.link,
950 .fl4_dst = iph->daddr,
951 .fl4_src = iph->saddr,
952 .fl4_tos = RT_TOS(iph->tos),
953 .proto = IPPROTO_GRE,
954 .fl_gre_key = tunnel->parms.o_key
955 };
956 struct rtable *rt; 938 struct rtable *rt;
957 939
958 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 940 rt = ip_route_output_gre(dev_net(dev), &fl4,
941 iph->daddr, iph->saddr,
942 tunnel->parms.o_key,
943 RT_TOS(iph->tos),
944 tunnel->parms.link);
945 if (!IS_ERR(rt)) {
959 tdev = rt->dst.dev; 946 tdev = rt->dst.dev;
960 ip_rt_put(rt); 947 ip_rt_put(rt);
961 } 948 }
@@ -1191,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1191 1178
1192static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1179static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1193{ 1180{
1194 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); 1181 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1195 memcpy(haddr, &iph->saddr, 4); 1182 memcpy(haddr, &iph->saddr, 4);
1196 return 4; 1183 return 4;
1197} 1184}
@@ -1207,17 +1194,16 @@ static int ipgre_open(struct net_device *dev)
1207 struct ip_tunnel *t = netdev_priv(dev); 1194 struct ip_tunnel *t = netdev_priv(dev);
1208 1195
1209 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1196 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1210 struct flowi fl = { 1197 struct flowi4 fl4;
1211 .oif = t->parms.link,
1212 .fl4_dst = t->parms.iph.daddr,
1213 .fl4_src = t->parms.iph.saddr,
1214 .fl4_tos = RT_TOS(t->parms.iph.tos),
1215 .proto = IPPROTO_GRE,
1216 .fl_gre_key = t->parms.o_key
1217 };
1218 struct rtable *rt; 1198 struct rtable *rt;
1219 1199
1220 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1200 rt = ip_route_output_gre(dev_net(dev), &fl4,
1201 t->parms.iph.daddr,
1202 t->parms.iph.saddr,
1203 t->parms.o_key,
1204 RT_TOS(t->parms.iph.tos),
1205 t->parms.link);
1206 if (IS_ERR(rt))
1221 return -EADDRNOTAVAIL; 1207 return -EADDRNOTAVAIL;
1222 dev = rt->dst.dev; 1208 dev = rt->dst.dev;
1223 ip_rt_put(rt); 1209 ip_rt_put(rt);
@@ -1765,4 +1751,4 @@ module_exit(ipgre_fini);
1765MODULE_LICENSE("GPL"); 1751MODULE_LICENSE("GPL");
1766MODULE_ALIAS_RTNL_LINK("gre"); 1752MODULE_ALIAS_RTNL_LINK("gre");
1767MODULE_ALIAS_RTNL_LINK("gretap"); 1753MODULE_ALIAS_RTNL_LINK("gretap");
1768MODULE_ALIAS("gre0"); 1754MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d859bcc26cb7..c8f48efc5fd3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb)
268static inline int ip_rcv_options(struct sk_buff *skb) 268static inline int ip_rcv_options(struct sk_buff *skb)
269{ 269{
270 struct ip_options *opt; 270 struct ip_options *opt;
271 struct iphdr *iph; 271 const struct iphdr *iph;
272 struct net_device *dev = skb->dev; 272 struct net_device *dev = skb->dev;
273 273
274 /* It looks as overkill, because not all 274 /* It looks as overkill, because not all
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
340 } 340 }
341 } 341 }
342 342
343#ifdef CONFIG_NET_CLS_ROUTE 343#ifdef CONFIG_IP_ROUTE_CLASSID
344 if (unlikely(skb_dst(skb)->tclassid)) { 344 if (unlikely(skb_dst(skb)->tclassid)) {
345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
346 u32 idx = skb_dst(skb)->tclassid; 346 u32 idx = skb_dst(skb)->tclassid;
@@ -374,7 +374,7 @@ drop:
374 */ 374 */
375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
376{ 376{
377 struct iphdr *iph; 377 const struct iphdr *iph;
378 u32 len; 378 u32 len;
379 379
380 /* When the interface is in promisc. mode, drop all the crap 380 /* When the interface is in promisc. mode, drop all the crap
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1906fa35860c..c3118e1cd3bb 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -36,8 +36,8 @@
36 * saddr is address of outgoing interface. 36 * saddr is address of outgoing interface.
37 */ 37 */
38 38
39void ip_options_build(struct sk_buff * skb, struct ip_options * opt, 39void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
40 __be32 daddr, struct rtable *rt, int is_frag) 40 __be32 daddr, struct rtable *rt, int is_frag)
41{ 41{
42 unsigned char *iph = skb_network_header(skb); 42 unsigned char *iph = skb_network_header(skb);
43 43
@@ -50,9 +50,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
50 50
51 if (!is_frag) { 51 if (!is_frag) {
52 if (opt->rr_needaddr) 52 if (opt->rr_needaddr)
53 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); 53 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt);
54 if (opt->ts_needaddr) 54 if (opt->ts_needaddr)
55 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); 55 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
56 if (opt->ts_needtime) { 56 if (opt->ts_needtime) {
57 struct timespec tv; 57 struct timespec tv;
58 __be32 midtime; 58 __be32 midtime;
@@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
83 * NOTE: dopt cannot point to skb. 83 * NOTE: dopt cannot point to skb.
84 */ 84 */
85 85
86int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 86int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
87{ 87{
88 struct ip_options *sopt; 88 const struct ip_options *sopt;
89 unsigned char *sptr, *dptr; 89 unsigned char *sptr, *dptr;
90 int soffset, doffset; 90 int soffset, doffset;
91 int optlen; 91 int optlen;
@@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
95 95
96 sopt = &(IPCB(skb)->opt); 96 sopt = &(IPCB(skb)->opt);
97 97
98 if (sopt->optlen == 0) { 98 if (sopt->optlen == 0)
99 dopt->optlen = 0;
100 return 0; 99 return 0;
101 }
102 100
103 sptr = skb_network_header(skb); 101 sptr = skb_network_header(skb);
104 dptr = dopt->__data; 102 dptr = dopt->__data;
@@ -140,11 +138,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
140 } else { 138 } else {
141 dopt->ts_needtime = 0; 139 dopt->ts_needtime = 0;
142 140
143 if (soffset + 8 <= optlen) { 141 if (soffset + 7 <= optlen) {
144 __be32 addr; 142 __be32 addr;
145 143
146 memcpy(&addr, sptr+soffset-1, 4); 144 memcpy(&addr, dptr+soffset-1, 4);
147 if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { 145 if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
148 dopt->ts_needtime = 1; 146 dopt->ts_needtime = 1;
149 soffset += 8; 147 soffset += 8;
150 } 148 }
@@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
157 dopt->optlen += optlen; 155 dopt->optlen += optlen;
158 } 156 }
159 if (sopt->srr) { 157 if (sopt->srr) {
160 unsigned char * start = sptr+sopt->srr; 158 unsigned char *start = sptr+sopt->srr;
161 __be32 faddr; 159 __be32 faddr;
162 160
163 optlen = start[1]; 161 optlen = start[1];
@@ -329,7 +327,7 @@ int ip_options_compile(struct net *net,
329 pp_ptr = optptr + 2; 327 pp_ptr = optptr + 2;
330 goto error; 328 goto error;
331 } 329 }
332 if (skb) { 330 if (rt) {
333 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 331 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
334 opt->is_changed = 1; 332 opt->is_changed = 1;
335 } 333 }
@@ -371,7 +369,7 @@ int ip_options_compile(struct net *net,
371 goto error; 369 goto error;
372 } 370 }
373 opt->ts = optptr - iph; 371 opt->ts = optptr - iph;
374 if (skb) { 372 if (rt) {
375 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 373 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
376 timeptr = (__be32*)&optptr[optptr[2]+3]; 374 timeptr = (__be32*)&optptr[optptr[2]+3];
377 } 375 }
@@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt)
499 } 497 }
500} 498}
501 499
502static struct ip_options *ip_options_get_alloc(const int optlen) 500static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
503{ 501{
504 return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), 502 return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
505 GFP_KERNEL); 503 GFP_KERNEL);
506} 504}
507 505
508static int ip_options_get_finish(struct net *net, struct ip_options **optp, 506static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
509 struct ip_options *opt, int optlen) 507 struct ip_options_rcu *opt, int optlen)
510{ 508{
511 while (optlen & 3) 509 while (optlen & 3)
512 opt->__data[optlen++] = IPOPT_END; 510 opt->opt.__data[optlen++] = IPOPT_END;
513 opt->optlen = optlen; 511 opt->opt.optlen = optlen;
514 if (optlen && ip_options_compile(net, opt, NULL)) { 512 if (optlen && ip_options_compile(net, &opt->opt, NULL)) {
515 kfree(opt); 513 kfree(opt);
516 return -EINVAL; 514 return -EINVAL;
517 } 515 }
@@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp,
520 return 0; 518 return 0;
521} 519}
522 520
523int ip_options_get_from_user(struct net *net, struct ip_options **optp, 521int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
524 unsigned char __user *data, int optlen) 522 unsigned char __user *data, int optlen)
525{ 523{
526 struct ip_options *opt = ip_options_get_alloc(optlen); 524 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
527 525
528 if (!opt) 526 if (!opt)
529 return -ENOMEM; 527 return -ENOMEM;
530 if (optlen && copy_from_user(opt->__data, data, optlen)) { 528 if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
531 kfree(opt); 529 kfree(opt);
532 return -EFAULT; 530 return -EFAULT;
533 } 531 }
534 return ip_options_get_finish(net, optp, opt, optlen); 532 return ip_options_get_finish(net, optp, opt, optlen);
535} 533}
536 534
537int ip_options_get(struct net *net, struct ip_options **optp, 535int ip_options_get(struct net *net, struct ip_options_rcu **optp,
538 unsigned char *data, int optlen) 536 unsigned char *data, int optlen)
539{ 537{
540 struct ip_options *opt = ip_options_get_alloc(optlen); 538 struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
541 539
542 if (!opt) 540 if (!opt)
543 return -ENOMEM; 541 return -ENOMEM;
544 if (optlen) 542 if (optlen)
545 memcpy(opt->__data, data, optlen); 543 memcpy(opt->opt.__data, data, optlen);
546 return ip_options_get_finish(net, optp, opt, optlen); 544 return ip_options_get_finish(net, optp, opt, optlen);
547} 545}
548 546
@@ -555,7 +553,7 @@ void ip_forward_options(struct sk_buff *skb)
555 553
556 if (opt->rr_needaddr) { 554 if (opt->rr_needaddr) {
557 optptr = (unsigned char *)raw + opt->rr; 555 optptr = (unsigned char *)raw + opt->rr;
558 ip_rt_get_source(&optptr[optptr[2]-5], rt); 556 ip_rt_get_source(&optptr[optptr[2]-5], skb, rt);
559 opt->is_changed = 1; 557 opt->is_changed = 1;
560 } 558 }
561 if (opt->srr_is_hit) { 559 if (opt->srr_is_hit) {
@@ -569,19 +567,18 @@ void ip_forward_options(struct sk_buff *skb)
569 ) { 567 ) {
570 if (srrptr + 3 > srrspace) 568 if (srrptr + 3 > srrspace)
571 break; 569 break;
572 if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) 570 if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0)
573 break; 571 break;
574 } 572 }
575 if (srrptr + 3 <= srrspace) { 573 if (srrptr + 3 <= srrspace) {
576 opt->is_changed = 1; 574 opt->is_changed = 1;
577 ip_rt_get_source(&optptr[srrptr-1], rt); 575 ip_rt_get_source(&optptr[srrptr-1], skb, rt);
578 ip_hdr(skb)->daddr = rt->rt_dst;
579 optptr[2] = srrptr+4; 576 optptr[2] = srrptr+4;
580 } else if (net_ratelimit()) 577 } else if (net_ratelimit())
581 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); 578 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
582 if (opt->ts_needaddr) { 579 if (opt->ts_needaddr) {
583 optptr = raw + opt->ts; 580 optptr = raw + opt->ts;
584 ip_rt_get_source(&optptr[optptr[2]-9], rt); 581 ip_rt_get_source(&optptr[optptr[2]-9], skb, rt);
585 opt->is_changed = 1; 582 opt->is_changed = 1;
586 } 583 }
587 } 584 }
@@ -603,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
603 unsigned long orefdst; 600 unsigned long orefdst;
604 int err; 601 int err;
605 602
606 if (!opt->srr) 603 if (!rt)
607 return 0; 604 return 0;
608 605
609 if (skb->pkt_type != PACKET_HOST) 606 if (skb->pkt_type != PACKET_HOST)
@@ -637,7 +634,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
637 if (rt2->rt_type != RTN_LOCAL) 634 if (rt2->rt_type != RTN_LOCAL)
638 break; 635 break;
639 /* Superfast 8) loopback forward */ 636 /* Superfast 8) loopback forward */
640 memcpy(&iph->daddr, &optptr[srrptr-1], 4); 637 iph->daddr = nexthop;
641 opt->is_changed = 1; 638 opt->is_changed = 1;
642 } 639 }
643 if (srrptr <= srrspace) { 640 if (srrptr <= srrspace) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04c7b3ba6b39..98af3697c718 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
140 * 140 *
141 */ 141 */
142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 __be32 saddr, __be32 daddr, struct ip_options *opt) 143 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
144{ 144{
145 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
146 struct rtable *rt = skb_rtable(skb); 146 struct rtable *rt = skb_rtable(skb);
147 struct iphdr *iph; 147 struct iphdr *iph;
148 148
149 /* Build the IP header. */ 149 /* Build the IP header. */
150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
151 skb_reset_network_header(skb); 151 skb_reset_network_header(skb);
152 iph = ip_hdr(skb); 152 iph = ip_hdr(skb);
153 iph->version = 4; 153 iph->version = 4;
@@ -158,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
158 else 158 else
159 iph->frag_off = 0; 159 iph->frag_off = 0;
160 iph->ttl = ip_select_ttl(inet, &rt->dst); 160 iph->ttl = ip_select_ttl(inet, &rt->dst);
161 iph->daddr = rt->rt_dst; 161 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
162 iph->saddr = rt->rt_src; 162 iph->saddr = saddr;
163 iph->protocol = sk->sk_protocol; 163 iph->protocol = sk->sk_protocol;
164 ip_select_ident(iph, &rt->dst, sk); 164 ip_select_ident(iph, &rt->dst, sk);
165 165
166 if (opt && opt->optlen) { 166 if (opt && opt->opt.optlen) {
167 iph->ihl += opt->optlen>>2; 167 iph->ihl += opt->opt.optlen>>2;
168 ip_options_build(skb, opt, daddr, rt, 0); 168 ip_options_build(skb, &opt->opt, daddr, rt, 0);
169 } 169 }
170 170
171 skb->priority = sk->sk_priority; 171 skb->priority = sk->sk_priority;
@@ -312,11 +312,12 @@ int ip_output(struct sk_buff *skb)
312 !(IPCB(skb)->flags & IPSKB_REROUTED)); 312 !(IPCB(skb)->flags & IPSKB_REROUTED));
313} 313}
314 314
315int ip_queue_xmit(struct sk_buff *skb) 315int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
316{ 316{
317 struct sock *sk = skb->sk; 317 struct sock *sk = skb->sk;
318 struct inet_sock *inet = inet_sk(sk); 318 struct inet_sock *inet = inet_sk(sk);
319 struct ip_options *opt = inet->opt; 319 struct ip_options_rcu *inet_opt;
320 struct flowi4 *fl4;
320 struct rtable *rt; 321 struct rtable *rt;
321 struct iphdr *iph; 322 struct iphdr *iph;
322 int res; 323 int res;
@@ -325,6 +326,8 @@ int ip_queue_xmit(struct sk_buff *skb)
325 * f.e. by something like SCTP. 326 * f.e. by something like SCTP.
326 */ 327 */
327 rcu_read_lock(); 328 rcu_read_lock();
329 inet_opt = rcu_dereference(inet->inet_opt);
330 fl4 = &fl->u.ip4;
328 rt = skb_rtable(skb); 331 rt = skb_rtable(skb);
329 if (rt != NULL) 332 if (rt != NULL)
330 goto packet_routed; 333 goto packet_routed;
@@ -336,38 +339,32 @@ int ip_queue_xmit(struct sk_buff *skb)
336 339
337 /* Use correct destination address if we have options. */ 340 /* Use correct destination address if we have options. */
338 daddr = inet->inet_daddr; 341 daddr = inet->inet_daddr;
339 if(opt && opt->srr) 342 if (inet_opt && inet_opt->opt.srr)
340 daddr = opt->faddr; 343 daddr = inet_opt->opt.faddr;
341 344
342 { 345 /* If this fails, retransmit mechanism of transport layer will
343 struct flowi fl = { .oif = sk->sk_bound_dev_if, 346 * keep trying until route appears or the connection times
344 .mark = sk->sk_mark, 347 * itself out.
345 .fl4_dst = daddr, 348 */
346 .fl4_src = inet->inet_saddr, 349 rt = ip_route_output_ports(sock_net(sk), fl4, sk,
347 .fl4_tos = RT_CONN_FLAGS(sk), 350 daddr, inet->inet_saddr,
348 .proto = sk->sk_protocol, 351 inet->inet_dport,
349 .flags = inet_sk_flowi_flags(sk), 352 inet->inet_sport,
350 .fl_ip_sport = inet->inet_sport, 353 sk->sk_protocol,
351 .fl_ip_dport = inet->inet_dport }; 354 RT_CONN_FLAGS(sk),
352 355 sk->sk_bound_dev_if);
353 /* If this fails, retransmit mechanism of transport layer will 356 if (IS_ERR(rt))
354 * keep trying until route appears or the connection times 357 goto no_route;
355 * itself out.
356 */
357 security_sk_classify_flow(sk, &fl);
358 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
359 goto no_route;
360 }
361 sk_setup_caps(sk, &rt->dst); 358 sk_setup_caps(sk, &rt->dst);
362 } 359 }
363 skb_dst_set_noref(skb, &rt->dst); 360 skb_dst_set_noref(skb, &rt->dst);
364 361
365packet_routed: 362packet_routed:
366 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 363 if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
367 goto no_route; 364 goto no_route;
368 365
369 /* OK, we know where to send it, allocate and build IP header. */ 366 /* OK, we know where to send it, allocate and build IP header. */
370 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 367 skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
371 skb_reset_network_header(skb); 368 skb_reset_network_header(skb);
372 iph = ip_hdr(skb); 369 iph = ip_hdr(skb);
373 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 370 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
@@ -377,13 +374,13 @@ packet_routed:
377 iph->frag_off = 0; 374 iph->frag_off = 0;
378 iph->ttl = ip_select_ttl(inet, &rt->dst); 375 iph->ttl = ip_select_ttl(inet, &rt->dst);
379 iph->protocol = sk->sk_protocol; 376 iph->protocol = sk->sk_protocol;
380 iph->saddr = rt->rt_src; 377 iph->saddr = fl4->saddr;
381 iph->daddr = rt->rt_dst; 378 iph->daddr = fl4->daddr;
382 /* Transport layer set skb->h.foo itself. */ 379 /* Transport layer set skb->h.foo itself. */
383 380
384 if (opt && opt->optlen) { 381 if (inet_opt && inet_opt->opt.optlen) {
385 iph->ihl += opt->optlen >> 2; 382 iph->ihl += inet_opt->opt.optlen >> 2;
386 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 383 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
387 } 384 }
388 385
389 ip_select_ident_more(iph, &rt->dst, sk, 386 ip_select_ident_more(iph, &rt->dst, sk,
@@ -609,7 +606,7 @@ slow_path:
609 /* IF: it doesn't fit, use 'mtu' - the data space left */ 606 /* IF: it doesn't fit, use 'mtu' - the data space left */
610 if (len > mtu) 607 if (len > mtu)
611 len = mtu; 608 len = mtu;
612 /* IF: we are not sending upto and including the packet end 609 /* IF: we are not sending up to and including the packet end
613 then align the next start on an eight byte boundary */ 610 then align the next start on an eight byte boundary */
614 if (len < left) { 611 if (len < left) {
615 len &= ~7; 612 len &= ~7;
@@ -733,6 +730,7 @@ csum_page(struct page *page, int offset, int copy)
733} 730}
734 731
735static inline int ip_ufo_append_data(struct sock *sk, 732static inline int ip_ufo_append_data(struct sock *sk,
733 struct sk_buff_head *queue,
736 int getfrag(void *from, char *to, int offset, int len, 734 int getfrag(void *from, char *to, int offset, int len,
737 int odd, struct sk_buff *skb), 735 int odd, struct sk_buff *skb),
738 void *from, int length, int hh_len, int fragheaderlen, 736 void *from, int length, int hh_len, int fragheaderlen,
@@ -745,7 +743,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
745 * device, so create one single skb packet containing complete 743 * device, so create one single skb packet containing complete
746 * udp datagram 744 * udp datagram
747 */ 745 */
748 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 746 if ((skb = skb_peek_tail(queue)) == NULL) {
749 skb = sock_alloc_send_skb(sk, 747 skb = sock_alloc_send_skb(sk,
750 hh_len + fragheaderlen + transhdrlen + 20, 748 hh_len + fragheaderlen + transhdrlen + 20,
751 (flags & MSG_DONTWAIT), &err); 749 (flags & MSG_DONTWAIT), &err);
@@ -767,40 +765,30 @@ static inline int ip_ufo_append_data(struct sock *sk,
767 765
768 skb->ip_summed = CHECKSUM_PARTIAL; 766 skb->ip_summed = CHECKSUM_PARTIAL;
769 skb->csum = 0; 767 skb->csum = 0;
770 sk->sk_sndmsg_off = 0;
771 768
772 /* specify the length of each IP datagram fragment */ 769 /* specify the length of each IP datagram fragment */
773 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 770 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
774 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 771 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
775 __skb_queue_tail(&sk->sk_write_queue, skb); 772 __skb_queue_tail(queue, skb);
776 } 773 }
777 774
778 return skb_append_datato_frags(sk, skb, getfrag, from, 775 return skb_append_datato_frags(sk, skb, getfrag, from,
779 (length - transhdrlen)); 776 (length - transhdrlen));
780} 777}
781 778
782/* 779static int __ip_append_data(struct sock *sk,
783 * ip_append_data() and ip_append_page() can make one large IP datagram 780 struct flowi4 *fl4,
784 * from many pieces of data. Each pieces will be holded on the socket 781 struct sk_buff_head *queue,
785 * until ip_push_pending_frames() is called. Each piece can be a page 782 struct inet_cork *cork,
786 * or non-page data. 783 int getfrag(void *from, char *to, int offset,
787 * 784 int len, int odd, struct sk_buff *skb),
788 * Not only UDP, other transport protocols - e.g. raw sockets - can use 785 void *from, int length, int transhdrlen,
789 * this interface potentially. 786 unsigned int flags)
790 *
791 * LATER: length must be adjusted by pad at tail, when it is required.
792 */
793int ip_append_data(struct sock *sk,
794 int getfrag(void *from, char *to, int offset, int len,
795 int odd, struct sk_buff *skb),
796 void *from, int length, int transhdrlen,
797 struct ipcm_cookie *ipc, struct rtable **rtp,
798 unsigned int flags)
799{ 787{
800 struct inet_sock *inet = inet_sk(sk); 788 struct inet_sock *inet = inet_sk(sk);
801 struct sk_buff *skb; 789 struct sk_buff *skb;
802 790
803 struct ip_options *opt = NULL; 791 struct ip_options *opt = cork->opt;
804 int hh_len; 792 int hh_len;
805 int exthdrlen; 793 int exthdrlen;
806 int mtu; 794 int mtu;
@@ -809,59 +797,20 @@ int ip_append_data(struct sock *sk,
809 int offset = 0; 797 int offset = 0;
810 unsigned int maxfraglen, fragheaderlen; 798 unsigned int maxfraglen, fragheaderlen;
811 int csummode = CHECKSUM_NONE; 799 int csummode = CHECKSUM_NONE;
812 struct rtable *rt; 800 struct rtable *rt = (struct rtable *)cork->dst;
813
814 if (flags&MSG_PROBE)
815 return 0;
816 801
817 if (skb_queue_empty(&sk->sk_write_queue)) { 802 exthdrlen = transhdrlen ? rt->dst.header_len : 0;
818 /* 803 length += exthdrlen;
819 * setup for corking. 804 transhdrlen += exthdrlen;
820 */ 805 mtu = cork->fragsize;
821 opt = ipc->opt;
822 if (opt) {
823 if (inet->cork.opt == NULL) {
824 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
825 if (unlikely(inet->cork.opt == NULL))
826 return -ENOBUFS;
827 }
828 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
829 inet->cork.flags |= IPCORK_OPT;
830 inet->cork.addr = ipc->addr;
831 }
832 rt = *rtp;
833 if (unlikely(!rt))
834 return -EFAULT;
835 /*
836 * We steal reference to this route, caller should not release it
837 */
838 *rtp = NULL;
839 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
840 rt->dst.dev->mtu :
841 dst_mtu(rt->dst.path);
842 inet->cork.dst = &rt->dst;
843 inet->cork.length = 0;
844 sk->sk_sndmsg_page = NULL;
845 sk->sk_sndmsg_off = 0;
846 exthdrlen = rt->dst.header_len;
847 length += exthdrlen;
848 transhdrlen += exthdrlen;
849 } else {
850 rt = (struct rtable *)inet->cork.dst;
851 if (inet->cork.flags & IPCORK_OPT)
852 opt = inet->cork.opt;
853 806
854 transhdrlen = 0;
855 exthdrlen = 0;
856 mtu = inet->cork.fragsize;
857 }
858 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 807 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
859 808
860 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 809 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
861 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 810 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
862 811
863 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 812 if (cork->length + length > 0xFFFF - fragheaderlen) {
864 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 813 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
865 mtu-exthdrlen); 814 mtu-exthdrlen);
866 return -EMSGSIZE; 815 return -EMSGSIZE;
867 } 816 }
@@ -876,15 +825,15 @@ int ip_append_data(struct sock *sk,
876 !exthdrlen) 825 !exthdrlen)
877 csummode = CHECKSUM_PARTIAL; 826 csummode = CHECKSUM_PARTIAL;
878 827
879 skb = skb_peek_tail(&sk->sk_write_queue); 828 skb = skb_peek_tail(queue);
880 829
881 inet->cork.length += length; 830 cork->length += length;
882 if (((length > mtu) || (skb && skb_is_gso(skb))) && 831 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
883 (sk->sk_protocol == IPPROTO_UDP) && 832 (sk->sk_protocol == IPPROTO_UDP) &&
884 (rt->dst.dev->features & NETIF_F_UFO)) { 833 (rt->dst.dev->features & NETIF_F_UFO)) {
885 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 834 err = ip_ufo_append_data(sk, queue, getfrag, from, length,
886 fragheaderlen, transhdrlen, mtu, 835 hh_len, fragheaderlen, transhdrlen,
887 flags); 836 mtu, flags);
888 if (err) 837 if (err)
889 goto error; 838 goto error;
890 return 0; 839 return 0;
@@ -961,7 +910,7 @@ alloc_new_skb:
961 else 910 else
962 /* only the initial fragment is 911 /* only the initial fragment is
963 time stamped */ 912 time stamped */
964 ipc->tx_flags = 0; 913 cork->tx_flags = 0;
965 } 914 }
966 if (skb == NULL) 915 if (skb == NULL)
967 goto error; 916 goto error;
@@ -972,7 +921,7 @@ alloc_new_skb:
972 skb->ip_summed = csummode; 921 skb->ip_summed = csummode;
973 skb->csum = 0; 922 skb->csum = 0;
974 skb_reserve(skb, hh_len); 923 skb_reserve(skb, hh_len);
975 skb_shinfo(skb)->tx_flags = ipc->tx_flags; 924 skb_shinfo(skb)->tx_flags = cork->tx_flags;
976 925
977 /* 926 /*
978 * Find where to start putting bytes. 927 * Find where to start putting bytes.
@@ -1009,7 +958,7 @@ alloc_new_skb:
1009 /* 958 /*
1010 * Put the packet on the pending queue. 959 * Put the packet on the pending queue.
1011 */ 960 */
1012 __skb_queue_tail(&sk->sk_write_queue, skb); 961 __skb_queue_tail(queue, skb);
1013 continue; 962 continue;
1014 } 963 }
1015 964
@@ -1029,8 +978,8 @@ alloc_new_skb:
1029 } else { 978 } else {
1030 int i = skb_shinfo(skb)->nr_frags; 979 int i = skb_shinfo(skb)->nr_frags;
1031 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 980 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1032 struct page *page = sk->sk_sndmsg_page; 981 struct page *page = cork->page;
1033 int off = sk->sk_sndmsg_off; 982 int off = cork->off;
1034 unsigned int left; 983 unsigned int left;
1035 984
1036 if (page && (left = PAGE_SIZE - off) > 0) { 985 if (page && (left = PAGE_SIZE - off) > 0) {
@@ -1042,7 +991,7 @@ alloc_new_skb:
1042 goto error; 991 goto error;
1043 } 992 }
1044 get_page(page); 993 get_page(page);
1045 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 994 skb_fill_page_desc(skb, i, page, off, 0);
1046 frag = &skb_shinfo(skb)->frags[i]; 995 frag = &skb_shinfo(skb)->frags[i];
1047 } 996 }
1048 } else if (i < MAX_SKB_FRAGS) { 997 } else if (i < MAX_SKB_FRAGS) {
@@ -1053,8 +1002,8 @@ alloc_new_skb:
1053 err = -ENOMEM; 1002 err = -ENOMEM;
1054 goto error; 1003 goto error;
1055 } 1004 }
1056 sk->sk_sndmsg_page = page; 1005 cork->page = page;
1057 sk->sk_sndmsg_off = 0; 1006 cork->off = 0;
1058 1007
1059 skb_fill_page_desc(skb, i, page, 0, 0); 1008 skb_fill_page_desc(skb, i, page, 0, 0);
1060 frag = &skb_shinfo(skb)->frags[i]; 1009 frag = &skb_shinfo(skb)->frags[i];
@@ -1066,7 +1015,7 @@ alloc_new_skb:
1066 err = -EFAULT; 1015 err = -EFAULT;
1067 goto error; 1016 goto error;
1068 } 1017 }
1069 sk->sk_sndmsg_off += copy; 1018 cork->off += copy;
1070 frag->size += copy; 1019 frag->size += copy;
1071 skb->len += copy; 1020 skb->len += copy;
1072 skb->data_len += copy; 1021 skb->data_len += copy;
@@ -1080,18 +1029,95 @@ alloc_new_skb:
1080 return 0; 1029 return 0;
1081 1030
1082error: 1031error:
1083 inet->cork.length -= length; 1032 cork->length -= length;
1084 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1033 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1085 return err; 1034 return err;
1086} 1035}
1087 1036
1088ssize_t ip_append_page(struct sock *sk, struct page *page, 1037static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1038 struct ipcm_cookie *ipc, struct rtable **rtp)
1039{
1040 struct inet_sock *inet = inet_sk(sk);
1041 struct ip_options_rcu *opt;
1042 struct rtable *rt;
1043
1044 /*
1045 * setup for corking.
1046 */
1047 opt = ipc->opt;
1048 if (opt) {
1049 if (cork->opt == NULL) {
1050 cork->opt = kmalloc(sizeof(struct ip_options) + 40,
1051 sk->sk_allocation);
1052 if (unlikely(cork->opt == NULL))
1053 return -ENOBUFS;
1054 }
1055 memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
1056 cork->flags |= IPCORK_OPT;
1057 cork->addr = ipc->addr;
1058 }
1059 rt = *rtp;
1060 if (unlikely(!rt))
1061 return -EFAULT;
1062 /*
1063 * We steal reference to this route, caller should not release it
1064 */
1065 *rtp = NULL;
1066 cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ?
1067 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1068 cork->dst = &rt->dst;
1069 cork->length = 0;
1070 cork->tx_flags = ipc->tx_flags;
1071 cork->page = NULL;
1072 cork->off = 0;
1073
1074 return 0;
1075}
1076
1077/*
1078 * ip_append_data() and ip_append_page() can make one large IP datagram
1079 * from many pieces of data. Each pieces will be holded on the socket
1080 * until ip_push_pending_frames() is called. Each piece can be a page
1081 * or non-page data.
1082 *
1083 * Not only UDP, other transport protocols - e.g. raw sockets - can use
1084 * this interface potentially.
1085 *
1086 * LATER: length must be adjusted by pad at tail, when it is required.
1087 */
1088int ip_append_data(struct sock *sk, struct flowi4 *fl4,
1089 int getfrag(void *from, char *to, int offset, int len,
1090 int odd, struct sk_buff *skb),
1091 void *from, int length, int transhdrlen,
1092 struct ipcm_cookie *ipc, struct rtable **rtp,
1093 unsigned int flags)
1094{
1095 struct inet_sock *inet = inet_sk(sk);
1096 int err;
1097
1098 if (flags&MSG_PROBE)
1099 return 0;
1100
1101 if (skb_queue_empty(&sk->sk_write_queue)) {
1102 err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp);
1103 if (err)
1104 return err;
1105 } else {
1106 transhdrlen = 0;
1107 }
1108
1109 return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
1110 from, length, transhdrlen, flags);
1111}
1112
1113ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1089 int offset, size_t size, int flags) 1114 int offset, size_t size, int flags)
1090{ 1115{
1091 struct inet_sock *inet = inet_sk(sk); 1116 struct inet_sock *inet = inet_sk(sk);
1092 struct sk_buff *skb; 1117 struct sk_buff *skb;
1093 struct rtable *rt; 1118 struct rtable *rt;
1094 struct ip_options *opt = NULL; 1119 struct ip_options *opt = NULL;
1120 struct inet_cork *cork;
1095 int hh_len; 1121 int hh_len;
1096 int mtu; 1122 int mtu;
1097 int len; 1123 int len;
@@ -1107,28 +1133,29 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1107 if (skb_queue_empty(&sk->sk_write_queue)) 1133 if (skb_queue_empty(&sk->sk_write_queue))
1108 return -EINVAL; 1134 return -EINVAL;
1109 1135
1110 rt = (struct rtable *)inet->cork.dst; 1136 cork = &inet->cork.base;
1111 if (inet->cork.flags & IPCORK_OPT) 1137 rt = (struct rtable *)cork->dst;
1112 opt = inet->cork.opt; 1138 if (cork->flags & IPCORK_OPT)
1139 opt = cork->opt;
1113 1140
1114 if (!(rt->dst.dev->features&NETIF_F_SG)) 1141 if (!(rt->dst.dev->features&NETIF_F_SG))
1115 return -EOPNOTSUPP; 1142 return -EOPNOTSUPP;
1116 1143
1117 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1144 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1118 mtu = inet->cork.fragsize; 1145 mtu = cork->fragsize;
1119 1146
1120 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1147 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1121 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1148 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1122 1149
1123 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1150 if (cork->length + size > 0xFFFF - fragheaderlen) {
1124 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); 1151 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu);
1125 return -EMSGSIZE; 1152 return -EMSGSIZE;
1126 } 1153 }
1127 1154
1128 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1155 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1129 return -EINVAL; 1156 return -EINVAL;
1130 1157
1131 inet->cork.length += size; 1158 cork->length += size;
1132 if ((size + skb->len > mtu) && 1159 if ((size + skb->len > mtu) &&
1133 (sk->sk_protocol == IPPROTO_UDP) && 1160 (sk->sk_protocol == IPPROTO_UDP) &&
1134 (rt->dst.dev->features & NETIF_F_UFO)) { 1161 (rt->dst.dev->features & NETIF_F_UFO)) {
@@ -1223,45 +1250,47 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1223 return 0; 1250 return 0;
1224 1251
1225error: 1252error:
1226 inet->cork.length -= size; 1253 cork->length -= size;
1227 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1254 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1228 return err; 1255 return err;
1229} 1256}
1230 1257
1231static void ip_cork_release(struct inet_sock *inet) 1258static void ip_cork_release(struct inet_cork *cork)
1232{ 1259{
1233 inet->cork.flags &= ~IPCORK_OPT; 1260 cork->flags &= ~IPCORK_OPT;
1234 kfree(inet->cork.opt); 1261 kfree(cork->opt);
1235 inet->cork.opt = NULL; 1262 cork->opt = NULL;
1236 dst_release(inet->cork.dst); 1263 dst_release(cork->dst);
1237 inet->cork.dst = NULL; 1264 cork->dst = NULL;
1238} 1265}
1239 1266
1240/* 1267/*
1241 * Combined all pending IP fragments on the socket as one IP datagram 1268 * Combined all pending IP fragments on the socket as one IP datagram
1242 * and push them out. 1269 * and push them out.
1243 */ 1270 */
1244int ip_push_pending_frames(struct sock *sk) 1271struct sk_buff *__ip_make_skb(struct sock *sk,
1272 struct flowi4 *fl4,
1273 struct sk_buff_head *queue,
1274 struct inet_cork *cork)
1245{ 1275{
1246 struct sk_buff *skb, *tmp_skb; 1276 struct sk_buff *skb, *tmp_skb;
1247 struct sk_buff **tail_skb; 1277 struct sk_buff **tail_skb;
1248 struct inet_sock *inet = inet_sk(sk); 1278 struct inet_sock *inet = inet_sk(sk);
1249 struct net *net = sock_net(sk); 1279 struct net *net = sock_net(sk);
1250 struct ip_options *opt = NULL; 1280 struct ip_options *opt = NULL;
1251 struct rtable *rt = (struct rtable *)inet->cork.dst; 1281 struct rtable *rt = (struct rtable *)cork->dst;
1252 struct iphdr *iph; 1282 struct iphdr *iph;
1253 __be16 df = 0; 1283 __be16 df = 0;
1254 __u8 ttl; 1284 __u8 ttl;
1255 int err = 0;
1256 1285
1257 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1286 if ((skb = __skb_dequeue(queue)) == NULL)
1258 goto out; 1287 goto out;
1259 tail_skb = &(skb_shinfo(skb)->frag_list); 1288 tail_skb = &(skb_shinfo(skb)->frag_list);
1260 1289
1261 /* move skb->data to ip header from ext header */ 1290 /* move skb->data to ip header from ext header */
1262 if (skb->data < skb_network_header(skb)) 1291 if (skb->data < skb_network_header(skb))
1263 __skb_pull(skb, skb_network_offset(skb)); 1292 __skb_pull(skb, skb_network_offset(skb));
1264 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1293 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1265 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1294 __skb_pull(tmp_skb, skb_network_header_len(skb));
1266 *tail_skb = tmp_skb; 1295 *tail_skb = tmp_skb;
1267 tail_skb = &(tmp_skb->next); 1296 tail_skb = &(tmp_skb->next);
@@ -1287,8 +1316,8 @@ int ip_push_pending_frames(struct sock *sk)
1287 ip_dont_fragment(sk, &rt->dst))) 1316 ip_dont_fragment(sk, &rt->dst)))
1288 df = htons(IP_DF); 1317 df = htons(IP_DF);
1289 1318
1290 if (inet->cork.flags & IPCORK_OPT) 1319 if (cork->flags & IPCORK_OPT)
1291 opt = inet->cork.opt; 1320 opt = cork->opt;
1292 1321
1293 if (rt->rt_type == RTN_MULTICAST) 1322 if (rt->rt_type == RTN_MULTICAST)
1294 ttl = inet->mc_ttl; 1323 ttl = inet->mc_ttl;
@@ -1298,17 +1327,18 @@ int ip_push_pending_frames(struct sock *sk)
1298 iph = (struct iphdr *)skb->data; 1327 iph = (struct iphdr *)skb->data;
1299 iph->version = 4; 1328 iph->version = 4;
1300 iph->ihl = 5; 1329 iph->ihl = 5;
1301 if (opt) {
1302 iph->ihl += opt->optlen>>2;
1303 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1304 }
1305 iph->tos = inet->tos; 1330 iph->tos = inet->tos;
1306 iph->frag_off = df; 1331 iph->frag_off = df;
1307 ip_select_ident(iph, &rt->dst, sk); 1332 ip_select_ident(iph, &rt->dst, sk);
1308 iph->ttl = ttl; 1333 iph->ttl = ttl;
1309 iph->protocol = sk->sk_protocol; 1334 iph->protocol = sk->sk_protocol;
1310 iph->saddr = rt->rt_src; 1335 iph->saddr = fl4->saddr;
1311 iph->daddr = rt->rt_dst; 1336 iph->daddr = fl4->daddr;
1337
1338 if (opt) {
1339 iph->ihl += opt->optlen>>2;
1340 ip_options_build(skb, opt, cork->addr, rt, 0);
1341 }
1312 1342
1313 skb->priority = sk->sk_priority; 1343 skb->priority = sk->sk_priority;
1314 skb->mark = sk->sk_mark; 1344 skb->mark = sk->sk_mark;
@@ -1316,44 +1346,99 @@ int ip_push_pending_frames(struct sock *sk)
1316 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec 1346 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
1317 * on dst refcount 1347 * on dst refcount
1318 */ 1348 */
1319 inet->cork.dst = NULL; 1349 cork->dst = NULL;
1320 skb_dst_set(skb, &rt->dst); 1350 skb_dst_set(skb, &rt->dst);
1321 1351
1322 if (iph->protocol == IPPROTO_ICMP) 1352 if (iph->protocol == IPPROTO_ICMP)
1323 icmp_out_count(net, ((struct icmphdr *) 1353 icmp_out_count(net, ((struct icmphdr *)
1324 skb_transport_header(skb))->type); 1354 skb_transport_header(skb))->type);
1325 1355
1326 /* Netfilter gets whole the not fragmented skb. */ 1356 ip_cork_release(cork);
1357out:
1358 return skb;
1359}
1360
1361int ip_send_skb(struct sk_buff *skb)
1362{
1363 struct net *net = sock_net(skb->sk);
1364 int err;
1365
1327 err = ip_local_out(skb); 1366 err = ip_local_out(skb);
1328 if (err) { 1367 if (err) {
1329 if (err > 0) 1368 if (err > 0)
1330 err = net_xmit_errno(err); 1369 err = net_xmit_errno(err);
1331 if (err) 1370 if (err)
1332 goto error; 1371 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1333 } 1372 }
1334 1373
1335out:
1336 ip_cork_release(inet);
1337 return err; 1374 return err;
1375}
1338 1376
1339error: 1377int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
1340 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); 1378{
1341 goto out; 1379 struct sk_buff *skb;
1380
1381 skb = ip_finish_skb(sk, fl4);
1382 if (!skb)
1383 return 0;
1384
1385 /* Netfilter gets whole the not fragmented skb. */
1386 return ip_send_skb(skb);
1342} 1387}
1343 1388
1344/* 1389/*
1345 * Throw away all pending data on the socket. 1390 * Throw away all pending data on the socket.
1346 */ 1391 */
1347void ip_flush_pending_frames(struct sock *sk) 1392static void __ip_flush_pending_frames(struct sock *sk,
1393 struct sk_buff_head *queue,
1394 struct inet_cork *cork)
1348{ 1395{
1349 struct sk_buff *skb; 1396 struct sk_buff *skb;
1350 1397
1351 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) 1398 while ((skb = __skb_dequeue_tail(queue)) != NULL)
1352 kfree_skb(skb); 1399 kfree_skb(skb);
1353 1400
1354 ip_cork_release(inet_sk(sk)); 1401 ip_cork_release(cork);
1355} 1402}
1356 1403
1404void ip_flush_pending_frames(struct sock *sk)
1405{
1406 __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
1407}
1408
1409struct sk_buff *ip_make_skb(struct sock *sk,
1410 struct flowi4 *fl4,
1411 int getfrag(void *from, char *to, int offset,
1412 int len, int odd, struct sk_buff *skb),
1413 void *from, int length, int transhdrlen,
1414 struct ipcm_cookie *ipc, struct rtable **rtp,
1415 unsigned int flags)
1416{
1417 struct inet_cork cork;
1418 struct sk_buff_head queue;
1419 int err;
1420
1421 if (flags & MSG_PROBE)
1422 return NULL;
1423
1424 __skb_queue_head_init(&queue);
1425
1426 cork.flags = 0;
1427 cork.addr = 0;
1428 cork.opt = NULL;
1429 err = ip_setup_cork(sk, &cork, ipc, rtp);
1430 if (err)
1431 return ERR_PTR(err);
1432
1433 err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
1434 from, length, transhdrlen, flags);
1435 if (err) {
1436 __ip_flush_pending_frames(sk, &queue, &cork);
1437 return ERR_PTR(err);
1438 }
1439
1440 return __ip_make_skb(sk, fl4, &queue, &cork);
1441}
1357 1442
1358/* 1443/*
1359 * Fetch data from kernel space and fill in checksum if needed. 1444 * Fetch data from kernel space and fill in checksum if needed.
@@ -1375,45 +1460,39 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1375 * Should run single threaded per socket because it uses the sock 1460 * Should run single threaded per socket because it uses the sock
1376 * structure to pass arguments. 1461 * structure to pass arguments.
1377 */ 1462 */
1378void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1463void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1379 unsigned int len) 1464 struct ip_reply_arg *arg, unsigned int len)
1380{ 1465{
1381 struct inet_sock *inet = inet_sk(sk); 1466 struct inet_sock *inet = inet_sk(sk);
1382 struct { 1467 struct ip_options_data replyopts;
1383 struct ip_options opt;
1384 char data[40];
1385 } replyopts;
1386 struct ipcm_cookie ipc; 1468 struct ipcm_cookie ipc;
1387 __be32 daddr; 1469 struct flowi4 fl4;
1388 struct rtable *rt = skb_rtable(skb); 1470 struct rtable *rt = skb_rtable(skb);
1389 1471
1390 if (ip_options_echo(&replyopts.opt, skb)) 1472 if (ip_options_echo(&replyopts.opt.opt, skb))
1391 return; 1473 return;
1392 1474
1393 daddr = ipc.addr = rt->rt_src; 1475 ipc.addr = daddr;
1394 ipc.opt = NULL; 1476 ipc.opt = NULL;
1395 ipc.tx_flags = 0; 1477 ipc.tx_flags = 0;
1396 1478
1397 if (replyopts.opt.optlen) { 1479 if (replyopts.opt.opt.optlen) {
1398 ipc.opt = &replyopts.opt; 1480 ipc.opt = &replyopts.opt;
1399 1481
1400 if (ipc.opt->srr) 1482 if (replyopts.opt.opt.srr)
1401 daddr = replyopts.opt.faddr; 1483 daddr = replyopts.opt.opt.faddr;
1402 } 1484 }
1403 1485
1404 { 1486 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1405 struct flowi fl = { .oif = arg->bound_dev_if, 1487 RT_TOS(ip_hdr(skb)->tos),
1406 .fl4_dst = daddr, 1488 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1407 .fl4_src = rt->rt_spec_dst, 1489 ip_reply_arg_flowi_flags(arg),
1408 .fl4_tos = RT_TOS(ip_hdr(skb)->tos), 1490 daddr, rt->rt_spec_dst,
1409 .fl_ip_sport = tcp_hdr(skb)->dest, 1491 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1410 .fl_ip_dport = tcp_hdr(skb)->source, 1492 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1411 .proto = sk->sk_protocol, 1493 rt = ip_route_output_key(sock_net(sk), &fl4);
1412 .flags = ip_reply_arg_flowi_flags(arg) }; 1494 if (IS_ERR(rt))
1413 security_skb_classify_flow(skb, &fl); 1495 return;
1414 if (ip_route_output_key(sock_net(sk), &rt, &fl))
1415 return;
1416 }
1417 1496
1418 /* And let IP do all the hard work. 1497 /* And let IP do all the hard work.
1419 1498
@@ -1426,7 +1505,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1426 sk->sk_priority = skb->priority; 1505 sk->sk_priority = skb->priority;
1427 sk->sk_protocol = ip_hdr(skb)->protocol; 1506 sk->sk_protocol = ip_hdr(skb)->protocol;
1428 sk->sk_bound_dev_if = arg->bound_dev_if; 1507 sk->sk_bound_dev_if = arg->bound_dev_if;
1429 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1508 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1430 &ipc, &rt, MSG_DONTWAIT); 1509 &ipc, &rt, MSG_DONTWAIT);
1431 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1510 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1432 if (arg->csumoffset >= 0) 1511 if (arg->csumoffset >= 0)
@@ -1434,7 +1513,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1434 arg->csumoffset) = csum_fold(csum_add(skb->csum, 1513 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1435 arg->csum)); 1514 arg->csum));
1436 skb->ip_summed = CHECKSUM_NONE; 1515 skb->ip_summed = CHECKSUM_NONE;
1437 ip_push_pending_frames(sk); 1516 ip_push_pending_frames(sk, &fl4);
1438 } 1517 }
1439 1518
1440 bh_unlock_sock(sk); 1519 bh_unlock_sock(sk);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3948c86e59ca..ab0c9efd1efa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -131,7 +131,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
131static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 131static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
132{ 132{
133 struct sockaddr_in sin; 133 struct sockaddr_in sin;
134 struct iphdr *iph = ip_hdr(skb); 134 const struct iphdr *iph = ip_hdr(skb);
135 __be16 *ports = (__be16 *)skb_transport_header(skb); 135 __be16 *ports = (__be16 *)skb_transport_header(skb);
136 136
137 if (skb_transport_offset(skb) + 4 > skb->len) 137 if (skb_transport_offset(skb) + 4 > skb->len)
@@ -451,6 +451,11 @@ out:
451} 451}
452 452
453 453
454static void opt_kfree_rcu(struct rcu_head *head)
455{
456 kfree(container_of(head, struct ip_options_rcu, rcu));
457}
458
454/* 459/*
455 * Socket option code for IP. This is the end of the line after any 460 * Socket option code for IP. This is the end of the line after any
456 * TCP,UDP etc options on an IP socket. 461 * TCP,UDP etc options on an IP socket.
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
497 switch (optname) { 502 switch (optname) {
498 case IP_OPTIONS: 503 case IP_OPTIONS:
499 { 504 {
500 struct ip_options *opt = NULL; 505 struct ip_options_rcu *old, *opt = NULL;
506
501 if (optlen > 40) 507 if (optlen > 40)
502 goto e_inval; 508 goto e_inval;
503 err = ip_options_get_from_user(sock_net(sk), &opt, 509 err = ip_options_get_from_user(sock_net(sk), &opt,
504 optval, optlen); 510 optval, optlen);
505 if (err) 511 if (err)
506 break; 512 break;
513 old = rcu_dereference_protected(inet->inet_opt,
514 sock_owned_by_user(sk));
507 if (inet->is_icsk) { 515 if (inet->is_icsk) {
508 struct inet_connection_sock *icsk = inet_csk(sk); 516 struct inet_connection_sock *icsk = inet_csk(sk);
509#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 517#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
512 (TCPF_LISTEN | TCPF_CLOSE)) && 520 (TCPF_LISTEN | TCPF_CLOSE)) &&
513 inet->inet_daddr != LOOPBACK4_IPV6)) { 521 inet->inet_daddr != LOOPBACK4_IPV6)) {
514#endif 522#endif
515 if (inet->opt) 523 if (old)
516 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 524 icsk->icsk_ext_hdr_len -= old->opt.optlen;
517 if (opt) 525 if (opt)
518 icsk->icsk_ext_hdr_len += opt->optlen; 526 icsk->icsk_ext_hdr_len += opt->opt.optlen;
519 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 527 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
520#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 528#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
521 } 529 }
522#endif 530#endif
523 } 531 }
524 opt = xchg(&inet->opt, opt); 532 rcu_assign_pointer(inet->inet_opt, opt);
525 kfree(opt); 533 if (old)
534 call_rcu(&old->rcu, opt_kfree_rcu);
526 break; 535 break;
527 } 536 }
528 case IP_PKTINFO: 537 case IP_PKTINFO:
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1081 case IP_OPTIONS: 1090 case IP_OPTIONS:
1082 { 1091 {
1083 unsigned char optbuf[sizeof(struct ip_options)+40]; 1092 unsigned char optbuf[sizeof(struct ip_options)+40];
1084 struct ip_options * opt = (struct ip_options *)optbuf; 1093 struct ip_options *opt = (struct ip_options *)optbuf;
1094 struct ip_options_rcu *inet_opt;
1095
1096 inet_opt = rcu_dereference_protected(inet->inet_opt,
1097 sock_owned_by_user(sk));
1085 opt->optlen = 0; 1098 opt->optlen = 0;
1086 if (inet->opt) 1099 if (inet_opt)
1087 memcpy(optbuf, inet->opt, 1100 memcpy(optbuf, &inet_opt->opt,
1088 sizeof(struct ip_options)+ 1101 sizeof(struct ip_options) +
1089 inet->opt->optlen); 1102 inet_opt->opt.optlen);
1090 release_sock(sk); 1103 release_sock(sk);
1091 1104
1092 if (opt->optlen == 0) 1105 if (opt->optlen == 0)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 629067571f02..c857f6f49b03 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -27,7 +27,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
27{ 27{
28 struct net *net = dev_net(skb->dev); 28 struct net *net = dev_net(skb->dev);
29 __be32 spi; 29 __be32 spi;
30 struct iphdr *iph = (struct iphdr *)skb->data; 30 const struct iphdr *iph = (const struct iphdr *)skb->data;
31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); 31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
32 struct xfrm_state *x; 32 struct xfrm_state *x;
33 33
@@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
36 return; 36 return;
37 37
38 spi = htonl(ntohs(ipch->cpi)); 38 spi = htonl(ntohs(ipch->cpi));
39 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, 39 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
40 spi, IPPROTO_COMP, AF_INET); 40 spi, IPPROTO_COMP, AF_INET);
41 if (!x) 41 if (!x)
42 return; 42 return;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2b097752426b..ab7e5542c1cf 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -87,8 +87,8 @@
87#endif 87#endif
88 88
89/* Define the friendly delay before and after opening net devices */ 89/* Define the friendly delay before and after opening net devices */
90#define CONF_PRE_OPEN 500 /* Before opening: 1/2 second */ 90#define CONF_POST_OPEN 10 /* After opening: 10 msecs */
91#define CONF_POST_OPEN 1 /* After opening: 1 second */ 91#define CONF_CARRIER_TIMEOUT 120000 /* Wait for carrier timeout */
92 92
93/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ 93/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
94#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ 94#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */
@@ -188,14 +188,14 @@ struct ic_device {
188static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ 188static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
189static struct net_device *ic_dev __initdata = NULL; /* Selected device */ 189static struct net_device *ic_dev __initdata = NULL; /* Selected device */
190 190
191static bool __init ic_device_match(struct net_device *dev) 191static bool __init ic_is_init_dev(struct net_device *dev)
192{ 192{
193 if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : 193 if (dev->flags & IFF_LOOPBACK)
194 return false;
195 return user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
194 (!(dev->flags & IFF_LOOPBACK) && 196 (!(dev->flags & IFF_LOOPBACK) &&
195 (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && 197 (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
196 strncmp(dev->name, "dummy", 5))) 198 strncmp(dev->name, "dummy", 5));
197 return true;
198 return false;
199} 199}
200 200
201static int __init ic_open_devs(void) 201static int __init ic_open_devs(void)
@@ -203,6 +203,7 @@ static int __init ic_open_devs(void)
203 struct ic_device *d, **last; 203 struct ic_device *d, **last;
204 struct net_device *dev; 204 struct net_device *dev;
205 unsigned short oflags; 205 unsigned short oflags;
206 unsigned long start;
206 207
207 last = &ic_first_dev; 208 last = &ic_first_dev;
208 rtnl_lock(); 209 rtnl_lock();
@@ -216,9 +217,7 @@ static int __init ic_open_devs(void)
216 } 217 }
217 218
218 for_each_netdev(&init_net, dev) { 219 for_each_netdev(&init_net, dev) {
219 if (dev->flags & IFF_LOOPBACK) 220 if (ic_is_init_dev(dev)) {
220 continue;
221 if (ic_device_match(dev)) {
222 int able = 0; 221 int able = 0;
223 if (dev->mtu >= 364) 222 if (dev->mtu >= 364)
224 able |= IC_BOOTP; 223 able |= IC_BOOTP;
@@ -252,6 +251,17 @@ static int __init ic_open_devs(void)
252 dev->name, able, d->xid)); 251 dev->name, able, d->xid));
253 } 252 }
254 } 253 }
254
255 /* wait for a carrier on at least one device */
256 start = jiffies;
257 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
258 for_each_netdev(&init_net, dev)
259 if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
260 goto have_carrier;
261
262 msleep(1);
263 }
264have_carrier:
255 rtnl_unlock(); 265 rtnl_unlock();
256 266
257 *last = NULL; 267 *last = NULL;
@@ -1324,14 +1334,13 @@ static int __init wait_for_devices(void)
1324{ 1334{
1325 int i; 1335 int i;
1326 1336
1327 msleep(CONF_PRE_OPEN);
1328 for (i = 0; i < DEVICE_WAIT_MAX; i++) { 1337 for (i = 0; i < DEVICE_WAIT_MAX; i++) {
1329 struct net_device *dev; 1338 struct net_device *dev;
1330 int found = 0; 1339 int found = 0;
1331 1340
1332 rtnl_lock(); 1341 rtnl_lock();
1333 for_each_netdev(&init_net, dev) { 1342 for_each_netdev(&init_net, dev) {
1334 if (ic_device_match(dev)) { 1343 if (ic_is_init_dev(dev)) {
1335 found = 1; 1344 found = 1;
1336 break; 1345 break;
1337 } 1346 }
@@ -1378,7 +1387,7 @@ static int __init ip_auto_config(void)
1378 return err; 1387 return err;
1379 1388
1380 /* Give drivers a chance to settle */ 1389 /* Give drivers a chance to settle */
1381 ssleep(CONF_POST_OPEN); 1390 msleep(CONF_POST_OPEN);
1382 1391
1383 /* 1392 /*
1384 * If the config information is insufficient (e.g., our IP address or 1393 * If the config information is insufficient (e.g., our IP address or
@@ -1444,7 +1453,7 @@ static int __init ip_auto_config(void)
1444 root_server_addr = addr; 1453 root_server_addr = addr;
1445 1454
1446 /* 1455 /*
1447 * Use defaults whereever applicable. 1456 * Use defaults wherever applicable.
1448 */ 1457 */
1449 if (ic_defaults() < 0) 1458 if (ic_defaults() < 0)
1450 return -1; 1459 return -1;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54a..378b20b7ca6e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -276,11 +276,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
276 276
277 dev_net_set(dev, net); 277 dev_net_set(dev, net);
278 278
279 if (strchr(name, '%')) {
280 if (dev_alloc_name(dev, name) < 0)
281 goto failed_free;
282 }
283
284 nt = netdev_priv(dev); 279 nt = netdev_priv(dev);
285 nt->parms = *parms; 280 nt->parms = *parms;
286 281
@@ -319,7 +314,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
319 8 bytes of packet payload. It means, that precise relaying of 314 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible. 315 ICMP in the real Internet is absolutely infeasible.
321 */ 316 */
322 struct iphdr *iph = (struct iphdr *)skb->data; 317 const struct iphdr *iph = (const struct iphdr *)skb->data;
323 const int type = icmp_hdr(skb)->type; 318 const int type = icmp_hdr(skb)->type;
324 const int code = icmp_hdr(skb)->code; 319 const int code = icmp_hdr(skb)->code;
325 struct ip_tunnel *t; 320 struct ip_tunnel *t;
@@ -433,15 +428,16 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
433{ 428{
434 struct ip_tunnel *tunnel = netdev_priv(dev); 429 struct ip_tunnel *tunnel = netdev_priv(dev);
435 struct pcpu_tstats *tstats; 430 struct pcpu_tstats *tstats;
436 struct iphdr *tiph = &tunnel->parms.iph; 431 const struct iphdr *tiph = &tunnel->parms.iph;
437 u8 tos = tunnel->parms.iph.tos; 432 u8 tos = tunnel->parms.iph.tos;
438 __be16 df = tiph->frag_off; 433 __be16 df = tiph->frag_off;
439 struct rtable *rt; /* Route to the other host */ 434 struct rtable *rt; /* Route to the other host */
440 struct net_device *tdev; /* Device to other host */ 435 struct net_device *tdev; /* Device to other host */
441 struct iphdr *old_iph = ip_hdr(skb); 436 const struct iphdr *old_iph = ip_hdr(skb);
442 struct iphdr *iph; /* Our new IP header */ 437 struct iphdr *iph; /* Our new IP header */
443 unsigned int max_headroom; /* The extra header space needed */ 438 unsigned int max_headroom; /* The extra header space needed */
444 __be32 dst = tiph->daddr; 439 __be32 dst = tiph->daddr;
440 struct flowi4 fl4;
445 int mtu; 441 int mtu;
446 442
447 if (skb->protocol != htons(ETH_P_IP)) 443 if (skb->protocol != htons(ETH_P_IP))
@@ -460,19 +456,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
460 goto tx_error_icmp; 456 goto tx_error_icmp;
461 } 457 }
462 458
463 { 459 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
464 struct flowi fl = { 460 dst, tiph->saddr,
465 .oif = tunnel->parms.link, 461 0, 0,
466 .fl4_dst = dst, 462 IPPROTO_IPIP, RT_TOS(tos),
467 .fl4_src= tiph->saddr, 463 tunnel->parms.link);
468 .fl4_tos = RT_TOS(tos), 464 if (IS_ERR(rt)) {
469 .proto = IPPROTO_IPIP 465 dev->stats.tx_carrier_errors++;
470 }; 466 goto tx_error_icmp;
471
472 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
473 dev->stats.tx_carrier_errors++;
474 goto tx_error_icmp;
475 }
476 } 467 }
477 tdev = rt->dst.dev; 468 tdev = rt->dst.dev;
478 469
@@ -554,8 +545,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
554 iph->frag_off = df; 545 iph->frag_off = df;
555 iph->protocol = IPPROTO_IPIP; 546 iph->protocol = IPPROTO_IPIP;
556 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); 547 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
557 iph->daddr = rt->rt_dst; 548 iph->daddr = fl4.daddr;
558 iph->saddr = rt->rt_src; 549 iph->saddr = fl4.saddr;
559 550
560 if ((iph->ttl = tiph->ttl) == 0) 551 if ((iph->ttl = tiph->ttl) == 0)
561 iph->ttl = old_iph->ttl; 552 iph->ttl = old_iph->ttl;
@@ -577,22 +568,22 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
577{ 568{
578 struct net_device *tdev = NULL; 569 struct net_device *tdev = NULL;
579 struct ip_tunnel *tunnel; 570 struct ip_tunnel *tunnel;
580 struct iphdr *iph; 571 const struct iphdr *iph;
581 572
582 tunnel = netdev_priv(dev); 573 tunnel = netdev_priv(dev);
583 iph = &tunnel->parms.iph; 574 iph = &tunnel->parms.iph;
584 575
585 if (iph->daddr) { 576 if (iph->daddr) {
586 struct flowi fl = {
587 .oif = tunnel->parms.link,
588 .fl4_dst = iph->daddr,
589 .fl4_src = iph->saddr,
590 .fl4_tos = RT_TOS(iph->tos),
591 .proto = IPPROTO_IPIP
592 };
593 struct rtable *rt; 577 struct rtable *rt;
594 578 struct flowi4 fl4;
595 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 579
580 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
581 iph->daddr, iph->saddr,
582 0, 0,
583 IPPROTO_IPIP,
584 RT_TOS(iph->tos),
585 tunnel->parms.link);
586 if (!IS_ERR(rt)) {
596 tdev = rt->dst.dev; 587 tdev = rt->dst.dev;
597 ip_rt_put(rt); 588 ip_rt_put(rt);
598 } 589 }
@@ -913,4 +904,4 @@ static void __exit ipip_fini(void)
913module_init(ipip_init); 904module_init(ipip_init);
914module_exit(ipip_fini); 905module_exit(ipip_fini);
915MODULE_LICENSE("GPL"); 906MODULE_LICENSE("GPL");
916MODULE_ALIAS("tunl0"); 907MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8b65a12654e7..30a7763c400e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -148,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
148 return NULL; 148 return NULL;
149} 149}
150 150
151static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 151static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
152 struct mr_table **mrt) 152 struct mr_table **mrt)
153{ 153{
154 struct ipmr_result res; 154 struct ipmr_result res;
155 struct fib_lookup_arg arg = { .result = &res, }; 155 struct fib_lookup_arg arg = { .result = &res, };
156 int err; 156 int err;
157 157
158 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); 158 err = fib_rules_lookup(net->ipv4.mr_rules_ops,
159 flowi4_to_flowi(flp4), 0, &arg);
159 if (err < 0) 160 if (err < 0)
160 return err; 161 return err;
161 *mrt = res.mrt; 162 *mrt = res.mrt;
@@ -283,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
283 return net->ipv4.mrt; 284 return net->ipv4.mrt;
284} 285}
285 286
286static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 287static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
287 struct mr_table **mrt) 288 struct mr_table **mrt)
288{ 289{
289 *mrt = net->ipv4.mrt; 290 *mrt = net->ipv4.mrt;
@@ -435,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
435{ 436{
436 struct net *net = dev_net(dev); 437 struct net *net = dev_net(dev);
437 struct mr_table *mrt; 438 struct mr_table *mrt;
438 struct flowi fl = { 439 struct flowi4 fl4 = {
439 .oif = dev->ifindex, 440 .flowi4_oif = dev->ifindex,
440 .iif = skb->skb_iif, 441 .flowi4_iif = skb->skb_iif,
441 .mark = skb->mark, 442 .flowi4_mark = skb->mark,
442 }; 443 };
443 int err; 444 int err;
444 445
445 err = ipmr_fib_lookup(net, &fl, &mrt); 446 err = ipmr_fib_lookup(net, &fl4, &mrt);
446 if (err < 0) { 447 if (err < 0) {
447 kfree_skb(skb); 448 kfree_skb(skb);
448 return err; 449 return err;
@@ -1548,7 +1549,7 @@ static struct notifier_block ip_mr_notifier = {
1548static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1549static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1549{ 1550{
1550 struct iphdr *iph; 1551 struct iphdr *iph;
1551 struct iphdr *old_iph = ip_hdr(skb); 1552 const struct iphdr *old_iph = ip_hdr(skb);
1552 1553
1553 skb_push(skb, sizeof(struct iphdr)); 1554 skb_push(skb, sizeof(struct iphdr));
1554 skb->transport_header = skb->network_header; 1555 skb->transport_header = skb->network_header;
@@ -1594,6 +1595,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1594 struct vif_device *vif = &mrt->vif_table[vifi]; 1595 struct vif_device *vif = &mrt->vif_table[vifi];
1595 struct net_device *dev; 1596 struct net_device *dev;
1596 struct rtable *rt; 1597 struct rtable *rt;
1598 struct flowi4 fl4;
1597 int encap = 0; 1599 int encap = 0;
1598 1600
1599 if (vif->dev == NULL) 1601 if (vif->dev == NULL)
@@ -1611,26 +1613,20 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1611#endif 1613#endif
1612 1614
1613 if (vif->flags & VIFF_TUNNEL) { 1615 if (vif->flags & VIFF_TUNNEL) {
1614 struct flowi fl = { 1616 rt = ip_route_output_ports(net, &fl4, NULL,
1615 .oif = vif->link, 1617 vif->remote, vif->local,
1616 .fl4_dst = vif->remote, 1618 0, 0,
1617 .fl4_src = vif->local, 1619 IPPROTO_IPIP,
1618 .fl4_tos = RT_TOS(iph->tos), 1620 RT_TOS(iph->tos), vif->link);
1619 .proto = IPPROTO_IPIP 1621 if (IS_ERR(rt))
1620 };
1621
1622 if (ip_route_output_key(net, &rt, &fl))
1623 goto out_free; 1622 goto out_free;
1624 encap = sizeof(struct iphdr); 1623 encap = sizeof(struct iphdr);
1625 } else { 1624 } else {
1626 struct flowi fl = { 1625 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1627 .oif = vif->link, 1626 0, 0,
1628 .fl4_dst = iph->daddr, 1627 IPPROTO_IPIP,
1629 .fl4_tos = RT_TOS(iph->tos), 1628 RT_TOS(iph->tos), vif->link);
1630 .proto = IPPROTO_IPIP 1629 if (IS_ERR(rt))
1631 };
1632
1633 if (ip_route_output_key(net, &rt, &fl))
1634 goto out_free; 1630 goto out_free;
1635 } 1631 }
1636 1632
@@ -1793,6 +1789,26 @@ dont_forward:
1793 return 0; 1789 return 0;
1794} 1790}
1795 1791
1792static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
1793{
1794 struct rtable *rt = skb_rtable(skb);
1795 struct iphdr *iph = ip_hdr(skb);
1796 struct flowi4 fl4 = {
1797 .daddr = iph->daddr,
1798 .saddr = iph->saddr,
1799 .flowi4_tos = iph->tos,
1800 .flowi4_oif = rt->rt_oif,
1801 .flowi4_iif = rt->rt_iif,
1802 .flowi4_mark = rt->rt_mark,
1803 };
1804 struct mr_table *mrt;
1805 int err;
1806
1807 err = ipmr_fib_lookup(net, &fl4, &mrt);
1808 if (err)
1809 return ERR_PTR(err);
1810 return mrt;
1811}
1796 1812
1797/* 1813/*
1798 * Multicast packets for forwarding arrive here 1814 * Multicast packets for forwarding arrive here
@@ -1805,7 +1821,6 @@ int ip_mr_input(struct sk_buff *skb)
1805 struct net *net = dev_net(skb->dev); 1821 struct net *net = dev_net(skb->dev);
1806 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1822 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1807 struct mr_table *mrt; 1823 struct mr_table *mrt;
1808 int err;
1809 1824
1810 /* Packet is looped back after forward, it should not be 1825 /* Packet is looped back after forward, it should not be
1811 * forwarded second time, but still can be delivered locally. 1826 * forwarded second time, but still can be delivered locally.
@@ -1813,12 +1828,11 @@ int ip_mr_input(struct sk_buff *skb)
1813 if (IPCB(skb)->flags & IPSKB_FORWARDED) 1828 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1814 goto dont_forward; 1829 goto dont_forward;
1815 1830
1816 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1831 mrt = ipmr_rt_fib_lookup(net, skb);
1817 if (err < 0) { 1832 if (IS_ERR(mrt)) {
1818 kfree_skb(skb); 1833 kfree_skb(skb);
1819 return err; 1834 return PTR_ERR(mrt);
1820 } 1835 }
1821
1822 if (!local) { 1836 if (!local) {
1823 if (IPCB(skb)->opt.router_alert) { 1837 if (IPCB(skb)->opt.router_alert) {
1824 if (ip_call_ra_chain(skb)) 1838 if (ip_call_ra_chain(skb))
@@ -1946,9 +1960,9 @@ int pim_rcv_v1(struct sk_buff *skb)
1946 1960
1947 pim = igmp_hdr(skb); 1961 pim = igmp_hdr(skb);
1948 1962
1949 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1963 mrt = ipmr_rt_fib_lookup(net, skb);
1964 if (IS_ERR(mrt))
1950 goto drop; 1965 goto drop;
1951
1952 if (!mrt->mroute_do_pim || 1966 if (!mrt->mroute_do_pim ||
1953 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1967 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1954 goto drop; 1968 goto drop;
@@ -1978,9 +1992,9 @@ static int pim_rcv(struct sk_buff *skb)
1978 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1992 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1979 goto drop; 1993 goto drop;
1980 1994
1981 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1995 mrt = ipmr_rt_fib_lookup(net, skb);
1996 if (IS_ERR(mrt))
1982 goto drop; 1997 goto drop;
1983
1984 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1998 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1985drop: 1999drop:
1986 kfree_skb(skb); 2000 kfree_skb(skb);
@@ -2027,20 +2041,20 @@ rtattr_failure:
2027 return -EMSGSIZE; 2041 return -EMSGSIZE;
2028} 2042}
2029 2043
2030int ipmr_get_route(struct net *net, 2044int ipmr_get_route(struct net *net, struct sk_buff *skb,
2031 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 2045 __be32 saddr, __be32 daddr,
2046 struct rtmsg *rtm, int nowait)
2032{ 2047{
2033 int err;
2034 struct mr_table *mrt;
2035 struct mfc_cache *cache; 2048 struct mfc_cache *cache;
2036 struct rtable *rt = skb_rtable(skb); 2049 struct mr_table *mrt;
2050 int err;
2037 2051
2038 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2052 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2039 if (mrt == NULL) 2053 if (mrt == NULL)
2040 return -ENOENT; 2054 return -ENOENT;
2041 2055
2042 rcu_read_lock(); 2056 rcu_read_lock();
2043 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 2057 cache = ipmr_cache_find(mrt, saddr, daddr);
2044 2058
2045 if (cache == NULL) { 2059 if (cache == NULL) {
2046 struct sk_buff *skb2; 2060 struct sk_buff *skb2;
@@ -2073,8 +2087,8 @@ int ipmr_get_route(struct net *net,
2073 skb_reset_network_header(skb2); 2087 skb_reset_network_header(skb2);
2074 iph = ip_hdr(skb2); 2088 iph = ip_hdr(skb2);
2075 iph->ihl = sizeof(struct iphdr) >> 2; 2089 iph->ihl = sizeof(struct iphdr) >> 2;
2076 iph->saddr = rt->rt_src; 2090 iph->saddr = saddr;
2077 iph->daddr = rt->rt_dst; 2091 iph->daddr = daddr;
2078 iph->version = 0; 2092 iph->version = 0;
2079 err = ipmr_cache_unresolved(mrt, vif, skb2); 2093 err = ipmr_cache_unresolved(mrt, vif, skb2);
2080 read_unlock(&mrt_lock); 2094 read_unlock(&mrt_lock);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 994a1f29ebbc..4614babdc45f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
16 struct net *net = dev_net(skb_dst(skb)->dev); 16 struct net *net = dev_net(skb_dst(skb)->dev);
17 const struct iphdr *iph = ip_hdr(skb); 17 const struct iphdr *iph = ip_hdr(skb);
18 struct rtable *rt; 18 struct rtable *rt;
19 struct flowi fl = {}; 19 struct flowi4 fl4 = {};
20 unsigned long orefdst; 20 unsigned long orefdst;
21 unsigned int hh_len; 21 unsigned int hh_len;
22 unsigned int type; 22 unsigned int type;
@@ -31,14 +31,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. 31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
32 */ 32 */
33 if (addr_type == RTN_LOCAL) { 33 if (addr_type == RTN_LOCAL) {
34 fl.fl4_dst = iph->daddr; 34 fl4.daddr = iph->daddr;
35 if (type == RTN_LOCAL) 35 if (type == RTN_LOCAL)
36 fl.fl4_src = iph->saddr; 36 fl4.saddr = iph->saddr;
37 fl.fl4_tos = RT_TOS(iph->tos); 37 fl4.flowi4_tos = RT_TOS(iph->tos);
38 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 38 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
39 fl.mark = skb->mark; 39 fl4.flowi4_mark = skb->mark;
40 fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; 40 fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
41 if (ip_route_output_key(net, &rt, &fl) != 0) 41 rt = ip_route_output_key(net, &fl4);
42 if (IS_ERR(rt))
42 return -1; 43 return -1;
43 44
44 /* Drop old route. */ 45 /* Drop old route. */
@@ -47,8 +48,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
47 } else { 48 } else {
48 /* non-local src, find valid iif to satisfy 49 /* non-local src, find valid iif to satisfy
49 * rp-filter when calling ip_route_input. */ 50 * rp-filter when calling ip_route_input. */
50 fl.fl4_dst = iph->saddr; 51 fl4.daddr = iph->saddr;
51 if (ip_route_output_key(net, &rt, &fl) != 0) 52 rt = ip_route_output_key(net, &fl4);
53 if (IS_ERR(rt))
52 return -1; 54 return -1;
53 55
54 orefdst = skb->_skb_refdst; 56 orefdst = skb->_skb_refdst;
@@ -66,10 +68,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
66 68
67#ifdef CONFIG_XFRM 69#ifdef CONFIG_XFRM
68 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 70 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
69 xfrm_decode_session(skb, &fl, AF_INET) == 0) { 71 xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
70 struct dst_entry *dst = skb_dst(skb); 72 struct dst_entry *dst = skb_dst(skb);
71 skb_dst_set(skb, NULL); 73 skb_dst_set(skb, NULL);
72 if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) 74 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
75 if (IS_ERR(dst))
73 return -1; 76 return -1;
74 skb_dst_set(skb, dst); 77 skb_dst_set(skb, dst);
75 } 78 }
@@ -102,7 +105,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
102 dst = ((struct xfrm_dst *)dst)->route; 105 dst = ((struct xfrm_dst *)dst)->route;
103 dst_hold(dst); 106 dst_hold(dst);
104 107
105 if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) 108 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
109 if (IS_ERR(dst))
106 return -1; 110 return -1;
107 111
108 skb_dst_drop(skb); 112 skb_dst_drop(skb);
@@ -217,9 +221,14 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
217 return csum; 221 return csum;
218} 222}
219 223
220static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) 224static int nf_ip_route(struct net *net, struct dst_entry **dst,
225 struct flowi *fl, bool strict __always_unused)
221{ 226{
222 return ip_route_output_key(&init_net, (struct rtable **)dst, fl); 227 struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
228 if (IS_ERR(rt))
229 return PTR_ERR(rt);
230 *dst = &rt->dst;
231 return 0;
223} 232}
224 233
225static const struct nf_afinfo nf_ip_afinfo = { 234static const struct nf_afinfo nf_ip_afinfo = {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index babd1a2bae5f..1dfc18a03fd4 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -64,16 +64,6 @@ config IP_NF_IPTABLES
64if IP_NF_IPTABLES 64if IP_NF_IPTABLES
65 65
66# The matches. 66# The matches.
67config IP_NF_MATCH_ADDRTYPE
68 tristate '"addrtype" address type match support'
69 depends on NETFILTER_ADVANCED
70 help
71 This option allows you to match what routing thinks of an address,
72 eg. UNICAST, LOCAL, BROADCAST, ...
73
74 If you want to compile it as a module, say M here and read
75 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
76
77config IP_NF_MATCH_AH 67config IP_NF_MATCH_AH
78 tristate '"ah" match support' 68 tristate '"ah" match support'
79 depends on NETFILTER_ADVANCED 69 depends on NETFILTER_ADVANCED
@@ -206,8 +196,9 @@ config IP_NF_TARGET_REDIRECT
206 196
207config NF_NAT_SNMP_BASIC 197config NF_NAT_SNMP_BASIC
208 tristate "Basic SNMP-ALG support" 198 tristate "Basic SNMP-ALG support"
209 depends on NF_NAT 199 depends on NF_CONNTRACK_SNMP && NF_NAT
210 depends on NETFILTER_ADVANCED 200 depends on NETFILTER_ADVANCED
201 default NF_NAT && NF_CONNTRACK_SNMP
211 ---help--- 202 ---help---
212 203
213 This module implements an Application Layer Gateway (ALG) for 204 This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 19eb59d01037..dca2082ec683 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
48obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o 48obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
49 49
50# matches 50# matches
51obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
52obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 51obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
53obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 52obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
54 53
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e855fffaed95..fd7a3f68917f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -76,7 +76,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
76} 76}
77 77
78/* 78/*
79 * Unfortunatly, _b and _mask are not aligned to an int (or long int) 79 * Unfortunately, _b and _mask are not aligned to an int (or long int)
80 * Some arches dont care, unrolling the loop is a win on them. 80 * Some arches dont care, unrolling the loop is a win on them.
81 * For other arches, we only have a 16bit alignement. 81 * For other arches, we only have a 16bit alignement.
82 */ 82 */
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
260 void *table_base; 260 void *table_base;
261 const struct xt_table_info *private; 261 const struct xt_table_info *private;
262 struct xt_action_param acpar; 262 struct xt_action_param acpar;
263 unsigned int addend;
263 264
264 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 265 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
265 return NF_DROP; 266 return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
267 indev = in ? in->name : nulldevname; 268 indev = in ? in->name : nulldevname;
268 outdev = out ? out->name : nulldevname; 269 outdev = out ? out->name : nulldevname;
269 270
270 xt_info_rdlock_bh(); 271 local_bh_disable();
272 addend = xt_write_recseq_begin();
271 private = table->private; 273 private = table->private;
272 table_base = private->entries[smp_processor_id()]; 274 table_base = private->entries[smp_processor_id()];
273 275
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
338 /* Verdict */ 340 /* Verdict */
339 break; 341 break;
340 } while (!acpar.hotdrop); 342 } while (!acpar.hotdrop);
341 xt_info_rdunlock_bh(); 343 xt_write_recseq_end(addend);
344 local_bh_enable();
342 345
343 if (acpar.hotdrop) 346 if (acpar.hotdrop)
344 return NF_DROP; 347 return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
712 unsigned int i; 715 unsigned int i;
713 716
714 for_each_possible_cpu(cpu) { 717 for_each_possible_cpu(cpu) {
715 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 718 seqcount_t *s = &per_cpu(xt_recseq, cpu);
716 719
717 i = 0; 720 i = 0;
718 xt_entry_foreach(iter, t->entries[cpu], t->size) { 721 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
720 unsigned int start; 723 unsigned int start;
721 724
722 do { 725 do {
723 start = read_seqbegin(lock); 726 start = read_seqcount_begin(s);
724 bcnt = iter->counters.bcnt; 727 bcnt = iter->counters.bcnt;
725 pcnt = iter->counters.pcnt; 728 pcnt = iter->counters.pcnt;
726 } while (read_seqretry(lock, start)); 729 } while (read_seqcount_retry(s, start));
727 730
728 ADD_COUNTER(counters[i], bcnt, pcnt); 731 ADD_COUNTER(counters[i], bcnt, pcnt);
729 ++i; 732 ++i;
@@ -866,6 +869,7 @@ static int compat_table_info(const struct xt_table_info *info,
866 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 869 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
867 newinfo->initial_entries = 0; 870 newinfo->initial_entries = 0;
868 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 871 loc_cpu_entry = info->entries[raw_smp_processor_id()];
872 xt_compat_init_offsets(NFPROTO_ARP, info->number);
869 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 873 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
870 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 874 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
871 if (ret != 0) 875 if (ret != 0)
@@ -1065,6 +1069,7 @@ static int do_replace(struct net *net, const void __user *user,
1065 /* overflow check */ 1069 /* overflow check */
1066 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1070 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1067 return -ENOMEM; 1071 return -ENOMEM;
1072 tmp.name[sizeof(tmp.name)-1] = 0;
1068 1073
1069 newinfo = xt_alloc_table_info(tmp.size); 1074 newinfo = xt_alloc_table_info(tmp.size);
1070 if (!newinfo) 1075 if (!newinfo)
@@ -1113,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1113 int ret = 0; 1118 int ret = 0;
1114 void *loc_cpu_entry; 1119 void *loc_cpu_entry;
1115 struct arpt_entry *iter; 1120 struct arpt_entry *iter;
1121 unsigned int addend;
1116#ifdef CONFIG_COMPAT 1122#ifdef CONFIG_COMPAT
1117 struct compat_xt_counters_info compat_tmp; 1123 struct compat_xt_counters_info compat_tmp;
1118 1124
@@ -1169,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
1169 /* Choose the copy that is on our node */ 1175 /* Choose the copy that is on our node */
1170 curcpu = smp_processor_id(); 1176 curcpu = smp_processor_id();
1171 loc_cpu_entry = private->entries[curcpu]; 1177 loc_cpu_entry = private->entries[curcpu];
1172 xt_info_wrlock(curcpu); 1178 addend = xt_write_recseq_begin();
1173 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1179 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1174 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1180 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1175 ++i; 1181 ++i;
1176 } 1182 }
1177 xt_info_wrunlock(curcpu); 1183 xt_write_recseq_end(addend);
1178 unlock_up_free: 1184 unlock_up_free:
1179 local_bh_enable(); 1185 local_bh_enable();
1180 xt_table_unlock(t); 1186 xt_table_unlock(t);
@@ -1333,6 +1339,7 @@ static int translate_compat_table(const char *name,
1333 duprintf("translate_compat_table: size %u\n", info->size); 1339 duprintf("translate_compat_table: size %u\n", info->size);
1334 j = 0; 1340 j = 0;
1335 xt_compat_lock(NFPROTO_ARP); 1341 xt_compat_lock(NFPROTO_ARP);
1342 xt_compat_init_offsets(NFPROTO_ARP, number);
1336 /* Walk through entries, checking offsets. */ 1343 /* Walk through entries, checking offsets. */
1337 xt_entry_foreach(iter0, entry0, total_size) { 1344 xt_entry_foreach(iter0, entry0, total_size) {
1338 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1345 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1486,6 +1493,7 @@ static int compat_do_replace(struct net *net, void __user *user,
1486 return -ENOMEM; 1493 return -ENOMEM;
1487 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1494 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1488 return -ENOMEM; 1495 return -ENOMEM;
1496 tmp.name[sizeof(tmp.name)-1] = 0;
1489 1497
1490 newinfo = xt_alloc_table_info(tmp.size); 1498 newinfo = xt_alloc_table_info(tmp.size);
1491 if (!newinfo) 1499 if (!newinfo)
@@ -1738,6 +1746,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1738 ret = -EFAULT; 1746 ret = -EFAULT;
1739 break; 1747 break;
1740 } 1748 }
1749 rev.name[sizeof(rev.name)-1] = 0;
1741 1750
1742 try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, 1751 try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
1743 rev.revision, 1, &ret), 1752 rev.revision, 1, &ret),
@@ -1869,7 +1878,7 @@ static int __init arp_tables_init(void)
1869 if (ret < 0) 1878 if (ret < 0)
1870 goto err1; 1879 goto err1;
1871 1880
1872 /* Noone else will be downing sem now, so we won't sleep */ 1881 /* No one else will be downing sem now, so we won't sleep */
1873 ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); 1882 ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
1874 if (ret < 0) 1883 if (ret < 0)
1875 goto err2; 1884 goto err2;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 652efea013dc..764743843503 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
68} 68}
69EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); 69EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
70 70
71/*
72 We keep a set of rules for each CPU, so we can avoid write-locking
73 them in the softirq when updating the counters and therefore
74 only need to read-lock in the softirq; doing a write_lock_bh() in user
75 context stops packets coming through and allows user context to read
76 the counters or update the rules.
77
78 Hence the start of any table is given by get_table() below. */
79
80/* Returns whether matches rule or not. */ 71/* Returns whether matches rule or not. */
81/* Performance critical - called for every packet */ 72/* Performance critical - called for every packet */
82static inline bool 73static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
311 unsigned int *stackptr, origptr, cpu; 302 unsigned int *stackptr, origptr, cpu;
312 const struct xt_table_info *private; 303 const struct xt_table_info *private;
313 struct xt_action_param acpar; 304 struct xt_action_param acpar;
305 unsigned int addend;
314 306
315 /* Initialization */ 307 /* Initialization */
316 ip = ip_hdr(skb); 308 ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
331 acpar.hooknum = hook; 323 acpar.hooknum = hook;
332 324
333 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 325 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
334 xt_info_rdlock_bh(); 326 local_bh_disable();
327 addend = xt_write_recseq_begin();
335 private = table->private; 328 private = table->private;
336 cpu = smp_processor_id(); 329 cpu = smp_processor_id();
337 table_base = private->entries[cpu]; 330 table_base = private->entries[cpu];
@@ -387,7 +380,7 @@ ipt_do_table(struct sk_buff *skb,
387 verdict = (unsigned)(-v) - 1; 380 verdict = (unsigned)(-v) - 1;
388 break; 381 break;
389 } 382 }
390 if (*stackptr == 0) { 383 if (*stackptr <= origptr) {
391 e = get_entry(table_base, 384 e = get_entry(table_base,
392 private->underflow[hook]); 385 private->underflow[hook]);
393 pr_debug("Underflow (this is normal) " 386 pr_debug("Underflow (this is normal) "
@@ -427,10 +420,12 @@ ipt_do_table(struct sk_buff *skb,
427 /* Verdict */ 420 /* Verdict */
428 break; 421 break;
429 } while (!acpar.hotdrop); 422 } while (!acpar.hotdrop);
430 xt_info_rdunlock_bh();
431 pr_debug("Exiting %s; resetting sp from %u to %u\n", 423 pr_debug("Exiting %s; resetting sp from %u to %u\n",
432 __func__, *stackptr, origptr); 424 __func__, *stackptr, origptr);
433 *stackptr = origptr; 425 *stackptr = origptr;
426 xt_write_recseq_end(addend);
427 local_bh_enable();
428
434#ifdef DEBUG_ALLOW_ALL 429#ifdef DEBUG_ALLOW_ALL
435 return NF_ACCEPT; 430 return NF_ACCEPT;
436#else 431#else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
886 unsigned int i; 881 unsigned int i;
887 882
888 for_each_possible_cpu(cpu) { 883 for_each_possible_cpu(cpu) {
889 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 884 seqcount_t *s = &per_cpu(xt_recseq, cpu);
890 885
891 i = 0; 886 i = 0;
892 xt_entry_foreach(iter, t->entries[cpu], t->size) { 887 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
894 unsigned int start; 889 unsigned int start;
895 890
896 do { 891 do {
897 start = read_seqbegin(lock); 892 start = read_seqcount_begin(s);
898 bcnt = iter->counters.bcnt; 893 bcnt = iter->counters.bcnt;
899 pcnt = iter->counters.pcnt; 894 pcnt = iter->counters.pcnt;
900 } while (read_seqretry(lock, start)); 895 } while (read_seqcount_retry(s, start));
901 896
902 ADD_COUNTER(counters[i], bcnt, pcnt); 897 ADD_COUNTER(counters[i], bcnt, pcnt);
903 ++i; /* macro does multi eval of i */ 898 ++i; /* macro does multi eval of i */
@@ -1063,6 +1058,7 @@ static int compat_table_info(const struct xt_table_info *info,
1063 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1058 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1064 newinfo->initial_entries = 0; 1059 newinfo->initial_entries = 0;
1065 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1060 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1061 xt_compat_init_offsets(AF_INET, info->number);
1066 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1062 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1067 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1063 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1068 if (ret != 0) 1064 if (ret != 0)
@@ -1261,6 +1257,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1261 /* overflow check */ 1257 /* overflow check */
1262 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1258 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1263 return -ENOMEM; 1259 return -ENOMEM;
1260 tmp.name[sizeof(tmp.name)-1] = 0;
1264 1261
1265 newinfo = xt_alloc_table_info(tmp.size); 1262 newinfo = xt_alloc_table_info(tmp.size);
1266 if (!newinfo) 1263 if (!newinfo)
@@ -1310,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
1310 int ret = 0; 1307 int ret = 0;
1311 void *loc_cpu_entry; 1308 void *loc_cpu_entry;
1312 struct ipt_entry *iter; 1309 struct ipt_entry *iter;
1310 unsigned int addend;
1313#ifdef CONFIG_COMPAT 1311#ifdef CONFIG_COMPAT
1314 struct compat_xt_counters_info compat_tmp; 1312 struct compat_xt_counters_info compat_tmp;
1315 1313
@@ -1366,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
1366 /* Choose the copy that is on our node */ 1364 /* Choose the copy that is on our node */
1367 curcpu = smp_processor_id(); 1365 curcpu = smp_processor_id();
1368 loc_cpu_entry = private->entries[curcpu]; 1366 loc_cpu_entry = private->entries[curcpu];
1369 xt_info_wrlock(curcpu); 1367 addend = xt_write_recseq_begin();
1370 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1368 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1371 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1369 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1372 ++i; 1370 ++i;
1373 } 1371 }
1374 xt_info_wrunlock(curcpu); 1372 xt_write_recseq_end(addend);
1375 unlock_up_free: 1373 unlock_up_free:
1376 local_bh_enable(); 1374 local_bh_enable();
1377 xt_table_unlock(t); 1375 xt_table_unlock(t);
@@ -1664,6 +1662,7 @@ translate_compat_table(struct net *net,
1664 duprintf("translate_compat_table: size %u\n", info->size); 1662 duprintf("translate_compat_table: size %u\n", info->size);
1665 j = 0; 1663 j = 0;
1666 xt_compat_lock(AF_INET); 1664 xt_compat_lock(AF_INET);
1665 xt_compat_init_offsets(AF_INET, number);
1667 /* Walk through entries, checking offsets. */ 1666 /* Walk through entries, checking offsets. */
1668 xt_entry_foreach(iter0, entry0, total_size) { 1667 xt_entry_foreach(iter0, entry0, total_size) {
1669 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1668 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1805,6 +1804,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1805 return -ENOMEM; 1804 return -ENOMEM;
1806 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1805 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1807 return -ENOMEM; 1806 return -ENOMEM;
1807 tmp.name[sizeof(tmp.name)-1] = 0;
1808 1808
1809 newinfo = xt_alloc_table_info(tmp.size); 1809 newinfo = xt_alloc_table_info(tmp.size);
1810 if (!newinfo) 1810 if (!newinfo)
@@ -2034,6 +2034,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2034 ret = -EFAULT; 2034 ret = -EFAULT;
2035 break; 2035 break;
2036 } 2036 }
2037 rev.name[sizeof(rev.name)-1] = 0;
2037 2038
2038 if (cmd == IPT_SO_GET_REVISION_TARGET) 2039 if (cmd == IPT_SO_GET_REVISION_TARGET)
2039 target = 1; 2040 target = 1;
@@ -2228,7 +2229,7 @@ static int __init ip_tables_init(void)
2228 if (ret < 0) 2229 if (ret < 0)
2229 goto err1; 2230 goto err1;
2230 2231
2231 /* Noone else will be downing sem now, so we won't sleep */ 2232 /* No one else will be downing sem now, so we won't sleep */
2232 ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); 2233 ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
2233 if (ret < 0) 2234 if (ret < 0)
2234 goto err2; 2235 goto err2;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a4897655..d609ac3cb9a4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
300 * that the ->target() function isn't called after ->destroy() */ 300 * that the ->target() function isn't called after ->destroy() */
301 301
302 ct = nf_ct_get(skb, &ctinfo); 302 ct = nf_ct_get(skb, &ctinfo);
303 if (ct == NULL) { 303 if (ct == NULL)
304 pr_info("no conntrack!\n");
305 /* FIXME: need to drop invalid ones, since replies
306 * to outgoing connections of other nodes will be
307 * marked as INVALID */
308 return NF_DROP; 304 return NF_DROP;
309 }
310 305
311 /* special case: ICMP error handling. conntrack distinguishes between 306 /* special case: ICMP error handling. conntrack distinguishes between
312 * error messages (RELATED) and information requests (see below) */ 307 * error messages (RELATED) and information requests (see below) */
@@ -669,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
669 char buffer[PROC_WRITELEN+1]; 664 char buffer[PROC_WRITELEN+1];
670 unsigned long nodenum; 665 unsigned long nodenum;
671 666
672 if (copy_from_user(buffer, input, PROC_WRITELEN)) 667 if (size > PROC_WRITELEN)
668 return -EIO;
669 if (copy_from_user(buffer, input, size))
673 return -EFAULT; 670 return -EFAULT;
671 buffer[size] = 0;
674 672
675 if (*buffer == '+') { 673 if (*buffer == '+') {
676 nodenum = simple_strtoul(buffer+1, NULL, 10); 674 nodenum = simple_strtoul(buffer+1, NULL, 10);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 72ffc8fda2e9..d76d6c9ed946 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
442 } 442 }
443#endif 443#endif
444 444
445 /* MAC logging for input path only. */ 445 if (in != NULL)
446 if (in && !out)
447 dump_mac_header(m, loginfo, skb); 446 dump_mac_header(m, loginfo, skb);
448 447
449 dump_packet(m, loginfo, skb, 0); 448 dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
deleted file mode 100644
index db8bff0fb86d..000000000000
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * iptables module to match inet_addr_type() of an ip.
3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/ip.h>
17#include <net/route.h>
18
19#include <linux/netfilter_ipv4/ipt_addrtype.h>
20#include <linux/netfilter/x_tables.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_DESCRIPTION("Xtables: address type match for IPv4");
25
26static inline bool match_type(struct net *net, const struct net_device *dev,
27 __be32 addr, u_int16_t mask)
28{
29 return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
30}
31
32static bool
33addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
34{
35 struct net *net = dev_net(par->in ? par->in : par->out);
36 const struct ipt_addrtype_info *info = par->matchinfo;
37 const struct iphdr *iph = ip_hdr(skb);
38 bool ret = true;
39
40 if (info->source)
41 ret &= match_type(net, NULL, iph->saddr, info->source) ^
42 info->invert_source;
43 if (info->dest)
44 ret &= match_type(net, NULL, iph->daddr, info->dest) ^
45 info->invert_dest;
46
47 return ret;
48}
49
50static bool
51addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
52{
53 struct net *net = dev_net(par->in ? par->in : par->out);
54 const struct ipt_addrtype_info_v1 *info = par->matchinfo;
55 const struct iphdr *iph = ip_hdr(skb);
56 const struct net_device *dev = NULL;
57 bool ret = true;
58
59 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
60 dev = par->in;
61 else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
62 dev = par->out;
63
64 if (info->source)
65 ret &= match_type(net, dev, iph->saddr, info->source) ^
66 (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
67 if (ret && info->dest)
68 ret &= match_type(net, dev, iph->daddr, info->dest) ^
69 !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
70 return ret;
71}
72
73static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
74{
75 struct ipt_addrtype_info_v1 *info = par->matchinfo;
76
77 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
78 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
79 pr_info("both incoming and outgoing "
80 "interface limitation cannot be selected\n");
81 return -EINVAL;
82 }
83
84 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
85 (1 << NF_INET_LOCAL_IN)) &&
86 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
87 pr_info("output interface limitation "
88 "not valid in PREROUTING and INPUT\n");
89 return -EINVAL;
90 }
91
92 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
93 (1 << NF_INET_LOCAL_OUT)) &&
94 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
95 pr_info("input interface limitation "
96 "not valid in POSTROUTING and OUTPUT\n");
97 return -EINVAL;
98 }
99
100 return 0;
101}
102
103static struct xt_match addrtype_mt_reg[] __read_mostly = {
104 {
105 .name = "addrtype",
106 .family = NFPROTO_IPV4,
107 .match = addrtype_mt_v0,
108 .matchsize = sizeof(struct ipt_addrtype_info),
109 .me = THIS_MODULE
110 },
111 {
112 .name = "addrtype",
113 .family = NFPROTO_IPV4,
114 .revision = 1,
115 .match = addrtype_mt_v1,
116 .checkentry = addrtype_mt_checkentry_v1,
117 .matchsize = sizeof(struct ipt_addrtype_info_v1),
118 .me = THIS_MODULE
119 }
120};
121
122static int __init addrtype_mt_init(void)
123{
124 return xt_register_matches(addrtype_mt_reg,
125 ARRAY_SIZE(addrtype_mt_reg));
126}
127
128static void __exit addrtype_mt_exit(void)
129{
130 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
131}
132
133module_init(addrtype_mt_init);
134module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 294a2a32f293..aef5d1fbe77d 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
60 ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, 60 ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
61 dev_net(out)->ipv4.iptable_mangle); 61 dev_net(out)->ipv4.iptable_mangle);
62 /* Reroute for ANY change. */ 62 /* Reroute for ANY change. */
63 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { 63 if (ret != NF_DROP && ret != NF_STOLEN) {
64 iph = ip_hdr(skb); 64 iph = ip_hdr(skb);
65 65
66 if (iph->saddr != saddr || 66 if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 63f60fc5d26a..5585980fce2e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
20#include <net/netfilter/nf_conntrack_l4proto.h> 20#include <net/netfilter/nf_conntrack_l4proto.h>
21#include <net/netfilter/nf_conntrack_expect.h> 21#include <net/netfilter/nf_conntrack_expect.h>
22#include <net/netfilter/nf_conntrack_acct.h> 22#include <net/netfilter/nf_conntrack_acct.h>
23#include <linux/rculist_nulls.h>
23 24
24struct ct_iter_state { 25struct ct_iter_state {
25 struct seq_net_private p; 26 struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
35 for (st->bucket = 0; 36 for (st->bucket = 0;
36 st->bucket < net->ct.htable_size; 37 st->bucket < net->ct.htable_size;
37 st->bucket++) { 38 st->bucket++) {
38 n = rcu_dereference(net->ct.hash[st->bucket].first); 39 n = rcu_dereference(
40 hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
39 if (!is_a_nulls(n)) 41 if (!is_a_nulls(n))
40 return n; 42 return n;
41 } 43 }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
48 struct net *net = seq_file_net(seq); 50 struct net *net = seq_file_net(seq);
49 struct ct_iter_state *st = seq->private; 51 struct ct_iter_state *st = seq->private;
50 52
51 head = rcu_dereference(head->next); 53 head = rcu_dereference(hlist_nulls_next_rcu(head));
52 while (is_a_nulls(head)) { 54 while (is_a_nulls(head)) {
53 if (likely(get_nulls_value(head) == st->bucket)) { 55 if (likely(get_nulls_value(head) == st->bucket)) {
54 if (++st->bucket >= net->ct.htable_size) 56 if (++st->bucket >= net->ct.htable_size)
55 return NULL; 57 return NULL;
56 } 58 }
57 head = rcu_dereference(net->ct.hash[st->bucket].first); 59 head = rcu_dereference(
60 hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
58 } 61 }
59 return head; 62 return head;
60} 63}
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
217 struct hlist_node *n; 220 struct hlist_node *n;
218 221
219 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 222 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
220 n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 223 n = rcu_dereference(
224 hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
221 if (n) 225 if (n)
222 return n; 226 return n;
223 } 227 }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
230 struct net *net = seq_file_net(seq); 234 struct net *net = seq_file_net(seq);
231 struct ct_expect_iter_state *st = seq->private; 235 struct ct_expect_iter_state *st = seq->private;
232 236
233 head = rcu_dereference(head->next); 237 head = rcu_dereference(hlist_next_rcu(head));
234 while (head == NULL) { 238 while (head == NULL) {
235 if (++st->bucket >= nf_ct_expect_hsize) 239 if (++st->bucket >= nf_ct_expect_hsize)
236 return NULL; 240 return NULL;
237 head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 241 head = rcu_dereference(
242 hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
238 } 243 }
239 return head; 244 return head;
240} 245}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f23b3f06df0..703f366fd235 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
44 44
45 /* Try to get same port: if not, try to change it. */ 45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int ret; 47 int res;
48 48
49 exp->tuple.dst.u.tcp.port = htons(port); 49 exp->tuple.dst.u.tcp.port = htons(port);
50 ret = nf_ct_expect_related(exp); 50 res = nf_ct_expect_related(exp);
51 if (ret == 0) 51 if (res == 0)
52 break; 52 break;
53 else if (ret != -EBUSY) { 53 else if (res != -EBUSY) {
54 port = 0; 54 port = 0;
55 break; 55 break;
56 } 56 }
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c04787ce1a71..9c71b2755ce3 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
221 manips not an issue. */ 221 manips not an issue. */
222 if (maniptype == IP_NAT_MANIP_SRC && 222 if (maniptype == IP_NAT_MANIP_SRC &&
223 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { 223 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
224 if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { 224 /* try the original tuple first */
225 if (in_range(orig_tuple, range)) {
226 if (!nf_nat_used_tuple(orig_tuple, ct)) {
227 *tuple = *orig_tuple;
228 return;
229 }
230 } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
231 range)) {
225 pr_debug("get_unique_tuple: Found current src map\n"); 232 pr_debug("get_unique_tuple: Found current src map\n");
226 if (!nf_nat_used_tuple(tuple, ct)) 233 if (!nf_nat_used_tuple(tuple, ct))
227 return; 234 return;
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct,
266 struct net *net = nf_ct_net(ct); 273 struct net *net = nf_ct_net(ct);
267 struct nf_conntrack_tuple curr_tuple, new_tuple; 274 struct nf_conntrack_tuple curr_tuple, new_tuple;
268 struct nf_conn_nat *nat; 275 struct nf_conn_nat *nat;
269 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
270 276
271 /* nat helper or nfctnetlink also setup binding */ 277 /* nat helper or nfctnetlink also setup binding */
272 nat = nfct_nat(ct); 278 nat = nfct_nat(ct);
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct,
306 ct->status |= IPS_DST_NAT; 312 ct->status |= IPS_DST_NAT;
307 } 313 }
308 314
309 /* Place in source hash if this is the first time. */ 315 if (maniptype == IP_NAT_MANIP_SRC) {
310 if (have_to_hash) {
311 unsigned int srchash; 316 unsigned int srchash;
312 317
313 srchash = hash_by_src(net, nf_ct_zone(ct), 318 srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct,
323 328
324 /* It's done. */ 329 /* It's done. */
325 if (maniptype == IP_NAT_MANIP_DST) 330 if (maniptype == IP_NAT_MANIP_DST)
326 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 331 ct->status |= IPS_DST_NAT_DONE;
327 else 332 else
328 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 333 ct->status |= IPS_SRC_NAT_DONE;
329 334
330 return NF_ACCEPT; 335 return NF_ACCEPT;
331} 336}
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
502 int ret = 0; 507 int ret = 0;
503 508
504 spin_lock_bh(&nf_nat_lock); 509 spin_lock_bh(&nf_nat_lock);
505 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 510 if (rcu_dereference_protected(
511 nf_nat_protos[proto->protonum],
512 lockdep_is_held(&nf_nat_lock)
513 ) != &nf_nat_unknown_protocol) {
506 ret = -EBUSY; 514 ret = -EBUSY;
507 goto out; 515 goto out;
508 } 516 }
@@ -513,7 +521,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
513} 521}
514EXPORT_SYMBOL(nf_nat_protocol_register); 522EXPORT_SYMBOL(nf_nat_protocol_register);
515 523
516/* Noone stores the protocol anywhere; simply delete it. */ 524/* No one stores the protocol anywhere; simply delete it. */
517void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) 525void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
518{ 526{
519 spin_lock_bh(&nf_nat_lock); 527 spin_lock_bh(&nf_nat_lock);
@@ -524,7 +532,7 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
524} 532}
525EXPORT_SYMBOL(nf_nat_protocol_unregister); 533EXPORT_SYMBOL(nf_nat_protocol_unregister);
526 534
527/* Noone using conntrack by the time this called. */ 535/* No one using conntrack by the time this called. */
528static void nf_nat_cleanup_conntrack(struct nf_conn *ct) 536static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
529{ 537{
530 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); 538 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
532 if (nat == NULL || nat->ct == NULL) 540 if (nat == NULL || nat->ct == NULL)
533 return; 541 return;
534 542
535 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); 543 NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
536 544
537 spin_lock_bh(&nf_nat_lock); 545 spin_lock_bh(&nf_nat_lock);
538 hlist_del_rcu(&nat->bysource); 546 hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old)
545 struct nf_conn_nat *old_nat = old; 553 struct nf_conn_nat *old_nat = old;
546 struct nf_conn *ct = old_nat->ct; 554 struct nf_conn *ct = old_nat->ct;
547 555
548 if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) 556 if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
549 return; 557 return;
550 558
551 spin_lock_bh(&nf_nat_lock); 559 spin_lock_bh(&nf_nat_lock);
552 new_nat->ct = ct;
553 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 560 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
554 spin_unlock_bh(&nf_nat_lock); 561 spin_unlock_bh(&nf_nat_lock);
555} 562}
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net)
679{ 686{
680 /* Leave them the same for the moment. */ 687 /* Leave them the same for the moment. */
681 net->ipv4.nat_htable_size = net->ct.htable_size; 688 net->ipv4.nat_htable_size = net->ct.htable_size;
682 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 689 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
683 &net->ipv4.nat_vmalloced, 0);
684 if (!net->ipv4.nat_bysource) 690 if (!net->ipv4.nat_bysource)
685 return -ENOMEM; 691 return -ENOMEM;
686 return 0; 692 return 0;
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
702{ 708{
703 nf_ct_iterate_cleanup(net, &clean_nat, NULL); 709 nf_ct_iterate_cleanup(net, &clean_nat, NULL);
704 synchronize_rcu(); 710 synchronize_rcu();
705 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, 711 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
706 net->ipv4.nat_htable_size);
707} 712}
708 713
709static struct pernet_operations nf_nat_net_ops = { 714static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 31427fb57aa8..99cfa28b6d38 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,7 +153,7 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data, 156static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen) 157 int datalen, __sum16 *check, int oldlen)
158{ 158{
159 struct rtable *rt = skb_rtable(skb); 159 struct rtable *rt = skb_rtable(skb);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ee5f419d0a56..8812a02078ab 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
54#include <net/netfilter/nf_conntrack_expect.h> 54#include <net/netfilter/nf_conntrack_expect.h>
55#include <net/netfilter/nf_conntrack_helper.h> 55#include <net/netfilter/nf_conntrack_helper.h>
56#include <net/netfilter/nf_nat_helper.h> 56#include <net/netfilter/nf_nat_helper.h>
57#include <linux/netfilter/nf_conntrack_snmp.h>
57 58
58MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
59MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 60MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
1310{ 1311{
1311 int ret = 0; 1312 int ret = 0;
1312 1313
1313 ret = nf_conntrack_helper_register(&snmp_helper); 1314 BUG_ON(nf_nat_snmp_hook != NULL);
1314 if (ret < 0) 1315 rcu_assign_pointer(nf_nat_snmp_hook, help);
1315 return ret; 1316
1316 ret = nf_conntrack_helper_register(&snmp_trap_helper); 1317 ret = nf_conntrack_helper_register(&snmp_trap_helper);
1317 if (ret < 0) { 1318 if (ret < 0) {
1318 nf_conntrack_helper_unregister(&snmp_helper); 1319 nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
1323 1324
1324static void __exit nf_nat_snmp_basic_fini(void) 1325static void __exit nf_nat_snmp_basic_fini(void)
1325{ 1326{
1326 nf_conntrack_helper_unregister(&snmp_helper); 1327 rcu_assign_pointer(nf_nat_snmp_hook, NULL);
1327 nf_conntrack_helper_unregister(&snmp_trap_helper); 1328 nf_conntrack_helper_unregister(&snmp_trap_helper);
1328} 1329}
1329 1330
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 95481fee8bdb..7317bdf1d457 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -31,6 +31,7 @@
31#ifdef CONFIG_XFRM 31#ifdef CONFIG_XFRM
32static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) 32static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
33{ 33{
34 struct flowi4 *fl4 = &fl->u.ip4;
34 const struct nf_conn *ct; 35 const struct nf_conn *ct;
35 const struct nf_conntrack_tuple *t; 36 const struct nf_conntrack_tuple *t;
36 enum ip_conntrack_info ctinfo; 37 enum ip_conntrack_info ctinfo;
@@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
49 statusbit = IPS_SRC_NAT; 50 statusbit = IPS_SRC_NAT;
50 51
51 if (ct->status & statusbit) { 52 if (ct->status & statusbit) {
52 fl->fl4_dst = t->dst.u3.ip; 53 fl4->daddr = t->dst.u3.ip;
53 if (t->dst.protonum == IPPROTO_TCP || 54 if (t->dst.protonum == IPPROTO_TCP ||
54 t->dst.protonum == IPPROTO_UDP || 55 t->dst.protonum == IPPROTO_UDP ||
55 t->dst.protonum == IPPROTO_UDPLITE || 56 t->dst.protonum == IPPROTO_UDPLITE ||
56 t->dst.protonum == IPPROTO_DCCP || 57 t->dst.protonum == IPPROTO_DCCP ||
57 t->dst.protonum == IPPROTO_SCTP) 58 t->dst.protonum == IPPROTO_SCTP)
58 fl->fl_ip_dport = t->dst.u.tcp.port; 59 fl4->fl4_dport = t->dst.u.tcp.port;
59 } 60 }
60 61
61 statusbit ^= IPS_NAT_MASK; 62 statusbit ^= IPS_NAT_MASK;
62 63
63 if (ct->status & statusbit) { 64 if (ct->status & statusbit) {
64 fl->fl4_src = t->src.u3.ip; 65 fl4->saddr = t->src.u3.ip;
65 if (t->dst.protonum == IPPROTO_TCP || 66 if (t->dst.protonum == IPPROTO_TCP ||
66 t->dst.protonum == IPPROTO_UDP || 67 t->dst.protonum == IPPROTO_UDP ||
67 t->dst.protonum == IPPROTO_UDPLITE || 68 t->dst.protonum == IPPROTO_UDPLITE ||
68 t->dst.protonum == IPPROTO_DCCP || 69 t->dst.protonum == IPPROTO_DCCP ||
69 t->dst.protonum == IPPROTO_SCTP) 70 t->dst.protonum == IPPROTO_SCTP)
70 fl->fl_ip_sport = t->src.u.tcp.port; 71 fl4->fl4_sport = t->src.u.tcp.port;
71 } 72 }
72} 73}
73#endif 74#endif
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
new file mode 100644
index 000000000000..1f3bb11490c9
--- /dev/null
+++ b/net/ipv4/ping.c
@@ -0,0 +1,935 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * "Ping" sockets
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Based on ipv4/udp.c code.
14 *
15 * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6),
16 * Pavel Kankovsky (for Linux 2.4.32)
17 *
18 * Pavel gave all rights to bugs to Vasiliy,
19 * none of the bugs are Pavel's now.
20 *
21 */
22
23#include <asm/system.h>
24#include <linux/uaccess.h>
25#include <linux/types.h>
26#include <linux/fcntl.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/in.h>
30#include <linux/errno.h>
31#include <linux/timer.h>
32#include <linux/mm.h>
33#include <linux/inet.h>
34#include <linux/netdevice.h>
35#include <net/snmp.h>
36#include <net/ip.h>
37#include <net/ipv6.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <linux/skbuff.h>
41#include <linux/proc_fs.h>
42#include <net/sock.h>
43#include <net/ping.h>
44#include <net/icmp.h>
45#include <net/udp.h>
46#include <net/route.h>
47#include <net/inet_common.h>
48#include <net/checksum.h>
49
50
51static struct ping_table ping_table;
52
53static u16 ping_port_rover;
54
55static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
56{
57 int res = (num + net_hash_mix(net)) & mask;
58 pr_debug("hash(%d) = %d\n", num, res);
59 return res;
60}
61
62static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
63 struct net *net, unsigned num)
64{
65 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
66}
67
68static int ping_v4_get_port(struct sock *sk, unsigned short ident)
69{
70 struct hlist_nulls_node *node;
71 struct hlist_nulls_head *hlist;
72 struct inet_sock *isk, *isk2;
73 struct sock *sk2 = NULL;
74
75 isk = inet_sk(sk);
76 write_lock_bh(&ping_table.lock);
77 if (ident == 0) {
78 u32 i;
79 u16 result = ping_port_rover + 1;
80
81 for (i = 0; i < (1L << 16); i++, result++) {
82 if (!result)
83 result++; /* avoid zero */
84 hlist = ping_hashslot(&ping_table, sock_net(sk),
85 result);
86 ping_portaddr_for_each_entry(sk2, node, hlist) {
87 isk2 = inet_sk(sk2);
88
89 if (isk2->inet_num == result)
90 goto next_port;
91 }
92
93 /* found */
94 ping_port_rover = ident = result;
95 break;
96next_port:
97 ;
98 }
99 if (i >= (1L << 16))
100 goto fail;
101 } else {
102 hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
103 ping_portaddr_for_each_entry(sk2, node, hlist) {
104 isk2 = inet_sk(sk2);
105
106 if ((isk2->inet_num == ident) &&
107 (sk2 != sk) &&
108 (!sk2->sk_reuse || !sk->sk_reuse))
109 goto fail;
110 }
111 }
112
113 pr_debug("found port/ident = %d\n", ident);
114 isk->inet_num = ident;
115 if (sk_unhashed(sk)) {
116 pr_debug("was not hashed\n");
117 sock_hold(sk);
118 hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
119 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
120 }
121 write_unlock_bh(&ping_table.lock);
122 return 0;
123
124fail:
125 write_unlock_bh(&ping_table.lock);
126 return 1;
127}
128
129static void ping_v4_hash(struct sock *sk)
130{
131 pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
132 BUG(); /* "Please do not press this button again." */
133}
134
135static void ping_v4_unhash(struct sock *sk)
136{
137 struct inet_sock *isk = inet_sk(sk);
138 pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
139 if (sk_hashed(sk)) {
140 struct hlist_nulls_head *hslot;
141
142 hslot = ping_hashslot(&ping_table, sock_net(sk), isk->inet_num);
143 write_lock_bh(&ping_table.lock);
144 hlist_nulls_del(&sk->sk_nulls_node);
145 sock_put(sk);
146 isk->inet_num = isk->inet_sport = 0;
147 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
148 write_unlock_bh(&ping_table.lock);
149 }
150}
151
152static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr,
153 u16 ident, int dif)
154{
155 struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
156 struct sock *sk = NULL;
157 struct inet_sock *isk;
158 struct hlist_nulls_node *hnode;
159
160 pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n",
161 (int)ident, (unsigned long)daddr, dif);
162 read_lock_bh(&ping_table.lock);
163
164 ping_portaddr_for_each_entry(sk, hnode, hslot) {
165 isk = inet_sk(sk);
166
167 pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk,
168 (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr,
169 sk->sk_bound_dev_if);
170
171 pr_debug("iterate\n");
172 if (isk->inet_num != ident)
173 continue;
174 if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
175 continue;
176 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
177 continue;
178
179 sock_hold(sk);
180 goto exit;
181 }
182
183 sk = NULL;
184exit:
185 read_unlock_bh(&ping_table.lock);
186
187 return sk;
188}
189
190static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
191 gid_t *high)
192{
193 gid_t *data = net->ipv4.sysctl_ping_group_range;
194 unsigned seq;
195 do {
196 seq = read_seqbegin(&sysctl_local_ports.lock);
197
198 *low = data[0];
199 *high = data[1];
200 } while (read_seqretry(&sysctl_local_ports.lock, seq));
201}
202
203
204static int ping_init_sock(struct sock *sk)
205{
206 struct net *net = sock_net(sk);
207 gid_t group = current_egid();
208 gid_t range[2];
209 struct group_info *group_info = get_current_groups();
210 int i, j, count = group_info->ngroups;
211
212 inet_get_ping_group_range_net(net, range, range+1);
213 if (range[0] <= group && group <= range[1])
214 return 0;
215
216 for (i = 0; i < group_info->nblocks; i++) {
217 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
218
219 for (j = 0; j < cp_count; j++) {
220 group = group_info->blocks[i][j];
221 if (range[0] <= group && group <= range[1])
222 return 0;
223 }
224
225 count -= cp_count;
226 }
227
228 return -EACCES;
229}
230
231static void ping_close(struct sock *sk, long timeout)
232{
233 pr_debug("ping_close(sk=%p,sk->num=%u)\n",
234 inet_sk(sk), inet_sk(sk)->inet_num);
235 pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
236
237 sk_common_release(sk);
238}
239
240/*
241 * We need our own bind because there are no privileged id's == local ports.
242 * Moreover, we don't allow binding to multi- and broadcast addresses.
243 */
244
245static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
246{
247 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
248 struct inet_sock *isk = inet_sk(sk);
249 unsigned short snum;
250 int chk_addr_ret;
251 int err;
252
253 if (addr_len < sizeof(struct sockaddr_in))
254 return -EINVAL;
255
256 pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
257 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
258
259 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
260 if (addr->sin_addr.s_addr == INADDR_ANY)
261 chk_addr_ret = RTN_LOCAL;
262
263 if ((sysctl_ip_nonlocal_bind == 0 &&
264 isk->freebind == 0 && isk->transparent == 0 &&
265 chk_addr_ret != RTN_LOCAL) ||
266 chk_addr_ret == RTN_MULTICAST ||
267 chk_addr_ret == RTN_BROADCAST)
268 return -EADDRNOTAVAIL;
269
270 lock_sock(sk);
271
272 err = -EINVAL;
273 if (isk->inet_num != 0)
274 goto out;
275
276 err = -EADDRINUSE;
277 isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
278 snum = ntohs(addr->sin_port);
279 if (ping_v4_get_port(sk, snum) != 0) {
280 isk->inet_saddr = isk->inet_rcv_saddr = 0;
281 goto out;
282 }
283
284 pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n",
285 (int)isk->inet_num,
286 (unsigned long) isk->inet_rcv_saddr,
287 (int)sk->sk_bound_dev_if);
288
289 err = 0;
290 if (isk->inet_rcv_saddr)
291 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
292 if (snum)
293 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
294 isk->inet_sport = htons(isk->inet_num);
295 isk->inet_daddr = 0;
296 isk->inet_dport = 0;
297 sk_dst_reset(sk);
298out:
299 release_sock(sk);
300 pr_debug("ping_v4_bind -> %d\n", err);
301 return err;
302}
303
304/*
305 * Is this a supported type of ICMP message?
306 */
307
308static inline int ping_supported(int type, int code)
309{
310 if (type == ICMP_ECHO && code == 0)
311 return 1;
312 return 0;
313}
314
315/*
316 * This routine is called by the ICMP module when it gets some
317 * sort of error condition.
318 */
319
320static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
321
322void ping_err(struct sk_buff *skb, u32 info)
323{
324 struct iphdr *iph = (struct iphdr *)skb->data;
325 struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
326 struct inet_sock *inet_sock;
327 int type = icmph->type;
328 int code = icmph->code;
329 struct net *net = dev_net(skb->dev);
330 struct sock *sk;
331 int harderr;
332 int err;
333
334 /* We assume the packet has already been checked by icmp_unreach */
335
336 if (!ping_supported(icmph->type, icmph->code))
337 return;
338
339 pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
340 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
341
342 sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
343 ntohs(icmph->un.echo.id), skb->dev->ifindex);
344 if (sk == NULL) {
345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
346 pr_debug("no socket, dropping\n");
347 return; /* No socket for error */
348 }
349 pr_debug("err on socket %p\n", sk);
350
351 err = 0;
352 harderr = 0;
353 inet_sock = inet_sk(sk);
354
355 switch (type) {
356 default:
357 case ICMP_TIME_EXCEEDED:
358 err = EHOSTUNREACH;
359 break;
360 case ICMP_SOURCE_QUENCH:
361 /* This is not a real error but ping wants to see it.
362 * Report it with some fake errno. */
363 err = EREMOTEIO;
364 break;
365 case ICMP_PARAMETERPROB:
366 err = EPROTO;
367 harderr = 1;
368 break;
369 case ICMP_DEST_UNREACH:
370 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
371 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
372 err = EMSGSIZE;
373 harderr = 1;
374 break;
375 }
376 goto out;
377 }
378 err = EHOSTUNREACH;
379 if (code <= NR_ICMP_UNREACH) {
380 harderr = icmp_err_convert[code].fatal;
381 err = icmp_err_convert[code].errno;
382 }
383 break;
384 case ICMP_REDIRECT:
385 /* See ICMP_SOURCE_QUENCH */
386 err = EREMOTEIO;
387 break;
388 }
389
390 /*
391 * RFC1122: OK. Passes ICMP errors back to application, as per
392 * 4.1.3.3.
393 */
394 if (!inet_sock->recverr) {
395 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
396 goto out;
397 } else {
398 ip_icmp_error(sk, skb, err, 0 /* no remote port */,
399 info, (u8 *)icmph);
400 }
401 sk->sk_err = err;
402 sk->sk_error_report(sk);
403out:
404 sock_put(sk);
405}
406
407/*
408 * Copy and checksum an ICMP Echo packet from user space into a buffer.
409 */
410
411struct pingfakehdr {
412 struct icmphdr icmph;
413 struct iovec *iov;
414 u32 wcheck;
415};
416
417static int ping_getfrag(void *from, char * to,
418 int offset, int fraglen, int odd, struct sk_buff *skb)
419{
420 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
421
422 if (offset == 0) {
423 if (fraglen < sizeof(struct icmphdr))
424 BUG();
425 if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
426 pfh->iov, 0, fraglen - sizeof(struct icmphdr),
427 &pfh->wcheck))
428 return -EFAULT;
429
430 return 0;
431 }
432 if (offset < sizeof(struct icmphdr))
433 BUG();
434 if (csum_partial_copy_fromiovecend
435 (to, pfh->iov, offset - sizeof(struct icmphdr),
436 fraglen, &pfh->wcheck))
437 return -EFAULT;
438 return 0;
439}
440
441static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
442 struct flowi4 *fl4)
443{
444 struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
445
446 pfh->wcheck = csum_partial((char *)&pfh->icmph,
447 sizeof(struct icmphdr), pfh->wcheck);
448 pfh->icmph.checksum = csum_fold(pfh->wcheck);
449 memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr));
450 skb->ip_summed = CHECKSUM_NONE;
451 return ip_push_pending_frames(sk, fl4);
452}
453
454static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
455 size_t len)
456{
457 struct net *net = sock_net(sk);
458 struct flowi4 fl4;
459 struct inet_sock *inet = inet_sk(sk);
460 struct ipcm_cookie ipc;
461 struct icmphdr user_icmph;
462 struct pingfakehdr pfh;
463 struct rtable *rt = NULL;
464 struct ip_options_data opt_copy;
465 int free = 0;
466 u32 saddr, daddr, faddr;
467 u8 tos;
468 int err;
469
470 pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
471
472
473 if (len > 0xFFFF)
474 return -EMSGSIZE;
475
476 /*
477 * Check the flags.
478 */
479
480 /* Mirror BSD error message compatibility */
481 if (msg->msg_flags & MSG_OOB)
482 return -EOPNOTSUPP;
483
484 /*
485 * Fetch the ICMP header provided by the userland.
486 * iovec is modified!
487 */
488
489 if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
490 sizeof(struct icmphdr)))
491 return -EFAULT;
492 if (!ping_supported(user_icmph.type, user_icmph.code))
493 return -EINVAL;
494
495 /*
496 * Get and verify the address.
497 */
498
499 if (msg->msg_name) {
500 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
501 if (msg->msg_namelen < sizeof(*usin))
502 return -EINVAL;
503 if (usin->sin_family != AF_INET)
504 return -EINVAL;
505 daddr = usin->sin_addr.s_addr;
506 /* no remote port */
507 } else {
508 if (sk->sk_state != TCP_ESTABLISHED)
509 return -EDESTADDRREQ;
510 daddr = inet->inet_daddr;
511 /* no remote port */
512 }
513
514 ipc.addr = inet->inet_saddr;
515 ipc.opt = NULL;
516 ipc.oif = sk->sk_bound_dev_if;
517 ipc.tx_flags = 0;
518 err = sock_tx_timestamp(sk, &ipc.tx_flags);
519 if (err)
520 return err;
521
522 if (msg->msg_controllen) {
523 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
524 if (err)
525 return err;
526 if (ipc.opt)
527 free = 1;
528 }
529 if (!ipc.opt) {
530 struct ip_options_rcu *inet_opt;
531
532 rcu_read_lock();
533 inet_opt = rcu_dereference(inet->inet_opt);
534 if (inet_opt) {
535 memcpy(&opt_copy, inet_opt,
536 sizeof(*inet_opt) + inet_opt->opt.optlen);
537 ipc.opt = &opt_copy.opt;
538 }
539 rcu_read_unlock();
540 }
541
542 saddr = ipc.addr;
543 ipc.addr = faddr = daddr;
544
545 if (ipc.opt && ipc.opt->opt.srr) {
546 if (!daddr)
547 return -EINVAL;
548 faddr = ipc.opt->opt.faddr;
549 }
550 tos = RT_TOS(inet->tos);
551 if (sock_flag(sk, SOCK_LOCALROUTE) ||
552 (msg->msg_flags & MSG_DONTROUTE) ||
553 (ipc.opt && ipc.opt->opt.is_strictroute)) {
554 tos |= RTO_ONLINK;
555 }
556
557 if (ipv4_is_multicast(daddr)) {
558 if (!ipc.oif)
559 ipc.oif = inet->mc_index;
560 if (!saddr)
561 saddr = inet->mc_addr;
562 }
563
564 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
565 RT_SCOPE_UNIVERSE, sk->sk_protocol,
566 inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
567
568 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
569 rt = ip_route_output_flow(net, &fl4, sk);
570 if (IS_ERR(rt)) {
571 err = PTR_ERR(rt);
572 rt = NULL;
573 if (err == -ENETUNREACH)
574 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
575 goto out;
576 }
577
578 err = -EACCES;
579 if ((rt->rt_flags & RTCF_BROADCAST) &&
580 !sock_flag(sk, SOCK_BROADCAST))
581 goto out;
582
583 if (msg->msg_flags & MSG_CONFIRM)
584 goto do_confirm;
585back_from_confirm:
586
587 if (!ipc.addr)
588 ipc.addr = fl4.daddr;
589
590 lock_sock(sk);
591
592 pfh.icmph.type = user_icmph.type; /* already checked */
593 pfh.icmph.code = user_icmph.code; /* ditto */
594 pfh.icmph.checksum = 0;
595 pfh.icmph.un.echo.id = inet->inet_sport;
596 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
597 pfh.iov = msg->msg_iov;
598 pfh.wcheck = 0;
599
600 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
601 0, &ipc, &rt, msg->msg_flags);
602 if (err)
603 ip_flush_pending_frames(sk);
604 else
605 err = ping_push_pending_frames(sk, &pfh, &fl4);
606 release_sock(sk);
607
608out:
609 ip_rt_put(rt);
610 if (free)
611 kfree(ipc.opt);
612 if (!err) {
613 icmp_out_count(sock_net(sk), user_icmph.type);
614 return len;
615 }
616 return err;
617
618do_confirm:
619 dst_confirm(&rt->dst);
620 if (!(msg->msg_flags & MSG_PROBE) || len)
621 goto back_from_confirm;
622 err = 0;
623 goto out;
624}
625
626static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
627 size_t len, int noblock, int flags, int *addr_len)
628{
629 struct inet_sock *isk = inet_sk(sk);
630 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
631 struct sk_buff *skb;
632 int copied, err;
633
634 pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
635
636 if (flags & MSG_OOB)
637 goto out;
638
639 if (addr_len)
640 *addr_len = sizeof(*sin);
641
642 if (flags & MSG_ERRQUEUE)
643 return ip_recv_error(sk, msg, len);
644
645 skb = skb_recv_datagram(sk, flags, noblock, &err);
646 if (!skb)
647 goto out;
648
649 copied = skb->len;
650 if (copied > len) {
651 msg->msg_flags |= MSG_TRUNC;
652 copied = len;
653 }
654
655 /* Don't bother checking the checksum */
656 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
657 if (err)
658 goto done;
659
660 sock_recv_timestamp(msg, sk, skb);
661
662 /* Copy the address. */
663 if (sin) {
664 sin->sin_family = AF_INET;
665 sin->sin_port = 0 /* skb->h.uh->source */;
666 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
667 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
668 }
669 if (isk->cmsg_flags)
670 ip_cmsg_recv(msg, skb);
671 err = copied;
672
673done:
674 skb_free_datagram(sk, skb);
675out:
676 pr_debug("ping_recvmsg -> %d\n", err);
677 return err;
678}
679
680static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
681{
682 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
683 inet_sk(sk), inet_sk(sk)->inet_num, skb);
684 if (sock_queue_rcv_skb(sk, skb) < 0) {
685 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
686 kfree_skb(skb);
687 pr_debug("ping_queue_rcv_skb -> failed\n");
688 return -1;
689 }
690 return 0;
691}
692
693
694/*
695 * All we need to do is get the socket.
696 */
697
698void ping_rcv(struct sk_buff *skb)
699{
700 struct sock *sk;
701 struct net *net = dev_net(skb->dev);
702 struct iphdr *iph = ip_hdr(skb);
703 struct icmphdr *icmph = icmp_hdr(skb);
704 u32 saddr = iph->saddr;
705 u32 daddr = iph->daddr;
706
707 /* We assume the packet has already been checked by icmp_rcv */
708
709 pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
710 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
711
712 /* Push ICMP header back */
713 skb_push(skb, skb->data - (u8 *)icmph);
714
715 sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
716 skb->dev->ifindex);
717 if (sk != NULL) {
718 pr_debug("rcv on socket %p\n", sk);
719 ping_queue_rcv_skb(sk, skb_get(skb));
720 sock_put(sk);
721 return;
722 }
723 pr_debug("no socket, dropping\n");
724
725 /* We're called from icmp_rcv(). kfree_skb() is done there. */
726}
727
728struct proto ping_prot = {
729 .name = "PING",
730 .owner = THIS_MODULE,
731 .init = ping_init_sock,
732 .close = ping_close,
733 .connect = ip4_datagram_connect,
734 .disconnect = udp_disconnect,
735 .setsockopt = ip_setsockopt,
736 .getsockopt = ip_getsockopt,
737 .sendmsg = ping_sendmsg,
738 .recvmsg = ping_recvmsg,
739 .bind = ping_bind,
740 .backlog_rcv = ping_queue_rcv_skb,
741 .hash = ping_v4_hash,
742 .unhash = ping_v4_unhash,
743 .get_port = ping_v4_get_port,
744 .obj_size = sizeof(struct inet_sock),
745};
746EXPORT_SYMBOL(ping_prot);
747
748#ifdef CONFIG_PROC_FS
749
750static struct sock *ping_get_first(struct seq_file *seq, int start)
751{
752 struct sock *sk;
753 struct ping_iter_state *state = seq->private;
754 struct net *net = seq_file_net(seq);
755
756 for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
757 ++state->bucket) {
758 struct hlist_nulls_node *node;
759 struct hlist_nulls_head *hslot;
760
761 hslot = &ping_table.hash[state->bucket];
762
763 if (hlist_nulls_empty(hslot))
764 continue;
765
766 sk_nulls_for_each(sk, node, hslot) {
767 if (net_eq(sock_net(sk), net))
768 goto found;
769 }
770 }
771 sk = NULL;
772found:
773 return sk;
774}
775
776static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
777{
778 struct ping_iter_state *state = seq->private;
779 struct net *net = seq_file_net(seq);
780
781 do {
782 sk = sk_nulls_next(sk);
783 } while (sk && (!net_eq(sock_net(sk), net)));
784
785 if (!sk)
786 return ping_get_first(seq, state->bucket + 1);
787 return sk;
788}
789
790static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
791{
792 struct sock *sk = ping_get_first(seq, 0);
793
794 if (sk)
795 while (pos && (sk = ping_get_next(seq, sk)) != NULL)
796 --pos;
797 return pos ? NULL : sk;
798}
799
800static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
801{
802 struct ping_iter_state *state = seq->private;
803 state->bucket = 0;
804
805 read_lock_bh(&ping_table.lock);
806
807 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
808}
809
810static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
811{
812 struct sock *sk;
813
814 if (v == SEQ_START_TOKEN)
815 sk = ping_get_idx(seq, 0);
816 else
817 sk = ping_get_next(seq, v);
818
819 ++*pos;
820 return sk;
821}
822
823static void ping_seq_stop(struct seq_file *seq, void *v)
824{
825 read_unlock_bh(&ping_table.lock);
826}
827
828static void ping_format_sock(struct sock *sp, struct seq_file *f,
829 int bucket, int *len)
830{
831 struct inet_sock *inet = inet_sk(sp);
832 __be32 dest = inet->inet_daddr;
833 __be32 src = inet->inet_rcv_saddr;
834 __u16 destp = ntohs(inet->inet_dport);
835 __u16 srcp = ntohs(inet->inet_sport);
836
837 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
838 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
839 bucket, src, srcp, dest, destp, sp->sk_state,
840 sk_wmem_alloc_get(sp),
841 sk_rmem_alloc_get(sp),
842 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
843 atomic_read(&sp->sk_refcnt), sp,
844 atomic_read(&sp->sk_drops), len);
845}
846
847static int ping_seq_show(struct seq_file *seq, void *v)
848{
849 if (v == SEQ_START_TOKEN)
850 seq_printf(seq, "%-127s\n",
851 " sl local_address rem_address st tx_queue "
852 "rx_queue tr tm->when retrnsmt uid timeout "
853 "inode ref pointer drops");
854 else {
855 struct ping_iter_state *state = seq->private;
856 int len;
857
858 ping_format_sock(v, seq, state->bucket, &len);
859 seq_printf(seq, "%*s\n", 127 - len, "");
860 }
861 return 0;
862}
863
864static const struct seq_operations ping_seq_ops = {
865 .show = ping_seq_show,
866 .start = ping_seq_start,
867 .next = ping_seq_next,
868 .stop = ping_seq_stop,
869};
870
871static int ping_seq_open(struct inode *inode, struct file *file)
872{
873 return seq_open_net(inode, file, &ping_seq_ops,
874 sizeof(struct ping_iter_state));
875}
876
877static const struct file_operations ping_seq_fops = {
878 .open = ping_seq_open,
879 .read = seq_read,
880 .llseek = seq_lseek,
881 .release = seq_release_net,
882};
883
884static int ping_proc_register(struct net *net)
885{
886 struct proc_dir_entry *p;
887 int rc = 0;
888
889 p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
890 if (!p)
891 rc = -ENOMEM;
892 return rc;
893}
894
895static void ping_proc_unregister(struct net *net)
896{
897 proc_net_remove(net, "icmp");
898}
899
900
901static int __net_init ping_proc_init_net(struct net *net)
902{
903 return ping_proc_register(net);
904}
905
906static void __net_exit ping_proc_exit_net(struct net *net)
907{
908 ping_proc_unregister(net);
909}
910
911static struct pernet_operations ping_net_ops = {
912 .init = ping_proc_init_net,
913 .exit = ping_proc_exit_net,
914};
915
916int __init ping_proc_init(void)
917{
918 return register_pernet_subsys(&ping_net_ops);
919}
920
921void ping_proc_exit(void)
922{
923 unregister_pernet_subsys(&ping_net_ops);
924}
925
926#endif
927
928void __init ping_init(void)
929{
930 int i;
931
932 for (i = 0; i < PING_HTABLE_SIZE; i++)
933 INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
934 rwlock_init(&ping_table.lock);
935}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 6390ba299b3d..11e1780455f2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -154,7 +154,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
154 * RFC 1122: SHOULD pass TOS value up to the transport layer. 154 * RFC 1122: SHOULD pass TOS value up to the transport layer.
155 * -> It does. And not only TOS, but all IP header. 155 * -> It does. And not only TOS, but all IP header.
156 */ 156 */
157static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) 157static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
158{ 158{
159 struct sock *sk; 159 struct sock *sk;
160 struct hlist_head *head; 160 struct hlist_head *head;
@@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
247 } 247 }
248 248
249 if (inet->recverr) { 249 if (inet->recverr) {
250 struct iphdr *iph = (struct iphdr *)skb->data; 250 const struct iphdr *iph = (const struct iphdr *)skb->data;
251 u8 *payload = skb->data + (iph->ihl << 2); 251 u8 *payload = skb->data + (iph->ihl << 2);
252 252
253 if (inet->hdrincl) 253 if (inet->hdrincl)
@@ -265,7 +265,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
265{ 265{
266 int hash; 266 int hash;
267 struct sock *raw_sk; 267 struct sock *raw_sk;
268 struct iphdr *iph; 268 const struct iphdr *iph;
269 struct net *net; 269 struct net *net;
270 270
271 hash = protocol & (RAW_HTABLE_SIZE - 1); 271 hash = protocol & (RAW_HTABLE_SIZE - 1);
@@ -273,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
273 read_lock(&raw_v4_hashinfo.lock); 273 read_lock(&raw_v4_hashinfo.lock);
274 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); 274 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
275 if (raw_sk != NULL) { 275 if (raw_sk != NULL) {
276 iph = (struct iphdr *)skb->data; 276 iph = (const struct iphdr *)skb->data;
277 net = dev_net(skb->dev); 277 net = dev_net(skb->dev);
278 278
279 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, 279 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
@@ -281,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
281 skb->dev->ifindex)) != NULL) { 281 skb->dev->ifindex)) != NULL) {
282 raw_err(raw_sk, skb, info); 282 raw_err(raw_sk, skb, info);
283 raw_sk = sk_next(raw_sk); 283 raw_sk = sk_next(raw_sk);
284 iph = (struct iphdr *)skb->data; 284 iph = (const struct iphdr *)skb->data;
285 } 285 }
286 } 286 }
287 read_unlock(&raw_v4_hashinfo.lock); 287 read_unlock(&raw_v4_hashinfo.lock);
@@ -314,9 +314,10 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
314 return 0; 314 return 0;
315} 315}
316 316
317static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, 317static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
318 struct rtable **rtp, 318 void *from, size_t length,
319 unsigned int flags) 319 struct rtable **rtp,
320 unsigned int flags)
320{ 321{
321 struct inet_sock *inet = inet_sk(sk); 322 struct inet_sock *inet = inet_sk(sk);
322 struct net *net = sock_net(sk); 323 struct net *net = sock_net(sk);
@@ -327,7 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
327 struct rtable *rt = *rtp; 328 struct rtable *rt = *rtp;
328 329
329 if (length > rt->dst.dev->mtu) { 330 if (length > rt->dst.dev->mtu) {
330 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 331 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
331 rt->dst.dev->mtu); 332 rt->dst.dev->mtu);
332 return -EMSGSIZE; 333 return -EMSGSIZE;
333 } 334 }
@@ -372,7 +373,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
372 373
373 if (iphlen >= sizeof(*iph)) { 374 if (iphlen >= sizeof(*iph)) {
374 if (!iph->saddr) 375 if (!iph->saddr)
375 iph->saddr = rt->rt_src; 376 iph->saddr = fl4->saddr;
376 iph->check = 0; 377 iph->check = 0;
377 iph->tot_len = htons(length); 378 iph->tot_len = htons(length);
378 if (!iph->id) 379 if (!iph->id)
@@ -402,7 +403,7 @@ error:
402 return err; 403 return err;
403} 404}
404 405
405static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) 406static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg)
406{ 407{
407 struct iovec *iov; 408 struct iovec *iov;
408 u8 __user *type = NULL; 409 u8 __user *type = NULL;
@@ -418,7 +419,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
418 if (!iov) 419 if (!iov)
419 continue; 420 continue;
420 421
421 switch (fl->proto) { 422 switch (fl4->flowi4_proto) {
422 case IPPROTO_ICMP: 423 case IPPROTO_ICMP:
423 /* check if one-byte field is readable or not. */ 424 /* check if one-byte field is readable or not. */
424 if (iov->iov_base && iov->iov_len < 1) 425 if (iov->iov_base && iov->iov_len < 1)
@@ -433,8 +434,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
433 code = iov->iov_base; 434 code = iov->iov_base;
434 435
435 if (type && code) { 436 if (type && code) {
436 if (get_user(fl->fl_icmp_type, type) || 437 if (get_user(fl4->fl4_icmp_type, type) ||
437 get_user(fl->fl_icmp_code, code)) 438 get_user(fl4->fl4_icmp_code, code))
438 return -EFAULT; 439 return -EFAULT;
439 probed = 1; 440 probed = 1;
440 } 441 }
@@ -455,11 +456,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
455 struct inet_sock *inet = inet_sk(sk); 456 struct inet_sock *inet = inet_sk(sk);
456 struct ipcm_cookie ipc; 457 struct ipcm_cookie ipc;
457 struct rtable *rt = NULL; 458 struct rtable *rt = NULL;
459 struct flowi4 fl4;
458 int free = 0; 460 int free = 0;
459 __be32 daddr; 461 __be32 daddr;
460 __be32 saddr; 462 __be32 saddr;
461 u8 tos; 463 u8 tos;
462 int err; 464 int err;
465 struct ip_options_data opt_copy;
463 466
464 err = -EMSGSIZE; 467 err = -EMSGSIZE;
465 if (len > 0xFFFF) 468 if (len > 0xFFFF)
@@ -520,8 +523,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
520 saddr = ipc.addr; 523 saddr = ipc.addr;
521 ipc.addr = daddr; 524 ipc.addr = daddr;
522 525
523 if (!ipc.opt) 526 if (!ipc.opt) {
524 ipc.opt = inet->opt; 527 struct ip_options_rcu *inet_opt;
528
529 rcu_read_lock();
530 inet_opt = rcu_dereference(inet->inet_opt);
531 if (inet_opt) {
532 memcpy(&opt_copy, inet_opt,
533 sizeof(*inet_opt) + inet_opt->opt.optlen);
534 ipc.opt = &opt_copy.opt;
535 }
536 rcu_read_unlock();
537 }
525 538
526 if (ipc.opt) { 539 if (ipc.opt) {
527 err = -EINVAL; 540 err = -EINVAL;
@@ -530,10 +543,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
530 */ 543 */
531 if (inet->hdrincl) 544 if (inet->hdrincl)
532 goto done; 545 goto done;
533 if (ipc.opt->srr) { 546 if (ipc.opt->opt.srr) {
534 if (!daddr) 547 if (!daddr)
535 goto done; 548 goto done;
536 daddr = ipc.opt->faddr; 549 daddr = ipc.opt->opt.faddr;
537 } 550 }
538 } 551 }
539 tos = RT_CONN_FLAGS(sk); 552 tos = RT_CONN_FLAGS(sk);
@@ -547,26 +560,24 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
547 saddr = inet->mc_addr; 560 saddr = inet->mc_addr;
548 } 561 }
549 562
550 { 563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
551 struct flowi fl = { .oif = ipc.oif, 564 RT_SCOPE_UNIVERSE,
552 .mark = sk->sk_mark, 565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
553 .fl4_dst = daddr, 566 FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
554 .fl4_src = saddr,
555 .fl4_tos = tos,
556 .proto = inet->hdrincl ? IPPROTO_RAW :
557 sk->sk_protocol,
558 };
559 if (!inet->hdrincl) {
560 err = raw_probe_proto_opt(&fl, msg);
561 if (err)
562 goto done;
563 }
564 567
565 security_sk_classify_flow(sk, &fl); 568 if (!inet->hdrincl) {
566 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); 569 err = raw_probe_proto_opt(&fl4, msg);
570 if (err)
571 goto done;
567 } 572 }
568 if (err) 573
574 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
575 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
576 if (IS_ERR(rt)) {
577 err = PTR_ERR(rt);
578 rt = NULL;
569 goto done; 579 goto done;
580 }
570 581
571 err = -EACCES; 582 err = -EACCES;
572 if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) 583 if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
@@ -577,19 +588,20 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
577back_from_confirm: 588back_from_confirm:
578 589
579 if (inet->hdrincl) 590 if (inet->hdrincl)
580 err = raw_send_hdrinc(sk, msg->msg_iov, len, 591 err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
581 &rt, msg->msg_flags); 592 &rt, msg->msg_flags);
582 593
583 else { 594 else {
584 if (!ipc.addr) 595 if (!ipc.addr)
585 ipc.addr = rt->rt_dst; 596 ipc.addr = fl4.daddr;
586 lock_sock(sk); 597 lock_sock(sk);
587 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, 598 err = ip_append_data(sk, &fl4, ip_generic_getfrag,
588 &ipc, &rt, msg->msg_flags); 599 msg->msg_iov, len, 0,
600 &ipc, &rt, msg->msg_flags);
589 if (err) 601 if (err)
590 ip_flush_pending_frames(sk); 602 ip_flush_pending_frames(sk);
591 else if (!(msg->msg_flags & MSG_MORE)) { 603 else if (!(msg->msg_flags & MSG_MORE)) {
592 err = ip_push_pending_frames(sk); 604 err = ip_push_pending_frames(sk, &fl4);
593 if (err == -ENOBUFS && !inet->recverr) 605 if (err == -ENOBUFS && !inet->recverr)
594 err = 0; 606 err = 0;
595 } 607 }
@@ -616,7 +628,7 @@ do_confirm:
616static void raw_close(struct sock *sk, long timeout) 628static void raw_close(struct sock *sk, long timeout)
617{ 629{
618 /* 630 /*
619 * Raw sockets may have direct kernel refereneces. Kill them. 631 * Raw sockets may have direct kernel references. Kill them.
620 */ 632 */
621 ip_ra_control(sk, 0, NULL); 633 ip_ra_control(sk, 0, NULL);
622 634
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ed6603c2f6d..b24d58e6bbcd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,8 +109,8 @@
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111 111
112#define RT_FL_TOS(oldflp) \ 112#define RT_FL_TOS(oldflp4) \
113 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 113 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
114 114
115#define IP_MAX_MTU 0xFFF0 115#define IP_MAX_MTU 0xFFF0
116 116
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256; 131static int ip_rt_min_advmss __read_mostly = 256;
132static int rt_chain_length_max __read_mostly = 20; 132static int rt_chain_length_max __read_mostly = 20;
133 133
134static struct delayed_work expires_work;
135static unsigned long expires_ljiffies;
136
137/* 134/*
138 * Interface to generic destination cache. 135 * Interface to generic destination cache.
139 */ 136 */
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152{ 149{
153} 150}
154 151
152static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
153{
154 struct rtable *rt = (struct rtable *) dst;
155 struct inet_peer *peer;
156 u32 *p = NULL;
157
158 if (!rt->peer)
159 rt_bind_peer(rt, rt->rt_dst, 1);
160
161 peer = rt->peer;
162 if (peer) {
163 u32 *old_p = __DST_METRICS_PTR(old);
164 unsigned long prev, new;
165
166 p = peer->metrics;
167 if (inet_metrics_new(peer))
168 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
169
170 new = (unsigned long) p;
171 prev = cmpxchg(&dst->_metrics, old, new);
172
173 if (prev != old) {
174 p = __DST_METRICS_PTR(prev);
175 if (prev & DST_METRICS_READ_ONLY)
176 p = NULL;
177 } else {
178 if (rt->fi) {
179 fib_info_put(rt->fi);
180 rt->fi = NULL;
181 }
182 }
183 }
184 return p;
185}
186
155static struct dst_ops ipv4_dst_ops = { 187static struct dst_ops ipv4_dst_ops = {
156 .family = AF_INET, 188 .family = AF_INET,
157 .protocol = cpu_to_be16(ETH_P_IP), 189 .protocol = cpu_to_be16(ETH_P_IP),
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = {
159 .check = ipv4_dst_check, 191 .check = ipv4_dst_check,
160 .default_advmss = ipv4_default_advmss, 192 .default_advmss = ipv4_default_advmss,
161 .default_mtu = ipv4_default_mtu, 193 .default_mtu = ipv4_default_mtu,
194 .cow_metrics = ipv4_cow_metrics,
162 .destroy = ipv4_dst_destroy, 195 .destroy = ipv4_dst_destroy,
163 .ifdown = ipv4_dst_ifdown, 196 .ifdown = ipv4_dst_ifdown,
164 .negative_advice = ipv4_negative_advice, 197 .negative_advice = ipv4_negative_advice,
@@ -171,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = {
171 204
172const __u8 ip_tos2prio[16] = { 205const __u8 ip_tos2prio[16] = {
173 TC_PRIO_BESTEFFORT, 206 TC_PRIO_BESTEFFORT,
174 ECN_OR_COST(FILLER), 207 ECN_OR_COST(BESTEFFORT),
175 TC_PRIO_BESTEFFORT, 208 TC_PRIO_BESTEFFORT,
176 ECN_OR_COST(BESTEFFORT), 209 ECN_OR_COST(BESTEFFORT),
177 TC_PRIO_BULK, 210 TC_PRIO_BULK,
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
391 dst_metric(&r->dst, RTAX_WINDOW), 424 dst_metric(&r->dst, RTAX_WINDOW),
392 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
393 dst_metric(&r->dst, RTAX_RTTVAR)), 426 dst_metric(&r->dst, RTAX_RTTVAR)),
394 r->fl.fl4_tos, 427 r->rt_key_tos,
395 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
396 r->dst.hh ? (r->dst.hh->hh_output == 429 r->dst.hh ? (r->dst.hh->hh_output ==
397 dev_queue_xmit) : 0, 430 dev_queue_xmit) : 0,
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = {
514 .release = seq_release, 547 .release = seq_release,
515}; 548};
516 549
517#ifdef CONFIG_NET_CLS_ROUTE 550#ifdef CONFIG_IP_ROUTE_CLASSID
518static int rt_acct_proc_show(struct seq_file *m, void *v) 551static int rt_acct_proc_show(struct seq_file *m, void *v)
519{ 552{
520 struct ip_rt_acct *dst, *src; 553 struct ip_rt_acct *dst, *src;
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
567 if (!pde) 600 if (!pde)
568 goto err2; 601 goto err2;
569 602
570#ifdef CONFIG_NET_CLS_ROUTE 603#ifdef CONFIG_IP_ROUTE_CLASSID
571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); 604 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
572 if (!pde) 605 if (!pde)
573 goto err3; 606 goto err3;
574#endif 607#endif
575 return 0; 608 return 0;
576 609
577#ifdef CONFIG_NET_CLS_ROUTE 610#ifdef CONFIG_IP_ROUTE_CLASSID
578err3: 611err3:
579 remove_proc_entry("rt_cache", net->proc_net_stat); 612 remove_proc_entry("rt_cache", net->proc_net_stat);
580#endif 613#endif
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
588{ 621{
589 remove_proc_entry("rt_cache", net->proc_net_stat); 622 remove_proc_entry("rt_cache", net->proc_net_stat);
590 remove_proc_entry("rt_cache", net->proc_net); 623 remove_proc_entry("rt_cache", net->proc_net);
591#ifdef CONFIG_NET_CLS_ROUTE 624#ifdef CONFIG_IP_ROUTE_CLASSID
592 remove_proc_entry("rt_acct", net->proc_net); 625 remove_proc_entry("rt_acct", net->proc_net);
593#endif 626#endif
594} 627}
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth)
632static inline int rt_valuable(struct rtable *rth) 665static inline int rt_valuable(struct rtable *rth)
633{ 666{
634 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 667 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
635 rth->dst.expires; 668 (rth->peer && rth->peer->pmtu_expires);
636} 669}
637 670
638static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 671static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
643 if (atomic_read(&rth->dst.__refcnt)) 676 if (atomic_read(&rth->dst.__refcnt))
644 goto out; 677 goto out;
645 678
646 ret = 1;
647 if (rth->dst.expires &&
648 time_after_eq(jiffies, rth->dst.expires))
649 goto out;
650
651 age = jiffies - rth->dst.lastuse; 679 age = jiffies - rth->dst.lastuse;
652 ret = 0;
653 if ((age <= tmo1 && !rt_fast_clean(rth)) || 680 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
654 (age <= tmo2 && rt_valuable(rth))) 681 (age <= tmo2 && rt_valuable(rth)))
655 goto out; 682 goto out;
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net)
684 net->ipv4.sysctl_rt_cache_rebuild_count; 711 net->ipv4.sysctl_rt_cache_rebuild_count;
685} 712}
686 713
687static inline bool compare_hash_inputs(const struct flowi *fl1, 714static inline bool compare_hash_inputs(const struct rtable *rt1,
688 const struct flowi *fl2) 715 const struct rtable *rt2)
689{ 716{
690 return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 717 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
691 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 718 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
692 (fl1->iif ^ fl2->iif)) == 0); 719 (rt1->rt_iif ^ rt2->rt_iif)) == 0);
693} 720}
694 721
695static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 722static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
696{ 723{
697 return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
698 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
699 (fl1->mark ^ fl2->mark) | 726 (rt1->rt_mark ^ rt2->rt_mark) |
700 (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | 727 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
701 (fl1->oif ^ fl2->oif) | 728 (rt1->rt_oif ^ rt2->rt_oif) |
702 (fl1->iif ^ fl2->iif)) == 0; 729 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
703} 730}
704 731
705static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 732static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -786,106 +813,15 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
786 const struct rtable *aux = head; 813 const struct rtable *aux = head;
787 814
788 while (aux != rth) { 815 while (aux != rth) {
789 if (compare_hash_inputs(&aux->fl, &rth->fl)) 816 if (compare_hash_inputs(aux, rth))
790 return 0; 817 return 0;
791 aux = rcu_dereference_protected(aux->dst.rt_next, 1); 818 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
792 } 819 }
793 return ONE; 820 return ONE;
794} 821}
795 822
796static void rt_check_expire(void)
797{
798 static unsigned int rover;
799 unsigned int i = rover, goal;
800 struct rtable *rth;
801 struct rtable __rcu **rthp;
802 unsigned long samples = 0;
803 unsigned long sum = 0, sum2 = 0;
804 unsigned long delta;
805 u64 mult;
806
807 delta = jiffies - expires_ljiffies;
808 expires_ljiffies = jiffies;
809 mult = ((u64)delta) << rt_hash_log;
810 if (ip_rt_gc_timeout > 1)
811 do_div(mult, ip_rt_gc_timeout);
812 goal = (unsigned int)mult;
813 if (goal > rt_hash_mask)
814 goal = rt_hash_mask + 1;
815 for (; goal > 0; goal--) {
816 unsigned long tmo = ip_rt_gc_timeout;
817 unsigned long length;
818
819 i = (i + 1) & rt_hash_mask;
820 rthp = &rt_hash_table[i].chain;
821
822 if (need_resched())
823 cond_resched();
824
825 samples++;
826
827 if (rcu_dereference_raw(*rthp) == NULL)
828 continue;
829 length = 0;
830 spin_lock_bh(rt_hash_lock_addr(i));
831 while ((rth = rcu_dereference_protected(*rthp,
832 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next;
836 rt_free(rth);
837 continue;
838 }
839 if (rth->dst.expires) {
840 /* Entry is expired even if it is in use */
841 if (time_before_eq(jiffies, rth->dst.expires)) {
842nofree:
843 tmo >>= 1;
844 rthp = &rth->dst.rt_next;
845 /*
846 * We only count entries on
847 * a chain with equal hash inputs once
848 * so that entries for different QOS
849 * levels, and other non-hash input
850 * attributes don't unfairly skew
851 * the length computation
852 */
853 length += has_noalias(rt_hash_table[i].chain, rth);
854 continue;
855 }
856 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
857 goto nofree;
858
859 /* Cleanup aged off entries. */
860 *rthp = rth->dst.rt_next;
861 rt_free(rth);
862 }
863 spin_unlock_bh(rt_hash_lock_addr(i));
864 sum += length;
865 sum2 += length*length;
866 }
867 if (samples) {
868 unsigned long avg = sum / samples;
869 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
870 rt_chain_length_max = max_t(unsigned long,
871 ip_rt_gc_elasticity,
872 (avg + 4*sd) >> FRACT_BITS);
873 }
874 rover = i;
875}
876
877/* 823/*
878 * rt_worker_func() is run in process context. 824 * Perturbation of rt_genid by a small quantity [1..256]
879 * we call rt_check_expire() to scan part of the hash table
880 */
881static void rt_worker_func(struct work_struct *work)
882{
883 rt_check_expire();
884 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
885}
886
887/*
888 * Pertubation of rt_genid by a small quantity [1..256]
889 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 825 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
890 * many times (2^24) without giving recent rt_genid. 826 * many times (2^24) without giving recent rt_genid.
891 * Jenkins hash is strong enough that litle changes of rt_genid are OK. 827 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
@@ -1032,10 +968,6 @@ static int rt_garbage_collect(struct dst_ops *ops)
1032 break; 968 break;
1033 969
1034 expire >>= 1; 970 expire >>= 1;
1035#if RT_CACHE_DEBUG >= 2
1036 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1037 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1038#endif
1039 971
1040 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) 972 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1041 goto out; 973 goto out;
@@ -1056,10 +988,6 @@ work_done:
1056 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || 988 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1057 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) 989 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1058 expire = ip_rt_gc_timeout; 990 expire = ip_rt_gc_timeout;
1059#if RT_CACHE_DEBUG >= 2
1060 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1061 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1062#endif
1063out: return 0; 991out: return 0;
1064} 992}
1065 993
@@ -1078,8 +1006,8 @@ static int slow_chain_length(const struct rtable *head)
1078 return length >> FRACT_BITS; 1006 return length >> FRACT_BITS;
1079} 1007}
1080 1008
1081static int rt_intern_hash(unsigned hash, struct rtable *rt, 1009static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1082 struct rtable **rp, struct sk_buff *skb, int ifindex) 1010 struct sk_buff *skb, int ifindex)
1083{ 1011{
1084 struct rtable *rth, *cand; 1012 struct rtable *rth, *cand;
1085 struct rtable __rcu **rthp, **candp; 1013 struct rtable __rcu **rthp, **candp;
@@ -1120,7 +1048,7 @@ restart:
1120 printk(KERN_WARNING 1048 printk(KERN_WARNING
1121 "Neighbour table failure & not caching routes.\n"); 1049 "Neighbour table failure & not caching routes.\n");
1122 ip_rt_put(rt); 1050 ip_rt_put(rt);
1123 return err; 1051 return ERR_PTR(err);
1124 } 1052 }
1125 } 1053 }
1126 1054
@@ -1137,7 +1065,7 @@ restart:
1137 rt_free(rth); 1065 rt_free(rth);
1138 continue; 1066 continue;
1139 } 1067 }
1140 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1068 if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
1141 /* Put it first */ 1069 /* Put it first */
1142 *rthp = rth->dst.rt_next; 1070 *rthp = rth->dst.rt_next;
1143 /* 1071 /*
@@ -1157,11 +1085,9 @@ restart:
1157 spin_unlock_bh(rt_hash_lock_addr(hash)); 1085 spin_unlock_bh(rt_hash_lock_addr(hash));
1158 1086
1159 rt_drop(rt); 1087 rt_drop(rt);
1160 if (rp) 1088 if (skb)
1161 *rp = rth;
1162 else
1163 skb_dst_set(skb, &rth->dst); 1089 skb_dst_set(skb, &rth->dst);
1164 return 0; 1090 return rth;
1165 } 1091 }
1166 1092
1167 if (!atomic_read(&rth->dst.__refcnt)) { 1093 if (!atomic_read(&rth->dst.__refcnt)) {
@@ -1202,7 +1128,7 @@ restart:
1202 rt_emergency_hash_rebuild(net); 1128 rt_emergency_hash_rebuild(net);
1203 spin_unlock_bh(rt_hash_lock_addr(hash)); 1129 spin_unlock_bh(rt_hash_lock_addr(hash));
1204 1130
1205 hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1131 hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1206 ifindex, rt_genid(net)); 1132 ifindex, rt_genid(net));
1207 goto restart; 1133 goto restart;
1208 } 1134 }
@@ -1218,7 +1144,7 @@ restart:
1218 1144
1219 if (err != -ENOBUFS) { 1145 if (err != -ENOBUFS) {
1220 rt_drop(rt); 1146 rt_drop(rt);
1221 return err; 1147 return ERR_PTR(err);
1222 } 1148 }
1223 1149
1224 /* Neighbour tables are full and nothing 1150 /* Neighbour tables are full and nothing
@@ -1239,25 +1165,15 @@ restart:
1239 if (net_ratelimit()) 1165 if (net_ratelimit())
1240 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); 1166 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
1241 rt_drop(rt); 1167 rt_drop(rt);
1242 return -ENOBUFS; 1168 return ERR_PTR(-ENOBUFS);
1243 } 1169 }
1244 } 1170 }
1245 1171
1246 rt->dst.rt_next = rt_hash_table[hash].chain; 1172 rt->dst.rt_next = rt_hash_table[hash].chain;
1247 1173
1248#if RT_CACHE_DEBUG >= 2
1249 if (rt->dst.rt_next) {
1250 struct rtable *trt;
1251 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1252 hash, &rt->rt_dst);
1253 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1254 printk(" . %pI4", &trt->rt_dst);
1255 printk("\n");
1256 }
1257#endif
1258 /* 1174 /*
1259 * Since lookup is lockfree, we must make sure 1175 * Since lookup is lockfree, we must make sure
1260 * previous writes to rt are comitted to memory 1176 * previous writes to rt are committed to memory
1261 * before making rt visible to other CPUS. 1177 * before making rt visible to other CPUS.
1262 */ 1178 */
1263 rcu_assign_pointer(rt_hash_table[hash].chain, rt); 1179 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
@@ -1265,21 +1181,28 @@ restart:
1265 spin_unlock_bh(rt_hash_lock_addr(hash)); 1181 spin_unlock_bh(rt_hash_lock_addr(hash));
1266 1182
1267skip_hashing: 1183skip_hashing:
1268 if (rp) 1184 if (skb)
1269 *rp = rt;
1270 else
1271 skb_dst_set(skb, &rt->dst); 1185 skb_dst_set(skb, &rt->dst);
1272 return 0; 1186 return rt;
1187}
1188
1189static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
1190
1191static u32 rt_peer_genid(void)
1192{
1193 return atomic_read(&__rt_peer_genid);
1273} 1194}
1274 1195
1275void rt_bind_peer(struct rtable *rt, int create) 1196void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
1276{ 1197{
1277 struct inet_peer *peer; 1198 struct inet_peer *peer;
1278 1199
1279 peer = inet_getpeer_v4(rt->rt_dst, create); 1200 peer = inet_getpeer_v4(daddr, create);
1280 1201
1281 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1202 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1282 inet_putpeer(peer); 1203 inet_putpeer(peer);
1204 else
1205 rt->rt_peer_genid = rt_peer_genid();
1283} 1206}
1284 1207
1285/* 1208/*
@@ -1308,7 +1231,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1308 1231
1309 if (rt) { 1232 if (rt) {
1310 if (rt->peer == NULL) 1233 if (rt->peer == NULL)
1311 rt_bind_peer(rt, 1); 1234 rt_bind_peer(rt, rt->rt_dst, 1);
1312 1235
1313 /* If peer is attached to destination, it is never detached, 1236 /* If peer is attached to destination, it is never detached,
1314 so that we need not to grab a lock to dereference it. 1237 so that we need not to grab a lock to dereference it.
@@ -1349,13 +1272,8 @@ static void rt_del(unsigned hash, struct rtable *rt)
1349void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1272void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1350 __be32 saddr, struct net_device *dev) 1273 __be32 saddr, struct net_device *dev)
1351{ 1274{
1352 int i, k;
1353 struct in_device *in_dev = __in_dev_get_rcu(dev); 1275 struct in_device *in_dev = __in_dev_get_rcu(dev);
1354 struct rtable *rth; 1276 struct inet_peer *peer;
1355 struct rtable __rcu **rthp;
1356 __be32 skeys[2] = { saddr, 0 };
1357 int ikeys[2] = { dev->ifindex, 0 };
1358 struct netevent_redirect netevent;
1359 struct net *net; 1277 struct net *net;
1360 1278
1361 if (!in_dev) 1279 if (!in_dev)
@@ -1367,9 +1285,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1367 ipv4_is_zeronet(new_gw)) 1285 ipv4_is_zeronet(new_gw))
1368 goto reject_redirect; 1286 goto reject_redirect;
1369 1287
1370 if (!rt_caching(net))
1371 goto reject_redirect;
1372
1373 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1288 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1374 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1289 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1375 goto reject_redirect; 1290 goto reject_redirect;
@@ -1380,91 +1295,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1380 goto reject_redirect; 1295 goto reject_redirect;
1381 } 1296 }
1382 1297
1383 for (i = 0; i < 2; i++) { 1298 peer = inet_getpeer_v4(daddr, 1);
1384 for (k = 0; k < 2; k++) { 1299 if (peer) {
1385 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1300 peer->redirect_learned.a4 = new_gw;
1386 rt_genid(net));
1387
1388 rthp = &rt_hash_table[hash].chain;
1389
1390 while ((rth = rcu_dereference(*rthp)) != NULL) {
1391 struct rtable *rt;
1392
1393 if (rth->fl.fl4_dst != daddr ||
1394 rth->fl.fl4_src != skeys[i] ||
1395 rth->fl.oif != ikeys[k] ||
1396 rt_is_input_route(rth) ||
1397 rt_is_expired(rth) ||
1398 !net_eq(dev_net(rth->dst.dev), net)) {
1399 rthp = &rth->dst.rt_next;
1400 continue;
1401 }
1402
1403 if (rth->rt_dst != daddr ||
1404 rth->rt_src != saddr ||
1405 rth->dst.error ||
1406 rth->rt_gateway != old_gw ||
1407 rth->dst.dev != dev)
1408 break;
1409
1410 dst_hold(&rth->dst);
1411
1412 rt = dst_alloc(&ipv4_dst_ops);
1413 if (rt == NULL) {
1414 ip_rt_put(rth);
1415 return;
1416 }
1417
1418 /* Copy all the information. */
1419 *rt = *rth;
1420 rt->dst.__use = 1;
1421 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->dst.child = NULL;
1423 if (rt->dst.dev)
1424 dev_hold(rt->dst.dev);
1425 rt->dst.obsolete = -1;
1426 rt->dst.lastuse = jiffies;
1427 rt->dst.path = &rt->dst;
1428 rt->dst.neighbour = NULL;
1429 rt->dst.hh = NULL;
1430#ifdef CONFIG_XFRM
1431 rt->dst.xfrm = NULL;
1432#endif
1433 rt->rt_genid = rt_genid(net);
1434 rt->rt_flags |= RTCF_REDIRECTED;
1435
1436 /* Gateway is different ... */
1437 rt->rt_gateway = new_gw;
1438
1439 /* Redirect received -> path was valid */
1440 dst_confirm(&rth->dst);
1441
1442 if (rt->peer)
1443 atomic_inc(&rt->peer->refcnt);
1444
1445 if (arp_bind_neighbour(&rt->dst) ||
1446 !(rt->dst.neighbour->nud_state &
1447 NUD_VALID)) {
1448 if (rt->dst.neighbour)
1449 neigh_event_send(rt->dst.neighbour, NULL);
1450 ip_rt_put(rth);
1451 rt_drop(rt);
1452 goto do_next;
1453 }
1454 1301
1455 netevent.old = &rth->dst; 1302 inet_putpeer(peer);
1456 netevent.new = &rt->dst;
1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1458 &netevent);
1459 1303
1460 rt_del(hash, rth); 1304 atomic_inc(&__rt_peer_genid);
1461 if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1462 ip_rt_put(rt);
1463 goto do_next;
1464 }
1465 do_next:
1466 ;
1467 }
1468 } 1305 }
1469 return; 1306 return;
1470 1307
@@ -1488,18 +1325,20 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1488 if (dst->obsolete > 0) { 1325 if (dst->obsolete > 0) {
1489 ip_rt_put(rt); 1326 ip_rt_put(rt);
1490 ret = NULL; 1327 ret = NULL;
1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1328 } else if (rt->rt_flags & RTCF_REDIRECTED) {
1492 (rt->dst.expires && 1329 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1493 time_after_eq(jiffies, rt->dst.expires))) { 1330 rt->rt_oif,
1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1495 rt->fl.oif,
1496 rt_genid(dev_net(dst->dev))); 1331 rt_genid(dev_net(dst->dev)));
1497#if RT_CACHE_DEBUG >= 1
1498 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1499 &rt->rt_dst, rt->fl.fl4_tos);
1500#endif
1501 rt_del(hash, rt); 1332 rt_del(hash, rt);
1502 ret = NULL; 1333 ret = NULL;
1334 } else if (rt->peer &&
1335 rt->peer->pmtu_expires &&
1336 time_after_eq(jiffies, rt->peer->pmtu_expires)) {
1337 unsigned long orig = rt->peer->pmtu_expires;
1338
1339 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1340 dst_metric_set(dst, RTAX_MTU,
1341 rt->peer->pmtu_orig);
1503 } 1342 }
1504 } 1343 }
1505 return ret; 1344 return ret;
@@ -1525,6 +1364,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1525{ 1364{
1526 struct rtable *rt = skb_rtable(skb); 1365 struct rtable *rt = skb_rtable(skb);
1527 struct in_device *in_dev; 1366 struct in_device *in_dev;
1367 struct inet_peer *peer;
1528 int log_martians; 1368 int log_martians;
1529 1369
1530 rcu_read_lock(); 1370 rcu_read_lock();
@@ -1536,36 +1376,44 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1536 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1376 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1537 rcu_read_unlock(); 1377 rcu_read_unlock();
1538 1378
1379 if (!rt->peer)
1380 rt_bind_peer(rt, rt->rt_dst, 1);
1381 peer = rt->peer;
1382 if (!peer) {
1383 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1384 return;
1385 }
1386
1539 /* No redirected packets during ip_rt_redirect_silence; 1387 /* No redirected packets during ip_rt_redirect_silence;
1540 * reset the algorithm. 1388 * reset the algorithm.
1541 */ 1389 */
1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) 1390 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1543 rt->dst.rate_tokens = 0; 1391 peer->rate_tokens = 0;
1544 1392
1545 /* Too many ignored redirects; do not send anything 1393 /* Too many ignored redirects; do not send anything
1546 * set dst.rate_last to the last seen redirected packet. 1394 * set dst.rate_last to the last seen redirected packet.
1547 */ 1395 */
1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) { 1396 if (peer->rate_tokens >= ip_rt_redirect_number) {
1549 rt->dst.rate_last = jiffies; 1397 peer->rate_last = jiffies;
1550 return; 1398 return;
1551 } 1399 }
1552 1400
1553 /* Check for load limit; set rate_last to the latest sent 1401 /* Check for load limit; set rate_last to the latest sent
1554 * redirect. 1402 * redirect.
1555 */ 1403 */
1556 if (rt->dst.rate_tokens == 0 || 1404 if (peer->rate_tokens == 0 ||
1557 time_after(jiffies, 1405 time_after(jiffies,
1558 (rt->dst.rate_last + 1406 (peer->rate_last +
1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) { 1407 (ip_rt_redirect_load << peer->rate_tokens)))) {
1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1408 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1561 rt->dst.rate_last = jiffies; 1409 peer->rate_last = jiffies;
1562 ++rt->dst.rate_tokens; 1410 ++peer->rate_tokens;
1563#ifdef CONFIG_IP_ROUTE_VERBOSE 1411#ifdef CONFIG_IP_ROUTE_VERBOSE
1564 if (log_martians && 1412 if (log_martians &&
1565 rt->dst.rate_tokens == ip_rt_redirect_number && 1413 peer->rate_tokens == ip_rt_redirect_number &&
1566 net_ratelimit()) 1414 net_ratelimit())
1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1415 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1568 &rt->rt_src, rt->rt_iif, 1416 &ip_hdr(skb)->saddr, rt->rt_iif,
1569 &rt->rt_dst, &rt->rt_gateway); 1417 &rt->rt_dst, &rt->rt_gateway);
1570#endif 1418#endif
1571 } 1419 }
@@ -1574,7 +1422,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1574static int ip_error(struct sk_buff *skb) 1422static int ip_error(struct sk_buff *skb)
1575{ 1423{
1576 struct rtable *rt = skb_rtable(skb); 1424 struct rtable *rt = skb_rtable(skb);
1425 struct inet_peer *peer;
1577 unsigned long now; 1426 unsigned long now;
1427 bool send;
1578 int code; 1428 int code;
1579 1429
1580 switch (rt->dst.error) { 1430 switch (rt->dst.error) {
@@ -1594,15 +1444,24 @@ static int ip_error(struct sk_buff *skb)
1594 break; 1444 break;
1595 } 1445 }
1596 1446
1597 now = jiffies; 1447 if (!rt->peer)
1598 rt->dst.rate_tokens += now - rt->dst.rate_last; 1448 rt_bind_peer(rt, rt->rt_dst, 1);
1599 if (rt->dst.rate_tokens > ip_rt_error_burst) 1449 peer = rt->peer;
1600 rt->dst.rate_tokens = ip_rt_error_burst; 1450
1601 rt->dst.rate_last = now; 1451 send = true;
1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) { 1452 if (peer) {
1603 rt->dst.rate_tokens -= ip_rt_error_cost; 1453 now = jiffies;
1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1454 peer->rate_tokens += now - peer->rate_last;
1455 if (peer->rate_tokens > ip_rt_error_burst)
1456 peer->rate_tokens = ip_rt_error_burst;
1457 peer->rate_last = now;
1458 if (peer->rate_tokens >= ip_rt_error_cost)
1459 peer->rate_tokens -= ip_rt_error_cost;
1460 else
1461 send = false;
1605 } 1462 }
1463 if (send)
1464 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1606 1465
1607out: kfree_skb(skb); 1466out: kfree_skb(skb);
1608 return 0; 1467 return 0;
@@ -1626,92 +1485,144 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
1626 return 68; 1485 return 68;
1627} 1486}
1628 1487
1629unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, 1488unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1630 unsigned short new_mtu, 1489 unsigned short new_mtu,
1631 struct net_device *dev) 1490 struct net_device *dev)
1632{ 1491{
1633 int i, k;
1634 unsigned short old_mtu = ntohs(iph->tot_len); 1492 unsigned short old_mtu = ntohs(iph->tot_len);
1635 struct rtable *rth;
1636 int ikeys[2] = { dev->ifindex, 0 };
1637 __be32 skeys[2] = { iph->saddr, 0, };
1638 __be32 daddr = iph->daddr;
1639 unsigned short est_mtu = 0; 1493 unsigned short est_mtu = 0;
1494 struct inet_peer *peer;
1640 1495
1641 for (k = 0; k < 2; k++) { 1496 peer = inet_getpeer_v4(iph->daddr, 1);
1642 for (i = 0; i < 2; i++) { 1497 if (peer) {
1643 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1498 unsigned short mtu = new_mtu;
1644 rt_genid(net));
1645
1646 rcu_read_lock();
1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1648 rth = rcu_dereference(rth->dst.rt_next)) {
1649 unsigned short mtu = new_mtu;
1650
1651 if (rth->fl.fl4_dst != daddr ||
1652 rth->fl.fl4_src != skeys[i] ||
1653 rth->rt_dst != daddr ||
1654 rth->rt_src != iph->saddr ||
1655 rth->fl.oif != ikeys[k] ||
1656 rt_is_input_route(rth) ||
1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1658 !net_eq(dev_net(rth->dst.dev), net) ||
1659 rt_is_expired(rth))
1660 continue;
1661 1499
1662 if (new_mtu < 68 || new_mtu >= old_mtu) { 1500 if (new_mtu < 68 || new_mtu >= old_mtu) {
1501 /* BSD 4.2 derived systems incorrectly adjust
1502 * tot_len by the IP header length, and report
1503 * a zero MTU in the ICMP message.
1504 */
1505 if (mtu == 0 &&
1506 old_mtu >= 68 + (iph->ihl << 2))
1507 old_mtu -= iph->ihl << 2;
1508 mtu = guess_mtu(old_mtu);
1509 }
1663 1510
1664 /* BSD 4.2 compatibility hack :-( */ 1511 if (mtu < ip_rt_min_pmtu)
1665 if (mtu == 0 && 1512 mtu = ip_rt_min_pmtu;
1666 old_mtu >= dst_mtu(&rth->dst) && 1513 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1667 old_mtu >= 68 + (iph->ihl << 2)) 1514 unsigned long pmtu_expires;
1668 old_mtu -= iph->ihl << 2;
1669 1515
1670 mtu = guess_mtu(old_mtu); 1516 pmtu_expires = jiffies + ip_rt_mtu_expires;
1671 } 1517 if (!pmtu_expires)
1672 if (mtu <= dst_mtu(&rth->dst)) { 1518 pmtu_expires = 1UL;
1673 if (mtu < dst_mtu(&rth->dst)) { 1519
1674 dst_confirm(&rth->dst); 1520 est_mtu = mtu;
1675 if (mtu < ip_rt_min_pmtu) { 1521 peer->pmtu_learned = mtu;
1676 u32 lock = dst_metric(&rth->dst, 1522 peer->pmtu_expires = pmtu_expires;
1677 RTAX_LOCK);
1678 mtu = ip_rt_min_pmtu;
1679 lock |= (1 << RTAX_MTU);
1680 dst_metric_set(&rth->dst, RTAX_LOCK,
1681 lock);
1682 }
1683 dst_metric_set(&rth->dst, RTAX_MTU, mtu);
1684 dst_set_expires(&rth->dst,
1685 ip_rt_mtu_expires);
1686 }
1687 est_mtu = mtu;
1688 }
1689 }
1690 rcu_read_unlock();
1691 } 1523 }
1524
1525 inet_putpeer(peer);
1526
1527 atomic_inc(&__rt_peer_genid);
1692 } 1528 }
1693 return est_mtu ? : new_mtu; 1529 return est_mtu ? : new_mtu;
1694} 1530}
1695 1531
1532static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1533{
1534 unsigned long expires = peer->pmtu_expires;
1535
1536 if (time_before(jiffies, expires)) {
1537 u32 orig_dst_mtu = dst_mtu(dst);
1538 if (peer->pmtu_learned < orig_dst_mtu) {
1539 if (!peer->pmtu_orig)
1540 peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
1541 dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
1542 }
1543 } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
1544 dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1545}
1546
1696static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1547static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1697{ 1548{
1698 if (dst_mtu(dst) > mtu && mtu >= 68 && 1549 struct rtable *rt = (struct rtable *) dst;
1699 !(dst_metric_locked(dst, RTAX_MTU))) { 1550 struct inet_peer *peer;
1700 if (mtu < ip_rt_min_pmtu) { 1551
1701 u32 lock = dst_metric(dst, RTAX_LOCK); 1552 dst_confirm(dst);
1553
1554 if (!rt->peer)
1555 rt_bind_peer(rt, rt->rt_dst, 1);
1556 peer = rt->peer;
1557 if (peer) {
1558 if (mtu < ip_rt_min_pmtu)
1702 mtu = ip_rt_min_pmtu; 1559 mtu = ip_rt_min_pmtu;
1703 dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); 1560 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1561 unsigned long pmtu_expires;
1562
1563 pmtu_expires = jiffies + ip_rt_mtu_expires;
1564 if (!pmtu_expires)
1565 pmtu_expires = 1UL;
1566
1567 peer->pmtu_learned = mtu;
1568 peer->pmtu_expires = pmtu_expires;
1569
1570 atomic_inc(&__rt_peer_genid);
1571 rt->rt_peer_genid = rt_peer_genid();
1704 } 1572 }
1705 dst_metric_set(dst, RTAX_MTU, mtu); 1573 check_peer_pmtu(dst, peer);
1706 dst_set_expires(dst, ip_rt_mtu_expires);
1707 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1708 } 1574 }
1709} 1575}
1710 1576
1577static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1578{
1579 struct rtable *rt = (struct rtable *) dst;
1580 __be32 orig_gw = rt->rt_gateway;
1581
1582 dst_confirm(&rt->dst);
1583
1584 neigh_release(rt->dst.neighbour);
1585 rt->dst.neighbour = NULL;
1586
1587 rt->rt_gateway = peer->redirect_learned.a4;
1588 if (arp_bind_neighbour(&rt->dst) ||
1589 !(rt->dst.neighbour->nud_state & NUD_VALID)) {
1590 if (rt->dst.neighbour)
1591 neigh_event_send(rt->dst.neighbour, NULL);
1592 rt->rt_gateway = orig_gw;
1593 return -EAGAIN;
1594 } else {
1595 rt->rt_flags |= RTCF_REDIRECTED;
1596 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
1597 rt->dst.neighbour);
1598 }
1599 return 0;
1600}
1601
1711static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1602static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1712{ 1603{
1713 if (rt_is_expired((struct rtable *)dst)) 1604 struct rtable *rt = (struct rtable *) dst;
1605
1606 if (rt_is_expired(rt))
1714 return NULL; 1607 return NULL;
1608 if (rt->rt_peer_genid != rt_peer_genid()) {
1609 struct inet_peer *peer;
1610
1611 if (!rt->peer)
1612 rt_bind_peer(rt, rt->rt_dst, 0);
1613
1614 peer = rt->peer;
1615 if (peer && peer->pmtu_expires)
1616 check_peer_pmtu(dst, peer);
1617
1618 if (peer && peer->redirect_learned.a4 &&
1619 peer->redirect_learned.a4 != rt->rt_gateway) {
1620 if (check_peer_redir(dst, peer))
1621 return NULL;
1622 }
1623
1624 rt->rt_peer_genid = rt_peer_genid();
1625 }
1715 return dst; 1626 return dst;
1716} 1627}
1717 1628
@@ -1720,6 +1631,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1720 struct rtable *rt = (struct rtable *) dst; 1631 struct rtable *rt = (struct rtable *) dst;
1721 struct inet_peer *peer = rt->peer; 1632 struct inet_peer *peer = rt->peer;
1722 1633
1634 if (rt->fi) {
1635 fib_info_put(rt->fi);
1636 rt->fi = NULL;
1637 }
1723 if (peer) { 1638 if (peer) {
1724 rt->peer = NULL; 1639 rt->peer = NULL;
1725 inet_putpeer(peer); 1640 inet_putpeer(peer);
@@ -1734,8 +1649,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
1734 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1649 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1735 1650
1736 rt = skb_rtable(skb); 1651 rt = skb_rtable(skb);
1737 if (rt) 1652 if (rt &&
1738 dst_set_expires(&rt->dst, 0); 1653 rt->peer &&
1654 rt->peer->pmtu_expires) {
1655 unsigned long orig = rt->peer->pmtu_expires;
1656
1657 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1658 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1659 }
1739} 1660}
1740 1661
1741static int ip_rt_bug(struct sk_buff *skb) 1662static int ip_rt_bug(struct sk_buff *skb)
@@ -1756,17 +1677,30 @@ static int ip_rt_bug(struct sk_buff *skb)
1756 in IP options! 1677 in IP options!
1757 */ 1678 */
1758 1679
1759void ip_rt_get_source(u8 *addr, struct rtable *rt) 1680void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1760{ 1681{
1761 __be32 src; 1682 __be32 src;
1762 struct fib_result res;
1763 1683
1764 if (rt_is_output_route(rt)) 1684 if (rt_is_output_route(rt))
1765 src = rt->rt_src; 1685 src = ip_hdr(skb)->saddr;
1766 else { 1686 else {
1687 struct fib_result res;
1688 struct flowi4 fl4;
1689 struct iphdr *iph;
1690
1691 iph = ip_hdr(skb);
1692
1693 memset(&fl4, 0, sizeof(fl4));
1694 fl4.daddr = iph->daddr;
1695 fl4.saddr = iph->saddr;
1696 fl4.flowi4_tos = iph->tos;
1697 fl4.flowi4_oif = rt->dst.dev->ifindex;
1698 fl4.flowi4_iif = skb->dev->ifindex;
1699 fl4.flowi4_mark = skb->mark;
1700
1767 rcu_read_lock(); 1701 rcu_read_lock();
1768 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) 1702 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
1769 src = FIB_RES_PREFSRC(res); 1703 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1770 else 1704 else
1771 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1705 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1772 RT_SCOPE_UNIVERSE); 1706 RT_SCOPE_UNIVERSE);
@@ -1775,7 +1709,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1775 memcpy(addr, &src, 4); 1709 memcpy(addr, &src, 4);
1776} 1710}
1777 1711
1778#ifdef CONFIG_NET_CLS_ROUTE 1712#ifdef CONFIG_IP_ROUTE_CLASSID
1779static void set_class_tag(struct rtable *rt, u32 tag) 1713static void set_class_tag(struct rtable *rt, u32 tag)
1780{ 1714{
1781 if (!(rt->dst.tclassid & 0xFFFF)) 1715 if (!(rt->dst.tclassid & 0xFFFF))
@@ -1815,17 +1749,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1815 return mtu; 1749 return mtu;
1816} 1750}
1817 1751
1818static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1752static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1753 struct fib_info *fi)
1754{
1755 struct inet_peer *peer;
1756 int create = 0;
1757
1758 /* If a peer entry exists for this destination, we must hook
1759 * it up in order to get at cached metrics.
1760 */
1761 if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1762 create = 1;
1763
1764 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
1765 if (peer) {
1766 rt->rt_peer_genid = rt_peer_genid();
1767 if (inet_metrics_new(peer))
1768 memcpy(peer->metrics, fi->fib_metrics,
1769 sizeof(u32) * RTAX_MAX);
1770 dst_init_metrics(&rt->dst, peer->metrics, false);
1771
1772 if (peer->pmtu_expires)
1773 check_peer_pmtu(&rt->dst, peer);
1774 if (peer->redirect_learned.a4 &&
1775 peer->redirect_learned.a4 != rt->rt_gateway) {
1776 rt->rt_gateway = peer->redirect_learned.a4;
1777 rt->rt_flags |= RTCF_REDIRECTED;
1778 }
1779 } else {
1780 if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1781 rt->fi = fi;
1782 atomic_inc(&fi->fib_clntref);
1783 }
1784 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1785 }
1786}
1787
1788static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1789 const struct fib_result *res,
1790 struct fib_info *fi, u16 type, u32 itag)
1819{ 1791{
1820 struct dst_entry *dst = &rt->dst; 1792 struct dst_entry *dst = &rt->dst;
1821 struct fib_info *fi = res->fi;
1822 1793
1823 if (fi) { 1794 if (fi) {
1824 if (FIB_RES_GW(*res) && 1795 if (FIB_RES_GW(*res) &&
1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1796 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826 rt->rt_gateway = FIB_RES_GW(*res); 1797 rt->rt_gateway = FIB_RES_GW(*res);
1827 dst_import_metrics(dst, fi->fib_metrics); 1798 rt_init_metrics(rt, fl4, fi);
1828#ifdef CONFIG_NET_CLS_ROUTE 1799#ifdef CONFIG_IP_ROUTE_CLASSID
1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1800 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830#endif 1801#endif
1831 } 1802 }
@@ -1835,13 +1806,21 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1806 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1807 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1837 1808
1838#ifdef CONFIG_NET_CLS_ROUTE 1809#ifdef CONFIG_IP_ROUTE_CLASSID
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1810#ifdef CONFIG_IP_MULTIPLE_TABLES
1840 set_class_tag(rt, fib_rules_tclass(res)); 1811 set_class_tag(rt, fib_rules_tclass(res));
1841#endif 1812#endif
1842 set_class_tag(rt, itag); 1813 set_class_tag(rt, itag);
1843#endif 1814#endif
1844 rt->rt_type = res->type; 1815}
1816
1817static struct rtable *rt_dst_alloc(struct net_device *dev,
1818 bool nopolicy, bool noxfrm)
1819{
1820 return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
1821 DST_HOST |
1822 (nopolicy ? DST_NOPOLICY : 0) |
1823 (noxfrm ? DST_NOXFRM : 0));
1845} 1824}
1846 1825
1847/* called in rcu_read_lock() section */ 1826/* called in rcu_read_lock() section */
@@ -1869,41 +1848,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1869 goto e_inval; 1848 goto e_inval;
1870 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1849 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1871 } else { 1850 } else {
1872 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1851 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
1873 &itag, 0); 1852 &itag);
1874 if (err < 0) 1853 if (err < 0)
1875 goto e_err; 1854 goto e_err;
1876 } 1855 }
1877 rth = dst_alloc(&ipv4_dst_ops); 1856 rth = rt_dst_alloc(init_net.loopback_dev,
1857 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1878 if (!rth) 1858 if (!rth)
1879 goto e_nobufs; 1859 goto e_nobufs;
1880 1860
1861#ifdef CONFIG_IP_ROUTE_CLASSID
1862 rth->dst.tclassid = itag;
1863#endif
1881 rth->dst.output = ip_rt_bug; 1864 rth->dst.output = ip_rt_bug;
1882 rth->dst.obsolete = -1;
1883 1865
1884 atomic_set(&rth->dst.__refcnt, 1); 1866 rth->rt_key_dst = daddr;
1885 rth->dst.flags= DST_HOST; 1867 rth->rt_key_src = saddr;
1886 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1868 rth->rt_genid = rt_genid(dev_net(dev));
1887 rth->dst.flags |= DST_NOPOLICY; 1869 rth->rt_flags = RTCF_MULTICAST;
1888 rth->fl.fl4_dst = daddr; 1870 rth->rt_type = RTN_MULTICAST;
1871 rth->rt_key_tos = tos;
1889 rth->rt_dst = daddr; 1872 rth->rt_dst = daddr;
1890 rth->fl.fl4_tos = tos;
1891 rth->fl.mark = skb->mark;
1892 rth->fl.fl4_src = saddr;
1893 rth->rt_src = saddr; 1873 rth->rt_src = saddr;
1894#ifdef CONFIG_NET_CLS_ROUTE 1874 rth->rt_route_iif = dev->ifindex;
1895 rth->dst.tclassid = itag; 1875 rth->rt_iif = dev->ifindex;
1896#endif 1876 rth->rt_oif = 0;
1897 rth->rt_iif = 1877 rth->rt_mark = skb->mark;
1898 rth->fl.iif = dev->ifindex;
1899 rth->dst.dev = init_net.loopback_dev;
1900 dev_hold(rth->dst.dev);
1901 rth->fl.oif = 0;
1902 rth->rt_gateway = daddr; 1878 rth->rt_gateway = daddr;
1903 rth->rt_spec_dst= spec_dst; 1879 rth->rt_spec_dst= spec_dst;
1904 rth->rt_genid = rt_genid(dev_net(dev)); 1880 rth->rt_peer_genid = 0;
1905 rth->rt_flags = RTCF_MULTICAST; 1881 rth->peer = NULL;
1906 rth->rt_type = RTN_MULTICAST; 1882 rth->fi = NULL;
1907 if (our) { 1883 if (our) {
1908 rth->dst.input= ip_local_deliver; 1884 rth->dst.input= ip_local_deliver;
1909 rth->rt_flags |= RTCF_LOCAL; 1885 rth->rt_flags |= RTCF_LOCAL;
@@ -1916,7 +1892,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1916 RT_CACHE_STAT_INC(in_slow_mc); 1892 RT_CACHE_STAT_INC(in_slow_mc);
1917 1893
1918 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1894 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1919 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1895 rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
1896 err = 0;
1897 if (IS_ERR(rth))
1898 err = PTR_ERR(rth);
1920 1899
1921e_nobufs: 1900e_nobufs:
1922 return -ENOBUFS; 1901 return -ENOBUFS;
@@ -1959,7 +1938,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1959 1938
1960/* called in rcu_read_lock() section */ 1939/* called in rcu_read_lock() section */
1961static int __mkroute_input(struct sk_buff *skb, 1940static int __mkroute_input(struct sk_buff *skb,
1962 struct fib_result *res, 1941 const struct fib_result *res,
1963 struct in_device *in_dev, 1942 struct in_device *in_dev,
1964 __be32 daddr, __be32 saddr, u32 tos, 1943 __be32 daddr, __be32 saddr, u32 tos,
1965 struct rtable **result) 1944 struct rtable **result)
@@ -1981,8 +1960,8 @@ static int __mkroute_input(struct sk_buff *skb,
1981 } 1960 }
1982 1961
1983 1962
1984 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 1963 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1985 in_dev->dev, &spec_dst, &itag, skb->mark); 1964 in_dev->dev, &spec_dst, &itag);
1986 if (err < 0) { 1965 if (err < 0) {
1987 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1966 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1988 saddr); 1967 saddr);
@@ -2013,41 +1992,36 @@ static int __mkroute_input(struct sk_buff *skb,
2013 } 1992 }
2014 } 1993 }
2015 1994
2016 1995 rth = rt_dst_alloc(out_dev->dev,
2017 rth = dst_alloc(&ipv4_dst_ops); 1996 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1997 IN_DEV_CONF_GET(out_dev, NOXFRM));
2018 if (!rth) { 1998 if (!rth) {
2019 err = -ENOBUFS; 1999 err = -ENOBUFS;
2020 goto cleanup; 2000 goto cleanup;
2021 } 2001 }
2022 2002
2023 atomic_set(&rth->dst.__refcnt, 1); 2003 rth->rt_key_dst = daddr;
2024 rth->dst.flags= DST_HOST; 2004 rth->rt_key_src = saddr;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2005 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2026 rth->dst.flags |= DST_NOPOLICY; 2006 rth->rt_flags = flags;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 2007 rth->rt_type = res->type;
2028 rth->dst.flags |= DST_NOXFRM; 2008 rth->rt_key_tos = tos;
2029 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2009 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos;
2032 rth->fl.mark = skb->mark;
2033 rth->fl.fl4_src = saddr;
2034 rth->rt_src = saddr; 2010 rth->rt_src = saddr;
2011 rth->rt_route_iif = in_dev->dev->ifindex;
2012 rth->rt_iif = in_dev->dev->ifindex;
2013 rth->rt_oif = 0;
2014 rth->rt_mark = skb->mark;
2035 rth->rt_gateway = daddr; 2015 rth->rt_gateway = daddr;
2036 rth->rt_iif =
2037 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->dst.dev);
2040 rth->fl.oif = 0;
2041 rth->rt_spec_dst= spec_dst; 2016 rth->rt_spec_dst= spec_dst;
2017 rth->rt_peer_genid = 0;
2018 rth->peer = NULL;
2019 rth->fi = NULL;
2042 2020
2043 rth->dst.obsolete = -1;
2044 rth->dst.input = ip_forward; 2021 rth->dst.input = ip_forward;
2045 rth->dst.output = ip_output; 2022 rth->dst.output = ip_output;
2046 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2047
2048 rt_set_nexthop(rth, res, itag);
2049 2023
2050 rth->rt_flags = flags; 2024 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2051 2025
2052 *result = rth; 2026 *result = rth;
2053 err = 0; 2027 err = 0;
@@ -2057,7 +2031,7 @@ static int __mkroute_input(struct sk_buff *skb,
2057 2031
2058static int ip_mkroute_input(struct sk_buff *skb, 2032static int ip_mkroute_input(struct sk_buff *skb,
2059 struct fib_result *res, 2033 struct fib_result *res,
2060 const struct flowi *fl, 2034 const struct flowi4 *fl4,
2061 struct in_device *in_dev, 2035 struct in_device *in_dev,
2062 __be32 daddr, __be32 saddr, u32 tos) 2036 __be32 daddr, __be32 saddr, u32 tos)
2063{ 2037{
@@ -2066,8 +2040,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
2066 unsigned hash; 2040 unsigned hash;
2067 2041
2068#ifdef CONFIG_IP_ROUTE_MULTIPATH 2042#ifdef CONFIG_IP_ROUTE_MULTIPATH
2069 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) 2043 if (res->fi && res->fi->fib_nhs > 1)
2070 fib_select_multipath(fl, res); 2044 fib_select_multipath(res);
2071#endif 2045#endif
2072 2046
2073 /* create a routing cache entry */ 2047 /* create a routing cache entry */
@@ -2076,9 +2050,12 @@ static int ip_mkroute_input(struct sk_buff *skb,
2076 return err; 2050 return err;
2077 2051
2078 /* put it into the cache */ 2052 /* put it into the cache */
2079 hash = rt_hash(daddr, saddr, fl->iif, 2053 hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
2080 rt_genid(dev_net(rth->dst.dev))); 2054 rt_genid(dev_net(rth->dst.dev)));
2081 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2055 rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
2056 if (IS_ERR(rth))
2057 return PTR_ERR(rth);
2058 return 0;
2082} 2059}
2083 2060
2084/* 2061/*
@@ -2097,12 +2074,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2097{ 2074{
2098 struct fib_result res; 2075 struct fib_result res;
2099 struct in_device *in_dev = __in_dev_get_rcu(dev); 2076 struct in_device *in_dev = __in_dev_get_rcu(dev);
2100 struct flowi fl = { .fl4_dst = daddr, 2077 struct flowi4 fl4;
2101 .fl4_src = saddr,
2102 .fl4_tos = tos,
2103 .fl4_scope = RT_SCOPE_UNIVERSE,
2104 .mark = skb->mark,
2105 .iif = dev->ifindex };
2106 unsigned flags = 0; 2078 unsigned flags = 0;
2107 u32 itag = 0; 2079 u32 itag = 0;
2108 struct rtable * rth; 2080 struct rtable * rth;
@@ -2139,7 +2111,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2139 /* 2111 /*
2140 * Now we are ready to route packet. 2112 * Now we are ready to route packet.
2141 */ 2113 */
2142 err = fib_lookup(net, &fl, &res); 2114 fl4.flowi4_oif = 0;
2115 fl4.flowi4_iif = dev->ifindex;
2116 fl4.flowi4_mark = skb->mark;
2117 fl4.flowi4_tos = tos;
2118 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
2119 fl4.daddr = daddr;
2120 fl4.saddr = saddr;
2121 err = fib_lookup(net, &fl4, &res);
2143 if (err != 0) { 2122 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2123 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2124 goto e_hostunreach;
@@ -2152,9 +2131,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2152 goto brd_input; 2131 goto brd_input;
2153 2132
2154 if (res.type == RTN_LOCAL) { 2133 if (res.type == RTN_LOCAL) {
2155 err = fib_validate_source(saddr, daddr, tos, 2134 err = fib_validate_source(skb, saddr, daddr, tos,
2156 net->loopback_dev->ifindex, 2135 net->loopback_dev->ifindex,
2157 dev, &spec_dst, &itag, skb->mark); 2136 dev, &spec_dst, &itag);
2158 if (err < 0) 2137 if (err < 0)
2159 goto martian_source_keep_err; 2138 goto martian_source_keep_err;
2160 if (err) 2139 if (err)
@@ -2168,7 +2147,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2168 if (res.type != RTN_UNICAST) 2147 if (res.type != RTN_UNICAST)
2169 goto martian_destination; 2148 goto martian_destination;
2170 2149
2171 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2150 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
2172out: return err; 2151out: return err;
2173 2152
2174brd_input: 2153brd_input:
@@ -2178,8 +2157,8 @@ brd_input:
2178 if (ipv4_is_zeronet(saddr)) 2157 if (ipv4_is_zeronet(saddr))
2179 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 2158 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2180 else { 2159 else {
2181 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2160 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
2182 &itag, skb->mark); 2161 &itag);
2183 if (err < 0) 2162 if (err < 0)
2184 goto martian_source_keep_err; 2163 goto martian_source_keep_err;
2185 if (err) 2164 if (err)
@@ -2190,43 +2169,47 @@ brd_input:
2190 RT_CACHE_STAT_INC(in_brd); 2169 RT_CACHE_STAT_INC(in_brd);
2191 2170
2192local_input: 2171local_input:
2193 rth = dst_alloc(&ipv4_dst_ops); 2172 rth = rt_dst_alloc(net->loopback_dev,
2173 IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2194 if (!rth) 2174 if (!rth)
2195 goto e_nobufs; 2175 goto e_nobufs;
2196 2176
2177 rth->dst.input= ip_local_deliver;
2197 rth->dst.output= ip_rt_bug; 2178 rth->dst.output= ip_rt_bug;
2198 rth->dst.obsolete = -1; 2179#ifdef CONFIG_IP_ROUTE_CLASSID
2199 rth->rt_genid = rt_genid(net); 2180 rth->dst.tclassid = itag;
2181#endif
2200 2182
2201 atomic_set(&rth->dst.__refcnt, 1); 2183 rth->rt_key_dst = daddr;
2202 rth->dst.flags= DST_HOST; 2184 rth->rt_key_src = saddr;
2203 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2185 rth->rt_genid = rt_genid(net);
2204 rth->dst.flags |= DST_NOPOLICY; 2186 rth->rt_flags = flags|RTCF_LOCAL;
2205 rth->fl.fl4_dst = daddr; 2187 rth->rt_type = res.type;
2188 rth->rt_key_tos = tos;
2206 rth->rt_dst = daddr; 2189 rth->rt_dst = daddr;
2207 rth->fl.fl4_tos = tos;
2208 rth->fl.mark = skb->mark;
2209 rth->fl.fl4_src = saddr;
2210 rth->rt_src = saddr; 2190 rth->rt_src = saddr;
2211#ifdef CONFIG_NET_CLS_ROUTE 2191#ifdef CONFIG_IP_ROUTE_CLASSID
2212 rth->dst.tclassid = itag; 2192 rth->dst.tclassid = itag;
2213#endif 2193#endif
2214 rth->rt_iif = 2194 rth->rt_route_iif = dev->ifindex;
2215 rth->fl.iif = dev->ifindex; 2195 rth->rt_iif = dev->ifindex;
2216 rth->dst.dev = net->loopback_dev; 2196 rth->rt_oif = 0;
2217 dev_hold(rth->dst.dev); 2197 rth->rt_mark = skb->mark;
2218 rth->rt_gateway = daddr; 2198 rth->rt_gateway = daddr;
2219 rth->rt_spec_dst= spec_dst; 2199 rth->rt_spec_dst= spec_dst;
2220 rth->dst.input= ip_local_deliver; 2200 rth->rt_peer_genid = 0;
2221 rth->rt_flags = flags|RTCF_LOCAL; 2201 rth->peer = NULL;
2202 rth->fi = NULL;
2222 if (res.type == RTN_UNREACHABLE) { 2203 if (res.type == RTN_UNREACHABLE) {
2223 rth->dst.input= ip_error; 2204 rth->dst.input= ip_error;
2224 rth->dst.error= -err; 2205 rth->dst.error= -err;
2225 rth->rt_flags &= ~RTCF_LOCAL; 2206 rth->rt_flags &= ~RTCF_LOCAL;
2226 } 2207 }
2227 rth->rt_type = res.type; 2208 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2228 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2209 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2229 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2210 err = 0;
2211 if (IS_ERR(rth))
2212 err = PTR_ERR(rth);
2230 goto out; 2213 goto out;
2231 2214
2232no_route: 2215no_route:
@@ -2288,12 +2271,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2288 2271
2289 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2272 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2290 rth = rcu_dereference(rth->dst.rt_next)) { 2273 rth = rcu_dereference(rth->dst.rt_next)) {
2291 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2274 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2292 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2275 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2293 (rth->fl.iif ^ iif) | 2276 (rth->rt_iif ^ iif) |
2294 rth->fl.oif | 2277 rth->rt_oif |
2295 (rth->fl.fl4_tos ^ tos)) == 0 && 2278 (rth->rt_key_tos ^ tos)) == 0 &&
2296 rth->fl.mark == skb->mark && 2279 rth->rt_mark == skb->mark &&
2297 net_eq(dev_net(rth->dst.dev), net) && 2280 net_eq(dev_net(rth->dst.dev), net) &&
2298 !rt_is_expired(rth)) { 2281 !rt_is_expired(rth)) {
2299 if (noref) { 2282 if (noref) {
@@ -2326,8 +2309,8 @@ skip_cache:
2326 struct in_device *in_dev = __in_dev_get_rcu(dev); 2309 struct in_device *in_dev = __in_dev_get_rcu(dev);
2327 2310
2328 if (in_dev) { 2311 if (in_dev) {
2329 int our = ip_check_mc(in_dev, daddr, saddr, 2312 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
2330 ip_hdr(skb)->protocol); 2313 ip_hdr(skb)->protocol);
2331 if (our 2314 if (our
2332#ifdef CONFIG_IP_MROUTE 2315#ifdef CONFIG_IP_MROUTE
2333 || 2316 ||
@@ -2351,98 +2334,94 @@ skip_cache:
2351EXPORT_SYMBOL(ip_route_input_common); 2334EXPORT_SYMBOL(ip_route_input_common);
2352 2335
2353/* called with rcu_read_lock() */ 2336/* called with rcu_read_lock() */
2354static int __mkroute_output(struct rtable **result, 2337static struct rtable *__mkroute_output(const struct fib_result *res,
2355 struct fib_result *res, 2338 const struct flowi4 *fl4,
2356 const struct flowi *fl, 2339 __be32 orig_daddr, __be32 orig_saddr,
2357 const struct flowi *oldflp, 2340 int orig_oif, struct net_device *dev_out,
2358 struct net_device *dev_out, 2341 unsigned int flags)
2359 unsigned flags)
2360{ 2342{
2361 struct rtable *rth; 2343 struct fib_info *fi = res->fi;
2344 u32 tos = RT_FL_TOS(fl4);
2362 struct in_device *in_dev; 2345 struct in_device *in_dev;
2363 u32 tos = RT_FL_TOS(oldflp); 2346 u16 type = res->type;
2347 struct rtable *rth;
2364 2348
2365 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) 2349 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2366 return -EINVAL; 2350 return ERR_PTR(-EINVAL);
2367 2351
2368 if (ipv4_is_lbcast(fl->fl4_dst)) 2352 if (ipv4_is_lbcast(fl4->daddr))
2369 res->type = RTN_BROADCAST; 2353 type = RTN_BROADCAST;
2370 else if (ipv4_is_multicast(fl->fl4_dst)) 2354 else if (ipv4_is_multicast(fl4->daddr))
2371 res->type = RTN_MULTICAST; 2355 type = RTN_MULTICAST;
2372 else if (ipv4_is_zeronet(fl->fl4_dst)) 2356 else if (ipv4_is_zeronet(fl4->daddr))
2373 return -EINVAL; 2357 return ERR_PTR(-EINVAL);
2374 2358
2375 if (dev_out->flags & IFF_LOOPBACK) 2359 if (dev_out->flags & IFF_LOOPBACK)
2376 flags |= RTCF_LOCAL; 2360 flags |= RTCF_LOCAL;
2377 2361
2378 in_dev = __in_dev_get_rcu(dev_out); 2362 in_dev = __in_dev_get_rcu(dev_out);
2379 if (!in_dev) 2363 if (!in_dev)
2380 return -EINVAL; 2364 return ERR_PTR(-EINVAL);
2381 2365
2382 if (res->type == RTN_BROADCAST) { 2366 if (type == RTN_BROADCAST) {
2383 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2367 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2384 res->fi = NULL; 2368 fi = NULL;
2385 } else if (res->type == RTN_MULTICAST) { 2369 } else if (type == RTN_MULTICAST) {
2386 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2370 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2387 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2371 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2388 oldflp->proto)) 2372 fl4->flowi4_proto))
2389 flags &= ~RTCF_LOCAL; 2373 flags &= ~RTCF_LOCAL;
2390 /* If multicast route do not exist use 2374 /* If multicast route do not exist use
2391 * default one, but do not gateway in this case. 2375 * default one, but do not gateway in this case.
2392 * Yes, it is hack. 2376 * Yes, it is hack.
2393 */ 2377 */
2394 if (res->fi && res->prefixlen < 4) 2378 if (fi && res->prefixlen < 4)
2395 res->fi = NULL; 2379 fi = NULL;
2396 } 2380 }
2397 2381
2398 2382 rth = rt_dst_alloc(dev_out,
2399 rth = dst_alloc(&ipv4_dst_ops); 2383 IN_DEV_CONF_GET(in_dev, NOPOLICY),
2384 IN_DEV_CONF_GET(in_dev, NOXFRM));
2400 if (!rth) 2385 if (!rth)
2401 return -ENOBUFS; 2386 return ERR_PTR(-ENOBUFS);
2402 2387
2403 atomic_set(&rth->dst.__refcnt, 1); 2388 rth->dst.output = ip_output;
2404 rth->dst.flags= DST_HOST; 2389
2405 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2390 rth->rt_key_dst = orig_daddr;
2406 rth->dst.flags |= DST_NOXFRM; 2391 rth->rt_key_src = orig_saddr;
2407 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2408 rth->dst.flags |= DST_NOPOLICY;
2409
2410 rth->fl.fl4_dst = oldflp->fl4_dst;
2411 rth->fl.fl4_tos = tos;
2412 rth->fl.fl4_src = oldflp->fl4_src;
2413 rth->fl.oif = oldflp->oif;
2414 rth->fl.mark = oldflp->mark;
2415 rth->rt_dst = fl->fl4_dst;
2416 rth->rt_src = fl->fl4_src;
2417 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2418 /* get references to the devices that are to be hold by the routing
2419 cache entry */
2420 rth->dst.dev = dev_out;
2421 dev_hold(dev_out);
2422 rth->rt_gateway = fl->fl4_dst;
2423 rth->rt_spec_dst= fl->fl4_src;
2424
2425 rth->dst.output=ip_output;
2426 rth->dst.obsolete = -1;
2427 rth->rt_genid = rt_genid(dev_net(dev_out)); 2392 rth->rt_genid = rt_genid(dev_net(dev_out));
2393 rth->rt_flags = flags;
2394 rth->rt_type = type;
2395 rth->rt_key_tos = tos;
2396 rth->rt_dst = fl4->daddr;
2397 rth->rt_src = fl4->saddr;
2398 rth->rt_route_iif = 0;
2399 rth->rt_iif = orig_oif ? : dev_out->ifindex;
2400 rth->rt_oif = orig_oif;
2401 rth->rt_mark = fl4->flowi4_mark;
2402 rth->rt_gateway = fl4->daddr;
2403 rth->rt_spec_dst= fl4->saddr;
2404 rth->rt_peer_genid = 0;
2405 rth->peer = NULL;
2406 rth->fi = NULL;
2428 2407
2429 RT_CACHE_STAT_INC(out_slow_tot); 2408 RT_CACHE_STAT_INC(out_slow_tot);
2430 2409
2431 if (flags & RTCF_LOCAL) { 2410 if (flags & RTCF_LOCAL) {
2432 rth->dst.input = ip_local_deliver; 2411 rth->dst.input = ip_local_deliver;
2433 rth->rt_spec_dst = fl->fl4_dst; 2412 rth->rt_spec_dst = fl4->daddr;
2434 } 2413 }
2435 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2414 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2436 rth->rt_spec_dst = fl->fl4_src; 2415 rth->rt_spec_dst = fl4->saddr;
2437 if (flags & RTCF_LOCAL && 2416 if (flags & RTCF_LOCAL &&
2438 !(dev_out->flags & IFF_LOOPBACK)) { 2417 !(dev_out->flags & IFF_LOOPBACK)) {
2439 rth->dst.output = ip_mc_output; 2418 rth->dst.output = ip_mc_output;
2440 RT_CACHE_STAT_INC(out_slow_mc); 2419 RT_CACHE_STAT_INC(out_slow_mc);
2441 } 2420 }
2442#ifdef CONFIG_IP_MROUTE 2421#ifdef CONFIG_IP_MROUTE
2443 if (res->type == RTN_MULTICAST) { 2422 if (type == RTN_MULTICAST) {
2444 if (IN_DEV_MFORWARD(in_dev) && 2423 if (IN_DEV_MFORWARD(in_dev) &&
2445 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2424 !ipv4_is_local_multicast(fl4->daddr)) {
2446 rth->dst.input = ip_mr_input; 2425 rth->dst.input = ip_mr_input;
2447 rth->dst.output = ip_mc_output; 2426 rth->dst.output = ip_mc_output;
2448 } 2427 }
@@ -2450,31 +2429,9 @@ static int __mkroute_output(struct rtable **result,
2450#endif 2429#endif
2451 } 2430 }
2452 2431
2453 rt_set_nexthop(rth, res, 0); 2432 rt_set_nexthop(rth, fl4, res, fi, type, 0);
2454 2433
2455 rth->rt_flags = flags; 2434 return rth;
2456 *result = rth;
2457 return 0;
2458}
2459
2460/* called with rcu_read_lock() */
2461static int ip_mkroute_output(struct rtable **rp,
2462 struct fib_result *res,
2463 const struct flowi *fl,
2464 const struct flowi *oldflp,
2465 struct net_device *dev_out,
2466 unsigned flags)
2467{
2468 struct rtable *rth = NULL;
2469 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2470 unsigned hash;
2471 if (err == 0) {
2472 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2473 rt_genid(dev_net(dev_out)));
2474 err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2475 }
2476
2477 return err;
2478} 2435}
2479 2436
2480/* 2437/*
@@ -2482,34 +2439,37 @@ static int ip_mkroute_output(struct rtable **rp,
2482 * called with rcu_read_lock(); 2439 * called with rcu_read_lock();
2483 */ 2440 */
2484 2441
2485static int ip_route_output_slow(struct net *net, struct rtable **rp, 2442static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2486 const struct flowi *oldflp) 2443{
2487{
2488 u32 tos = RT_FL_TOS(oldflp);
2489 struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
2490 .fl4_src = oldflp->fl4_src,
2491 .fl4_tos = tos & IPTOS_RT_MASK,
2492 .fl4_scope = ((tos & RTO_ONLINK) ?
2493 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2494 .mark = oldflp->mark,
2495 .iif = net->loopback_dev->ifindex,
2496 .oif = oldflp->oif };
2497 struct fib_result res;
2498 unsigned int flags = 0;
2499 struct net_device *dev_out = NULL; 2444 struct net_device *dev_out = NULL;
2500 int err; 2445 u32 tos = RT_FL_TOS(fl4);
2501 2446 unsigned int flags = 0;
2447 struct fib_result res;
2448 struct rtable *rth;
2449 __be32 orig_daddr;
2450 __be32 orig_saddr;
2451 int orig_oif;
2502 2452
2503 res.fi = NULL; 2453 res.fi = NULL;
2504#ifdef CONFIG_IP_MULTIPLE_TABLES 2454#ifdef CONFIG_IP_MULTIPLE_TABLES
2505 res.r = NULL; 2455 res.r = NULL;
2506#endif 2456#endif
2507 2457
2508 if (oldflp->fl4_src) { 2458 orig_daddr = fl4->daddr;
2509 err = -EINVAL; 2459 orig_saddr = fl4->saddr;
2510 if (ipv4_is_multicast(oldflp->fl4_src) || 2460 orig_oif = fl4->flowi4_oif;
2511 ipv4_is_lbcast(oldflp->fl4_src) || 2461
2512 ipv4_is_zeronet(oldflp->fl4_src)) 2462 fl4->flowi4_iif = net->loopback_dev->ifindex;
2463 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2464 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2465 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2466
2467 rcu_read_lock();
2468 if (fl4->saddr) {
2469 rth = ERR_PTR(-EINVAL);
2470 if (ipv4_is_multicast(fl4->saddr) ||
2471 ipv4_is_lbcast(fl4->saddr) ||
2472 ipv4_is_zeronet(fl4->saddr))
2513 goto out; 2473 goto out;
2514 2474
2515 /* I removed check for oif == dev_out->oif here. 2475 /* I removed check for oif == dev_out->oif here.
@@ -2520,11 +2480,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2520 of another iface. --ANK 2480 of another iface. --ANK
2521 */ 2481 */
2522 2482
2523 if (oldflp->oif == 0 && 2483 if (fl4->flowi4_oif == 0 &&
2524 (ipv4_is_multicast(oldflp->fl4_dst) || 2484 (ipv4_is_multicast(fl4->daddr) ||
2525 ipv4_is_lbcast(oldflp->fl4_dst))) { 2485 ipv4_is_lbcast(fl4->daddr))) {
2526 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2486 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2527 dev_out = __ip_dev_find(net, oldflp->fl4_src, false); 2487 dev_out = __ip_dev_find(net, fl4->saddr, false);
2528 if (dev_out == NULL) 2488 if (dev_out == NULL)
2529 goto out; 2489 goto out;
2530 2490
@@ -2543,60 +2503,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 Luckily, this hack is good workaround. 2503 Luckily, this hack is good workaround.
2544 */ 2504 */
2545 2505
2546 fl.oif = dev_out->ifindex; 2506 fl4->flowi4_oif = dev_out->ifindex;
2547 goto make_route; 2507 goto make_route;
2548 } 2508 }
2549 2509
2550 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2510 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2551 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2511 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2552 if (!__ip_dev_find(net, oldflp->fl4_src, false)) 2512 if (!__ip_dev_find(net, fl4->saddr, false))
2553 goto out; 2513 goto out;
2554 } 2514 }
2555 } 2515 }
2556 2516
2557 2517
2558 if (oldflp->oif) { 2518 if (fl4->flowi4_oif) {
2559 dev_out = dev_get_by_index_rcu(net, oldflp->oif); 2519 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2560 err = -ENODEV; 2520 rth = ERR_PTR(-ENODEV);
2561 if (dev_out == NULL) 2521 if (dev_out == NULL)
2562 goto out; 2522 goto out;
2563 2523
2564 /* RACE: Check return value of inet_select_addr instead. */ 2524 /* RACE: Check return value of inet_select_addr instead. */
2565 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 2525 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2566 err = -ENETUNREACH; 2526 rth = ERR_PTR(-ENETUNREACH);
2567 goto out; 2527 goto out;
2568 } 2528 }
2569 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2529 if (ipv4_is_local_multicast(fl4->daddr) ||
2570 ipv4_is_lbcast(oldflp->fl4_dst)) { 2530 ipv4_is_lbcast(fl4->daddr)) {
2571 if (!fl.fl4_src) 2531 if (!fl4->saddr)
2572 fl.fl4_src = inet_select_addr(dev_out, 0, 2532 fl4->saddr = inet_select_addr(dev_out, 0,
2573 RT_SCOPE_LINK); 2533 RT_SCOPE_LINK);
2574 goto make_route; 2534 goto make_route;
2575 } 2535 }
2576 if (!fl.fl4_src) { 2536 if (fl4->saddr) {
2577 if (ipv4_is_multicast(oldflp->fl4_dst)) 2537 if (ipv4_is_multicast(fl4->daddr))
2578 fl.fl4_src = inet_select_addr(dev_out, 0, 2538 fl4->saddr = inet_select_addr(dev_out, 0,
2579 fl.fl4_scope); 2539 fl4->flowi4_scope);
2580 else if (!oldflp->fl4_dst) 2540 else if (!fl4->daddr)
2581 fl.fl4_src = inet_select_addr(dev_out, 0, 2541 fl4->saddr = inet_select_addr(dev_out, 0,
2582 RT_SCOPE_HOST); 2542 RT_SCOPE_HOST);
2583 } 2543 }
2584 } 2544 }
2585 2545
2586 if (!fl.fl4_dst) { 2546 if (!fl4->daddr) {
2587 fl.fl4_dst = fl.fl4_src; 2547 fl4->daddr = fl4->saddr;
2588 if (!fl.fl4_dst) 2548 if (!fl4->daddr)
2589 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2549 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2590 dev_out = net->loopback_dev; 2550 dev_out = net->loopback_dev;
2591 fl.oif = net->loopback_dev->ifindex; 2551 fl4->flowi4_oif = net->loopback_dev->ifindex;
2592 res.type = RTN_LOCAL; 2552 res.type = RTN_LOCAL;
2593 flags |= RTCF_LOCAL; 2553 flags |= RTCF_LOCAL;
2594 goto make_route; 2554 goto make_route;
2595 } 2555 }
2596 2556
2597 if (fib_lookup(net, &fl, &res)) { 2557 if (fib_lookup(net, fl4, &res)) {
2598 res.fi = NULL; 2558 res.fi = NULL;
2599 if (oldflp->oif) { 2559 if (fl4->flowi4_oif) {
2600 /* Apparently, routing tables are wrong. Assume, 2560 /* Apparently, routing tables are wrong. Assume,
2601 that the destination is on link. 2561 that the destination is on link.
2602 2562
@@ -2615,90 +2575,100 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2615 likely IPv6, but we do not. 2575 likely IPv6, but we do not.
2616 */ 2576 */
2617 2577
2618 if (fl.fl4_src == 0) 2578 if (fl4->saddr == 0)
2619 fl.fl4_src = inet_select_addr(dev_out, 0, 2579 fl4->saddr = inet_select_addr(dev_out, 0,
2620 RT_SCOPE_LINK); 2580 RT_SCOPE_LINK);
2621 res.type = RTN_UNICAST; 2581 res.type = RTN_UNICAST;
2622 goto make_route; 2582 goto make_route;
2623 } 2583 }
2624 err = -ENETUNREACH; 2584 rth = ERR_PTR(-ENETUNREACH);
2625 goto out; 2585 goto out;
2626 } 2586 }
2627 2587
2628 if (res.type == RTN_LOCAL) { 2588 if (res.type == RTN_LOCAL) {
2629 if (!fl.fl4_src) { 2589 if (!fl4->saddr) {
2630 if (res.fi->fib_prefsrc) 2590 if (res.fi->fib_prefsrc)
2631 fl.fl4_src = res.fi->fib_prefsrc; 2591 fl4->saddr = res.fi->fib_prefsrc;
2632 else 2592 else
2633 fl.fl4_src = fl.fl4_dst; 2593 fl4->saddr = fl4->daddr;
2634 } 2594 }
2635 dev_out = net->loopback_dev; 2595 dev_out = net->loopback_dev;
2636 fl.oif = dev_out->ifindex; 2596 fl4->flowi4_oif = dev_out->ifindex;
2637 res.fi = NULL; 2597 res.fi = NULL;
2638 flags |= RTCF_LOCAL; 2598 flags |= RTCF_LOCAL;
2639 goto make_route; 2599 goto make_route;
2640 } 2600 }
2641 2601
2642#ifdef CONFIG_IP_ROUTE_MULTIPATH 2602#ifdef CONFIG_IP_ROUTE_MULTIPATH
2643 if (res.fi->fib_nhs > 1 && fl.oif == 0) 2603 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2644 fib_select_multipath(&fl, &res); 2604 fib_select_multipath(&res);
2645 else 2605 else
2646#endif 2606#endif
2647 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2607 if (!res.prefixlen &&
2648 fib_select_default(net, &fl, &res); 2608 res.table->tb_num_default > 1 &&
2609 res.type == RTN_UNICAST && !fl4->flowi4_oif)
2610 fib_select_default(&res);
2649 2611
2650 if (!fl.fl4_src) 2612 if (!fl4->saddr)
2651 fl.fl4_src = FIB_RES_PREFSRC(res); 2613 fl4->saddr = FIB_RES_PREFSRC(net, res);
2652 2614
2653 dev_out = FIB_RES_DEV(res); 2615 dev_out = FIB_RES_DEV(res);
2654 fl.oif = dev_out->ifindex; 2616 fl4->flowi4_oif = dev_out->ifindex;
2655 2617
2656 2618
2657make_route: 2619make_route:
2658 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2620 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2621 dev_out, flags);
2622 if (!IS_ERR(rth)) {
2623 unsigned int hash;
2659 2624
2660out: return err; 2625 hash = rt_hash(orig_daddr, orig_saddr, orig_oif,
2626 rt_genid(dev_net(dev_out)));
2627 rth = rt_intern_hash(hash, rth, NULL, orig_oif);
2628 }
2629
2630out:
2631 rcu_read_unlock();
2632 return rth;
2661} 2633}
2662 2634
2663int __ip_route_output_key(struct net *net, struct rtable **rp, 2635struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2664 const struct flowi *flp)
2665{ 2636{
2666 unsigned int hash;
2667 int res;
2668 struct rtable *rth; 2637 struct rtable *rth;
2638 unsigned int hash;
2669 2639
2670 if (!rt_caching(net)) 2640 if (!rt_caching(net))
2671 goto slow_output; 2641 goto slow_output;
2672 2642
2673 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); 2643 hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
2674 2644
2675 rcu_read_lock_bh(); 2645 rcu_read_lock_bh();
2676 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2646 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2677 rth = rcu_dereference_bh(rth->dst.rt_next)) { 2647 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2678 if (rth->fl.fl4_dst == flp->fl4_dst && 2648 if (rth->rt_key_dst == flp4->daddr &&
2679 rth->fl.fl4_src == flp->fl4_src && 2649 rth->rt_key_src == flp4->saddr &&
2680 rt_is_output_route(rth) && 2650 rt_is_output_route(rth) &&
2681 rth->fl.oif == flp->oif && 2651 rth->rt_oif == flp4->flowi4_oif &&
2682 rth->fl.mark == flp->mark && 2652 rth->rt_mark == flp4->flowi4_mark &&
2683 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2653 !((rth->rt_key_tos ^ flp4->flowi4_tos) &
2684 (IPTOS_RT_MASK | RTO_ONLINK)) && 2654 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2685 net_eq(dev_net(rth->dst.dev), net) && 2655 net_eq(dev_net(rth->dst.dev), net) &&
2686 !rt_is_expired(rth)) { 2656 !rt_is_expired(rth)) {
2687 dst_use(&rth->dst, jiffies); 2657 dst_use(&rth->dst, jiffies);
2688 RT_CACHE_STAT_INC(out_hit); 2658 RT_CACHE_STAT_INC(out_hit);
2689 rcu_read_unlock_bh(); 2659 rcu_read_unlock_bh();
2690 *rp = rth; 2660 if (!flp4->saddr)
2691 return 0; 2661 flp4->saddr = rth->rt_src;
2662 if (!flp4->daddr)
2663 flp4->daddr = rth->rt_dst;
2664 return rth;
2692 } 2665 }
2693 RT_CACHE_STAT_INC(out_hlist_search); 2666 RT_CACHE_STAT_INC(out_hlist_search);
2694 } 2667 }
2695 rcu_read_unlock_bh(); 2668 rcu_read_unlock_bh();
2696 2669
2697slow_output: 2670slow_output:
2698 rcu_read_lock(); 2671 return ip_route_output_slow(net, flp4);
2699 res = ip_route_output_slow(net, rp, flp);
2700 rcu_read_unlock();
2701 return res;
2702} 2672}
2703EXPORT_SYMBOL_GPL(__ip_route_output_key); 2673EXPORT_SYMBOL_GPL(__ip_route_output_key);
2704 2674
@@ -2716,6 +2686,12 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2716{ 2686{
2717} 2687}
2718 2688
2689static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2690 unsigned long old)
2691{
2692 return NULL;
2693}
2694
2719static struct dst_ops ipv4_dst_blackhole_ops = { 2695static struct dst_ops ipv4_dst_blackhole_ops = {
2720 .family = AF_INET, 2696 .family = AF_INET,
2721 .protocol = cpu_to_be16(ETH_P_IP), 2697 .protocol = cpu_to_be16(ETH_P_IP),
@@ -2724,19 +2700,17 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2724 .default_mtu = ipv4_blackhole_default_mtu, 2700 .default_mtu = ipv4_blackhole_default_mtu,
2725 .default_advmss = ipv4_default_advmss, 2701 .default_advmss = ipv4_default_advmss,
2726 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2702 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2703 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2727}; 2704};
2728 2705
2729 2706struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2730static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
2731{ 2707{
2732 struct rtable *ort = *rp; 2708 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0);
2733 struct rtable *rt = (struct rtable *) 2709 struct rtable *ort = (struct rtable *) dst_orig;
2734 dst_alloc(&ipv4_dst_blackhole_ops);
2735 2710
2736 if (rt) { 2711 if (rt) {
2737 struct dst_entry *new = &rt->dst; 2712 struct dst_entry *new = &rt->dst;
2738 2713
2739 atomic_set(&new->__refcnt, 1);
2740 new->__use = 1; 2714 new->__use = 1;
2741 new->input = dst_discard; 2715 new->input = dst_discard;
2742 new->output = dst_discard; 2716 new->output = dst_discard;
@@ -2746,59 +2720,53 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2746 if (new->dev) 2720 if (new->dev)
2747 dev_hold(new->dev); 2721 dev_hold(new->dev);
2748 2722
2749 rt->fl = ort->fl; 2723 rt->rt_key_dst = ort->rt_key_dst;
2724 rt->rt_key_src = ort->rt_key_src;
2725 rt->rt_key_tos = ort->rt_key_tos;
2726 rt->rt_route_iif = ort->rt_route_iif;
2727 rt->rt_iif = ort->rt_iif;
2728 rt->rt_oif = ort->rt_oif;
2729 rt->rt_mark = ort->rt_mark;
2750 2730
2751 rt->rt_genid = rt_genid(net); 2731 rt->rt_genid = rt_genid(net);
2752 rt->rt_flags = ort->rt_flags; 2732 rt->rt_flags = ort->rt_flags;
2753 rt->rt_type = ort->rt_type; 2733 rt->rt_type = ort->rt_type;
2754 rt->rt_dst = ort->rt_dst; 2734 rt->rt_dst = ort->rt_dst;
2755 rt->rt_src = ort->rt_src; 2735 rt->rt_src = ort->rt_src;
2756 rt->rt_iif = ort->rt_iif;
2757 rt->rt_gateway = ort->rt_gateway; 2736 rt->rt_gateway = ort->rt_gateway;
2758 rt->rt_spec_dst = ort->rt_spec_dst; 2737 rt->rt_spec_dst = ort->rt_spec_dst;
2759 rt->peer = ort->peer; 2738 rt->peer = ort->peer;
2760 if (rt->peer) 2739 if (rt->peer)
2761 atomic_inc(&rt->peer->refcnt); 2740 atomic_inc(&rt->peer->refcnt);
2741 rt->fi = ort->fi;
2742 if (rt->fi)
2743 atomic_inc(&rt->fi->fib_clntref);
2762 2744
2763 dst_free(new); 2745 dst_free(new);
2764 } 2746 }
2765 2747
2766 dst_release(&(*rp)->dst); 2748 dst_release(dst_orig);
2767 *rp = rt; 2749
2768 return rt ? 0 : -ENOMEM; 2750 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2769} 2751}
2770 2752
2771int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2753struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2772 struct sock *sk, int flags) 2754 struct sock *sk)
2773{ 2755{
2774 int err; 2756 struct rtable *rt = __ip_route_output_key(net, flp4);
2775
2776 if ((err = __ip_route_output_key(net, rp, flp)) != 0)
2777 return err;
2778 2757
2779 if (flp->proto) { 2758 if (IS_ERR(rt))
2780 if (!flp->fl4_src) 2759 return rt;
2781 flp->fl4_src = (*rp)->rt_src;
2782 if (!flp->fl4_dst)
2783 flp->fl4_dst = (*rp)->rt_dst;
2784 err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
2785 flags ? XFRM_LOOKUP_WAIT : 0);
2786 if (err == -EREMOTE)
2787 err = ipv4_dst_blackhole(net, rp, flp);
2788 2760
2789 return err; 2761 if (flp4->flowi4_proto)
2790 } 2762 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2763 flowi4_to_flowi(flp4),
2764 sk, 0);
2791 2765
2792 return 0; 2766 return rt;
2793} 2767}
2794EXPORT_SYMBOL_GPL(ip_route_output_flow); 2768EXPORT_SYMBOL_GPL(ip_route_output_flow);
2795 2769
2796int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2797{
2798 return ip_route_output_flow(net, rp, flp, NULL, 0);
2799}
2800EXPORT_SYMBOL(ip_route_output_key);
2801
2802static int rt_fill_info(struct net *net, 2770static int rt_fill_info(struct net *net,
2803 struct sk_buff *skb, u32 pid, u32 seq, int event, 2771 struct sk_buff *skb, u32 pid, u32 seq, int event,
2804 int nowait, unsigned int flags) 2772 int nowait, unsigned int flags)
@@ -2817,7 +2785,7 @@ static int rt_fill_info(struct net *net,
2817 r->rtm_family = AF_INET; 2785 r->rtm_family = AF_INET;
2818 r->rtm_dst_len = 32; 2786 r->rtm_dst_len = 32;
2819 r->rtm_src_len = 0; 2787 r->rtm_src_len = 0;
2820 r->rtm_tos = rt->fl.fl4_tos; 2788 r->rtm_tos = rt->rt_key_tos;
2821 r->rtm_table = RT_TABLE_MAIN; 2789 r->rtm_table = RT_TABLE_MAIN;
2822 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2790 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2823 r->rtm_type = rt->rt_type; 2791 r->rtm_type = rt->rt_type;
@@ -2829,19 +2797,19 @@ static int rt_fill_info(struct net *net,
2829 2797
2830 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); 2798 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2831 2799
2832 if (rt->fl.fl4_src) { 2800 if (rt->rt_key_src) {
2833 r->rtm_src_len = 32; 2801 r->rtm_src_len = 32;
2834 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2802 NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
2835 } 2803 }
2836 if (rt->dst.dev) 2804 if (rt->dst.dev)
2837 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2805 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2838#ifdef CONFIG_NET_CLS_ROUTE 2806#ifdef CONFIG_IP_ROUTE_CLASSID
2839 if (rt->dst.tclassid) 2807 if (rt->dst.tclassid)
2840 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2808 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2841#endif 2809#endif
2842 if (rt_is_input_route(rt)) 2810 if (rt_is_input_route(rt))
2843 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2811 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2844 else if (rt->rt_src != rt->fl.fl4_src) 2812 else if (rt->rt_src != rt->rt_key_src)
2845 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); 2813 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2846 2814
2847 if (rt->rt_dst != rt->rt_gateway) 2815 if (rt->rt_dst != rt->rt_gateway)
@@ -2850,11 +2818,12 @@ static int rt_fill_info(struct net *net,
2850 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2818 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2851 goto nla_put_failure; 2819 goto nla_put_failure;
2852 2820
2853 if (rt->fl.mark) 2821 if (rt->rt_mark)
2854 NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); 2822 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
2855 2823
2856 error = rt->dst.error; 2824 error = rt->dst.error;
2857 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; 2825 expires = (rt->peer && rt->peer->pmtu_expires) ?
2826 rt->peer->pmtu_expires - jiffies : 0;
2858 if (rt->peer) { 2827 if (rt->peer) {
2859 inet_peer_refcheck(rt->peer); 2828 inet_peer_refcheck(rt->peer);
2860 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2829 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
@@ -2870,7 +2839,9 @@ static int rt_fill_info(struct net *net,
2870 2839
2871 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && 2840 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2872 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { 2841 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2873 int err = ipmr_get_route(net, skb, r, nowait); 2842 int err = ipmr_get_route(net, skb,
2843 rt->rt_src, rt->rt_dst,
2844 r, nowait);
2874 if (err <= 0) { 2845 if (err <= 0) {
2875 if (!nowait) { 2846 if (!nowait) {
2876 if (err == 0) 2847 if (err == 0)
@@ -2884,7 +2855,7 @@ static int rt_fill_info(struct net *net,
2884 } 2855 }
2885 } else 2856 } else
2886#endif 2857#endif
2887 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2858 NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
2888 } 2859 }
2889 2860
2890 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, 2861 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -2958,14 +2929,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2958 if (err == 0 && rt->dst.error) 2929 if (err == 0 && rt->dst.error)
2959 err = -rt->dst.error; 2930 err = -rt->dst.error;
2960 } else { 2931 } else {
2961 struct flowi fl = { 2932 struct flowi4 fl4 = {
2962 .fl4_dst = dst, 2933 .daddr = dst,
2963 .fl4_src = src, 2934 .saddr = src,
2964 .fl4_tos = rtm->rtm_tos, 2935 .flowi4_tos = rtm->rtm_tos,
2965 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2936 .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2966 .mark = mark, 2937 .flowi4_mark = mark,
2967 }; 2938 };
2968 err = ip_route_output_key(net, &rt, &fl); 2939 rt = ip_route_output_key(net, &fl4);
2940
2941 err = 0;
2942 if (IS_ERR(rt))
2943 err = PTR_ERR(rt);
2969 } 2944 }
2970 2945
2971 if (err) 2946 if (err)
@@ -3248,6 +3223,8 @@ static __net_init int rt_genid_init(struct net *net)
3248{ 3223{
3249 get_random_bytes(&net->ipv4.rt_genid, 3224 get_random_bytes(&net->ipv4.rt_genid,
3250 sizeof(net->ipv4.rt_genid)); 3225 sizeof(net->ipv4.rt_genid));
3226 get_random_bytes(&net->ipv4.dev_addr_genid,
3227 sizeof(net->ipv4.dev_addr_genid));
3251 return 0; 3228 return 0;
3252} 3229}
3253 3230
@@ -3256,9 +3233,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
3256}; 3233};
3257 3234
3258 3235
3259#ifdef CONFIG_NET_CLS_ROUTE 3236#ifdef CONFIG_IP_ROUTE_CLASSID
3260struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3237struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3261#endif /* CONFIG_NET_CLS_ROUTE */ 3238#endif /* CONFIG_IP_ROUTE_CLASSID */
3262 3239
3263static __initdata unsigned long rhash_entries; 3240static __initdata unsigned long rhash_entries;
3264static int __init set_rhash_entries(char *str) 3241static int __init set_rhash_entries(char *str)
@@ -3274,7 +3251,7 @@ int __init ip_rt_init(void)
3274{ 3251{
3275 int rc = 0; 3252 int rc = 0;
3276 3253
3277#ifdef CONFIG_NET_CLS_ROUTE 3254#ifdef CONFIG_IP_ROUTE_CLASSID
3278 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 3255 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3279 if (!ip_rt_acct) 3256 if (!ip_rt_acct)
3280 panic("IP: failed to allocate ip_rt_acct\n"); 3257 panic("IP: failed to allocate ip_rt_acct\n");
@@ -3311,14 +3288,6 @@ int __init ip_rt_init(void)
3311 devinet_init(); 3288 devinet_init();
3312 ip_fib_init(); 3289 ip_fib_init();
3313 3290
3314 /* All the timers, started at system startup tend
3315 to synchronize. Perturb it a bit.
3316 */
3317 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3318 expires_ljiffies = jiffies;
3319 schedule_delayed_work(&expires_work,
3320 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3321
3322 if (ip_rt_proc_init()) 3291 if (ip_rt_proc_init())
3323 printk(KERN_ERR "Unable to create route proc files\n"); 3292 printk(KERN_ERR "Unable to create route proc files\n");
3324#ifdef CONFIG_XFRM 3293#ifdef CONFIG_XFRM
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 47519205a014..26461492a847 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -321,10 +321,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 321 * the ACK carries the same options again (see RFC1122 4.2.3.8)
322 */ 322 */
323 if (opt && opt->optlen) { 323 if (opt && opt->optlen) {
324 int opt_size = sizeof(struct ip_options) + opt->optlen; 324 int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
325 325
326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC); 326 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
327 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { 327 if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
328 kfree(ireq->opt); 328 kfree(ireq->opt);
329 ireq->opt = NULL; 329 ireq->opt = NULL;
330 } 330 }
@@ -345,17 +345,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
345 * no easy way to do this. 345 * no easy way to do this.
346 */ 346 */
347 { 347 {
348 struct flowi fl = { .mark = sk->sk_mark, 348 struct flowi4 fl4;
349 .fl4_dst = ((opt && opt->srr) ? 349
350 opt->faddr : ireq->rmt_addr), 350 flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
351 .fl4_src = ireq->loc_addr, 351 RT_SCOPE_UNIVERSE, IPPROTO_TCP,
352 .fl4_tos = RT_CONN_FLAGS(sk), 352 inet_sk_flowi_flags(sk),
353 .proto = IPPROTO_TCP, 353 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
354 .flags = inet_sk_flowi_flags(sk), 354 ireq->loc_addr, th->source, th->dest);
355 .fl_ip_sport = th->dest, 355 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
356 .fl_ip_dport = th->source }; 356 rt = ip_route_output_key(sock_net(sk), &fl4);
357 security_req_classify_flow(req, &fl); 357 if (IS_ERR(rt)) {
358 if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
359 reqsk_free(req); 358 reqsk_free(req);
360 goto out; 359 goto out;
361 } 360 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1a456652086b..57d0752e239a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,6 +13,7 @@
13#include <linux/seqlock.h> 13#include <linux/seqlock.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/nsproxy.h>
16#include <net/snmp.h> 17#include <net/snmp.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/ip.h> 19#include <net/ip.h>
@@ -21,6 +22,7 @@
21#include <net/udp.h> 22#include <net/udp.h>
22#include <net/cipso_ipv4.h> 23#include <net/cipso_ipv4.h>
23#include <net/inet_frag.h> 24#include <net/inet_frag.h>
25#include <net/ping.h>
24 26
25static int zero; 27static int zero;
26static int tcp_retr1_max = 255; 28static int tcp_retr1_max = 255;
@@ -30,6 +32,8 @@ static int tcp_adv_win_scale_min = -31;
30static int tcp_adv_win_scale_max = 31; 32static int tcp_adv_win_scale_max = 31;
31static int ip_ttl_min = 1; 33static int ip_ttl_min = 1;
32static int ip_ttl_max = 255; 34static int ip_ttl_max = 255;
35static int ip_ping_group_range_min[] = { 0, 0 };
36static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
33 37
34/* Update system visible IP port range */ 38/* Update system visible IP port range */
35static void set_local_port_range(int range[2]) 39static void set_local_port_range(int range[2])
@@ -68,6 +72,53 @@ static int ipv4_local_port_range(ctl_table *table, int write,
68 return ret; 72 return ret;
69} 73}
70 74
75
76void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
77{
78 gid_t *data = table->data;
79 unsigned seq;
80 do {
81 seq = read_seqbegin(&sysctl_local_ports.lock);
82
83 *low = data[0];
84 *high = data[1];
85 } while (read_seqretry(&sysctl_local_ports.lock, seq));
86}
87
88/* Update system visible IP port range */
89static void set_ping_group_range(struct ctl_table *table, int range[2])
90{
91 gid_t *data = table->data;
92 write_seqlock(&sysctl_local_ports.lock);
93 data[0] = range[0];
94 data[1] = range[1];
95 write_sequnlock(&sysctl_local_ports.lock);
96}
97
98/* Validate changes from /proc interface. */
99static int ipv4_ping_group_range(ctl_table *table, int write,
100 void __user *buffer,
101 size_t *lenp, loff_t *ppos)
102{
103 int ret;
104 gid_t range[2];
105 ctl_table tmp = {
106 .data = &range,
107 .maxlen = sizeof(range),
108 .mode = table->mode,
109 .extra1 = &ip_ping_group_range_min,
110 .extra2 = &ip_ping_group_range_max,
111 };
112
113 inet_get_ping_group_range_table(table, range, range + 1);
114 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
115
116 if (write && ret == 0)
117 set_ping_group_range(table, range);
118
119 return ret;
120}
121
71static int proc_tcp_congestion_control(ctl_table *ctl, int write, 122static int proc_tcp_congestion_control(ctl_table *ctl, int write,
72 void __user *buffer, size_t *lenp, loff_t *ppos) 123 void __user *buffer, size_t *lenp, loff_t *ppos)
73{ 124{
@@ -311,7 +362,6 @@ static struct ctl_table ipv4_table[] = {
311 .mode = 0644, 362 .mode = 0644,
312 .proc_handler = proc_do_large_bitmap, 363 .proc_handler = proc_do_large_bitmap,
313 }, 364 },
314#ifdef CONFIG_IP_MULTICAST
315 { 365 {
316 .procname = "igmp_max_memberships", 366 .procname = "igmp_max_memberships",
317 .data = &sysctl_igmp_max_memberships, 367 .data = &sysctl_igmp_max_memberships,
@@ -319,8 +369,6 @@ static struct ctl_table ipv4_table[] = {
319 .mode = 0644, 369 .mode = 0644,
320 .proc_handler = proc_dointvec 370 .proc_handler = proc_dointvec
321 }, 371 },
322
323#endif
324 { 372 {
325 .procname = "igmp_max_msf", 373 .procname = "igmp_max_msf",
326 .data = &sysctl_igmp_max_msf, 374 .data = &sysctl_igmp_max_msf,
@@ -680,6 +728,13 @@ static struct ctl_table ipv4_net_table[] = {
680 .mode = 0644, 728 .mode = 0644,
681 .proc_handler = proc_dointvec 729 .proc_handler = proc_dointvec
682 }, 730 },
731 {
732 .procname = "ping_group_range",
733 .data = &init_net.ipv4.sysctl_ping_group_range,
734 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
735 .mode = 0644,
736 .proc_handler = ipv4_ping_group_range,
737 },
683 { } 738 { }
684}; 739};
685 740
@@ -714,8 +769,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
714 &net->ipv4.sysctl_icmp_ratemask; 769 &net->ipv4.sysctl_icmp_ratemask;
715 table[6].data = 770 table[6].data =
716 &net->ipv4.sysctl_rt_cache_rebuild_count; 771 &net->ipv4.sysctl_rt_cache_rebuild_count;
772 table[7].data =
773 &net->ipv4.sysctl_ping_group_range;
774
717 } 775 }
718 776
777 /*
778 * Sane defaults - nobody may create ping sockets.
779 * Boot scripts should set this to distro-specific group.
780 */
781 net->ipv4.sysctl_ping_group_range[0] = 1;
782 net->ipv4.sysctl_ping_group_range[1] = 0;
783
719 net->ipv4.sysctl_rt_cache_rebuild_count = 4; 784 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
720 785
721 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 786 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6c11eece262c..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
505 else 505 else
506 answ = tp->write_seq - tp->snd_una; 506 answ = tp->write_seq - tp->snd_una;
507 break; 507 break;
508 case SIOCOUTQNSD:
509 if (sk->sk_state == TCP_LISTEN)
510 return -EINVAL;
511
512 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
513 answ = 0;
514 else
515 answ = tp->write_seq - tp->snd_nxt;
516 break;
508 default: 517 default:
509 return -ENOIOCTLCMD; 518 return -ENOIOCTLCMD;
510 } 519 }
@@ -873,9 +882,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
873 flags); 882 flags);
874 883
875 lock_sock(sk); 884 lock_sock(sk);
876 TCP_CHECK_TIMER(sk);
877 res = do_tcp_sendpages(sk, &page, offset, size, flags); 885 res = do_tcp_sendpages(sk, &page, offset, size, flags);
878 TCP_CHECK_TIMER(sk);
879 release_sock(sk); 886 release_sock(sk);
880 return res; 887 return res;
881} 888}
@@ -916,7 +923,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
916 long timeo; 923 long timeo;
917 924
918 lock_sock(sk); 925 lock_sock(sk);
919 TCP_CHECK_TIMER(sk);
920 926
921 flags = msg->msg_flags; 927 flags = msg->msg_flags;
922 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 928 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -993,7 +999,8 @@ new_segment:
993 /* We have some space in skb head. Superb! */ 999 /* We have some space in skb head. Superb! */
994 if (copy > skb_tailroom(skb)) 1000 if (copy > skb_tailroom(skb))
995 copy = skb_tailroom(skb); 1001 copy = skb_tailroom(skb);
996 if ((err = skb_add_data(skb, from, copy)) != 0) 1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err)
997 goto do_fault; 1004 goto do_fault;
998 } else { 1005 } else {
999 int merge = 0; 1006 int merge = 0;
@@ -1036,8 +1043,8 @@ new_segment:
1036 1043
1037 /* Time to copy data. We are close to 1044 /* Time to copy data. We are close to
1038 * the end! */ 1045 * the end! */
1039 err = skb_copy_to_page(sk, from, skb, page, 1046 err = skb_copy_to_page_nocache(sk, from, skb,
1040 off, copy); 1047 page, off, copy);
1041 if (err) { 1048 if (err) {
1042 /* If this page was new, give it to the 1049 /* If this page was new, give it to the
1043 * socket so it does not get leaked. 1050 * socket so it does not get leaked.
@@ -1104,7 +1111,6 @@ wait_for_memory:
1104out: 1111out:
1105 if (copied) 1112 if (copied)
1106 tcp_push(sk, flags, mss_now, tp->nonagle); 1113 tcp_push(sk, flags, mss_now, tp->nonagle);
1107 TCP_CHECK_TIMER(sk);
1108 release_sock(sk); 1114 release_sock(sk);
1109 return copied; 1115 return copied;
1110 1116
@@ -1123,7 +1129,6 @@ do_error:
1123 goto out; 1129 goto out;
1124out_err: 1130out_err:
1125 err = sk_stream_error(sk, flags, err); 1131 err = sk_stream_error(sk, flags, err);
1126 TCP_CHECK_TIMER(sk);
1127 release_sock(sk); 1132 release_sock(sk);
1128 return err; 1133 return err;
1129} 1134}
@@ -1415,8 +1420,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1415 1420
1416 lock_sock(sk); 1421 lock_sock(sk);
1417 1422
1418 TCP_CHECK_TIMER(sk);
1419
1420 err = -ENOTCONN; 1423 err = -ENOTCONN;
1421 if (sk->sk_state == TCP_LISTEN) 1424 if (sk->sk_state == TCP_LISTEN)
1422 goto out; 1425 goto out;
@@ -1767,12 +1770,10 @@ skip_copy:
1767 /* Clean up data we have read: This will do ACK frames. */ 1770 /* Clean up data we have read: This will do ACK frames. */
1768 tcp_cleanup_rbuf(sk, copied); 1771 tcp_cleanup_rbuf(sk, copied);
1769 1772
1770 TCP_CHECK_TIMER(sk);
1771 release_sock(sk); 1773 release_sock(sk);
1772 return copied; 1774 return copied;
1773 1775
1774out: 1776out:
1775 TCP_CHECK_TIMER(sk);
1776 release_sock(sk); 1777 release_sock(sk);
1777 return err; 1778 return err;
1778 1779
@@ -2653,7 +2654,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2653EXPORT_SYMBOL(compat_tcp_getsockopt); 2654EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif 2655#endif
2655 2656
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) 2657struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
2657{ 2658{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL); 2659 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th; 2660 struct tcphdr *th;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 3b53fd1af23f..6187eb4d1dcf 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
209} 209}
210 210
211 211
212static struct tcp_congestion_ops bictcp = { 212static struct tcp_congestion_ops bictcp __read_mostly = {
213 .init = bictcp_init, 213 .init = bictcp_init,
214 .ssthresh = bictcp_recalc_ssthresh, 214 .ssthresh = bictcp_recalc_ssthresh,
215 .cong_avoid = bictcp_cong_avoid, 215 .cong_avoid = bictcp_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 71d5f2f29fa6..f376b05cca81 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -39,7 +39,7 @@
39 39
40/* Number of delay samples for detecting the increase of delay */ 40/* Number of delay samples for detecting the increase of delay */
41#define HYSTART_MIN_SAMPLES 8 41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (2U<<3) 42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3) 43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) 44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45 45
@@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1;
52static int hystart __read_mostly = 1; 52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; 53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16; 54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
55 56
56static u32 cube_rtt_scale __read_mostly; 57static u32 cube_rtt_scale __read_mostly;
57static u32 beta_scale __read_mostly; 58static u32 beta_scale __read_mostly;
@@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
75 " 1: packet-train 2: delay 3: both packet-train and delay"); 76 " 1: packet-train 2: delay 3: both packet-train and delay");
76module_param(hystart_low_window, int, 0644); 77module_param(hystart_low_window, int, 0644);
77MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); 78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
78 81
79/* BIC TCP Parameters */ 82/* BIC TCP Parameters */
80struct bictcp { 83struct bictcp {
@@ -85,17 +88,18 @@ struct bictcp {
85 u32 last_time; /* time when updated last_cwnd */ 88 u32 last_time; /* time when updated last_cwnd */
86 u32 bic_origin_point;/* origin point of bic function */ 89 u32 bic_origin_point;/* origin point of bic function */
87 u32 bic_K; /* time to origin point from the beginning of the current epoch */ 90 u32 bic_K; /* time to origin point from the beginning of the current epoch */
88 u32 delay_min; /* min delay */ 91 u32 delay_min; /* min delay (msec << 3) */
89 u32 epoch_start; /* beginning of an epoch */ 92 u32 epoch_start; /* beginning of an epoch */
90 u32 ack_cnt; /* number of acks */ 93 u32 ack_cnt; /* number of acks */
91 u32 tcp_cwnd; /* estimated tcp cwnd */ 94 u32 tcp_cwnd; /* estimated tcp cwnd */
92#define ACK_RATIO_SHIFT 4 95#define ACK_RATIO_SHIFT 4
96#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
93 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ 97 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
94 u8 sample_cnt; /* number of samples to decide curr_rtt */ 98 u8 sample_cnt; /* number of samples to decide curr_rtt */
95 u8 found; /* the exit point is found? */ 99 u8 found; /* the exit point is found? */
96 u32 round_start; /* beginning of each round */ 100 u32 round_start; /* beginning of each round */
97 u32 end_seq; /* end_seq of the round */ 101 u32 end_seq; /* end_seq of the round */
98 u32 last_jiffies; /* last time when the ACK spacing is close */ 102 u32 last_ack; /* last time when the ACK spacing is close */
99 u32 curr_rtt; /* the minimum rtt of current round */ 103 u32 curr_rtt; /* the minimum rtt of current round */
100}; 104};
101 105
@@ -116,12 +120,21 @@ static inline void bictcp_reset(struct bictcp *ca)
116 ca->found = 0; 120 ca->found = 0;
117} 121}
118 122
123static inline u32 bictcp_clock(void)
124{
125#if HZ < 1000
126 return ktime_to_ms(ktime_get_real());
127#else
128 return jiffies_to_msecs(jiffies);
129#endif
130}
131
119static inline void bictcp_hystart_reset(struct sock *sk) 132static inline void bictcp_hystart_reset(struct sock *sk)
120{ 133{
121 struct tcp_sock *tp = tcp_sk(sk); 134 struct tcp_sock *tp = tcp_sk(sk);
122 struct bictcp *ca = inet_csk_ca(sk); 135 struct bictcp *ca = inet_csk_ca(sk);
123 136
124 ca->round_start = ca->last_jiffies = jiffies; 137 ca->round_start = ca->last_ack = bictcp_clock();
125 ca->end_seq = tp->snd_nxt; 138 ca->end_seq = tp->snd_nxt;
126 ca->curr_rtt = 0; 139 ca->curr_rtt = 0;
127 ca->sample_cnt = 0; 140 ca->sample_cnt = 0;
@@ -236,8 +249,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
236 */ 249 */
237 250
238 /* change the unit from HZ to bictcp_HZ */ 251 /* change the unit from HZ to bictcp_HZ */
239 t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) 252 t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
240 << BICTCP_HZ) / HZ; 253 - ca->epoch_start) << BICTCP_HZ) / HZ;
241 254
242 if (t < ca->bic_K) /* t - K */ 255 if (t < ca->bic_K) /* t - K */
243 offs = ca->bic_K - t; 256 offs = ca->bic_K - t;
@@ -258,6 +271,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
258 ca->cnt = 100 * cwnd; /* very small increment*/ 271 ca->cnt = 100 * cwnd; /* very small increment*/
259 } 272 }
260 273
274 /*
275 * The initial growth of cubic function may be too conservative
276 * when the available bandwidth is still unknown.
277 */
278 if (ca->loss_cwnd == 0 && ca->cnt > 20)
279 ca->cnt = 20; /* increase cwnd 5% per RTT */
280
261 /* TCP Friendly */ 281 /* TCP Friendly */
262 if (tcp_friendliness) { 282 if (tcp_friendliness) {
263 u32 scale = beta_scale; 283 u32 scale = beta_scale;
@@ -339,12 +359,12 @@ static void hystart_update(struct sock *sk, u32 delay)
339 struct bictcp *ca = inet_csk_ca(sk); 359 struct bictcp *ca = inet_csk_ca(sk);
340 360
341 if (!(ca->found & hystart_detect)) { 361 if (!(ca->found & hystart_detect)) {
342 u32 curr_jiffies = jiffies; 362 u32 now = bictcp_clock();
343 363
344 /* first detection parameter - ack-train detection */ 364 /* first detection parameter - ack-train detection */
345 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { 365 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
346 ca->last_jiffies = curr_jiffies; 366 ca->last_ack = now;
347 if (curr_jiffies - ca->round_start >= ca->delay_min>>4) 367 if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
348 ca->found |= HYSTART_ACK_TRAIN; 368 ca->found |= HYSTART_ACK_TRAIN;
349 } 369 }
350 370
@@ -379,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
379 u32 delay; 399 u32 delay;
380 400
381 if (icsk->icsk_ca_state == TCP_CA_Open) { 401 if (icsk->icsk_ca_state == TCP_CA_Open) {
382 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; 402 u32 ratio = ca->delayed_ack;
383 ca->delayed_ack += cnt; 403
404 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
405 ratio += cnt;
406
407 ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
384 } 408 }
385 409
386 /* Some calls are for duplicates without timetamps */ 410 /* Some calls are for duplicates without timetamps */
@@ -391,7 +415,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
391 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) 415 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
392 return; 416 return;
393 417
394 delay = usecs_to_jiffies(rtt_us) << 3; 418 delay = (rtt_us << 3) / USEC_PER_MSEC;
395 if (delay == 0) 419 if (delay == 0)
396 delay = 1; 420 delay = 1;
397 421
@@ -405,7 +429,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
405 hystart_update(sk, delay); 429 hystart_update(sk, delay);
406} 430}
407 431
408static struct tcp_congestion_ops cubictcp = { 432static struct tcp_congestion_ops cubictcp __read_mostly = {
409 .init = bictcp_init, 433 .init = bictcp_init,
410 .ssthresh = bictcp_recalc_ssthresh, 434 .ssthresh = bictcp_recalc_ssthresh,
411 .cong_avoid = bictcp_cong_avoid, 435 .cong_avoid = bictcp_cong_avoid,
@@ -447,6 +471,10 @@ static int __init cubictcp_register(void)
447 /* divide by bic_scale and by constant Srtt (100ms) */ 471 /* divide by bic_scale and by constant Srtt (100ms) */
448 do_div(cube_factor, bic_scale * 10); 472 do_div(cube_factor, bic_scale * 10);
449 473
474 /* hystart needs ms clock resolution */
475 if (hystart && HZ < 1000)
476 cubictcp.flags |= TCP_CONG_RTT_STAMP;
477
450 return tcp_register_congestion_control(&cubictcp); 478 return tcp_register_congestion_control(&cubictcp);
451} 479}
452 480
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b6caaf75bb9..30f27f6b3655 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
158} 158}
159 159
160 160
161static struct tcp_congestion_ops tcp_highspeed = { 161static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
162 .init = hstcp_init, 162 .init = hstcp_init,
163 .ssthresh = hstcp_ssthresh, 163 .ssthresh = hstcp_ssthresh,
164 .cong_avoid = hstcp_cong_avoid, 164 .cong_avoid = hstcp_cong_avoid,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 7c94a4955416..c1a8175361e8 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state)
284 } 284 }
285} 285}
286 286
287static struct tcp_congestion_ops htcp = { 287static struct tcp_congestion_ops htcp __read_mostly = {
288 .init = htcp_init, 288 .init = htcp_init,
289 .ssthresh = htcp_recalc_ssthresh, 289 .ssthresh = htcp_recalc_ssthresh,
290 .cong_avoid = htcp_cong_avoid, 290 .cong_avoid = htcp_cong_avoid,
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 377bc9349371..fe3ecf484b44 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
162 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); 162 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
163} 163}
164 164
165static struct tcp_congestion_ops tcp_hybla = { 165static struct tcp_congestion_ops tcp_hybla __read_mostly = {
166 .init = hybla_init, 166 .init = hybla_init,
167 .ssthresh = tcp_reno_ssthresh, 167 .ssthresh = tcp_reno_ssthresh,
168 .min_cwnd = tcp_reno_min_cwnd, 168 .min_cwnd = tcp_reno_min_cwnd,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00ca688d8964..813b43a76fec 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
322 } 322 }
323} 323}
324 324
325static struct tcp_congestion_ops tcp_illinois = { 325static struct tcp_congestion_ops tcp_illinois __read_mostly = {
326 .flags = TCP_CONG_RTT_STAMP, 326 .flags = TCP_CONG_RTT_STAMP,
327 .init = tcp_illinois_init, 327 .init = tcp_illinois_init,
328 .ssthresh = tcp_illinois_ssthresh, 328 .ssthresh = tcp_illinois_ssthresh,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65f6c0406245..bef9f04c22ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
818 818
819 if (!cwnd) 819 if (!cwnd)
820 cwnd = rfc3390_bytes_to_packets(tp->mss_cache); 820 cwnd = TCP_INIT_CWND;
821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
822} 822}
823 823
@@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2659#define DBGUNDO(x...) do { } while (0) 2659#define DBGUNDO(x...) do { } while (0)
2660#endif 2660#endif
2661 2661
2662static void tcp_undo_cwr(struct sock *sk, const int undo) 2662static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2663{ 2663{
2664 struct tcp_sock *tp = tcp_sk(sk); 2664 struct tcp_sock *tp = tcp_sk(sk);
2665 2665
@@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2671 else 2671 else
2672 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); 2672 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2673 2673
2674 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { 2674 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2675 tp->snd_ssthresh = tp->prior_ssthresh; 2675 tp->snd_ssthresh = tp->prior_ssthresh;
2676 TCP_ECN_withdraw_cwr(tp); 2676 TCP_ECN_withdraw_cwr(tp);
2677 } 2677 }
2678 } else { 2678 } else {
2679 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); 2679 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2680 } 2680 }
2681 tcp_moderate_cwnd(tp);
2682 tp->snd_cwnd_stamp = tcp_time_stamp; 2681 tp->snd_cwnd_stamp = tcp_time_stamp;
2683} 2682}
2684 2683
@@ -2699,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
2699 * or our original transmission succeeded. 2698 * or our original transmission succeeded.
2700 */ 2699 */
2701 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); 2700 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2702 tcp_undo_cwr(sk, 1); 2701 tcp_undo_cwr(sk, true);
2703 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) 2702 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2704 mib_idx = LINUX_MIB_TCPLOSSUNDO; 2703 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2705 else 2704 else
@@ -2726,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
2726 2725
2727 if (tp->undo_marker && !tp->undo_retrans) { 2726 if (tp->undo_marker && !tp->undo_retrans) {
2728 DBGUNDO(sk, "D-SACK"); 2727 DBGUNDO(sk, "D-SACK");
2729 tcp_undo_cwr(sk, 1); 2728 tcp_undo_cwr(sk, true);
2730 tp->undo_marker = 0; 2729 tp->undo_marker = 0;
2731 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); 2730 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2732 } 2731 }
@@ -2779,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2779 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2778 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2780 2779
2781 DBGUNDO(sk, "Hoe"); 2780 DBGUNDO(sk, "Hoe");
2782 tcp_undo_cwr(sk, 0); 2781 tcp_undo_cwr(sk, false);
2783 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); 2782 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2784 2783
2785 /* So... Do not make Hoe's retransmit yet. 2784 /* So... Do not make Hoe's retransmit yet.
@@ -2808,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk)
2808 2807
2809 DBGUNDO(sk, "partial loss"); 2808 DBGUNDO(sk, "partial loss");
2810 tp->lost_out = 0; 2809 tp->lost_out = 0;
2811 tcp_undo_cwr(sk, 1); 2810 tcp_undo_cwr(sk, true);
2812 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2811 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2813 inet_csk(sk)->icsk_retransmits = 0; 2812 inet_csk(sk)->icsk_retransmits = 0;
2814 tp->undo_marker = 0; 2813 tp->undo_marker = 0;
@@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk)
2822static inline void tcp_complete_cwr(struct sock *sk) 2821static inline void tcp_complete_cwr(struct sock *sk)
2823{ 2822{
2824 struct tcp_sock *tp = tcp_sk(sk); 2823 struct tcp_sock *tp = tcp_sk(sk);
2825 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 2824 /* Do not moderate cwnd if it's already undone in cwr or recovery */
2826 tp->snd_cwnd_stamp = tcp_time_stamp; 2825 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
2826 tp->snd_cwnd = tp->snd_ssthresh;
2827 tp->snd_cwnd_stamp = tcp_time_stamp;
2828 }
2827 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2829 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2828} 2830}
2829 2831
@@ -3350,7 +3352,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3350 net_invalid_timestamp())) 3352 net_invalid_timestamp()))
3351 rtt_us = ktime_us_delta(ktime_get_real(), 3353 rtt_us = ktime_us_delta(ktime_get_real(),
3352 last_ackt); 3354 last_ackt);
3353 else if (ca_seq_rtt > 0) 3355 else if (ca_seq_rtt >= 0)
3354 rtt_us = jiffies_to_usecs(ca_seq_rtt); 3356 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3355 } 3357 }
3356 3358
@@ -3494,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3494 if (flag & FLAG_ECE) 3496 if (flag & FLAG_ECE)
3495 tcp_ratehalving_spur_to_response(sk); 3497 tcp_ratehalving_spur_to_response(sk);
3496 else 3498 else
3497 tcp_undo_cwr(sk, 1); 3499 tcp_undo_cwr(sk, true);
3498} 3500}
3499 3501
3500/* F-RTO spurious RTO detection algorithm (RFC4138) 3502/* F-RTO spurious RTO detection algorithm (RFC4138)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 02f583b3744a..3c8d9b6f1ea4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{ 148{
149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk); 151 struct tcp_sock *tp = tcp_sk(sk);
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 152 __be16 orig_sport, orig_dport;
152 struct rtable *rt;
153 __be32 daddr, nexthop; 153 __be32 daddr, nexthop;
154 int tmp; 154 struct flowi4 *fl4;
155 struct rtable *rt;
155 int err; 156 int err;
157 struct ip_options_rcu *inet_opt;
156 158
157 if (addr_len < sizeof(struct sockaddr_in)) 159 if (addr_len < sizeof(struct sockaddr_in))
158 return -EINVAL; 160 return -EINVAL;
@@ -161,20 +163,26 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
161 return -EAFNOSUPPORT; 163 return -EAFNOSUPPORT;
162 164
163 nexthop = daddr = usin->sin_addr.s_addr; 165 nexthop = daddr = usin->sin_addr.s_addr;
164 if (inet->opt && inet->opt->srr) { 166 inet_opt = rcu_dereference_protected(inet->inet_opt,
167 sock_owned_by_user(sk));
168 if (inet_opt && inet_opt->opt.srr) {
165 if (!daddr) 169 if (!daddr)
166 return -EINVAL; 170 return -EINVAL;
167 nexthop = inet->opt->faddr; 171 nexthop = inet_opt->opt.faddr;
168 } 172 }
169 173
170 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, 174 orig_sport = inet->inet_sport;
171 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 175 orig_dport = usin->sin_port;
172 IPPROTO_TCP, 176 fl4 = &inet->cork.fl.u.ip4;
173 inet->inet_sport, usin->sin_port, sk, 1); 177 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
174 if (tmp < 0) { 178 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 if (tmp == -ENETUNREACH) 179 IPPROTO_TCP,
180 orig_sport, orig_dport, sk, true);
181 if (IS_ERR(rt)) {
182 err = PTR_ERR(rt);
183 if (err == -ENETUNREACH)
176 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 184 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
177 return tmp; 185 return err;
178 } 186 }
179 187
180 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 188 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
@@ -182,11 +190,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
182 return -ENETUNREACH; 190 return -ENETUNREACH;
183 } 191 }
184 192
185 if (!inet->opt || !inet->opt->srr) 193 if (!inet_opt || !inet_opt->opt.srr)
186 daddr = rt->rt_dst; 194 daddr = fl4->daddr;
187 195
188 if (!inet->inet_saddr) 196 if (!inet->inet_saddr)
189 inet->inet_saddr = rt->rt_src; 197 inet->inet_saddr = fl4->saddr;
190 inet->inet_rcv_saddr = inet->inet_saddr; 198 inet->inet_rcv_saddr = inet->inet_saddr;
191 199
192 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 200 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -197,8 +205,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
197 } 205 }
198 206
199 if (tcp_death_row.sysctl_tw_recycle && 207 if (tcp_death_row.sysctl_tw_recycle &&
200 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 208 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
201 struct inet_peer *peer = rt_get_peer(rt); 209 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
202 /* 210 /*
203 * VJ's idea. We save last timestamp seen from 211 * VJ's idea. We save last timestamp seen from
204 * the destination in peer table, when entering state 212 * the destination in peer table, when entering state
@@ -218,8 +226,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
218 inet->inet_daddr = daddr; 226 inet->inet_daddr = daddr;
219 227
220 inet_csk(sk)->icsk_ext_hdr_len = 0; 228 inet_csk(sk)->icsk_ext_hdr_len = 0;
221 if (inet->opt) 229 if (inet_opt)
222 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 230 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
223 231
224 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 232 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
225 233
@@ -233,11 +241,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
233 if (err) 241 if (err)
234 goto failure; 242 goto failure;
235 243
236 err = ip_route_newports(&rt, IPPROTO_TCP, 244 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
237 inet->inet_sport, inet->inet_dport, sk); 245 inet->inet_sport, inet->inet_dport, sk);
238 if (err) 246 if (IS_ERR(rt)) {
247 err = PTR_ERR(rt);
248 rt = NULL;
239 goto failure; 249 goto failure;
240 250 }
241 /* OK, now commit destination to socket. */ 251 /* OK, now commit destination to socket. */
242 sk->sk_gso_type = SKB_GSO_TCPV4; 252 sk->sk_gso_type = SKB_GSO_TCPV4;
243 sk_setup_caps(sk, &rt->dst); 253 sk_setup_caps(sk, &rt->dst);
@@ -273,7 +283,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
273/* 283/*
274 * This routine does path mtu discovery as defined in RFC1191. 284 * This routine does path mtu discovery as defined in RFC1191.
275 */ 285 */
276static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 286static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
277{ 287{
278 struct dst_entry *dst; 288 struct dst_entry *dst;
279 struct inet_sock *inet = inet_sk(sk); 289 struct inet_sock *inet = inet_sk(sk);
@@ -335,7 +345,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
335 345
336void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 346void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
337{ 347{
338 struct iphdr *iph = (struct iphdr *)icmp_skb->data; 348 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
339 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 349 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
340 struct inet_connection_sock *icsk; 350 struct inet_connection_sock *icsk;
341 struct tcp_sock *tp; 351 struct tcp_sock *tp;
@@ -641,7 +651,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
641 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 651 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
642 652
643 net = dev_net(skb_dst(skb)->dev); 653 net = dev_net(skb_dst(skb)->dev);
644 ip_send_reply(net->ipv4.tcp_sock, skb, 654 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
645 &arg, arg.iov[0].iov_len); 655 &arg, arg.iov[0].iov_len);
646 656
647 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 657 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -716,7 +726,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
716 if (oif) 726 if (oif)
717 arg.bound_dev_if = oif; 727 arg.bound_dev_if = oif;
718 728
719 ip_send_reply(net->ipv4.tcp_sock, skb, 729 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
720 &arg, arg.iov[0].iov_len); 730 &arg, arg.iov[0].iov_len);
721 731
722 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 732 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -759,11 +769,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
759 struct request_values *rvp) 769 struct request_values *rvp)
760{ 770{
761 const struct inet_request_sock *ireq = inet_rsk(req); 771 const struct inet_request_sock *ireq = inet_rsk(req);
772 struct flowi4 fl4;
762 int err = -1; 773 int err = -1;
763 struct sk_buff * skb; 774 struct sk_buff * skb;
764 775
765 /* First, grab a route. */ 776 /* First, grab a route. */
766 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 777 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
767 return -1; 778 return -1;
768 779
769 skb = tcp_make_synack(sk, dst, req, rvp); 780 skb = tcp_make_synack(sk, dst, req, rvp);
@@ -814,17 +825,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
814/* 825/*
815 * Save and compile IPv4 options into the request_sock if needed. 826 * Save and compile IPv4 options into the request_sock if needed.
816 */ 827 */
817static struct ip_options *tcp_v4_save_options(struct sock *sk, 828static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
818 struct sk_buff *skb) 829 struct sk_buff *skb)
819{ 830{
820 struct ip_options *opt = &(IPCB(skb)->opt); 831 const struct ip_options *opt = &(IPCB(skb)->opt);
821 struct ip_options *dopt = NULL; 832 struct ip_options_rcu *dopt = NULL;
822 833
823 if (opt && opt->optlen) { 834 if (opt && opt->optlen) {
824 int opt_size = optlength(opt); 835 int opt_size = sizeof(*dopt) + opt->optlen;
836
825 dopt = kmalloc(opt_size, GFP_ATOMIC); 837 dopt = kmalloc(opt_size, GFP_ATOMIC);
826 if (dopt) { 838 if (dopt) {
827 if (ip_options_echo(dopt, skb)) { 839 if (ip_options_echo(&dopt->opt, skb)) {
828 kfree(dopt); 840 kfree(dopt);
829 dopt = NULL; 841 dopt = NULL;
830 } 842 }
@@ -1327,6 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1327 req->cookie_ts = tmp_opt.tstamp_ok; 1339 req->cookie_ts = tmp_opt.tstamp_ok;
1328 } else if (!isn) { 1340 } else if (!isn) {
1329 struct inet_peer *peer = NULL; 1341 struct inet_peer *peer = NULL;
1342 struct flowi4 fl4;
1330 1343
1331 /* VJ's idea. We save last timestamp seen 1344 /* VJ's idea. We save last timestamp seen
1332 * from the destination in peer table, when entering 1345 * from the destination in peer table, when entering
@@ -1339,9 +1352,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1339 */ 1352 */
1340 if (tmp_opt.saw_tstamp && 1353 if (tmp_opt.saw_tstamp &&
1341 tcp_death_row.sysctl_tw_recycle && 1354 tcp_death_row.sysctl_tw_recycle &&
1342 (dst = inet_csk_route_req(sk, req)) != NULL && 1355 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1343 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1356 fl4.daddr == saddr &&
1344 peer->daddr.a4 == saddr) { 1357 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1345 inet_peer_refcheck(peer); 1358 inet_peer_refcheck(peer);
1346 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1359 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1347 (s32)(peer->tcp_ts - req->ts_recent) > 1360 (s32)(peer->tcp_ts - req->ts_recent) >
@@ -1405,19 +1418,16 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1405#ifdef CONFIG_TCP_MD5SIG 1418#ifdef CONFIG_TCP_MD5SIG
1406 struct tcp_md5sig_key *key; 1419 struct tcp_md5sig_key *key;
1407#endif 1420#endif
1421 struct ip_options_rcu *inet_opt;
1408 1422
1409 if (sk_acceptq_is_full(sk)) 1423 if (sk_acceptq_is_full(sk))
1410 goto exit_overflow; 1424 goto exit_overflow;
1411 1425
1412 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1413 goto exit;
1414
1415 newsk = tcp_create_openreq_child(sk, req, skb); 1426 newsk = tcp_create_openreq_child(sk, req, skb);
1416 if (!newsk) 1427 if (!newsk)
1417 goto exit_nonewsk; 1428 goto exit_nonewsk;
1418 1429
1419 newsk->sk_gso_type = SKB_GSO_TCPV4; 1430 newsk->sk_gso_type = SKB_GSO_TCPV4;
1420 sk_setup_caps(newsk, dst);
1421 1431
1422 newtp = tcp_sk(newsk); 1432 newtp = tcp_sk(newsk);
1423 newinet = inet_sk(newsk); 1433 newinet = inet_sk(newsk);
@@ -1425,15 +1435,21 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1425 newinet->inet_daddr = ireq->rmt_addr; 1435 newinet->inet_daddr = ireq->rmt_addr;
1426 newinet->inet_rcv_saddr = ireq->loc_addr; 1436 newinet->inet_rcv_saddr = ireq->loc_addr;
1427 newinet->inet_saddr = ireq->loc_addr; 1437 newinet->inet_saddr = ireq->loc_addr;
1428 newinet->opt = ireq->opt; 1438 inet_opt = ireq->opt;
1439 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1429 ireq->opt = NULL; 1440 ireq->opt = NULL;
1430 newinet->mc_index = inet_iif(skb); 1441 newinet->mc_index = inet_iif(skb);
1431 newinet->mc_ttl = ip_hdr(skb)->ttl; 1442 newinet->mc_ttl = ip_hdr(skb)->ttl;
1432 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1443 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1433 if (newinet->opt) 1444 if (inet_opt)
1434 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1445 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1435 newinet->inet_id = newtp->write_seq ^ jiffies; 1446 newinet->inet_id = newtp->write_seq ^ jiffies;
1436 1447
1448 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1449 goto put_and_exit;
1450
1451 sk_setup_caps(newsk, dst);
1452
1437 tcp_mtup_init(newsk); 1453 tcp_mtup_init(newsk);
1438 tcp_sync_mss(newsk, dst_mtu(dst)); 1454 tcp_sync_mss(newsk, dst_mtu(dst));
1439 newtp->advmss = dst_metric_advmss(dst); 1455 newtp->advmss = dst_metric_advmss(dst);
@@ -1461,10 +1477,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1461 } 1477 }
1462#endif 1478#endif
1463 1479
1464 if (__inet_inherit_port(sk, newsk) < 0) { 1480 if (__inet_inherit_port(sk, newsk) < 0)
1465 sock_put(newsk); 1481 goto put_and_exit;
1466 goto exit;
1467 }
1468 __inet_hash_nolisten(newsk, NULL); 1482 __inet_hash_nolisten(newsk, NULL);
1469 1483
1470 return newsk; 1484 return newsk;
@@ -1476,6 +1490,9 @@ exit_nonewsk:
1476exit: 1490exit:
1477 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1478 return NULL; 1492 return NULL;
1493put_and_exit:
1494 sock_put(newsk);
1495 goto exit;
1479} 1496}
1480EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1497EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1481 1498
@@ -1556,12 +1573,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1556 1573
1557 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1574 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1558 sock_rps_save_rxhash(sk, skb->rxhash); 1575 sock_rps_save_rxhash(sk, skb->rxhash);
1559 TCP_CHECK_TIMER(sk);
1560 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1576 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1561 rsk = sk; 1577 rsk = sk;
1562 goto reset; 1578 goto reset;
1563 } 1579 }
1564 TCP_CHECK_TIMER(sk);
1565 return 0; 1580 return 0;
1566 } 1581 }
1567 1582
@@ -1583,13 +1598,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1583 } else 1598 } else
1584 sock_rps_save_rxhash(sk, skb->rxhash); 1599 sock_rps_save_rxhash(sk, skb->rxhash);
1585 1600
1586
1587 TCP_CHECK_TIMER(sk);
1588 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1601 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1589 rsk = sk; 1602 rsk = sk;
1590 goto reset; 1603 goto reset;
1591 } 1604 }
1592 TCP_CHECK_TIMER(sk);
1593 return 0; 1605 return 0;
1594 1606
1595reset: 1607reset:
@@ -1763,12 +1775,13 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1763 struct inet_sock *inet = inet_sk(sk); 1775 struct inet_sock *inet = inet_sk(sk);
1764 struct inet_peer *peer; 1776 struct inet_peer *peer;
1765 1777
1766 if (!rt || rt->rt_dst != inet->inet_daddr) { 1778 if (!rt ||
1779 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1767 peer = inet_getpeer_v4(inet->inet_daddr, 1); 1780 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1768 *release_it = true; 1781 *release_it = true;
1769 } else { 1782 } else {
1770 if (!rt->peer) 1783 if (!rt->peer)
1771 rt_bind_peer(rt, 1); 1784 rt_bind_peer(rt, inet->inet_daddr, 1);
1772 peer = rt->peer; 1785 peer = rt->peer;
1773 *release_it = false; 1786 *release_it = false;
1774 } 1787 }
@@ -2526,7 +2539,7 @@ void tcp4_proc_exit(void)
2526 2539
2527struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2540struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2528{ 2541{
2529 struct iphdr *iph = skb_gro_network_header(skb); 2542 const struct iphdr *iph = skb_gro_network_header(skb);
2530 2543
2531 switch (skb->ip_summed) { 2544 switch (skb->ip_summed) {
2532 case CHECKSUM_COMPLETE: 2545 case CHECKSUM_COMPLETE:
@@ -2547,7 +2560,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2547 2560
2548int tcp4_gro_complete(struct sk_buff *skb) 2561int tcp4_gro_complete(struct sk_buff *skb)
2549{ 2562{
2550 struct iphdr *iph = ip_hdr(skb); 2563 const struct iphdr *iph = ip_hdr(skb);
2551 struct tcphdr *th = tcp_hdr(skb); 2564 struct tcphdr *th = tcp_hdr(skb);
2552 2565
2553 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2566 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index de870377fbba..72f7218b03f5 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -12,7 +12,7 @@
12 * within cong_avoid. 12 * within cong_avoid.
13 * o Error correcting in remote HZ, therefore remote HZ will be keeped 13 * o Error correcting in remote HZ, therefore remote HZ will be keeped
14 * on checking and updating. 14 * on checking and updating.
15 * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne 15 * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since
16 * OWD have a similar meaning as RTT. Also correct the buggy formular. 16 * OWD have a similar meaning as RTT. Also correct the buggy formular.
17 * o Handle reaction for Early Congestion Indication (ECI) within 17 * o Handle reaction for Early Congestion Indication (ECI) within
18 * pkts_acked, as mentioned within pseudo code. 18 * pkts_acked, as mentioned within pseudo code.
@@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
313 lp->last_drop = tcp_time_stamp; 313 lp->last_drop = tcp_time_stamp;
314} 314}
315 315
316static struct tcp_congestion_ops tcp_lp = { 316static struct tcp_congestion_ops tcp_lp __read_mostly = {
317 .flags = TCP_CONG_RTT_STAMP, 317 .flags = TCP_CONG_RTT_STAMP,
318 .init = tcp_lp_init, 318 .init = tcp_lp_init,
319 .ssthresh = tcp_reno_ssthresh, 319 .ssthresh = tcp_reno_ssthresh,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dfa5beb0c1c8..882e0b0964d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -73,7 +73,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
73 tcp_advance_send_head(sk, skb); 73 tcp_advance_send_head(sk, skb);
74 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 74 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
75 75
76 /* Don't override Nagle indefinately with F-RTO */ 76 /* Don't override Nagle indefinitely with F-RTO */
77 if (tp->frto_counter == 2) 77 if (tp->frto_counter == 2)
78 tp->frto_counter = 3; 78 tp->frto_counter = 3;
79 79
@@ -899,7 +899,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
900 tcp_skb_pcount(skb)); 900 tcp_skb_pcount(skb));
901 901
902 err = icsk->icsk_af_ops->queue_xmit(skb); 902 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
903 if (likely(err <= 0)) 903 if (likely(err <= 0))
904 return err; 904 return err;
905 905
@@ -1003,7 +1003,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1003 int nlen; 1003 int nlen;
1004 u8 flags; 1004 u8 flags;
1005 1005
1006 BUG_ON(len > skb->len); 1006 if (WARN_ON(len > skb->len))
1007 return -EINVAL;
1007 1008
1008 nsize = skb_headlen(skb) - len; 1009 nsize = skb_headlen(skb) - len;
1009 if (nsize < 0) 1010 if (nsize < 0)
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index a76513779e2b..8ce55b8aaec8 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
35} 35}
36 36
37 37
38static struct tcp_congestion_ops tcp_scalable = { 38static struct tcp_congestion_ops tcp_scalable __read_mostly = {
39 .ssthresh = tcp_scalable_ssthresh, 39 .ssthresh = tcp_scalable_ssthresh,
40 .cong_avoid = tcp_scalable_cong_avoid, 40 .cong_avoid = tcp_scalable_cong_avoid,
41 .min_cwnd = tcp_reno_min_cwnd, 41 .min_cwnd = tcp_reno_min_cwnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74a6aa003657..ecd44b0c45f1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data)
259 tcp_send_ack(sk); 259 tcp_send_ack(sk);
260 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); 260 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
261 } 261 }
262 TCP_CHECK_TIMER(sk);
263 262
264out: 263out:
265 if (tcp_memory_pressure) 264 if (tcp_memory_pressure)
@@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data)
481 tcp_probe_timer(sk); 480 tcp_probe_timer(sk);
482 break; 481 break;
483 } 482 }
484 TCP_CHECK_TIMER(sk);
485 483
486out: 484out:
487 sk_mem_reclaim(sk); 485 sk_mem_reclaim(sk);
@@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data)
589 elapsed = keepalive_time_when(tp) - elapsed; 587 elapsed = keepalive_time_when(tp) - elapsed;
590 } 588 }
591 589
592 TCP_CHECK_TIMER(sk);
593 sk_mem_reclaim(sk); 590 sk_mem_reclaim(sk);
594 591
595resched: 592resched:
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c6743eec9b7d..80fa2bfd7ede 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
304} 304}
305EXPORT_SYMBOL_GPL(tcp_vegas_get_info); 305EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
306 306
307static struct tcp_congestion_ops tcp_vegas = { 307static struct tcp_congestion_ops tcp_vegas __read_mostly = {
308 .flags = TCP_CONG_RTT_STAMP, 308 .flags = TCP_CONG_RTT_STAMP,
309 .init = tcp_vegas_init, 309 .init = tcp_vegas_init,
310 .ssthresh = tcp_reno_ssthresh, 310 .ssthresh = tcp_reno_ssthresh,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 38bc0b52d745..ac43cd747bce 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
201 return max(tp->snd_cwnd >> 1U, 2U); 201 return max(tp->snd_cwnd >> 1U, 2U);
202} 202}
203 203
204static struct tcp_congestion_ops tcp_veno = { 204static struct tcp_congestion_ops tcp_veno __read_mostly = {
205 .flags = TCP_CONG_RTT_STAMP, 205 .flags = TCP_CONG_RTT_STAMP,
206 .init = tcp_veno_init, 206 .init = tcp_veno_init,
207 .ssthresh = tcp_veno_ssthresh, 207 .ssthresh = tcp_veno_ssthresh,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index a534dda5456e..1b91bf48e277 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
272} 272}
273 273
274 274
275static struct tcp_congestion_ops tcp_westwood = { 275static struct tcp_congestion_ops tcp_westwood __read_mostly = {
276 .init = tcp_westwood_init, 276 .init = tcp_westwood_init,
277 .ssthresh = tcp_reno_ssthresh, 277 .ssthresh = tcp_reno_ssthresh,
278 .cong_avoid = tcp_reno_cong_avoid, 278 .cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index a0f240358892..05c3b6f0e8e1 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -20,7 +20,7 @@
20#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss 20#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss
21#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion 21#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion
22#define TCP_YEAH_PHY 8 //lin maximum delta from base 22#define TCP_YEAH_PHY 8 //lin maximum delta from base
23#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss 23#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss
24#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count 24#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count
25 25
26#define TCP_SCALABLE_AI_CNT 100U 26#define TCP_SCALABLE_AI_CNT 100U
@@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
225 return tp->snd_cwnd - reduction; 225 return tp->snd_cwnd - reduction;
226} 226}
227 227
228static struct tcp_congestion_ops tcp_yeah = { 228static struct tcp_congestion_ops tcp_yeah __read_mostly = {
229 .flags = TCP_CONG_RTT_STAMP, 229 .flags = TCP_CONG_RTT_STAMP,
230 .init = tcp_yeah_init, 230 .init = tcp_yeah_init,
231 .ssthresh = tcp_yeah_ssthresh, 231 .ssthresh = tcp_yeah_ssthresh,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8157b17959ee..599374f65c76 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -189,7 +189,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
189 * @sk: socket struct in question 189 * @sk: socket struct in question
190 * @snum: port number to look up 190 * @snum: port number to look up
191 * @saddr_comp: AF-dependent comparison of bound local IP addresses 191 * @saddr_comp: AF-dependent comparison of bound local IP addresses
192 * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, 192 * @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
193 * with NULL address 193 * with NULL address
194 */ 194 */
195int udp_lib_get_port(struct sock *sk, unsigned short snum, 195int udp_lib_get_port(struct sock *sk, unsigned short snum,
@@ -578,7 +578,7 @@ found:
578void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 578void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
579{ 579{
580 struct inet_sock *inet; 580 struct inet_sock *inet;
581 struct iphdr *iph = (struct iphdr *)skb->data; 581 const struct iphdr *iph = (const struct iphdr *)skb->data;
582 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 582 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
583 const int type = icmp_hdr(skb)->type; 583 const int type = icmp_hdr(skb)->type;
584 const int code = icmp_hdr(skb)->code; 584 const int code = icmp_hdr(skb)->code;
@@ -663,75 +663,71 @@ void udp_flush_pending_frames(struct sock *sk)
663EXPORT_SYMBOL(udp_flush_pending_frames); 663EXPORT_SYMBOL(udp_flush_pending_frames);
664 664
665/** 665/**
666 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 666 * udp4_hwcsum - handle outgoing HW checksumming
667 * @sk: socket we are sending on
668 * @skb: sk_buff containing the filled-in UDP header 667 * @skb: sk_buff containing the filled-in UDP header
669 * (checksum field must be zeroed out) 668 * (checksum field must be zeroed out)
669 * @src: source IP address
670 * @dst: destination IP address
670 */ 671 */
671static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, 672static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
672 __be32 src, __be32 dst, int len)
673{ 673{
674 unsigned int offset;
675 struct udphdr *uh = udp_hdr(skb); 674 struct udphdr *uh = udp_hdr(skb);
675 struct sk_buff *frags = skb_shinfo(skb)->frag_list;
676 int offset = skb_transport_offset(skb);
677 int len = skb->len - offset;
678 int hlen = len;
676 __wsum csum = 0; 679 __wsum csum = 0;
677 680
678 if (skb_queue_len(&sk->sk_write_queue) == 1) { 681 if (!frags) {
679 /* 682 /*
680 * Only one fragment on the socket. 683 * Only one fragment on the socket.
681 */ 684 */
682 skb->csum_start = skb_transport_header(skb) - skb->head; 685 skb->csum_start = skb_transport_header(skb) - skb->head;
683 skb->csum_offset = offsetof(struct udphdr, check); 686 skb->csum_offset = offsetof(struct udphdr, check);
684 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); 687 uh->check = ~csum_tcpudp_magic(src, dst, len,
688 IPPROTO_UDP, 0);
685 } else { 689 } else {
686 /* 690 /*
687 * HW-checksum won't work as there are two or more 691 * HW-checksum won't work as there are two or more
688 * fragments on the socket so that all csums of sk_buffs 692 * fragments on the socket so that all csums of sk_buffs
689 * should be together 693 * should be together
690 */ 694 */
691 offset = skb_transport_offset(skb); 695 do {
692 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 696 csum = csum_add(csum, frags->csum);
697 hlen -= frags->len;
698 } while ((frags = frags->next));
693 699
700 csum = skb_checksum(skb, offset, hlen, csum);
694 skb->ip_summed = CHECKSUM_NONE; 701 skb->ip_summed = CHECKSUM_NONE;
695 702
696 skb_queue_walk(&sk->sk_write_queue, skb) {
697 csum = csum_add(csum, skb->csum);
698 }
699
700 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); 703 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
701 if (uh->check == 0) 704 if (uh->check == 0)
702 uh->check = CSUM_MANGLED_0; 705 uh->check = CSUM_MANGLED_0;
703 } 706 }
704} 707}
705 708
706/* 709static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
707 * Push out all pending data as one UDP datagram. Socket is locked.
708 */
709static int udp_push_pending_frames(struct sock *sk)
710{ 710{
711 struct udp_sock *up = udp_sk(sk); 711 struct sock *sk = skb->sk;
712 struct inet_sock *inet = inet_sk(sk); 712 struct inet_sock *inet = inet_sk(sk);
713 struct flowi *fl = &inet->cork.fl;
714 struct sk_buff *skb;
715 struct udphdr *uh; 713 struct udphdr *uh;
716 int err = 0; 714 int err = 0;
717 int is_udplite = IS_UDPLITE(sk); 715 int is_udplite = IS_UDPLITE(sk);
716 int offset = skb_transport_offset(skb);
717 int len = skb->len - offset;
718 __wsum csum = 0; 718 __wsum csum = 0;
719 719
720 /* Grab the skbuff where UDP header space exists. */
721 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
722 goto out;
723
724 /* 720 /*
725 * Create a UDP header 721 * Create a UDP header
726 */ 722 */
727 uh = udp_hdr(skb); 723 uh = udp_hdr(skb);
728 uh->source = fl->fl_ip_sport; 724 uh->source = inet->inet_sport;
729 uh->dest = fl->fl_ip_dport; 725 uh->dest = fl4->fl4_dport;
730 uh->len = htons(up->len); 726 uh->len = htons(len);
731 uh->check = 0; 727 uh->check = 0;
732 728
733 if (is_udplite) /* UDP-Lite */ 729 if (is_udplite) /* UDP-Lite */
734 csum = udplite_csum_outgoing(sk, skb); 730 csum = udplite_csum(skb);
735 731
736 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 732 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
737 733
@@ -740,20 +736,20 @@ static int udp_push_pending_frames(struct sock *sk)
740 736
741 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 737 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
742 738
743 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); 739 udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
744 goto send; 740 goto send;
745 741
746 } else /* `normal' UDP */ 742 } else
747 csum = udp_csum_outgoing(sk, skb); 743 csum = udp_csum(skb);
748 744
749 /* add protocol-dependent pseudo-header */ 745 /* add protocol-dependent pseudo-header */
750 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, 746 uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
751 sk->sk_protocol, csum); 747 sk->sk_protocol, csum);
752 if (uh->check == 0) 748 if (uh->check == 0)
753 uh->check = CSUM_MANGLED_0; 749 uh->check = CSUM_MANGLED_0;
754 750
755send: 751send:
756 err = ip_push_pending_frames(sk); 752 err = ip_send_skb(skb);
757 if (err) { 753 if (err) {
758 if (err == -ENOBUFS && !inet->recverr) { 754 if (err == -ENOBUFS && !inet->recverr) {
759 UDP_INC_STATS_USER(sock_net(sk), 755 UDP_INC_STATS_USER(sock_net(sk),
@@ -763,6 +759,26 @@ send:
763 } else 759 } else
764 UDP_INC_STATS_USER(sock_net(sk), 760 UDP_INC_STATS_USER(sock_net(sk),
765 UDP_MIB_OUTDATAGRAMS, is_udplite); 761 UDP_MIB_OUTDATAGRAMS, is_udplite);
762 return err;
763}
764
765/*
766 * Push out all pending data as one UDP datagram. Socket is locked.
767 */
768static int udp_push_pending_frames(struct sock *sk)
769{
770 struct udp_sock *up = udp_sk(sk);
771 struct inet_sock *inet = inet_sk(sk);
772 struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
773 struct sk_buff *skb;
774 int err = 0;
775
776 skb = ip_finish_skb(sk, fl4);
777 if (!skb)
778 goto out;
779
780 err = udp_send_skb(skb, fl4);
781
766out: 782out:
767 up->len = 0; 783 up->len = 0;
768 up->pending = 0; 784 up->pending = 0;
@@ -774,6 +790,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
774{ 790{
775 struct inet_sock *inet = inet_sk(sk); 791 struct inet_sock *inet = inet_sk(sk);
776 struct udp_sock *up = udp_sk(sk); 792 struct udp_sock *up = udp_sk(sk);
793 struct flowi4 fl4_stack;
794 struct flowi4 *fl4;
777 int ulen = len; 795 int ulen = len;
778 struct ipcm_cookie ipc; 796 struct ipcm_cookie ipc;
779 struct rtable *rt = NULL; 797 struct rtable *rt = NULL;
@@ -785,6 +803,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
785 int err, is_udplite = IS_UDPLITE(sk); 803 int err, is_udplite = IS_UDPLITE(sk);
786 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
787 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
806 struct sk_buff *skb;
807 struct ip_options_data opt_copy;
788 808
789 if (len > 0xFFFF) 809 if (len > 0xFFFF)
790 return -EMSGSIZE; 810 return -EMSGSIZE;
@@ -799,6 +819,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
799 ipc.opt = NULL; 819 ipc.opt = NULL;
800 ipc.tx_flags = 0; 820 ipc.tx_flags = 0;
801 821
822 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
823
824 fl4 = &inet->cork.fl.u.ip4;
802 if (up->pending) { 825 if (up->pending) {
803 /* 826 /*
804 * There are pending frames. 827 * There are pending frames.
@@ -856,22 +879,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
856 free = 1; 879 free = 1;
857 connected = 0; 880 connected = 0;
858 } 881 }
859 if (!ipc.opt) 882 if (!ipc.opt) {
860 ipc.opt = inet->opt; 883 struct ip_options_rcu *inet_opt;
884
885 rcu_read_lock();
886 inet_opt = rcu_dereference(inet->inet_opt);
887 if (inet_opt) {
888 memcpy(&opt_copy, inet_opt,
889 sizeof(*inet_opt) + inet_opt->opt.optlen);
890 ipc.opt = &opt_copy.opt;
891 }
892 rcu_read_unlock();
893 }
861 894
862 saddr = ipc.addr; 895 saddr = ipc.addr;
863 ipc.addr = faddr = daddr; 896 ipc.addr = faddr = daddr;
864 897
865 if (ipc.opt && ipc.opt->srr) { 898 if (ipc.opt && ipc.opt->opt.srr) {
866 if (!daddr) 899 if (!daddr)
867 return -EINVAL; 900 return -EINVAL;
868 faddr = ipc.opt->faddr; 901 faddr = ipc.opt->opt.faddr;
869 connected = 0; 902 connected = 0;
870 } 903 }
871 tos = RT_TOS(inet->tos); 904 tos = RT_TOS(inet->tos);
872 if (sock_flag(sk, SOCK_LOCALROUTE) || 905 if (sock_flag(sk, SOCK_LOCALROUTE) ||
873 (msg->msg_flags & MSG_DONTROUTE) || 906 (msg->msg_flags & MSG_DONTROUTE) ||
874 (ipc.opt && ipc.opt->is_strictroute)) { 907 (ipc.opt && ipc.opt->opt.is_strictroute)) {
875 tos |= RTO_ONLINK; 908 tos |= RTO_ONLINK;
876 connected = 0; 909 connected = 0;
877 } 910 }
@@ -888,20 +921,19 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
888 rt = (struct rtable *)sk_dst_check(sk, 0); 921 rt = (struct rtable *)sk_dst_check(sk, 0);
889 922
890 if (rt == NULL) { 923 if (rt == NULL) {
891 struct flowi fl = { .oif = ipc.oif,
892 .mark = sk->sk_mark,
893 .fl4_dst = faddr,
894 .fl4_src = saddr,
895 .fl4_tos = tos,
896 .proto = sk->sk_protocol,
897 .flags = inet_sk_flowi_flags(sk),
898 .fl_ip_sport = inet->inet_sport,
899 .fl_ip_dport = dport };
900 struct net *net = sock_net(sk); 924 struct net *net = sock_net(sk);
901 925
902 security_sk_classify_flow(sk, &fl); 926 fl4 = &fl4_stack;
903 err = ip_route_output_flow(net, &rt, &fl, sk, 1); 927 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
904 if (err) { 928 RT_SCOPE_UNIVERSE, sk->sk_protocol,
929 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
930 faddr, saddr, dport, inet->inet_sport);
931
932 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
933 rt = ip_route_output_flow(net, fl4, sk);
934 if (IS_ERR(rt)) {
935 err = PTR_ERR(rt);
936 rt = NULL;
905 if (err == -ENETUNREACH) 937 if (err == -ENETUNREACH)
906 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 938 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
907 goto out; 939 goto out;
@@ -919,9 +951,20 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
919 goto do_confirm; 951 goto do_confirm;
920back_from_confirm: 952back_from_confirm:
921 953
922 saddr = rt->rt_src; 954 saddr = fl4->saddr;
923 if (!ipc.addr) 955 if (!ipc.addr)
924 daddr = ipc.addr = rt->rt_dst; 956 daddr = ipc.addr = fl4->daddr;
957
958 /* Lockless fast path for the non-corking case. */
959 if (!corkreq) {
960 skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
961 sizeof(struct udphdr), &ipc, &rt,
962 msg->msg_flags);
963 err = PTR_ERR(skb);
964 if (skb && !IS_ERR(skb))
965 err = udp_send_skb(skb, fl4);
966 goto out;
967 }
925 968
926 lock_sock(sk); 969 lock_sock(sk);
927 if (unlikely(up->pending)) { 970 if (unlikely(up->pending)) {
@@ -936,18 +979,18 @@ back_from_confirm:
936 /* 979 /*
937 * Now cork the socket to pend data. 980 * Now cork the socket to pend data.
938 */ 981 */
939 inet->cork.fl.fl4_dst = daddr; 982 fl4 = &inet->cork.fl.u.ip4;
940 inet->cork.fl.fl_ip_dport = dport; 983 fl4->daddr = daddr;
941 inet->cork.fl.fl4_src = saddr; 984 fl4->saddr = saddr;
942 inet->cork.fl.fl_ip_sport = inet->inet_sport; 985 fl4->fl4_dport = dport;
986 fl4->fl4_sport = inet->inet_sport;
943 up->pending = AF_INET; 987 up->pending = AF_INET;
944 988
945do_append_data: 989do_append_data:
946 up->len += ulen; 990 up->len += ulen;
947 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 991 err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
948 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, 992 sizeof(struct udphdr), &ipc, &rt,
949 sizeof(struct udphdr), &ipc, &rt, 993 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
950 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
951 if (err) 994 if (err)
952 udp_flush_pending_frames(sk); 995 udp_flush_pending_frames(sk);
953 else if (!corkreq) 996 else if (!corkreq)
@@ -987,6 +1030,7 @@ EXPORT_SYMBOL(udp_sendmsg);
987int udp_sendpage(struct sock *sk, struct page *page, int offset, 1030int udp_sendpage(struct sock *sk, struct page *page, int offset,
988 size_t size, int flags) 1031 size_t size, int flags)
989{ 1032{
1033 struct inet_sock *inet = inet_sk(sk);
990 struct udp_sock *up = udp_sk(sk); 1034 struct udp_sock *up = udp_sk(sk);
991 int ret; 1035 int ret;
992 1036
@@ -1011,7 +1055,8 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
1011 return -EINVAL; 1055 return -EINVAL;
1012 } 1056 }
1013 1057
1014 ret = ip_append_page(sk, page, offset, size, flags); 1058 ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
1059 page, offset, size, flags);
1015 if (ret == -EOPNOTSUPP) { 1060 if (ret == -EOPNOTSUPP) {
1016 release_sock(sk); 1061 release_sock(sk);
1017 return sock_no_sendpage(sk->sk_socket, page, offset, 1062 return sock_no_sendpage(sk->sk_socket, page, offset,
@@ -2199,7 +2244,7 @@ int udp4_ufo_send_check(struct sk_buff *skb)
2199 return 0; 2244 return 0;
2200} 2245}
2201 2246
2202struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) 2247struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
2203{ 2248{
2204 struct sk_buff *segs = ERR_PTR(-EINVAL); 2249 struct sk_buff *segs = ERR_PTR(-EINVAL);
2205 unsigned int mss; 2250 unsigned int mss;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 571aa96a175c..2d51840e53a1 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
69} 69}
70EXPORT_SYMBOL(xfrm4_prepare_output); 70EXPORT_SYMBOL(xfrm4_prepare_output);
71 71
72static int xfrm4_output_finish(struct sk_buff *skb) 72int xfrm4_output_finish(struct sk_buff *skb)
73{ 73{
74#ifdef CONFIG_NETFILTER 74#ifdef CONFIG_NETFILTER
75 if (!skb_dst(skb)->xfrm) { 75 if (!skb_dst(skb)->xfrm) {
@@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb)
86 86
87int xfrm4_output(struct sk_buff *skb) 87int xfrm4_output(struct sk_buff *skb)
88{ 88{
89 struct dst_entry *dst = skb_dst(skb);
90 struct xfrm_state *x = dst->xfrm;
91
89 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, 92 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
90 NULL, skb_dst(skb)->dev, xfrm4_output_finish, 93 NULL, dst->dev,
94 x->outer_mode->afinfo->output_finish,
91 !(IPCB(skb)->flags & IPSKB_REROUTED)); 95 !(IPCB(skb)->flags & IPSKB_REROUTED));
92} 96}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b057d40addec..981e43eaf704 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -18,47 +18,53 @@
18 18
19static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 19static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
20 20
21static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, 21static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
22 xfrm_address_t *saddr, 22 int tos,
23 xfrm_address_t *daddr) 23 const xfrm_address_t *saddr,
24 const xfrm_address_t *daddr)
24{ 25{
25 struct flowi fl = {
26 .fl4_dst = daddr->a4,
27 .fl4_tos = tos,
28 };
29 struct dst_entry *dst;
30 struct rtable *rt; 26 struct rtable *rt;
31 int err;
32 27
28 memset(fl4, 0, sizeof(*fl4));
29 fl4->daddr = daddr->a4;
30 fl4->flowi4_tos = tos;
33 if (saddr) 31 if (saddr)
34 fl.fl4_src = saddr->a4; 32 fl4->saddr = saddr->a4;
33
34 rt = __ip_route_output_key(net, fl4);
35 if (!IS_ERR(rt))
36 return &rt->dst;
35 37
36 err = __ip_route_output_key(net, &rt, &fl); 38 return ERR_CAST(rt);
37 dst = &rt->dst; 39}
38 if (err) 40
39 dst = ERR_PTR(err); 41static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
40 return dst; 42 const xfrm_address_t *saddr,
43 const xfrm_address_t *daddr)
44{
45 struct flowi4 fl4;
46
47 return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);
41} 48}
42 49
43static int xfrm4_get_saddr(struct net *net, 50static int xfrm4_get_saddr(struct net *net,
44 xfrm_address_t *saddr, xfrm_address_t *daddr) 51 xfrm_address_t *saddr, xfrm_address_t *daddr)
45{ 52{
46 struct dst_entry *dst; 53 struct dst_entry *dst;
47 struct rtable *rt; 54 struct flowi4 fl4;
48 55
49 dst = xfrm4_dst_lookup(net, 0, NULL, daddr); 56 dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);
50 if (IS_ERR(dst)) 57 if (IS_ERR(dst))
51 return -EHOSTUNREACH; 58 return -EHOSTUNREACH;
52 59
53 rt = (struct rtable *)dst; 60 saddr->a4 = fl4.saddr;
54 saddr->a4 = rt->rt_src;
55 dst_release(dst); 61 dst_release(dst);
56 return 0; 62 return 0;
57} 63}
58 64
59static int xfrm4_get_tos(struct flowi *fl) 65static int xfrm4_get_tos(const struct flowi *fl)
60{ 66{
61 return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ 67 return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
62} 68}
63 69
64static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, 70static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
@@ -68,11 +74,18 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
68} 74}
69 75
70static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 76static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
71 struct flowi *fl) 77 const struct flowi *fl)
72{ 78{
73 struct rtable *rt = (struct rtable *)xdst->route; 79 struct rtable *rt = (struct rtable *)xdst->route;
80 const struct flowi4 *fl4 = &fl->u.ip4;
74 81
75 xdst->u.rt.fl = *fl; 82 rt->rt_key_dst = fl4->daddr;
83 rt->rt_key_src = fl4->saddr;
84 rt->rt_key_tos = fl4->flowi4_tos;
85 rt->rt_route_iif = fl4->flowi4_iif;
86 rt->rt_iif = fl4->flowi4_iif;
87 rt->rt_oif = fl4->flowi4_oif;
88 rt->rt_mark = fl4->flowi4_mark;
76 89
77 xdst->u.dst.dev = dev; 90 xdst->u.dst.dev = dev;
78 dev_hold(dev); 91 dev_hold(dev);
@@ -97,11 +110,12 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
97static void 110static void
98_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) 111_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
99{ 112{
100 struct iphdr *iph = ip_hdr(skb); 113 const struct iphdr *iph = ip_hdr(skb);
101 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 114 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
115 struct flowi4 *fl4 = &fl->u.ip4;
102 116
103 memset(fl, 0, sizeof(struct flowi)); 117 memset(fl4, 0, sizeof(struct flowi4));
104 fl->mark = skb->mark; 118 fl4->flowi4_mark = skb->mark;
105 119
106 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 120 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
107 switch (iph->protocol) { 121 switch (iph->protocol) {
@@ -114,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
114 pskb_may_pull(skb, xprth + 4 - skb->data)) { 128 pskb_may_pull(skb, xprth + 4 - skb->data)) {
115 __be16 *ports = (__be16 *)xprth; 129 __be16 *ports = (__be16 *)xprth;
116 130
117 fl->fl_ip_sport = ports[!!reverse]; 131 fl4->fl4_sport = ports[!!reverse];
118 fl->fl_ip_dport = ports[!reverse]; 132 fl4->fl4_dport = ports[!reverse];
119 } 133 }
120 break; 134 break;
121 135
@@ -123,8 +137,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
123 if (pskb_may_pull(skb, xprth + 2 - skb->data)) { 137 if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
124 u8 *icmp = xprth; 138 u8 *icmp = xprth;
125 139
126 fl->fl_icmp_type = icmp[0]; 140 fl4->fl4_icmp_type = icmp[0];
127 fl->fl_icmp_code = icmp[1]; 141 fl4->fl4_icmp_code = icmp[1];
128 } 142 }
129 break; 143 break;
130 144
@@ -132,7 +146,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
132 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 146 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
133 __be32 *ehdr = (__be32 *)xprth; 147 __be32 *ehdr = (__be32 *)xprth;
134 148
135 fl->fl_ipsec_spi = ehdr[0]; 149 fl4->fl4_ipsec_spi = ehdr[0];
136 } 150 }
137 break; 151 break;
138 152
@@ -140,7 +154,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
140 if (pskb_may_pull(skb, xprth + 8 - skb->data)) { 154 if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
141 __be32 *ah_hdr = (__be32*)xprth; 155 __be32 *ah_hdr = (__be32*)xprth;
142 156
143 fl->fl_ipsec_spi = ah_hdr[1]; 157 fl4->fl4_ipsec_spi = ah_hdr[1];
144 } 158 }
145 break; 159 break;
146 160
@@ -148,7 +162,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
148 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 162 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
149 __be16 *ipcomp_hdr = (__be16 *)xprth; 163 __be16 *ipcomp_hdr = (__be16 *)xprth;
150 164
151 fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); 165 fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
152 } 166 }
153 break; 167 break;
154 168
@@ -160,20 +174,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
160 if (greflags[0] & GRE_KEY) { 174 if (greflags[0] & GRE_KEY) {
161 if (greflags[0] & GRE_CSUM) 175 if (greflags[0] & GRE_CSUM)
162 gre_hdr++; 176 gre_hdr++;
163 fl->fl_gre_key = gre_hdr[1]; 177 fl4->fl4_gre_key = gre_hdr[1];
164 } 178 }
165 } 179 }
166 break; 180 break;
167 181
168 default: 182 default:
169 fl->fl_ipsec_spi = 0; 183 fl4->fl4_ipsec_spi = 0;
170 break; 184 break;
171 } 185 }
172 } 186 }
173 fl->proto = iph->protocol; 187 fl4->flowi4_proto = iph->protocol;
174 fl->fl4_dst = reverse ? iph->saddr : iph->daddr; 188 fl4->daddr = reverse ? iph->saddr : iph->daddr;
175 fl->fl4_src = reverse ? iph->daddr : iph->saddr; 189 fl4->saddr = reverse ? iph->daddr : iph->saddr;
176 fl->fl4_tos = iph->tos; 190 fl4->flowi4_tos = iph->tos;
177} 191}
178 192
179static inline int xfrm4_garbage_collect(struct dst_ops *ops) 193static inline int xfrm4_garbage_collect(struct dst_ops *ops)
@@ -196,8 +210,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
196{ 210{
197 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 211 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
198 212
213 dst_destroy_metrics_generic(dst);
214
199 if (likely(xdst->u.rt.peer)) 215 if (likely(xdst->u.rt.peer))
200 inet_putpeer(xdst->u.rt.peer); 216 inet_putpeer(xdst->u.rt.peer);
217
201 xfrm_dst_destroy(xdst); 218 xfrm_dst_destroy(xdst);
202} 219}
203 220
@@ -215,6 +232,7 @@ static struct dst_ops xfrm4_dst_ops = {
215 .protocol = cpu_to_be16(ETH_P_IP), 232 .protocol = cpu_to_be16(ETH_P_IP),
216 .gc = xfrm4_garbage_collect, 233 .gc = xfrm4_garbage_collect,
217 .update_pmtu = xfrm4_update_pmtu, 234 .update_pmtu = xfrm4_update_pmtu,
235 .cow_metrics = dst_cow_metrics_generic,
218 .destroy = xfrm4_dst_destroy, 236 .destroy = xfrm4_dst_destroy,
219 .ifdown = xfrm4_dst_ifdown, 237 .ifdown = xfrm4_dst_ifdown,
220 .local_out = __ip_local_out, 238 .local_out = __ip_local_out,
@@ -230,6 +248,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
230 .get_tos = xfrm4_get_tos, 248 .get_tos = xfrm4_get_tos,
231 .init_path = xfrm4_init_path, 249 .init_path = xfrm4_init_path,
232 .fill_dst = xfrm4_fill_dst, 250 .fill_dst = xfrm4_fill_dst,
251 .blackhole_route = ipv4_blackhole_route,
233}; 252};
234 253
235#ifdef CONFIG_SYSCTL 254#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 47947624eccc..d9ac0a0058b5 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,24 +21,26 @@ static int xfrm4_init_flags(struct xfrm_state *x)
21} 21}
22 22
23static void 23static void
24__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) 24__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
25{ 25{
26 sel->daddr.a4 = fl->fl4_dst; 26 const struct flowi4 *fl4 = &fl->u.ip4;
27 sel->saddr.a4 = fl->fl4_src; 27
28 sel->dport = xfrm_flowi_dport(fl); 28 sel->daddr.a4 = fl4->daddr;
29 sel->saddr.a4 = fl4->saddr;
30 sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
29 sel->dport_mask = htons(0xffff); 31 sel->dport_mask = htons(0xffff);
30 sel->sport = xfrm_flowi_sport(fl); 32 sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
31 sel->sport_mask = htons(0xffff); 33 sel->sport_mask = htons(0xffff);
32 sel->family = AF_INET; 34 sel->family = AF_INET;
33 sel->prefixlen_d = 32; 35 sel->prefixlen_d = 32;
34 sel->prefixlen_s = 32; 36 sel->prefixlen_s = 32;
35 sel->proto = fl->proto; 37 sel->proto = fl4->flowi4_proto;
36 sel->ifindex = fl->oif; 38 sel->ifindex = fl4->flowi4_oif;
37} 39}
38 40
39static void 41static void
40xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, 42xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
41 xfrm_address_t *daddr, xfrm_address_t *saddr) 43 const xfrm_address_t *daddr, const xfrm_address_t *saddr)
42{ 44{
43 x->id = tmpl->id; 45 x->id = tmpl->id;
44 if (x->id.daddr.a4 == 0) 46 if (x->id.daddr.a4 == 0)
@@ -53,7 +55,7 @@ xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
53 55
54int xfrm4_extract_header(struct sk_buff *skb) 56int xfrm4_extract_header(struct sk_buff *skb)
55{ 57{
56 struct iphdr *iph = ip_hdr(skb); 58 const struct iphdr *iph = ip_hdr(skb);
57 59
58 XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); 60 XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
59 XFRM_MODE_SKB_CB(skb)->id = iph->id; 61 XFRM_MODE_SKB_CB(skb)->id = iph->id;
@@ -76,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
76 .init_tempsel = __xfrm4_init_tempsel, 78 .init_tempsel = __xfrm4_init_tempsel,
77 .init_temprop = xfrm4_init_temprop, 79 .init_temprop = xfrm4_init_temprop,
78 .output = xfrm4_output, 80 .output = xfrm4_output,
81 .output_finish = xfrm4_output_finish,
79 .extract_input = xfrm4_extract_input, 82 .extract_input = xfrm4_extract_input,
80 .extract_output = xfrm4_extract_output, 83 .extract_output = xfrm4_extract_output,
81 .transport_finish = xfrm4_transport_finish, 84 .transport_finish = xfrm4_transport_finish,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd6782e3a038..498b927f68be 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -289,19 +289,19 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
289 sizeof(struct ipstats_mib), 289 sizeof(struct ipstats_mib),
290 __alignof__(struct ipstats_mib)) < 0) 290 __alignof__(struct ipstats_mib)) < 0)
291 goto err_ip; 291 goto err_ip;
292 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6, 292 idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
293 sizeof(struct icmpv6_mib), 293 GFP_KERNEL);
294 __alignof__(struct icmpv6_mib)) < 0) 294 if (!idev->stats.icmpv6dev)
295 goto err_icmp; 295 goto err_icmp;
296 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg, 296 idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
297 sizeof(struct icmpv6msg_mib), 297 GFP_KERNEL);
298 __alignof__(struct icmpv6msg_mib)) < 0) 298 if (!idev->stats.icmpv6msgdev)
299 goto err_icmpmsg; 299 goto err_icmpmsg;
300 300
301 return 0; 301 return 0;
302 302
303err_icmpmsg: 303err_icmpmsg:
304 snmp_mib_free((void __percpu **)idev->stats.icmpv6); 304 kfree(idev->stats.icmpv6dev);
305err_icmp: 305err_icmp:
306 snmp_mib_free((void __percpu **)idev->stats.ipv6); 306 snmp_mib_free((void __percpu **)idev->stats.ipv6);
307err_ip: 307err_ip:
@@ -310,19 +310,13 @@ err_ip:
310 310
311static void snmp6_free_dev(struct inet6_dev *idev) 311static void snmp6_free_dev(struct inet6_dev *idev)
312{ 312{
313 snmp_mib_free((void __percpu **)idev->stats.icmpv6msg); 313 kfree(idev->stats.icmpv6msgdev);
314 snmp_mib_free((void __percpu **)idev->stats.icmpv6); 314 kfree(idev->stats.icmpv6dev);
315 snmp_mib_free((void __percpu **)idev->stats.ipv6); 315 snmp_mib_free((void __percpu **)idev->stats.ipv6);
316} 316}
317 317
318/* Nobody refers to this device, we may destroy it. */ 318/* Nobody refers to this device, we may destroy it. */
319 319
320static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
321{
322 struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
323 kfree(idev);
324}
325
326void in6_dev_finish_destroy(struct inet6_dev *idev) 320void in6_dev_finish_destroy(struct inet6_dev *idev)
327{ 321{
328 struct net_device *dev = idev->dev; 322 struct net_device *dev = idev->dev;
@@ -339,7 +333,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
339 return; 333 return;
340 } 334 }
341 snmp6_free_dev(idev); 335 snmp6_free_dev(idev);
342 call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); 336 kfree_rcu(idev, rcu);
343} 337}
344 338
345EXPORT_SYMBOL(in6_dev_finish_destroy); 339EXPORT_SYMBOL(in6_dev_finish_destroy);
@@ -535,12 +529,6 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
535} 529}
536#endif 530#endif
537 531
538static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
539{
540 struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
541 kfree(ifp);
542}
543
544/* Nobody refers to this ifaddr, destroy it */ 532/* Nobody refers to this ifaddr, destroy it */
545void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) 533void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
546{ 534{
@@ -561,7 +549,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
561 } 549 }
562 dst_release(&ifp->rt->dst); 550 dst_release(&ifp->rt->dst);
563 551
564 call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); 552 kfree_rcu(ifp, rcu);
565} 553}
566 554
567static void 555static void
@@ -718,12 +706,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
718 struct inet6_ifaddr *ifa, *ifn; 706 struct inet6_ifaddr *ifa, *ifn;
719 struct inet6_dev *idev = ifp->idev; 707 struct inet6_dev *idev = ifp->idev;
720 int state; 708 int state;
721 int hash;
722 int deleted = 0, onlink = 0; 709 int deleted = 0, onlink = 0;
723 unsigned long expires = jiffies; 710 unsigned long expires = jiffies;
724 711
725 hash = ipv6_addr_hash(&ifp->addr);
726
727 spin_lock_bh(&ifp->state_lock); 712 spin_lock_bh(&ifp->state_lock);
728 state = ifp->state; 713 state = ifp->state;
729 ifp->state = INET6_IFADDR_STATE_DEAD; 714 ifp->state = INET6_IFADDR_STATE_DEAD;
@@ -828,6 +813,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
828 dst_release(&rt->dst); 813 dst_release(&rt->dst);
829 } 814 }
830 815
816 /* clean up prefsrc entries */
817 rt6_remove_prefsrc(ifp);
831out: 818out:
832 in6_ifa_put(ifp); 819 in6_ifa_put(ifp);
833} 820}
@@ -1087,7 +1074,7 @@ static int ipv6_get_saddr_eval(struct net *net,
1087 case IPV6_SADDR_RULE_PRIVACY: 1074 case IPV6_SADDR_RULE_PRIVACY:
1088 { 1075 {
1089 /* Rule 7: Prefer public address 1076 /* Rule 7: Prefer public address
1090 * Note: prefer temprary address if use_tempaddr >= 2 1077 * Note: prefer temporary address if use_tempaddr >= 2
1091 */ 1078 */
1092 int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? 1079 int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
1093 !!(dst->prefs & IPV6_PREFER_SRC_TMP) : 1080 !!(dst->prefs & IPV6_PREFER_SRC_TMP) :
@@ -1284,7 +1271,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
1284 return cnt; 1271 return cnt;
1285} 1272}
1286 1273
1287int ipv6_chk_addr(struct net *net, struct in6_addr *addr, 1274int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
1288 struct net_device *dev, int strict) 1275 struct net_device *dev, int strict)
1289{ 1276{
1290 struct inet6_ifaddr *ifp; 1277 struct inet6_ifaddr *ifp;
@@ -1327,7 +1314,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1327 return false; 1314 return false;
1328} 1315}
1329 1316
1330int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev) 1317int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1331{ 1318{
1332 struct inet6_dev *idev; 1319 struct inet6_dev *idev;
1333 struct inet6_ifaddr *ifa; 1320 struct inet6_ifaddr *ifa;
@@ -1458,7 +1445,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1458 1445
1459/* Join to solicited addr multicast group. */ 1446/* Join to solicited addr multicast group. */
1460 1447
1461void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) 1448void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
1462{ 1449{
1463 struct in6_addr maddr; 1450 struct in6_addr maddr;
1464 1451
@@ -1469,7 +1456,7 @@ void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
1469 ipv6_dev_mc_inc(dev, &maddr); 1456 ipv6_dev_mc_inc(dev, &maddr);
1470} 1457}
1471 1458
1472void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) 1459void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
1473{ 1460{
1474 struct in6_addr maddr; 1461 struct in6_addr maddr;
1475 1462
@@ -1971,7 +1958,7 @@ ok:
1971 * to the stored lifetime since we'll 1958 * to the stored lifetime since we'll
1972 * be updating the timestamp below, 1959 * be updating the timestamp below,
1973 * else we'll set it back to the 1960 * else we'll set it back to the
1974 * minumum. 1961 * minimum.
1975 */ 1962 */
1976 if (prefered_lft != ifp->prefered_lft) { 1963 if (prefered_lft != ifp->prefered_lft) {
1977 valid_lft = stored_lft; 1964 valid_lft = stored_lft;
@@ -2114,7 +2101,7 @@ err_exit:
2114/* 2101/*
2115 * Manual configuration of address on an interface 2102 * Manual configuration of address on an interface
2116 */ 2103 */
2117static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, 2104static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx,
2118 unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, 2105 unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
2119 __u32 valid_lft) 2106 __u32 valid_lft)
2120{ 2107{
@@ -2188,7 +2175,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2188 return PTR_ERR(ifp); 2175 return PTR_ERR(ifp);
2189} 2176}
2190 2177
2191static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, 2178static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
2192 unsigned int plen) 2179 unsigned int plen)
2193{ 2180{
2194 struct inet6_ifaddr *ifp; 2181 struct inet6_ifaddr *ifp;
@@ -2351,7 +2338,7 @@ static void init_loopback(struct net_device *dev)
2351 add_addr(idev, &in6addr_loopback, 128, IFA_HOST); 2338 add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
2352} 2339}
2353 2340
2354static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) 2341static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
2355{ 2342{
2356 struct inet6_ifaddr * ifp; 2343 struct inet6_ifaddr * ifp;
2357 u32 addr_flags = IFA_F_PERMANENT; 2344 u32 addr_flags = IFA_F_PERMANENT;
@@ -3122,7 +3109,7 @@ void if6_proc_exit(void)
3122 3109
3123#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 3110#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
3124/* Check if address is a home address configured on any interface. */ 3111/* Check if address is a home address configured on any interface. */
3125int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) 3112int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
3126{ 3113{
3127 int ret = 0; 3114 int ret = 0;
3128 struct inet6_ifaddr *ifp = NULL; 3115 struct inet6_ifaddr *ifp = NULL;
@@ -3839,7 +3826,7 @@ static inline size_t inet6_if_nlmsg_size(void)
3839 + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */ 3826 + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
3840} 3827}
3841 3828
3842static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, 3829static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
3843 int items, int bytes) 3830 int items, int bytes)
3844{ 3831{
3845 int i; 3832 int i;
@@ -3849,7 +3836,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
3849 /* Use put_unaligned() because stats may not be aligned for u64. */ 3836 /* Use put_unaligned() because stats may not be aligned for u64. */
3850 put_unaligned(items, &stats[0]); 3837 put_unaligned(items, &stats[0]);
3851 for (i = 1; i < items; i++) 3838 for (i = 1; i < items; i++)
3852 put_unaligned(snmp_fold_field(mib, i), &stats[i]); 3839 put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
3853 3840
3854 memset(&stats[items], 0, pad); 3841 memset(&stats[items], 0, pad);
3855} 3842}
@@ -3878,7 +3865,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
3878 IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); 3865 IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
3879 break; 3866 break;
3880 case IFLA_INET6_ICMP6STATS: 3867 case IFLA_INET6_ICMP6STATS:
3881 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); 3868 __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
3882 break; 3869 break;
3883 } 3870 }
3884} 3871}
@@ -4540,7 +4527,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
4540 4527
4541 t = p->sysctl; 4528 t = p->sysctl;
4542 p->sysctl = NULL; 4529 p->sysctl = NULL;
4543 unregister_sysctl_table(t->sysctl_header); 4530 unregister_net_sysctl_table(t->sysctl_header);
4544 kfree(t->dev_name); 4531 kfree(t->dev_name);
4545 kfree(t); 4532 kfree(t);
4546} 4533}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 978e80e2c4a8..b7919f901fbf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -644,41 +644,34 @@ EXPORT_SYMBOL(inet6_unregister_protosw);
644 644
645int inet6_sk_rebuild_header(struct sock *sk) 645int inet6_sk_rebuild_header(struct sock *sk)
646{ 646{
647 int err;
648 struct dst_entry *dst;
649 struct ipv6_pinfo *np = inet6_sk(sk); 647 struct ipv6_pinfo *np = inet6_sk(sk);
648 struct dst_entry *dst;
650 649
651 dst = __sk_dst_check(sk, np->dst_cookie); 650 dst = __sk_dst_check(sk, np->dst_cookie);
652 651
653 if (dst == NULL) { 652 if (dst == NULL) {
654 struct inet_sock *inet = inet_sk(sk); 653 struct inet_sock *inet = inet_sk(sk);
655 struct in6_addr *final_p, final; 654 struct in6_addr *final_p, final;
656 struct flowi fl; 655 struct flowi6 fl6;
657 656
658 memset(&fl, 0, sizeof(fl)); 657 memset(&fl6, 0, sizeof(fl6));
659 fl.proto = sk->sk_protocol; 658 fl6.flowi6_proto = sk->sk_protocol;
660 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 659 ipv6_addr_copy(&fl6.daddr, &np->daddr);
661 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 660 ipv6_addr_copy(&fl6.saddr, &np->saddr);
662 fl.fl6_flowlabel = np->flow_label; 661 fl6.flowlabel = np->flow_label;
663 fl.oif = sk->sk_bound_dev_if; 662 fl6.flowi6_oif = sk->sk_bound_dev_if;
664 fl.mark = sk->sk_mark; 663 fl6.flowi6_mark = sk->sk_mark;
665 fl.fl_ip_dport = inet->inet_dport; 664 fl6.fl6_dport = inet->inet_dport;
666 fl.fl_ip_sport = inet->inet_sport; 665 fl6.fl6_sport = inet->inet_sport;
667 security_sk_classify_flow(sk, &fl); 666 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
668 667
669 final_p = fl6_update_dst(&fl, np->opt, &final); 668 final_p = fl6_update_dst(&fl6, np->opt, &final);
670 669
671 err = ip6_dst_lookup(sk, &dst, &fl); 670 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
672 if (err) { 671 if (IS_ERR(dst)) {
673 sk->sk_route_caps = 0; 672 sk->sk_route_caps = 0;
674 return err; 673 sk->sk_err_soft = -PTR_ERR(dst);
675 } 674 return PTR_ERR(dst);
676 if (final_p)
677 ipv6_addr_copy(&fl.fl6_dst, final_p);
678
679 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
680 sk->sk_err_soft = -err;
681 return err;
682 } 675 }
683 676
684 __ip6_dst_store(sk, dst, NULL, NULL); 677 __ip6_dst_store(sk, dst, NULL, NULL);
@@ -747,7 +740,7 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
747 740
748static int ipv6_gso_send_check(struct sk_buff *skb) 741static int ipv6_gso_send_check(struct sk_buff *skb)
749{ 742{
750 struct ipv6hdr *ipv6h; 743 const struct ipv6hdr *ipv6h;
751 const struct inet6_protocol *ops; 744 const struct inet6_protocol *ops;
752 int err = -EINVAL; 745 int err = -EINVAL;
753 746
@@ -772,7 +765,7 @@ out:
772 return err; 765 return err;
773} 766}
774 767
775static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) 768static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
776{ 769{
777 struct sk_buff *segs = ERR_PTR(-EINVAL); 770 struct sk_buff *segs = ERR_PTR(-EINVAL);
778 struct ipv6hdr *ipv6h; 771 struct ipv6hdr *ipv6h;
@@ -1120,7 +1113,7 @@ static int __init inet6_init(void)
1120 /* 1113 /*
1121 * ipngwg API draft makes clear that the correct semantics 1114 * ipngwg API draft makes clear that the correct semantics
1122 * for TCP and UDP is to consider one TCP and UDP instance 1115 * for TCP and UDP is to consider one TCP and UDP instance
1123 * in a host availiable by both INET and INET6 APIs and 1116 * in a host available by both INET and INET6 APIs and
1124 * able to communicate via both network protocols. 1117 * able to communicate via both network protocols.
1125 */ 1118 */
1126 1119
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 1aba54ae53c4..2195ae651923 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -409,7 +409,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
409 409
410 ah->reserved = 0; 410 ah->reserved = 0;
411 ah->spi = x->id.spi; 411 ah->spi = x->id.spi;
412 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 412 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
413 413
414 sg_init_table(sg, nfrags); 414 sg_init_table(sg, nfrags);
415 skb_to_sgvec(skb, sg, 0, skb->len); 415 skb_to_sgvec(skb, sg, 0, skb->len);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0e5e943446f0..674255f5e6b7 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,7 +44,7 @@
44 44
45#include <net/checksum.h> 45#include <net/checksum.h>
46 46
47static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr); 47static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
48 48
49/* Big ac list lock for all the sockets */ 49/* Big ac list lock for all the sockets */
50static DEFINE_RWLOCK(ipv6_sk_ac_lock); 50static DEFINE_RWLOCK(ipv6_sk_ac_lock);
@@ -54,7 +54,7 @@ static DEFINE_RWLOCK(ipv6_sk_ac_lock);
54 * socket join an anycast group 54 * socket join an anycast group
55 */ 55 */
56 56
57int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) 57int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
58{ 58{
59 struct ipv6_pinfo *np = inet6_sk(sk); 59 struct ipv6_pinfo *np = inet6_sk(sk);
60 struct net_device *dev = NULL; 60 struct net_device *dev = NULL;
@@ -145,7 +145,7 @@ error:
145/* 145/*
146 * socket leave an anycast group 146 * socket leave an anycast group
147 */ 147 */
148int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) 148int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
149{ 149{
150 struct ipv6_pinfo *np = inet6_sk(sk); 150 struct ipv6_pinfo *np = inet6_sk(sk);
151 struct net_device *dev; 151 struct net_device *dev;
@@ -252,7 +252,7 @@ static void aca_put(struct ifacaddr6 *ac)
252/* 252/*
253 * device anycast group inc (add if not found) 253 * device anycast group inc (add if not found)
254 */ 254 */
255int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) 255int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
256{ 256{
257 struct ifacaddr6 *aca; 257 struct ifacaddr6 *aca;
258 struct inet6_dev *idev; 258 struct inet6_dev *idev;
@@ -324,7 +324,7 @@ out:
324/* 324/*
325 * device anycast group decrement 325 * device anycast group decrement
326 */ 326 */
327int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) 327int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
328{ 328{
329 struct ifacaddr6 *aca, *prev_aca; 329 struct ifacaddr6 *aca, *prev_aca;
330 330
@@ -358,7 +358,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
358} 358}
359 359
360/* called with rcu_read_lock() */ 360/* called with rcu_read_lock() */
361static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) 361static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
362{ 362{
363 struct inet6_dev *idev = __in6_dev_get(dev); 363 struct inet6_dev *idev = __in6_dev_get(dev);
364 364
@@ -371,7 +371,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
371 * check if the interface has this anycast address 371 * check if the interface has this anycast address
372 * called with rcu_read_lock() 372 * called with rcu_read_lock()
373 */ 373 */
374static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) 374static int ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
375{ 375{
376 struct inet6_dev *idev; 376 struct inet6_dev *idev;
377 struct ifacaddr6 *aca; 377 struct ifacaddr6 *aca;
@@ -392,7 +392,7 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
392 * check if given interface (or any, if dev==0) has this anycast address 392 * check if given interface (or any, if dev==0) has this anycast address
393 */ 393 */
394int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, 394int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
395 struct in6_addr *addr) 395 const struct in6_addr *addr)
396{ 396{
397 int found = 0; 397 int found = 0;
398 398
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 320bdb877eed..16560336eb72 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,7 +40,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
40 struct ipv6_pinfo *np = inet6_sk(sk); 40 struct ipv6_pinfo *np = inet6_sk(sk);
41 struct in6_addr *daddr, *final_p, final; 41 struct in6_addr *daddr, *final_p, final;
42 struct dst_entry *dst; 42 struct dst_entry *dst;
43 struct flowi fl; 43 struct flowi6 fl6;
44 struct ip6_flowlabel *flowlabel = NULL; 44 struct ip6_flowlabel *flowlabel = NULL;
45 struct ipv6_txoptions *opt; 45 struct ipv6_txoptions *opt;
46 int addr_type; 46 int addr_type;
@@ -59,11 +59,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
59 if (usin->sin6_family != AF_INET6) 59 if (usin->sin6_family != AF_INET6)
60 return -EAFNOSUPPORT; 60 return -EAFNOSUPPORT;
61 61
62 memset(&fl, 0, sizeof(fl)); 62 memset(&fl6, 0, sizeof(fl6));
63 if (np->sndflow) { 63 if (np->sndflow) {
64 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 64 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
65 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 65 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
66 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 66 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
67 if (flowlabel == NULL) 67 if (flowlabel == NULL)
68 return -EINVAL; 68 return -EINVAL;
69 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); 69 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
@@ -137,7 +137,7 @@ ipv4_connected:
137 } 137 }
138 138
139 ipv6_addr_copy(&np->daddr, daddr); 139 ipv6_addr_copy(&np->daddr, daddr);
140 np->flow_label = fl.fl6_flowlabel; 140 np->flow_label = fl6.flowlabel;
141 141
142 inet->inet_dport = usin->sin6_port; 142 inet->inet_dport = usin->sin6_port;
143 143
@@ -146,53 +146,46 @@ ipv4_connected:
146 * destination cache for it. 146 * destination cache for it.
147 */ 147 */
148 148
149 fl.proto = sk->sk_protocol; 149 fl6.flowi6_proto = sk->sk_protocol;
150 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 150 ipv6_addr_copy(&fl6.daddr, &np->daddr);
151 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 151 ipv6_addr_copy(&fl6.saddr, &np->saddr);
152 fl.oif = sk->sk_bound_dev_if; 152 fl6.flowi6_oif = sk->sk_bound_dev_if;
153 fl.mark = sk->sk_mark; 153 fl6.flowi6_mark = sk->sk_mark;
154 fl.fl_ip_dport = inet->inet_dport; 154 fl6.fl6_dport = inet->inet_dport;
155 fl.fl_ip_sport = inet->inet_sport; 155 fl6.fl6_sport = inet->inet_sport;
156 156
157 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) 157 if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
158 fl.oif = np->mcast_oif; 158 fl6.flowi6_oif = np->mcast_oif;
159 159
160 security_sk_classify_flow(sk, &fl); 160 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
161 161
162 opt = flowlabel ? flowlabel->opt : np->opt; 162 opt = flowlabel ? flowlabel->opt : np->opt;
163 final_p = fl6_update_dst(&fl, opt, &final); 163 final_p = fl6_update_dst(&fl6, opt, &final);
164 164
165 err = ip6_dst_lookup(sk, &dst, &fl); 165 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
166 if (err) 166 err = 0;
167 if (IS_ERR(dst)) {
168 err = PTR_ERR(dst);
167 goto out; 169 goto out;
168 if (final_p)
169 ipv6_addr_copy(&fl.fl6_dst, final_p);
170
171 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
172 if (err < 0) {
173 if (err == -EREMOTE)
174 err = ip6_dst_blackhole(sk, &dst, &fl);
175 if (err < 0)
176 goto out;
177 } 170 }
178 171
179 /* source address lookup done in ip6_dst_lookup */ 172 /* source address lookup done in ip6_dst_lookup */
180 173
181 if (ipv6_addr_any(&np->saddr)) 174 if (ipv6_addr_any(&np->saddr))
182 ipv6_addr_copy(&np->saddr, &fl.fl6_src); 175 ipv6_addr_copy(&np->saddr, &fl6.saddr);
183 176
184 if (ipv6_addr_any(&np->rcv_saddr)) { 177 if (ipv6_addr_any(&np->rcv_saddr)) {
185 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); 178 ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr);
186 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 179 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
187 if (sk->sk_prot->rehash) 180 if (sk->sk_prot->rehash)
188 sk->sk_prot->rehash(sk); 181 sk->sk_prot->rehash(sk);
189 } 182 }
190 183
191 ip6_dst_store(sk, dst, 184 ip6_dst_store(sk, dst,
192 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? 185 ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
193 &np->daddr : NULL, 186 &np->daddr : NULL,
194#ifdef CONFIG_IPV6_SUBTREES 187#ifdef CONFIG_IPV6_SUBTREES
195 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? 188 ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
196 &np->saddr : 189 &np->saddr :
197#endif 190#endif
198 NULL); 191 NULL);
@@ -238,7 +231,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
238 kfree_skb(skb); 231 kfree_skb(skb);
239} 232}
240 233
241void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) 234void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
242{ 235{
243 struct ipv6_pinfo *np = inet6_sk(sk); 236 struct ipv6_pinfo *np = inet6_sk(sk);
244 struct sock_exterr_skb *serr; 237 struct sock_exterr_skb *serr;
@@ -257,7 +250,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
257 skb_put(skb, sizeof(struct ipv6hdr)); 250 skb_put(skb, sizeof(struct ipv6hdr));
258 skb_reset_network_header(skb); 251 skb_reset_network_header(skb);
259 iph = ipv6_hdr(skb); 252 iph = ipv6_hdr(skb);
260 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); 253 ipv6_addr_copy(&iph->daddr, &fl6->daddr);
261 254
262 serr = SKB_EXT_ERR(skb); 255 serr = SKB_EXT_ERR(skb);
263 serr->ee.ee_errno = err; 256 serr->ee.ee_errno = err;
@@ -268,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
268 serr->ee.ee_info = info; 261 serr->ee.ee_info = info;
269 serr->ee.ee_data = 0; 262 serr->ee.ee_data = 0;
270 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 263 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
271 serr->port = fl->fl_ip_dport; 264 serr->port = fl6->fl6_dport;
272 265
273 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 266 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
274 skb_reset_transport_header(skb); 267 skb_reset_transport_header(skb);
@@ -277,7 +270,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
277 kfree_skb(skb); 270 kfree_skb(skb);
278} 271}
279 272
280void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) 273void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
281{ 274{
282 struct ipv6_pinfo *np = inet6_sk(sk); 275 struct ipv6_pinfo *np = inet6_sk(sk);
283 struct ipv6hdr *iph; 276 struct ipv6hdr *iph;
@@ -294,7 +287,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
294 skb_put(skb, sizeof(struct ipv6hdr)); 287 skb_put(skb, sizeof(struct ipv6hdr));
295 skb_reset_network_header(skb); 288 skb_reset_network_header(skb);
296 iph = ipv6_hdr(skb); 289 iph = ipv6_hdr(skb);
297 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); 290 ipv6_addr_copy(&iph->daddr, &fl6->daddr);
298 291
299 mtu_info = IP6CBMTU(skb); 292 mtu_info = IP6CBMTU(skb);
300 if (!mtu_info) { 293 if (!mtu_info) {
@@ -306,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
306 mtu_info->ip6m_addr.sin6_family = AF_INET6; 299 mtu_info->ip6m_addr.sin6_family = AF_INET6;
307 mtu_info->ip6m_addr.sin6_port = 0; 300 mtu_info->ip6m_addr.sin6_port = 0;
308 mtu_info->ip6m_addr.sin6_flowinfo = 0; 301 mtu_info->ip6m_addr.sin6_flowinfo = 0;
309 mtu_info->ip6m_addr.sin6_scope_id = fl->oif; 302 mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
310 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); 303 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
311 304
312 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 305 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
@@ -600,7 +593,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
600} 593}
601 594
602int datagram_send_ctl(struct net *net, 595int datagram_send_ctl(struct net *net,
603 struct msghdr *msg, struct flowi *fl, 596 struct msghdr *msg, struct flowi6 *fl6,
604 struct ipv6_txoptions *opt, 597 struct ipv6_txoptions *opt,
605 int *hlimit, int *tclass, int *dontfrag) 598 int *hlimit, int *tclass, int *dontfrag)
606{ 599{
@@ -636,16 +629,17 @@ int datagram_send_ctl(struct net *net,
636 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 629 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
637 630
638 if (src_info->ipi6_ifindex) { 631 if (src_info->ipi6_ifindex) {
639 if (fl->oif && src_info->ipi6_ifindex != fl->oif) 632 if (fl6->flowi6_oif &&
633 src_info->ipi6_ifindex != fl6->flowi6_oif)
640 return -EINVAL; 634 return -EINVAL;
641 fl->oif = src_info->ipi6_ifindex; 635 fl6->flowi6_oif = src_info->ipi6_ifindex;
642 } 636 }
643 637
644 addr_type = __ipv6_addr_type(&src_info->ipi6_addr); 638 addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
645 639
646 rcu_read_lock(); 640 rcu_read_lock();
647 if (fl->oif) { 641 if (fl6->flowi6_oif) {
648 dev = dev_get_by_index_rcu(net, fl->oif); 642 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
649 if (!dev) { 643 if (!dev) {
650 rcu_read_unlock(); 644 rcu_read_unlock();
651 return -ENODEV; 645 return -ENODEV;
@@ -661,7 +655,7 @@ int datagram_send_ctl(struct net *net,
661 strict ? dev : NULL, 0)) 655 strict ? dev : NULL, 0))
662 err = -EINVAL; 656 err = -EINVAL;
663 else 657 else
664 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); 658 ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr);
665 } 659 }
666 660
667 rcu_read_unlock(); 661 rcu_read_unlock();
@@ -678,13 +672,13 @@ int datagram_send_ctl(struct net *net,
678 goto exit_f; 672 goto exit_f;
679 } 673 }
680 674
681 if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { 675 if (fl6->flowlabel&IPV6_FLOWINFO_MASK) {
682 if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { 676 if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
683 err = -EINVAL; 677 err = -EINVAL;
684 goto exit_f; 678 goto exit_f;
685 } 679 }
686 } 680 }
687 fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); 681 fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
688 break; 682 break;
689 683
690 case IPV6_2292HOPOPTS: 684 case IPV6_2292HOPOPTS:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1b5c9825743b..1ac7938dd9ec 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -54,16 +54,20 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
54/* 54/*
55 * Allocate an AEAD request structure with extra space for SG and IV. 55 * Allocate an AEAD request structure with extra space for SG and IV.
56 * 56 *
57 * For alignment considerations the IV is placed at the front, followed 57 * For alignment considerations the upper 32 bits of the sequence number are
58 * by the request and finally the SG list. 58 * placed at the front, if present. Followed by the IV, the request and finally
59 * the SG list.
59 * 60 *
60 * TODO: Use spare space in skb for this where possible. 61 * TODO: Use spare space in skb for this where possible.
61 */ 62 */
62static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) 63static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
63{ 64{
64 unsigned int len; 65 unsigned int len;
65 66
66 len = crypto_aead_ivsize(aead); 67 len = seqihlen;
68
69 len += crypto_aead_ivsize(aead);
70
67 if (len) { 71 if (len) {
68 len += crypto_aead_alignmask(aead) & 72 len += crypto_aead_alignmask(aead) &
69 ~(crypto_tfm_ctx_alignment() - 1); 73 ~(crypto_tfm_ctx_alignment() - 1);
@@ -78,10 +82,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
78 return kmalloc(len, GFP_ATOMIC); 82 return kmalloc(len, GFP_ATOMIC);
79} 83}
80 84
81static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) 85static inline __be32 *esp_tmp_seqhi(void *tmp)
86{
87 return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
88}
89
90static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
82{ 91{
83 return crypto_aead_ivsize(aead) ? 92 return crypto_aead_ivsize(aead) ?
84 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; 93 PTR_ALIGN((u8 *)tmp + seqhilen,
94 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
85} 95}
86 96
87static inline struct aead_givcrypt_request *esp_tmp_givreq( 97static inline struct aead_givcrypt_request *esp_tmp_givreq(
@@ -145,8 +155,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
145 int plen; 155 int plen;
146 int tfclen; 156 int tfclen;
147 int nfrags; 157 int nfrags;
158 int assoclen;
159 int sglists;
160 int seqhilen;
148 u8 *iv; 161 u8 *iv;
149 u8 *tail; 162 u8 *tail;
163 __be32 *seqhi;
150 struct esp_data *esp = x->data; 164 struct esp_data *esp = x->data;
151 165
152 /* skb is pure payload to encrypt */ 166 /* skb is pure payload to encrypt */
@@ -175,14 +189,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
175 goto error; 189 goto error;
176 nfrags = err; 190 nfrags = err;
177 191
178 tmp = esp_alloc_tmp(aead, nfrags + 1); 192 assoclen = sizeof(*esph);
193 sglists = 1;
194 seqhilen = 0;
195
196 if (x->props.flags & XFRM_STATE_ESN) {
197 sglists += 2;
198 seqhilen += sizeof(__be32);
199 assoclen += seqhilen;
200 }
201
202 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
179 if (!tmp) 203 if (!tmp)
180 goto error; 204 goto error;
181 205
182 iv = esp_tmp_iv(aead, tmp); 206 seqhi = esp_tmp_seqhi(tmp);
207 iv = esp_tmp_iv(aead, tmp, seqhilen);
183 req = esp_tmp_givreq(aead, iv); 208 req = esp_tmp_givreq(aead, iv);
184 asg = esp_givreq_sg(aead, req); 209 asg = esp_givreq_sg(aead, req);
185 sg = asg + 1; 210 sg = asg + sglists;
186 211
187 /* Fill padding... */ 212 /* Fill padding... */
188 tail = skb_tail_pointer(trailer); 213 tail = skb_tail_pointer(trailer);
@@ -204,19 +229,27 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
204 *skb_mac_header(skb) = IPPROTO_ESP; 229 *skb_mac_header(skb) = IPPROTO_ESP;
205 230
206 esph->spi = x->id.spi; 231 esph->spi = x->id.spi;
207 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 232 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
208 233
209 sg_init_table(sg, nfrags); 234 sg_init_table(sg, nfrags);
210 skb_to_sgvec(skb, sg, 235 skb_to_sgvec(skb, sg,
211 esph->enc_data + crypto_aead_ivsize(aead) - skb->data, 236 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
212 clen + alen); 237 clen + alen);
213 sg_init_one(asg, esph, sizeof(*esph)); 238
239 if ((x->props.flags & XFRM_STATE_ESN)) {
240 sg_init_table(asg, 3);
241 sg_set_buf(asg, &esph->spi, sizeof(__be32));
242 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
243 sg_set_buf(asg + 1, seqhi, seqhilen);
244 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
245 } else
246 sg_init_one(asg, esph, sizeof(*esph));
214 247
215 aead_givcrypt_set_callback(req, 0, esp_output_done, skb); 248 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
216 aead_givcrypt_set_crypt(req, sg, sg, clen, iv); 249 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
217 aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); 250 aead_givcrypt_set_assoc(req, asg, assoclen);
218 aead_givcrypt_set_giv(req, esph->enc_data, 251 aead_givcrypt_set_giv(req, esph->enc_data,
219 XFRM_SKB_CB(skb)->seq.output); 252 XFRM_SKB_CB(skb)->seq.output.low);
220 253
221 ESP_SKB_CB(skb)->tmp = tmp; 254 ESP_SKB_CB(skb)->tmp = tmp;
222 err = crypto_aead_givencrypt(req); 255 err = crypto_aead_givencrypt(req);
@@ -292,8 +325,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
292 struct sk_buff *trailer; 325 struct sk_buff *trailer;
293 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); 326 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
294 int nfrags; 327 int nfrags;
328 int assoclen;
329 int sglists;
330 int seqhilen;
295 int ret = 0; 331 int ret = 0;
296 void *tmp; 332 void *tmp;
333 __be32 *seqhi;
297 u8 *iv; 334 u8 *iv;
298 struct scatterlist *sg; 335 struct scatterlist *sg;
299 struct scatterlist *asg; 336 struct scatterlist *asg;
@@ -314,15 +351,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
314 } 351 }
315 352
316 ret = -ENOMEM; 353 ret = -ENOMEM;
317 tmp = esp_alloc_tmp(aead, nfrags + 1); 354
355 assoclen = sizeof(*esph);
356 sglists = 1;
357 seqhilen = 0;
358
359 if (x->props.flags & XFRM_STATE_ESN) {
360 sglists += 2;
361 seqhilen += sizeof(__be32);
362 assoclen += seqhilen;
363 }
364
365 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
318 if (!tmp) 366 if (!tmp)
319 goto out; 367 goto out;
320 368
321 ESP_SKB_CB(skb)->tmp = tmp; 369 ESP_SKB_CB(skb)->tmp = tmp;
322 iv = esp_tmp_iv(aead, tmp); 370 seqhi = esp_tmp_seqhi(tmp);
371 iv = esp_tmp_iv(aead, tmp, seqhilen);
323 req = esp_tmp_req(aead, iv); 372 req = esp_tmp_req(aead, iv);
324 asg = esp_req_sg(aead, req); 373 asg = esp_req_sg(aead, req);
325 sg = asg + 1; 374 sg = asg + sglists;
326 375
327 skb->ip_summed = CHECKSUM_NONE; 376 skb->ip_summed = CHECKSUM_NONE;
328 377
@@ -333,11 +382,19 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
333 382
334 sg_init_table(sg, nfrags); 383 sg_init_table(sg, nfrags);
335 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); 384 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
336 sg_init_one(asg, esph, sizeof(*esph)); 385
386 if ((x->props.flags & XFRM_STATE_ESN)) {
387 sg_init_table(asg, 3);
388 sg_set_buf(asg, &esph->spi, sizeof(__be32));
389 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
390 sg_set_buf(asg + 1, seqhi, seqhilen);
391 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
392 } else
393 sg_init_one(asg, esph, sizeof(*esph));
337 394
338 aead_request_set_callback(req, 0, esp_input_done, skb); 395 aead_request_set_callback(req, 0, esp_input_done, skb);
339 aead_request_set_crypt(req, sg, sg, elen, iv); 396 aead_request_set_crypt(req, sg, sg, elen, iv);
340 aead_request_set_assoc(req, asg, sizeof(*esph)); 397 aead_request_set_assoc(req, asg, assoclen);
341 398
342 ret = crypto_aead_decrypt(req); 399 ret = crypto_aead_decrypt(req);
343 if (ret == -EINPROGRESS) 400 if (ret == -EINPROGRESS)
@@ -373,7 +430,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
373 u8 type, u8 code, int offset, __be32 info) 430 u8 type, u8 code, int offset, __be32 info)
374{ 431{
375 struct net *net = dev_net(skb->dev); 432 struct net *net = dev_net(skb->dev);
376 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; 433 const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
377 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); 434 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
378 struct xfrm_state *x; 435 struct xfrm_state *x;
379 436
@@ -381,7 +438,8 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
381 type != ICMPV6_PKT_TOOBIG) 438 type != ICMPV6_PKT_TOOBIG)
382 return; 439 return;
383 440
384 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); 441 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
442 esph->spi, IPPROTO_ESP, AF_INET6);
385 if (!x) 443 if (!x)
386 return; 444 return;
387 printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", 445 printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
@@ -443,10 +501,20 @@ static int esp_init_authenc(struct xfrm_state *x)
443 goto error; 501 goto error;
444 502
445 err = -ENAMETOOLONG; 503 err = -ENAMETOOLONG;
446 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", 504
447 x->aalg ? x->aalg->alg_name : "digest_null", 505 if ((x->props.flags & XFRM_STATE_ESN)) {
448 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 506 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
449 goto error; 507 "authencesn(%s,%s)",
508 x->aalg ? x->aalg->alg_name : "digest_null",
509 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
510 goto error;
511 } else {
512 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
513 "authenc(%s,%s)",
514 x->aalg ? x->aalg->alg_name : "digest_null",
515 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
516 goto error;
517 }
450 518
451 aead = crypto_alloc_aead(authenc_name, 0, 0); 519 aead = crypto_alloc_aead(authenc_name, 0, 0);
452 err = PTR_ERR(aead); 520 err = PTR_ERR(aead);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 262f105d23b9..79a485e8a700 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -876,22 +876,22 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
876 * fl6_update_dst - update flowi destination address with info given 876 * fl6_update_dst - update flowi destination address with info given
877 * by srcrt option, if any. 877 * by srcrt option, if any.
878 * 878 *
879 * @fl: flowi for which fl6_dst is to be updated 879 * @fl6: flowi6 for which daddr is to be updated
880 * @opt: struct ipv6_txoptions in which to look for srcrt opt 880 * @opt: struct ipv6_txoptions in which to look for srcrt opt
881 * @orig: copy of original fl6_dst address if modified 881 * @orig: copy of original daddr address if modified
882 * 882 *
883 * Returns NULL if no txoptions or no srcrt, otherwise returns orig 883 * Returns NULL if no txoptions or no srcrt, otherwise returns orig
884 * and initial value of fl->fl6_dst set in orig 884 * and initial value of fl6->daddr set in orig
885 */ 885 */
886struct in6_addr *fl6_update_dst(struct flowi *fl, 886struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
887 const struct ipv6_txoptions *opt, 887 const struct ipv6_txoptions *opt,
888 struct in6_addr *orig) 888 struct in6_addr *orig)
889{ 889{
890 if (!opt || !opt->srcrt) 890 if (!opt || !opt->srcrt)
891 return NULL; 891 return NULL;
892 892
893 ipv6_addr_copy(orig, &fl->fl6_dst); 893 ipv6_addr_copy(orig, &fl6->daddr);
894 ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); 894 ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr);
895 return orig; 895 return orig;
896} 896}
897 897
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d829874d8946..34d244df907d 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -29,7 +29,7 @@ struct fib6_rule
29 u8 tclass; 29 u8 tclass;
30}; 30};
31 31
32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, 32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
33 int flags, pol_lookup_t lookup) 33 int flags, pol_lookup_t lookup)
34{ 34{
35 struct fib_lookup_arg arg = { 35 struct fib_lookup_arg arg = {
@@ -37,7 +37,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
37 .flags = FIB_LOOKUP_NOREF, 37 .flags = FIB_LOOKUP_NOREF,
38 }; 38 };
39 39
40 fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); 40 fib_rules_lookup(net->ipv6.fib6_rules_ops,
41 flowi6_to_flowi(fl6), flags, &arg);
41 42
42 if (arg.result) 43 if (arg.result)
43 return arg.result; 44 return arg.result;
@@ -49,6 +50,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
49static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, 50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
50 int flags, struct fib_lookup_arg *arg) 51 int flags, struct fib_lookup_arg *arg)
51{ 52{
53 struct flowi6 *flp6 = &flp->u.ip6;
52 struct rt6_info *rt = NULL; 54 struct rt6_info *rt = NULL;
53 struct fib6_table *table; 55 struct fib6_table *table;
54 struct net *net = rule->fr_net; 56 struct net *net = rule->fr_net;
@@ -71,7 +73,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
71 73
72 table = fib6_get_table(net, rule->table); 74 table = fib6_get_table(net, rule->table);
73 if (table) 75 if (table)
74 rt = lookup(net, table, flp, flags); 76 rt = lookup(net, table, flp6, flags);
75 77
76 if (rt != net->ipv6.ip6_null_entry) { 78 if (rt != net->ipv6.ip6_null_entry) {
77 struct fib6_rule *r = (struct fib6_rule *)rule; 79 struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -86,14 +88,14 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
86 88
87 if (ipv6_dev_get_saddr(net, 89 if (ipv6_dev_get_saddr(net,
88 ip6_dst_idev(&rt->dst)->dev, 90 ip6_dst_idev(&rt->dst)->dev,
89 &flp->fl6_dst, 91 &flp6->daddr,
90 rt6_flags2srcprefs(flags), 92 rt6_flags2srcprefs(flags),
91 &saddr)) 93 &saddr))
92 goto again; 94 goto again;
93 if (!ipv6_prefix_equal(&saddr, &r->src.addr, 95 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
94 r->src.plen)) 96 r->src.plen))
95 goto again; 97 goto again;
96 ipv6_addr_copy(&flp->fl6_src, &saddr); 98 ipv6_addr_copy(&flp6->saddr, &saddr);
97 } 99 }
98 goto out; 100 goto out;
99 } 101 }
@@ -113,9 +115,10 @@ out:
113static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 115static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
114{ 116{
115 struct fib6_rule *r = (struct fib6_rule *) rule; 117 struct fib6_rule *r = (struct fib6_rule *) rule;
118 struct flowi6 *fl6 = &fl->u.ip6;
116 119
117 if (r->dst.plen && 120 if (r->dst.plen &&
118 !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) 121 !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen))
119 return 0; 122 return 0;
120 123
121 /* 124 /*
@@ -125,14 +128,14 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
125 */ 128 */
126 if (r->src.plen) { 129 if (r->src.plen) {
127 if (flags & RT6_LOOKUP_F_HAS_SADDR) { 130 if (flags & RT6_LOOKUP_F_HAS_SADDR) {
128 if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, 131 if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr,
129 r->src.plen)) 132 r->src.plen))
130 return 0; 133 return 0;
131 } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) 134 } else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
132 return 0; 135 return 0;
133 } 136 }
134 137
135 if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) 138 if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff))
136 return 0; 139 return 0;
137 140
138 return 1; 141 return 1;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 03e62f94ff8e..11900417b1cc 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -157,32 +157,32 @@ static int is_ineligible(struct sk_buff *skb)
157/* 157/*
158 * Check the ICMP output rate limit 158 * Check the ICMP output rate limit
159 */ 159 */
160static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, 160static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
161 struct flowi *fl) 161 struct flowi6 *fl6)
162{ 162{
163 struct dst_entry *dst; 163 struct dst_entry *dst;
164 struct net *net = sock_net(sk); 164 struct net *net = sock_net(sk);
165 int res = 0; 165 bool res = false;
166 166
167 /* Informational messages are not limited. */ 167 /* Informational messages are not limited. */
168 if (type & ICMPV6_INFOMSG_MASK) 168 if (type & ICMPV6_INFOMSG_MASK)
169 return 1; 169 return true;
170 170
171 /* Do not limit pmtu discovery, it would break it. */ 171 /* Do not limit pmtu discovery, it would break it. */
172 if (type == ICMPV6_PKT_TOOBIG) 172 if (type == ICMPV6_PKT_TOOBIG)
173 return 1; 173 return true;
174 174
175 /* 175 /*
176 * Look up the output route. 176 * Look up the output route.
177 * XXX: perhaps the expire for routing entries cloned by 177 * XXX: perhaps the expire for routing entries cloned by
178 * this lookup should be more aggressive (not longer than timeout). 178 * this lookup should be more aggressive (not longer than timeout).
179 */ 179 */
180 dst = ip6_route_output(net, sk, fl); 180 dst = ip6_route_output(net, sk, fl6);
181 if (dst->error) { 181 if (dst->error) {
182 IP6_INC_STATS(net, ip6_dst_idev(dst), 182 IP6_INC_STATS(net, ip6_dst_idev(dst),
183 IPSTATS_MIB_OUTNOROUTES); 183 IPSTATS_MIB_OUTNOROUTES);
184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 res = 1; 185 res = true;
186 } else { 186 } else {
187 struct rt6_info *rt = (struct rt6_info *)dst; 187 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = net->ipv6.sysctl.icmpv6_time; 188 int tmo = net->ipv6.sysctl.icmpv6_time;
@@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
191 if (rt->rt6i_dst.plen < 128) 191 if (rt->rt6i_dst.plen < 128)
192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 193
194 res = xrlim_allow(dst, tmo); 194 if (!rt->rt6i_peer)
195 rt6_bind_peer(rt, 1);
196 res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
195 } 197 }
196 dst_release(dst); 198 dst_release(dst);
197 return res; 199 return res;
@@ -215,7 +217,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
215 return (*op & 0xC0) == 0x80; 217 return (*op & 0xC0) == 0x80;
216} 218}
217 219
218static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len) 220static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
219{ 221{
220 struct sk_buff *skb; 222 struct sk_buff *skb;
221 struct icmp6hdr *icmp6h; 223 struct icmp6hdr *icmp6h;
@@ -231,9 +233,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
231 if (skb_queue_len(&sk->sk_write_queue) == 1) { 233 if (skb_queue_len(&sk->sk_write_queue) == 1) {
232 skb->csum = csum_partial(icmp6h, 234 skb->csum = csum_partial(icmp6h,
233 sizeof(struct icmp6hdr), skb->csum); 235 sizeof(struct icmp6hdr), skb->csum);
234 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, 236 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
235 &fl->fl6_dst, 237 &fl6->daddr,
236 len, fl->proto, 238 len, fl6->flowi6_proto,
237 skb->csum); 239 skb->csum);
238 } else { 240 } else {
239 __wsum tmp_csum = 0; 241 __wsum tmp_csum = 0;
@@ -244,9 +246,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
244 246
245 tmp_csum = csum_partial(icmp6h, 247 tmp_csum = csum_partial(icmp6h,
246 sizeof(struct icmp6hdr), tmp_csum); 248 sizeof(struct icmp6hdr), tmp_csum);
247 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, 249 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
248 &fl->fl6_dst, 250 &fl6->daddr,
249 len, fl->proto, 251 len, fl6->flowi6_proto,
250 tmp_csum); 252 tmp_csum);
251 } 253 }
252 ip6_push_pending_frames(sk); 254 ip6_push_pending_frames(sk);
@@ -298,6 +300,68 @@ static void mip6_addr_swap(struct sk_buff *skb)
298static inline void mip6_addr_swap(struct sk_buff *skb) {} 300static inline void mip6_addr_swap(struct sk_buff *skb) {}
299#endif 301#endif
300 302
303static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
304 struct sock *sk, struct flowi6 *fl6)
305{
306 struct dst_entry *dst, *dst2;
307 struct flowi6 fl2;
308 int err;
309
310 err = ip6_dst_lookup(sk, &dst, fl6);
311 if (err)
312 return ERR_PTR(err);
313
314 /*
315 * We won't send icmp if the destination is known
316 * anycast.
317 */
318 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
319 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
320 dst_release(dst);
321 return ERR_PTR(-EINVAL);
322 }
323
324 /* No need to clone since we're just using its address. */
325 dst2 = dst;
326
327 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
328 if (!IS_ERR(dst)) {
329 if (dst != dst2)
330 return dst;
331 } else {
332 if (PTR_ERR(dst) == -EPERM)
333 dst = NULL;
334 else
335 return dst;
336 }
337
338 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
339 if (err)
340 goto relookup_failed;
341
342 err = ip6_dst_lookup(sk, &dst2, &fl2);
343 if (err)
344 goto relookup_failed;
345
346 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
347 if (!IS_ERR(dst2)) {
348 dst_release(dst);
349 dst = dst2;
350 } else {
351 err = PTR_ERR(dst2);
352 if (err == -EPERM) {
353 dst_release(dst);
354 return dst2;
355 } else
356 goto relookup_failed;
357 }
358
359relookup_failed:
360 if (dst)
361 return dst;
362 return ERR_PTR(err);
363}
364
301/* 365/*
302 * Send an ICMP message in response to a packet in error 366 * Send an ICMP message in response to a packet in error
303 */ 367 */
@@ -308,12 +372,10 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
308 struct ipv6hdr *hdr = ipv6_hdr(skb); 372 struct ipv6hdr *hdr = ipv6_hdr(skb);
309 struct sock *sk; 373 struct sock *sk;
310 struct ipv6_pinfo *np; 374 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL; 375 const struct in6_addr *saddr = NULL;
312 struct dst_entry *dst; 376 struct dst_entry *dst;
313 struct dst_entry *dst2;
314 struct icmp6hdr tmp_hdr; 377 struct icmp6hdr tmp_hdr;
315 struct flowi fl; 378 struct flowi6 fl6;
316 struct flowi fl2;
317 struct icmpv6_msg msg; 379 struct icmpv6_msg msg;
318 int iif = 0; 380 int iif = 0;
319 int addr_type = 0; 381 int addr_type = 0;
@@ -380,22 +442,22 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
380 442
381 mip6_addr_swap(skb); 443 mip6_addr_swap(skb);
382 444
383 memset(&fl, 0, sizeof(fl)); 445 memset(&fl6, 0, sizeof(fl6));
384 fl.proto = IPPROTO_ICMPV6; 446 fl6.flowi6_proto = IPPROTO_ICMPV6;
385 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); 447 ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
386 if (saddr) 448 if (saddr)
387 ipv6_addr_copy(&fl.fl6_src, saddr); 449 ipv6_addr_copy(&fl6.saddr, saddr);
388 fl.oif = iif; 450 fl6.flowi6_oif = iif;
389 fl.fl_icmp_type = type; 451 fl6.fl6_icmp_type = type;
390 fl.fl_icmp_code = code; 452 fl6.fl6_icmp_code = code;
391 security_skb_classify_flow(skb, &fl); 453 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
392 454
393 sk = icmpv6_xmit_lock(net); 455 sk = icmpv6_xmit_lock(net);
394 if (sk == NULL) 456 if (sk == NULL)
395 return; 457 return;
396 np = inet6_sk(sk); 458 np = inet6_sk(sk);
397 459
398 if (!icmpv6_xrlim_allow(sk, type, &fl)) 460 if (!icmpv6_xrlim_allow(sk, type, &fl6))
399 goto out; 461 goto out;
400 462
401 tmp_hdr.icmp6_type = type; 463 tmp_hdr.icmp6_type = type;
@@ -403,61 +465,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
403 tmp_hdr.icmp6_cksum = 0; 465 tmp_hdr.icmp6_cksum = 0;
404 tmp_hdr.icmp6_pointer = htonl(info); 466 tmp_hdr.icmp6_pointer = htonl(info);
405 467
406 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 468 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
407 fl.oif = np->mcast_oif; 469 fl6.flowi6_oif = np->mcast_oif;
408 470
409 err = ip6_dst_lookup(sk, &dst, &fl); 471 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
410 if (err) 472 if (IS_ERR(dst))
411 goto out; 473 goto out;
412 474
413 /* 475 if (ipv6_addr_is_multicast(&fl6.daddr))
414 * We won't send icmp if the destination is known
415 * anycast.
416 */
417 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
418 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
419 goto out_dst_release;
420 }
421
422 /* No need to clone since we're just using its address. */
423 dst2 = dst;
424
425 err = xfrm_lookup(net, &dst, &fl, sk, 0);
426 switch (err) {
427 case 0:
428 if (dst != dst2)
429 goto route_done;
430 break;
431 case -EPERM:
432 dst = NULL;
433 break;
434 default:
435 goto out;
436 }
437
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto relookup_failed;
440
441 if (ip6_dst_lookup(sk, &dst2, &fl2))
442 goto relookup_failed;
443
444 err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
445 switch (err) {
446 case 0:
447 dst_release(dst);
448 dst = dst2;
449 break;
450 case -EPERM:
451 goto out_dst_release;
452 default:
453relookup_failed:
454 if (!dst)
455 goto out;
456 break;
457 }
458
459route_done:
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops; 476 hlimit = np->mcast_hops;
462 else 477 else
463 hlimit = np->hop_limit; 478 hlimit = np->hop_limit;
@@ -480,14 +495,14 @@ route_done:
480 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 495 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
481 len + sizeof(struct icmp6hdr), 496 len + sizeof(struct icmp6hdr),
482 sizeof(struct icmp6hdr), hlimit, 497 sizeof(struct icmp6hdr), hlimit,
483 np->tclass, NULL, &fl, (struct rt6_info*)dst, 498 np->tclass, NULL, &fl6, (struct rt6_info*)dst,
484 MSG_DONTWAIT, np->dontfrag); 499 MSG_DONTWAIT, np->dontfrag);
485 if (err) { 500 if (err) {
486 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 501 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
487 ip6_flush_pending_frames(sk); 502 ip6_flush_pending_frames(sk);
488 goto out_put; 503 goto out_put;
489 } 504 }
490 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); 505 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr));
491 506
492out_put: 507out_put:
493 if (likely(idev != NULL)) 508 if (likely(idev != NULL))
@@ -506,10 +521,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
506 struct sock *sk; 521 struct sock *sk;
507 struct inet6_dev *idev; 522 struct inet6_dev *idev;
508 struct ipv6_pinfo *np; 523 struct ipv6_pinfo *np;
509 struct in6_addr *saddr = NULL; 524 const struct in6_addr *saddr = NULL;
510 struct icmp6hdr *icmph = icmp6_hdr(skb); 525 struct icmp6hdr *icmph = icmp6_hdr(skb);
511 struct icmp6hdr tmp_hdr; 526 struct icmp6hdr tmp_hdr;
512 struct flowi fl; 527 struct flowi6 fl6;
513 struct icmpv6_msg msg; 528 struct icmpv6_msg msg;
514 struct dst_entry *dst; 529 struct dst_entry *dst;
515 int err = 0; 530 int err = 0;
@@ -523,30 +538,31 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
523 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 538 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
524 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; 539 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
525 540
526 memset(&fl, 0, sizeof(fl)); 541 memset(&fl6, 0, sizeof(fl6));
527 fl.proto = IPPROTO_ICMPV6; 542 fl6.flowi6_proto = IPPROTO_ICMPV6;
528 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); 543 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
529 if (saddr) 544 if (saddr)
530 ipv6_addr_copy(&fl.fl6_src, saddr); 545 ipv6_addr_copy(&fl6.saddr, saddr);
531 fl.oif = skb->dev->ifindex; 546 fl6.flowi6_oif = skb->dev->ifindex;
532 fl.fl_icmp_type = ICMPV6_ECHO_REPLY; 547 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
533 security_skb_classify_flow(skb, &fl); 548 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
534 549
535 sk = icmpv6_xmit_lock(net); 550 sk = icmpv6_xmit_lock(net);
536 if (sk == NULL) 551 if (sk == NULL)
537 return; 552 return;
538 np = inet6_sk(sk); 553 np = inet6_sk(sk);
539 554
540 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 555 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
541 fl.oif = np->mcast_oif; 556 fl6.flowi6_oif = np->mcast_oif;
542 557
543 err = ip6_dst_lookup(sk, &dst, &fl); 558 err = ip6_dst_lookup(sk, &dst, &fl6);
544 if (err) 559 if (err)
545 goto out; 560 goto out;
546 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) 561 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
562 if (IS_ERR(dst))
547 goto out; 563 goto out;
548 564
549 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 565 if (ipv6_addr_is_multicast(&fl6.daddr))
550 hlimit = np->mcast_hops; 566 hlimit = np->mcast_hops;
551 else 567 else
552 hlimit = np->hop_limit; 568 hlimit = np->hop_limit;
@@ -560,7 +576,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
560 msg.type = ICMPV6_ECHO_REPLY; 576 msg.type = ICMPV6_ECHO_REPLY;
561 577
562 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 578 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
563 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, 579 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
564 (struct rt6_info*)dst, MSG_DONTWAIT, 580 (struct rt6_info*)dst, MSG_DONTWAIT,
565 np->dontfrag); 581 np->dontfrag);
566 582
@@ -569,7 +585,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
569 ip6_flush_pending_frames(sk); 585 ip6_flush_pending_frames(sk);
570 goto out_put; 586 goto out_put;
571 } 587 }
572 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); 588 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
573 589
574out_put: 590out_put:
575 if (likely(idev != NULL)) 591 if (likely(idev != NULL))
@@ -629,8 +645,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
629{ 645{
630 struct net_device *dev = skb->dev; 646 struct net_device *dev = skb->dev;
631 struct inet6_dev *idev = __in6_dev_get(dev); 647 struct inet6_dev *idev = __in6_dev_get(dev);
632 struct in6_addr *saddr, *daddr; 648 const struct in6_addr *saddr, *daddr;
633 struct ipv6hdr *orig_hdr; 649 const struct ipv6hdr *orig_hdr;
634 struct icmp6hdr *hdr; 650 struct icmp6hdr *hdr;
635 u8 type; 651 u8 type;
636 652
@@ -768,20 +784,20 @@ drop_no_count:
768 return 0; 784 return 0;
769} 785}
770 786
771void icmpv6_flow_init(struct sock *sk, struct flowi *fl, 787void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
772 u8 type, 788 u8 type,
773 const struct in6_addr *saddr, 789 const struct in6_addr *saddr,
774 const struct in6_addr *daddr, 790 const struct in6_addr *daddr,
775 int oif) 791 int oif)
776{ 792{
777 memset(fl, 0, sizeof(*fl)); 793 memset(fl6, 0, sizeof(*fl6));
778 ipv6_addr_copy(&fl->fl6_src, saddr); 794 ipv6_addr_copy(&fl6->saddr, saddr);
779 ipv6_addr_copy(&fl->fl6_dst, daddr); 795 ipv6_addr_copy(&fl6->daddr, daddr);
780 fl->proto = IPPROTO_ICMPV6; 796 fl6->flowi6_proto = IPPROTO_ICMPV6;
781 fl->fl_icmp_type = type; 797 fl6->fl6_icmp_type = type;
782 fl->fl_icmp_code = 0; 798 fl6->fl6_icmp_code = 0;
783 fl->oif = oif; 799 fl6->flowi6_oif = oif;
784 security_sk_classify_flow(sk, fl); 800 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
785} 801}
786 802
787/* 803/*
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d144e629d2b4..8a58e8cf6646 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
44 !sk2->sk_bound_dev_if || 44 !sk2->sk_bound_dev_if ||
45 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && 45 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
46 (!sk->sk_reuse || !sk2->sk_reuse || 46 (!sk->sk_reuse || !sk2->sk_reuse ||
47 ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) && 47 sk2->sk_state == TCP_LISTEN) &&
48 ipv6_rcv_saddr_equal(sk, sk2)) 48 ipv6_rcv_saddr_equal(sk, sk2))
49 break; 49 break;
50 } 50 }
@@ -61,26 +61,21 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
61 struct ipv6_pinfo *np = inet6_sk(sk); 61 struct ipv6_pinfo *np = inet6_sk(sk);
62 struct in6_addr *final_p, final; 62 struct in6_addr *final_p, final;
63 struct dst_entry *dst; 63 struct dst_entry *dst;
64 struct flowi fl; 64 struct flowi6 fl6;
65 65
66 memset(&fl, 0, sizeof(fl)); 66 memset(&fl6, 0, sizeof(fl6));
67 fl.proto = IPPROTO_TCP; 67 fl6.flowi6_proto = IPPROTO_TCP;
68 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 68 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
69 final_p = fl6_update_dst(&fl, np->opt, &final); 69 final_p = fl6_update_dst(&fl6, np->opt, &final);
70 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 70 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
71 fl.oif = sk->sk_bound_dev_if; 71 fl6.flowi6_oif = sk->sk_bound_dev_if;
72 fl.mark = sk->sk_mark; 72 fl6.flowi6_mark = sk->sk_mark;
73 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 73 fl6.fl6_dport = inet_rsk(req)->rmt_port;
74 fl.fl_ip_sport = inet_rsk(req)->loc_port; 74 fl6.fl6_sport = inet_rsk(req)->loc_port;
75 security_req_classify_flow(req, &fl); 75 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
76 76
77 if (ip6_dst_lookup(sk, &dst, &fl)) 77 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
78 return NULL; 78 if (IS_ERR(dst))
79
80 if (final_p)
81 ipv6_addr_copy(&fl.fl6_dst, final_p);
82
83 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
84 return NULL; 79 return NULL;
85 80
86 return dst; 81 return dst;
@@ -208,47 +203,39 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
208 return dst; 203 return dst;
209} 204}
210 205
211int inet6_csk_xmit(struct sk_buff *skb) 206int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
212{ 207{
213 struct sock *sk = skb->sk; 208 struct sock *sk = skb->sk;
214 struct inet_sock *inet = inet_sk(sk); 209 struct inet_sock *inet = inet_sk(sk);
215 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct ipv6_pinfo *np = inet6_sk(sk);
216 struct flowi fl; 211 struct flowi6 fl6;
217 struct dst_entry *dst; 212 struct dst_entry *dst;
218 struct in6_addr *final_p, final; 213 struct in6_addr *final_p, final;
219 214
220 memset(&fl, 0, sizeof(fl)); 215 memset(&fl6, 0, sizeof(fl6));
221 fl.proto = sk->sk_protocol; 216 fl6.flowi6_proto = sk->sk_protocol;
222 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 217 ipv6_addr_copy(&fl6.daddr, &np->daddr);
223 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 218 ipv6_addr_copy(&fl6.saddr, &np->saddr);
224 fl.fl6_flowlabel = np->flow_label; 219 fl6.flowlabel = np->flow_label;
225 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); 220 IP6_ECN_flow_xmit(sk, fl6.flowlabel);
226 fl.oif = sk->sk_bound_dev_if; 221 fl6.flowi6_oif = sk->sk_bound_dev_if;
227 fl.mark = sk->sk_mark; 222 fl6.flowi6_mark = sk->sk_mark;
228 fl.fl_ip_sport = inet->inet_sport; 223 fl6.fl6_sport = inet->inet_sport;
229 fl.fl_ip_dport = inet->inet_dport; 224 fl6.fl6_dport = inet->inet_dport;
230 security_sk_classify_flow(sk, &fl); 225 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
231 226
232 final_p = fl6_update_dst(&fl, np->opt, &final); 227 final_p = fl6_update_dst(&fl6, np->opt, &final);
233 228
234 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 229 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
235 230
236 if (dst == NULL) { 231 if (dst == NULL) {
237 int err = ip6_dst_lookup(sk, &dst, &fl); 232 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
238
239 if (err) {
240 sk->sk_err_soft = -err;
241 kfree_skb(skb);
242 return err;
243 }
244
245 if (final_p)
246 ipv6_addr_copy(&fl.fl6_dst, final_p);
247 233
248 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { 234 if (IS_ERR(dst)) {
235 sk->sk_err_soft = -PTR_ERR(dst);
249 sk->sk_route_caps = 0; 236 sk->sk_route_caps = 0;
250 kfree_skb(skb); 237 kfree_skb(skb);
251 return err; 238 return PTR_ERR(dst);
252 } 239 }
253 240
254 __inet6_csk_dst_store(sk, dst, NULL, NULL); 241 __inet6_csk_dst_store(sk, dst, NULL, NULL);
@@ -257,9 +244,9 @@ int inet6_csk_xmit(struct sk_buff *skb)
257 skb_dst_set(skb, dst_clone(dst)); 244 skb_dst_set(skb, dst_clone(dst));
258 245
259 /* Restore final destination back after routing done */ 246 /* Restore final destination back after routing done */
260 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 247 ipv6_addr_copy(&fl6.daddr, &np->daddr);
261 248
262 return ip6_xmit(sk, skb, &fl, np->opt); 249 return ip6_xmit(sk, skb, &fl6, np->opt);
263} 250}
264 251
265EXPORT_SYMBOL_GPL(inet6_csk_xmit); 252EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 633a6c266136..b53197233709 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -124,7 +124,7 @@ out:
124} 124}
125EXPORT_SYMBOL(__inet6_lookup_established); 125EXPORT_SYMBOL(__inet6_lookup_established);
126 126
127static int inline compute_score(struct sock *sk, struct net *net, 127static inline int compute_score(struct sock *sk, struct net *net,
128 const unsigned short hnum, 128 const unsigned short hnum,
129 const struct in6_addr *daddr, 129 const struct in6_addr *daddr,
130 const int dif) 130 const int dif)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index de382114609b..4076a0b14b20 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -134,9 +134,9 @@ static __inline__ u32 fib6_new_sernum(void)
134# define BITOP_BE32_SWIZZLE 0 134# define BITOP_BE32_SWIZZLE 0
135#endif 135#endif
136 136
137static __inline__ __be32 addr_bit_set(void *token, int fn_bit) 137static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
138{ 138{
139 __be32 *addr = token; 139 const __be32 *addr = token;
140 /* 140 /*
141 * Here, 141 * Here,
142 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) 142 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
@@ -260,10 +260,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
260 return net->ipv6.fib6_main_tbl; 260 return net->ipv6.fib6_main_tbl;
261} 261}
262 262
263struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, 263struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
264 int flags, pol_lookup_t lookup) 264 int flags, pol_lookup_t lookup)
265{ 265{
266 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); 266 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
267} 267}
268 268
269static void __net_init fib6_tables_init(struct net *net) 269static void __net_init fib6_tables_init(struct net *net)
@@ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
394 arg.net = net; 394 arg.net = net;
395 w->args = &arg; 395 w->args = &arg;
396 396
397 rcu_read_lock();
397 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { 398 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
398 e = 0; 399 e = 0;
399 head = &net->ipv6.fib_table_hash[h]; 400 head = &net->ipv6.fib_table_hash[h];
400 hlist_for_each_entry(tb, node, head, tb6_hlist) { 401 hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
401 if (e < s_e) 402 if (e < s_e)
402 goto next; 403 goto next;
403 res = fib6_dump_table(tb, skb, cb); 404 res = fib6_dump_table(tb, skb, cb);
@@ -408,6 +409,7 @@ next:
408 } 409 }
409 } 410 }
410out: 411out:
412 rcu_read_unlock();
411 cb->args[1] = e; 413 cb->args[1] = e;
412 cb->args[0] = h; 414 cb->args[0] = h;
413 415
@@ -822,7 +824,7 @@ st_failure:
822 824
823struct lookup_args { 825struct lookup_args {
824 int offset; /* key offset on rt6_info */ 826 int offset; /* key offset on rt6_info */
825 struct in6_addr *addr; /* search key */ 827 const struct in6_addr *addr; /* search key */
826}; 828};
827 829
828static struct fib6_node * fib6_lookup_1(struct fib6_node *root, 830static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
@@ -881,8 +883,8 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
881 return NULL; 883 return NULL;
882} 884}
883 885
884struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, 886struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
885 struct in6_addr *saddr) 887 const struct in6_addr *saddr)
886{ 888{
887 struct fib6_node *fn; 889 struct fib6_node *fn;
888 struct lookup_args args[] = { 890 struct lookup_args args[] = {
@@ -916,7 +918,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
916 918
917 919
918static struct fib6_node * fib6_locate_1(struct fib6_node *root, 920static struct fib6_node * fib6_locate_1(struct fib6_node *root,
919 struct in6_addr *addr, 921 const struct in6_addr *addr,
920 int plen, int offset) 922 int plen, int offset)
921{ 923{
922 struct fib6_node *fn; 924 struct fib6_node *fn;
@@ -946,8 +948,8 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
946} 948}
947 949
948struct fib6_node * fib6_locate(struct fib6_node *root, 950struct fib6_node * fib6_locate(struct fib6_node *root,
949 struct in6_addr *daddr, int dst_len, 951 const struct in6_addr *daddr, int dst_len,
950 struct in6_addr *saddr, int src_len) 952 const struct in6_addr *saddr, int src_len)
951{ 953{
952 struct fib6_node *fn; 954 struct fib6_node *fn;
953 955
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 13654686aeab..f3caf1b8d572 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -342,7 +342,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
342 342
343 if (olen > 0) { 343 if (olen > 0) {
344 struct msghdr msg; 344 struct msghdr msg;
345 struct flowi flowi; 345 struct flowi6 flowi6;
346 int junk; 346 int junk;
347 347
348 err = -ENOMEM; 348 err = -ENOMEM;
@@ -358,9 +358,9 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
358 358
359 msg.msg_controllen = olen; 359 msg.msg_controllen = olen;
360 msg.msg_control = (void*)(fl->opt+1); 360 msg.msg_control = (void*)(fl->opt+1);
361 flowi.oif = 0; 361 memset(&flowi6, 0, sizeof(flowi6));
362 362
363 err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, 363 err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk,
364 &junk, &junk); 364 &junk, &junk);
365 if (err) 365 if (err)
366 goto done; 366 goto done;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a83e9209cecc..027c7ff6f1e5 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -57,7 +57,7 @@ inline int ip6_rcv_finish( struct sk_buff *skb)
57 57
58int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 58int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
59{ 59{
60 struct ipv6hdr *hdr; 60 const struct ipv6hdr *hdr;
61 u32 pkt_len; 61 u32 pkt_len;
62 struct inet6_dev *idev; 62 struct inet6_dev *idev;
63 struct net *net = dev_net(skb->dev); 63 struct net *net = dev_net(skb->dev);
@@ -186,7 +186,7 @@ resubmit:
186 int ret; 186 int ret;
187 187
188 if (ipprot->flags & INET6_PROTO_FINAL) { 188 if (ipprot->flags & INET6_PROTO_FINAL) {
189 struct ipv6hdr *hdr; 189 const struct ipv6hdr *hdr;
190 190
191 /* Free reference early: we don't need it any more, 191 /* Free reference early: we don't need it any more,
192 and it may hold ip_conntrack module loaded 192 and it may hold ip_conntrack module loaded
@@ -242,7 +242,7 @@ int ip6_input(struct sk_buff *skb)
242 242
243int ip6_mc_input(struct sk_buff *skb) 243int ip6_mc_input(struct sk_buff *skb)
244{ 244{
245 struct ipv6hdr *hdr; 245 const struct ipv6hdr *hdr;
246 int deliver; 246 int deliver;
247 247
248 IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), 248 IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f8d242be3f3..9d4b165837d6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,15 +174,15 @@ int ip6_output(struct sk_buff *skb)
174 * xmit an sk_buff (used by TCP, SCTP and DCCP) 174 * xmit an sk_buff (used by TCP, SCTP and DCCP)
175 */ 175 */
176 176
177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
178 struct ipv6_txoptions *opt) 178 struct ipv6_txoptions *opt)
179{ 179{
180 struct net *net = sock_net(sk); 180 struct net *net = sock_net(sk);
181 struct ipv6_pinfo *np = inet6_sk(sk); 181 struct ipv6_pinfo *np = inet6_sk(sk);
182 struct in6_addr *first_hop = &fl->fl6_dst; 182 struct in6_addr *first_hop = &fl6->daddr;
183 struct dst_entry *dst = skb_dst(skb); 183 struct dst_entry *dst = skb_dst(skb);
184 struct ipv6hdr *hdr; 184 struct ipv6hdr *hdr;
185 u8 proto = fl->proto; 185 u8 proto = fl6->flowi6_proto;
186 int seg_len = skb->len; 186 int seg_len = skb->len;
187 int hlimit = -1; 187 int hlimit = -1;
188 int tclass = 0; 188 int tclass = 0;
@@ -230,13 +230,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
230 if (hlimit < 0) 230 if (hlimit < 0)
231 hlimit = ip6_dst_hoplimit(dst); 231 hlimit = ip6_dst_hoplimit(dst);
232 232
233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
234 234
235 hdr->payload_len = htons(seg_len); 235 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 236 hdr->nexthdr = proto;
237 hdr->hop_limit = hlimit; 237 hdr->hop_limit = hlimit;
238 238
239 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 239 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
240 ipv6_addr_copy(&hdr->daddr, first_hop); 240 ipv6_addr_copy(&hdr->daddr, first_hop);
241 241
242 skb->priority = sk->sk_priority; 242 skb->priority = sk->sk_priority;
@@ -274,13 +274,10 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274{ 274{
275 struct ipv6_pinfo *np = inet6_sk(sk); 275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct ipv6hdr *hdr; 276 struct ipv6hdr *hdr;
277 int totlen;
278 277
279 skb->protocol = htons(ETH_P_IPV6); 278 skb->protocol = htons(ETH_P_IPV6);
280 skb->dev = dev; 279 skb->dev = dev;
281 280
282 totlen = len + sizeof(struct ipv6hdr);
283
284 skb_reset_network_header(skb); 281 skb_reset_network_header(skb);
285 skb_put(skb, sizeof(struct ipv6hdr)); 282 skb_put(skb, sizeof(struct ipv6hdr));
286 hdr = ipv6_hdr(skb); 283 hdr = ipv6_hdr(skb);
@@ -479,10 +476,13 @@ int ip6_forward(struct sk_buff *skb)
479 else 476 else
480 target = &hdr->daddr; 477 target = &hdr->daddr;
481 478
479 if (!rt->rt6i_peer)
480 rt6_bind_peer(rt, 1);
481
482 /* Limit redirects both by destination (here) 482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect) 483 and by source (inside ndisc_send_redirect)
484 */ 484 */
485 if (xrlim_allow(dst, 1*HZ)) 485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 ndisc_send_redirect(skb, n, target); 486 ndisc_send_redirect(skb, n, target);
487 } else { 487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr); 488 int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -779,7 +779,7 @@ slow_path:
779 /* IF: it doesn't fit, use 'mtu' - the data space left */ 779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu) 780 if (len > mtu)
781 len = mtu; 781 len = mtu;
782 /* IF: we are not sending upto and including the packet end 782 /* IF: we are not sending up to and including the packet end
783 then align the next start on an eight byte boundary */ 783 then align the next start on an eight byte boundary */
784 if (len < left) { 784 if (len < left) {
785 len &= ~7; 785 len &= ~7;
@@ -869,9 +869,9 @@ fail:
869 return err; 869 return err;
870} 870}
871 871
872static inline int ip6_rt_check(struct rt6key *rt_key, 872static inline int ip6_rt_check(const struct rt6key *rt_key,
873 struct in6_addr *fl_addr, 873 const struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache) 874 const struct in6_addr *addr_cache)
875{ 875{
876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
@@ -879,7 +879,7 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
879 879
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst, 881 struct dst_entry *dst,
882 struct flowi *fl) 882 const struct flowi6 *fl6)
883{ 883{
884 struct ipv6_pinfo *np = inet6_sk(sk); 884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst; 885 struct rt6_info *rt = (struct rt6_info *)dst;
@@ -904,11 +904,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
904 * sockets. 904 * sockets.
905 * 2. oif also should be the same. 905 * 2. oif also should be the same.
906 */ 906 */
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 907 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
908#ifdef CONFIG_IPV6_SUBTREES 908#ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 909 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
910#endif 910#endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) { 911 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
912 dst_release(dst); 912 dst_release(dst);
913 dst = NULL; 913 dst = NULL;
914 } 914 }
@@ -918,22 +918,22 @@ out:
918} 918}
919 919
920static int ip6_dst_lookup_tail(struct sock *sk, 920static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl) 921 struct dst_entry **dst, struct flowi6 *fl6)
922{ 922{
923 int err; 923 int err;
924 struct net *net = sock_net(sk); 924 struct net *net = sock_net(sk);
925 925
926 if (*dst == NULL) 926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl); 927 *dst = ip6_route_output(net, sk, fl6);
928 928
929 if ((err = (*dst)->error)) 929 if ((err = (*dst)->error))
930 goto out_err_release; 930 goto out_err_release;
931 931
932 if (ipv6_addr_any(&fl->fl6_src)) { 932 if (ipv6_addr_any(&fl6->saddr)) {
933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 933 struct rt6_info *rt = (struct rt6_info *) *dst;
934 &fl->fl6_dst, 934 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
935 sk ? inet6_sk(sk)->srcprefs : 0, 935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src); 936 &fl6->saddr);
937 if (err) 937 if (err)
938 goto out_err_release; 938 goto out_err_release;
939 } 939 }
@@ -949,10 +949,10 @@ static int ip6_dst_lookup_tail(struct sock *sk,
949 */ 949 */
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp; 951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw; 952 struct flowi6 fl_gw6;
953 int redirect; 953 int redirect;
954 954
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 955 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
956 (*dst)->dev, 1); 956 (*dst)->dev, 1);
957 957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
@@ -965,9 +965,9 @@ static int ip6_dst_lookup_tail(struct sock *sk,
965 * default router instead 965 * default router instead
966 */ 966 */
967 dst_release(*dst); 967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi)); 968 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 969 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw); 970 *dst = ip6_route_output(net, sk, &fl_gw6);
971 if ((err = (*dst)->error)) 971 if ((err = (*dst)->error))
972 goto out_err_release; 972 goto out_err_release;
973 } 973 }
@@ -988,43 +988,85 @@ out_err_release:
988 * ip6_dst_lookup - perform route lookup on flow 988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info 989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result 990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup 991 * @fl6: flow to lookup
992 * 992 *
993 * This function performs a route lookup on the given flow. 993 * This function performs a route lookup on the given flow.
994 * 994 *
995 * It returns zero on success, or a standard errno code on error. 995 * It returns zero on success, or a standard errno code on error.
996 */ 996 */
997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
998{ 998{
999 *dst = NULL; 999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl); 1000 return ip6_dst_lookup_tail(sk, dst, fl6);
1001} 1001}
1002EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1002EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003 1003
1004/** 1004/**
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1005 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1006 * @sk: socket which provides route info
1007 * @fl6: flow to lookup
1008 * @final_dst: final destination address for ipsec lookup
1009 * @can_sleep: we are in a sleepable context
1010 *
1011 * This function performs a route lookup on the given flow.
1012 *
1013 * It returns a valid dst pointer on success, or a pointer encoded
1014 * error code.
1015 */
1016struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1017 const struct in6_addr *final_dst,
1018 bool can_sleep)
1019{
1020 struct dst_entry *dst = NULL;
1021 int err;
1022
1023 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1024 if (err)
1025 return ERR_PTR(err);
1026 if (final_dst)
1027 ipv6_addr_copy(&fl6->daddr, final_dst);
1028 if (can_sleep)
1029 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1030
1031 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1032}
1033EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1034
1035/**
1036 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info 1037 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result 1038 * @fl6: flow to lookup
1008 * @fl: flow to lookup 1039 * @final_dst: final destination address for ipsec lookup
1040 * @can_sleep: we are in a sleepable context
1009 * 1041 *
1010 * This function performs a route lookup on the given flow with the 1042 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid. 1043 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache. 1044 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context. 1045 * As a result, this function can only be used in process context.
1014 * 1046 *
1015 * It returns zero on success, or a standard errno code on error. 1047 * It returns a valid dst pointer on success, or a pointer encoded
1048 * error code.
1016 */ 1049 */
1017int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1050struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1051 const struct in6_addr *final_dst,
1052 bool can_sleep)
1018{ 1053{
1019 *dst = NULL; 1054 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1020 if (sk) { 1055 int err;
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1056
1022 *dst = ip6_sk_dst_check(sk, *dst, fl); 1057 dst = ip6_sk_dst_check(sk, dst, fl6);
1023 } 1058
1059 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1060 if (err)
1061 return ERR_PTR(err);
1062 if (final_dst)
1063 ipv6_addr_copy(&fl6->daddr, final_dst);
1064 if (can_sleep)
1065 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1024 1066
1025 return ip6_dst_lookup_tail(sk, dst, fl); 1067 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1026} 1068}
1027EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1069EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1028 1070
1029static inline int ip6_ufo_append_data(struct sock *sk, 1071static inline int ip6_ufo_append_data(struct sock *sk,
1030 int getfrag(void *from, char *to, int offset, int len, 1072 int getfrag(void *from, char *to, int offset, int len,
@@ -1061,7 +1103,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1061 1103
1062 skb->ip_summed = CHECKSUM_PARTIAL; 1104 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum = 0; 1105 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1065 } 1106 }
1066 1107
1067 err = skb_append_datato_frags(sk,skb, getfrag, from, 1108 err = skb_append_datato_frags(sk,skb, getfrag, from,
@@ -1104,11 +1145,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1104int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1145int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1105 int offset, int len, int odd, struct sk_buff *skb), 1146 int offset, int len, int odd, struct sk_buff *skb),
1106 void *from, int length, int transhdrlen, 1147 void *from, int length, int transhdrlen,
1107 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1148 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1108 struct rt6_info *rt, unsigned int flags, int dontfrag) 1149 struct rt6_info *rt, unsigned int flags, int dontfrag)
1109{ 1150{
1110 struct inet_sock *inet = inet_sk(sk); 1151 struct inet_sock *inet = inet_sk(sk);
1111 struct ipv6_pinfo *np = inet6_sk(sk); 1152 struct ipv6_pinfo *np = inet6_sk(sk);
1153 struct inet_cork *cork;
1112 struct sk_buff *skb; 1154 struct sk_buff *skb;
1113 unsigned int maxfraglen, fragheaderlen; 1155 unsigned int maxfraglen, fragheaderlen;
1114 int exthdrlen; 1156 int exthdrlen;
@@ -1118,9 +1160,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1118 int err; 1160 int err;
1119 int offset = 0; 1161 int offset = 0;
1120 int csummode = CHECKSUM_NONE; 1162 int csummode = CHECKSUM_NONE;
1163 __u8 tx_flags = 0;
1121 1164
1122 if (flags&MSG_PROBE) 1165 if (flags&MSG_PROBE)
1123 return 0; 1166 return 0;
1167 cork = &inet->cork.base;
1124 if (skb_queue_empty(&sk->sk_write_queue)) { 1168 if (skb_queue_empty(&sk->sk_write_queue)) {
1125 /* 1169 /*
1126 * setup for corking 1170 * setup for corking
@@ -1160,8 +1204,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1160 /* need source address above miyazawa*/ 1204 /* need source address above miyazawa*/
1161 } 1205 }
1162 dst_hold(&rt->dst); 1206 dst_hold(&rt->dst);
1163 inet->cork.dst = &rt->dst; 1207 cork->dst = &rt->dst;
1164 inet->cork.fl = *fl; 1208 inet->cork.fl.u.ip6 = *fl6;
1165 np->cork.hop_limit = hlimit; 1209 np->cork.hop_limit = hlimit;
1166 np->cork.tclass = tclass; 1210 np->cork.tclass = tclass;
1167 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1211 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
@@ -1170,10 +1214,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1170 if (np->frag_size) 1214 if (np->frag_size)
1171 mtu = np->frag_size; 1215 mtu = np->frag_size;
1172 } 1216 }
1173 inet->cork.fragsize = mtu; 1217 cork->fragsize = mtu;
1174 if (dst_allfrag(rt->dst.path)) 1218 if (dst_allfrag(rt->dst.path))
1175 inet->cork.flags |= IPCORK_ALLFRAG; 1219 cork->flags |= IPCORK_ALLFRAG;
1176 inet->cork.length = 0; 1220 cork->length = 0;
1177 sk->sk_sndmsg_page = NULL; 1221 sk->sk_sndmsg_page = NULL;
1178 sk->sk_sndmsg_off = 0; 1222 sk->sk_sndmsg_off = 0;
1179 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - 1223 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
@@ -1181,12 +1225,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1181 length += exthdrlen; 1225 length += exthdrlen;
1182 transhdrlen += exthdrlen; 1226 transhdrlen += exthdrlen;
1183 } else { 1227 } else {
1184 rt = (struct rt6_info *)inet->cork.dst; 1228 rt = (struct rt6_info *)cork->dst;
1185 fl = &inet->cork.fl; 1229 fl6 = &inet->cork.fl.u.ip6;
1186 opt = np->cork.opt; 1230 opt = np->cork.opt;
1187 transhdrlen = 0; 1231 transhdrlen = 0;
1188 exthdrlen = 0; 1232 exthdrlen = 0;
1189 mtu = inet->cork.fragsize; 1233 mtu = cork->fragsize;
1190 } 1234 }
1191 1235
1192 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1236 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1196,12 +1240,19 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1196 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1240 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1197 1241
1198 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1242 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1199 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1243 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1200 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1244 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1201 return -EMSGSIZE; 1245 return -EMSGSIZE;
1202 } 1246 }
1203 } 1247 }
1204 1248
1249 /* For UDP, check if TX timestamp is enabled */
1250 if (sk->sk_type == SOCK_DGRAM) {
1251 err = sock_tx_timestamp(sk, &tx_flags);
1252 if (err)
1253 goto error;
1254 }
1255
1205 /* 1256 /*
1206 * Let's try using as much space as possible. 1257 * Let's try using as much space as possible.
1207 * Use MTU if total length of the message fits into the MTU. 1258 * Use MTU if total length of the message fits into the MTU.
@@ -1218,11 +1269,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1218 * --yoshfuji 1269 * --yoshfuji
1219 */ 1270 */
1220 1271
1221 inet->cork.length += length; 1272 cork->length += length;
1222 if (length > mtu) { 1273 if (length > mtu) {
1223 int proto = sk->sk_protocol; 1274 int proto = sk->sk_protocol;
1224 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1275 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1225 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); 1276 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1226 return -EMSGSIZE; 1277 return -EMSGSIZE;
1227 } 1278 }
1228 1279
@@ -1243,7 +1294,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1243 1294
1244 while (length > 0) { 1295 while (length > 0) {
1245 /* Check if the remaining data fits into current packet. */ 1296 /* Check if the remaining data fits into current packet. */
1246 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1297 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1247 if (copy < length) 1298 if (copy < length)
1248 copy = maxfraglen - skb->len; 1299 copy = maxfraglen - skb->len;
1249 1300
@@ -1268,7 +1319,7 @@ alloc_new_skb:
1268 * we know we need more fragment(s). 1319 * we know we need more fragment(s).
1269 */ 1320 */
1270 datalen = length + fraggap; 1321 datalen = length + fraggap;
1271 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1322 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1272 datalen = maxfraglen - fragheaderlen; 1323 datalen = maxfraglen - fragheaderlen;
1273 1324
1274 fraglen = datalen + fragheaderlen; 1325 fraglen = datalen + fragheaderlen;
@@ -1306,6 +1357,12 @@ alloc_new_skb:
1306 sk->sk_allocation); 1357 sk->sk_allocation);
1307 if (unlikely(skb == NULL)) 1358 if (unlikely(skb == NULL))
1308 err = -ENOBUFS; 1359 err = -ENOBUFS;
1360 else {
1361 /* Only the initial fragment
1362 * is time stamped.
1363 */
1364 tx_flags = 0;
1365 }
1309 } 1366 }
1310 if (skb == NULL) 1367 if (skb == NULL)
1311 goto error; 1368 goto error;
@@ -1317,6 +1374,9 @@ alloc_new_skb:
1317 /* reserve for fragmentation */ 1374 /* reserve for fragmentation */
1318 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1375 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1319 1376
1377 if (sk->sk_type == SOCK_DGRAM)
1378 skb_shinfo(skb)->tx_flags = tx_flags;
1379
1320 /* 1380 /*
1321 * Find where to start putting bytes 1381 * Find where to start putting bytes
1322 */ 1382 */
@@ -1423,7 +1483,7 @@ alloc_new_skb:
1423 } 1483 }
1424 return 0; 1484 return 0;
1425error: 1485error:
1426 inet->cork.length -= length; 1486 cork->length -= length;
1427 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1487 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1428 return err; 1488 return err;
1429} 1489}
@@ -1439,10 +1499,10 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1439 np->cork.opt = NULL; 1499 np->cork.opt = NULL;
1440 } 1500 }
1441 1501
1442 if (inet->cork.dst) { 1502 if (inet->cork.base.dst) {
1443 dst_release(inet->cork.dst); 1503 dst_release(inet->cork.base.dst);
1444 inet->cork.dst = NULL; 1504 inet->cork.base.dst = NULL;
1445 inet->cork.flags &= ~IPCORK_ALLFRAG; 1505 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1446 } 1506 }
1447 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1507 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1448} 1508}
@@ -1457,9 +1517,9 @@ int ip6_push_pending_frames(struct sock *sk)
1457 struct net *net = sock_net(sk); 1517 struct net *net = sock_net(sk);
1458 struct ipv6hdr *hdr; 1518 struct ipv6hdr *hdr;
1459 struct ipv6_txoptions *opt = np->cork.opt; 1519 struct ipv6_txoptions *opt = np->cork.opt;
1460 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1520 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1461 struct flowi *fl = &inet->cork.fl; 1521 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1462 unsigned char proto = fl->proto; 1522 unsigned char proto = fl6->flowi6_proto;
1463 int err = 0; 1523 int err = 0;
1464 1524
1465 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1525 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
@@ -1484,7 +1544,7 @@ int ip6_push_pending_frames(struct sock *sk)
1484 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1544 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1485 skb->local_df = 1; 1545 skb->local_df = 1;
1486 1546
1487 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1547 ipv6_addr_copy(final_dst, &fl6->daddr);
1488 __skb_pull(skb, skb_network_header_len(skb)); 1548 __skb_pull(skb, skb_network_header_len(skb));
1489 if (opt && opt->opt_flen) 1549 if (opt && opt->opt_flen)
1490 ipv6_push_frag_opts(skb, opt, &proto); 1550 ipv6_push_frag_opts(skb, opt, &proto);
@@ -1495,12 +1555,12 @@ int ip6_push_pending_frames(struct sock *sk)
1495 skb_reset_network_header(skb); 1555 skb_reset_network_header(skb);
1496 hdr = ipv6_hdr(skb); 1556 hdr = ipv6_hdr(skb);
1497 1557
1498 *(__be32*)hdr = fl->fl6_flowlabel | 1558 *(__be32*)hdr = fl6->flowlabel |
1499 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1559 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1500 1560
1501 hdr->hop_limit = np->cork.hop_limit; 1561 hdr->hop_limit = np->cork.hop_limit;
1502 hdr->nexthdr = proto; 1562 hdr->nexthdr = proto;
1503 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1563 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1504 ipv6_addr_copy(&hdr->daddr, final_dst); 1564 ipv6_addr_copy(&hdr->daddr, final_dst);
1505 1565
1506 skb->priority = sk->sk_priority; 1566 skb->priority = sk->sk_priority;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697bd..36c2842a86b2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
57MODULE_AUTHOR("Ville Nuorvala"); 57MODULE_AUTHOR("Ville Nuorvala");
58MODULE_DESCRIPTION("IPv6 tunneling device"); 58MODULE_DESCRIPTION("IPv6 tunneling device");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
60MODULE_ALIAS_NETDEV("ip6tnl0");
60 61
61#ifdef IP6_TNL_DEBUG 62#ifdef IP6_TNL_DEBUG
62#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
@@ -161,7 +162,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
161 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 162 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
162 163
163static struct ip6_tnl * 164static struct ip6_tnl *
164ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) 165ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
165{ 166{
166 unsigned int h0 = HASH(remote); 167 unsigned int h0 = HASH(remote);
167 unsigned int h1 = HASH(local); 168 unsigned int h1 = HASH(local);
@@ -193,10 +194,10 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
193 **/ 194 **/
194 195
195static struct ip6_tnl __rcu ** 196static struct ip6_tnl __rcu **
196ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) 197ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
197{ 198{
198 struct in6_addr *remote = &p->raddr; 199 const struct in6_addr *remote = &p->raddr;
199 struct in6_addr *local = &p->laddr; 200 const struct in6_addr *local = &p->laddr;
200 unsigned h = 0; 201 unsigned h = 0;
201 int prio = 0; 202 int prio = 0;
202 203
@@ -279,11 +280,6 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
279 280
280 dev_net_set(dev, net); 281 dev_net_set(dev, net);
281 282
282 if (strchr(name, '%')) {
283 if (dev_alloc_name(dev, name) < 0)
284 goto failed_free;
285 }
286
287 t = netdev_priv(dev); 283 t = netdev_priv(dev);
288 t->parms = *p; 284 t->parms = *p;
289 err = ip6_tnl_dev_init(dev); 285 err = ip6_tnl_dev_init(dev);
@@ -320,8 +316,8 @@ failed:
320static struct ip6_tnl *ip6_tnl_locate(struct net *net, 316static struct ip6_tnl *ip6_tnl_locate(struct net *net,
321 struct ip6_tnl_parm *p, int create) 317 struct ip6_tnl_parm *p, int create)
322{ 318{
323 struct in6_addr *remote = &p->raddr; 319 const struct in6_addr *remote = &p->raddr;
324 struct in6_addr *local = &p->laddr; 320 const struct in6_addr *local = &p->laddr;
325 struct ip6_tnl __rcu **tp; 321 struct ip6_tnl __rcu **tp;
326 struct ip6_tnl *t; 322 struct ip6_tnl *t;
327 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 323 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -373,7 +369,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
373static __u16 369static __u16
374parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) 370parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
375{ 371{
376 struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw; 372 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
377 __u8 nexthdr = ipv6h->nexthdr; 373 __u8 nexthdr = ipv6h->nexthdr;
378 __u16 off = sizeof (*ipv6h); 374 __u16 off = sizeof (*ipv6h);
379 375
@@ -434,7 +430,7 @@ static int
434ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 430ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
435 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 431 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
436{ 432{
437 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; 433 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
438 struct ip6_tnl *t; 434 struct ip6_tnl *t;
439 int rel_msg = 0; 435 int rel_msg = 0;
440 u8 rel_type = ICMPV6_DEST_UNREACH; 436 u8 rel_type = ICMPV6_DEST_UNREACH;
@@ -534,9 +530,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
534 __u32 rel_info = ntohl(info); 530 __u32 rel_info = ntohl(info);
535 int err; 531 int err;
536 struct sk_buff *skb2; 532 struct sk_buff *skb2;
537 struct iphdr *eiph; 533 const struct iphdr *eiph;
538 struct flowi fl;
539 struct rtable *rt; 534 struct rtable *rt;
535 struct flowi4 fl4;
540 536
541 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, 537 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
542 &rel_msg, &rel_info, offset); 538 &rel_msg, &rel_info, offset);
@@ -577,11 +573,11 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
577 eiph = ip_hdr(skb2); 573 eiph = ip_hdr(skb2);
578 574
579 /* Try to guess incoming interface */ 575 /* Try to guess incoming interface */
580 memset(&fl, 0, sizeof(fl)); 576 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
581 fl.fl4_dst = eiph->saddr; 577 eiph->saddr, 0,
582 fl.fl4_tos = RT_TOS(eiph->tos); 578 0, 0,
583 fl.proto = IPPROTO_IPIP; 579 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
584 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) 580 if (IS_ERR(rt))
585 goto out; 581 goto out;
586 582
587 skb2->dev = rt->dst.dev; 583 skb2->dev = rt->dst.dev;
@@ -590,15 +586,18 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
590 if (rt->rt_flags & RTCF_LOCAL) { 586 if (rt->rt_flags & RTCF_LOCAL) {
591 ip_rt_put(rt); 587 ip_rt_put(rt);
592 rt = NULL; 588 rt = NULL;
593 fl.fl4_dst = eiph->daddr; 589 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
594 fl.fl4_src = eiph->saddr; 590 eiph->daddr, eiph->saddr,
595 fl.fl4_tos = eiph->tos; 591 0, 0,
596 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || 592 IPPROTO_IPIP,
593 RT_TOS(eiph->tos), 0);
594 if (IS_ERR(rt) ||
597 rt->dst.dev->type != ARPHRD_TUNNEL) { 595 rt->dst.dev->type != ARPHRD_TUNNEL) {
598 ip_rt_put(rt); 596 if (!IS_ERR(rt))
597 ip_rt_put(rt);
599 goto out; 598 goto out;
600 } 599 }
601 skb_dst_set(skb2, (struct dst_entry *)rt); 600 skb_dst_set(skb2, &rt->dst);
602 } else { 601 } else {
603 ip_rt_put(rt); 602 ip_rt_put(rt);
604 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 603 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
@@ -666,8 +665,8 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
666 return 0; 665 return 0;
667} 666}
668 667
669static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, 668static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
670 struct ipv6hdr *ipv6h, 669 const struct ipv6hdr *ipv6h,
671 struct sk_buff *skb) 670 struct sk_buff *skb)
672{ 671{
673 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; 672 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
@@ -679,8 +678,8 @@ static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
679 IP_ECN_set_ce(ip_hdr(skb)); 678 IP_ECN_set_ce(ip_hdr(skb));
680} 679}
681 680
682static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, 681static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
683 struct ipv6hdr *ipv6h, 682 const struct ipv6hdr *ipv6h,
684 struct sk_buff *skb) 683 struct sk_buff *skb)
685{ 684{
686 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 685 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
@@ -723,12 +722,12 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
723 722
724static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, 723static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
725 __u8 ipproto, 724 __u8 ipproto,
726 void (*dscp_ecn_decapsulate)(struct ip6_tnl *t, 725 void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
727 struct ipv6hdr *ipv6h, 726 const struct ipv6hdr *ipv6h,
728 struct sk_buff *skb)) 727 struct sk_buff *skb))
729{ 728{
730 struct ip6_tnl *t; 729 struct ip6_tnl *t;
731 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 730 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
732 731
733 rcu_read_lock(); 732 rcu_read_lock();
734 733
@@ -825,7 +824,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
825 **/ 824 **/
826 825
827static inline int 826static inline int
828ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) 827ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
829{ 828{
830 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 829 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
831} 830}
@@ -881,7 +880,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
881static int ip6_tnl_xmit2(struct sk_buff *skb, 880static int ip6_tnl_xmit2(struct sk_buff *skb,
882 struct net_device *dev, 881 struct net_device *dev,
883 __u8 dsfield, 882 __u8 dsfield,
884 struct flowi *fl, 883 struct flowi6 *fl6,
885 int encap_limit, 884 int encap_limit,
886 __u32 *pmtu) 885 __u32 *pmtu)
887{ 886{
@@ -901,10 +900,16 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
901 if ((dst = ip6_tnl_dst_check(t)) != NULL) 900 if ((dst = ip6_tnl_dst_check(t)) != NULL)
902 dst_hold(dst); 901 dst_hold(dst);
903 else { 902 else {
904 dst = ip6_route_output(net, NULL, fl); 903 dst = ip6_route_output(net, NULL, fl6);
905 904
906 if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) 905 if (dst->error)
907 goto tx_err_link_failure; 906 goto tx_err_link_failure;
907 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
908 if (IS_ERR(dst)) {
909 err = PTR_ERR(dst);
910 dst = NULL;
911 goto tx_err_link_failure;
912 }
908 } 913 }
909 914
910 tdev = dst->dev; 915 tdev = dst->dev;
@@ -954,7 +959,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
954 959
955 skb->transport_header = skb->network_header; 960 skb->transport_header = skb->network_header;
956 961
957 proto = fl->proto; 962 proto = fl6->flowi6_proto;
958 if (encap_limit >= 0) { 963 if (encap_limit >= 0) {
959 init_tel_txopt(&opt, encap_limit); 964 init_tel_txopt(&opt, encap_limit);
960 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 965 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
@@ -962,13 +967,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
962 skb_push(skb, sizeof(struct ipv6hdr)); 967 skb_push(skb, sizeof(struct ipv6hdr));
963 skb_reset_network_header(skb); 968 skb_reset_network_header(skb);
964 ipv6h = ipv6_hdr(skb); 969 ipv6h = ipv6_hdr(skb);
965 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); 970 *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
966 dsfield = INET_ECN_encapsulate(0, dsfield); 971 dsfield = INET_ECN_encapsulate(0, dsfield);
967 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 972 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
968 ipv6h->hop_limit = t->parms.hop_limit; 973 ipv6h->hop_limit = t->parms.hop_limit;
969 ipv6h->nexthdr = proto; 974 ipv6h->nexthdr = proto;
970 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); 975 ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr);
971 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); 976 ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr);
972 nf_reset(skb); 977 nf_reset(skb);
973 pkt_len = skb->len; 978 pkt_len = skb->len;
974 err = ip6_local_out(skb); 979 err = ip6_local_out(skb);
@@ -996,9 +1001,9 @@ static inline int
996ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1001ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
997{ 1002{
998 struct ip6_tnl *t = netdev_priv(dev); 1003 struct ip6_tnl *t = netdev_priv(dev);
999 struct iphdr *iph = ip_hdr(skb); 1004 const struct iphdr *iph = ip_hdr(skb);
1000 int encap_limit = -1; 1005 int encap_limit = -1;
1001 struct flowi fl; 1006 struct flowi6 fl6;
1002 __u8 dsfield; 1007 __u8 dsfield;
1003 __u32 mtu; 1008 __u32 mtu;
1004 int err; 1009 int err;
@@ -1010,16 +1015,16 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1010 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1015 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1011 encap_limit = t->parms.encap_limit; 1016 encap_limit = t->parms.encap_limit;
1012 1017
1013 memcpy(&fl, &t->fl, sizeof (fl)); 1018 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
1014 fl.proto = IPPROTO_IPIP; 1019 fl6.flowi6_proto = IPPROTO_IPIP;
1015 1020
1016 dsfield = ipv4_get_dsfield(iph); 1021 dsfield = ipv4_get_dsfield(iph);
1017 1022
1018 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1023 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1019 fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 1024 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1020 & IPV6_TCLASS_MASK; 1025 & IPV6_TCLASS_MASK;
1021 1026
1022 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); 1027 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1023 if (err != 0) { 1028 if (err != 0) {
1024 /* XXX: send ICMP error even if DF is not set. */ 1029 /* XXX: send ICMP error even if DF is not set. */
1025 if (err == -EMSGSIZE) 1030 if (err == -EMSGSIZE)
@@ -1038,7 +1043,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1038 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1043 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1039 int encap_limit = -1; 1044 int encap_limit = -1;
1040 __u16 offset; 1045 __u16 offset;
1041 struct flowi fl; 1046 struct flowi6 fl6;
1042 __u8 dsfield; 1047 __u8 dsfield;
1043 __u32 mtu; 1048 __u32 mtu;
1044 int err; 1049 int err;
@@ -1060,16 +1065,16 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1060 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1065 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1061 encap_limit = t->parms.encap_limit; 1066 encap_limit = t->parms.encap_limit;
1062 1067
1063 memcpy(&fl, &t->fl, sizeof (fl)); 1068 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
1064 fl.proto = IPPROTO_IPV6; 1069 fl6.flowi6_proto = IPPROTO_IPV6;
1065 1070
1066 dsfield = ipv6_get_dsfield(ipv6h); 1071 dsfield = ipv6_get_dsfield(ipv6h);
1067 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1072 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1068 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1073 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1069 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1074 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1070 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); 1075 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1071 1076
1072 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); 1077 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1073 if (err != 0) { 1078 if (err != 0) {
1074 if (err == -EMSGSIZE) 1079 if (err == -EMSGSIZE)
1075 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1080 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1132,21 +1137,21 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1132{ 1137{
1133 struct net_device *dev = t->dev; 1138 struct net_device *dev = t->dev;
1134 struct ip6_tnl_parm *p = &t->parms; 1139 struct ip6_tnl_parm *p = &t->parms;
1135 struct flowi *fl = &t->fl; 1140 struct flowi6 *fl6 = &t->fl.u.ip6;
1136 1141
1137 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1142 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1138 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1143 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1139 1144
1140 /* Set up flowi template */ 1145 /* Set up flowi template */
1141 ipv6_addr_copy(&fl->fl6_src, &p->laddr); 1146 ipv6_addr_copy(&fl6->saddr, &p->laddr);
1142 ipv6_addr_copy(&fl->fl6_dst, &p->raddr); 1147 ipv6_addr_copy(&fl6->daddr, &p->raddr);
1143 fl->oif = p->link; 1148 fl6->flowi6_oif = p->link;
1144 fl->fl6_flowlabel = 0; 1149 fl6->flowlabel = 0;
1145 1150
1146 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) 1151 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1147 fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; 1152 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1148 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1153 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1149 fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1154 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1150 1155
1151 ip6_tnl_set_cap(t); 1156 ip6_tnl_set_cap(t);
1152 1157
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e1d53bcf1e0..82a809901f8e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -135,14 +135,15 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
135 return NULL; 135 return NULL;
136} 136}
137 137
138static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, 138static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 struct mr6_table **mrt) 139 struct mr6_table **mrt)
140{ 140{
141 struct ip6mr_result res; 141 struct ip6mr_result res;
142 struct fib_lookup_arg arg = { .result = &res, }; 142 struct fib_lookup_arg arg = { .result = &res, };
143 int err; 143 int err;
144 144
145 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); 145 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146 flowi6_to_flowi(flp6), 0, &arg);
146 if (err < 0) 147 if (err < 0)
147 return err; 148 return err;
148 *mrt = res.mrt; 149 *mrt = res.mrt;
@@ -270,7 +271,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
270 return net->ipv6.mrt6; 271 return net->ipv6.mrt6;
271} 272}
272 273
273static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, 274static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
274 struct mr6_table **mrt) 275 struct mr6_table **mrt)
275{ 276{
276 *mrt = net->ipv6.mrt6; 277 *mrt = net->ipv6.mrt6;
@@ -617,9 +618,9 @@ static int pim6_rcv(struct sk_buff *skb)
617 struct net_device *reg_dev = NULL; 618 struct net_device *reg_dev = NULL;
618 struct net *net = dev_net(skb->dev); 619 struct net *net = dev_net(skb->dev);
619 struct mr6_table *mrt; 620 struct mr6_table *mrt;
620 struct flowi fl = { 621 struct flowi6 fl6 = {
621 .iif = skb->dev->ifindex, 622 .flowi6_iif = skb->dev->ifindex,
622 .mark = skb->mark, 623 .flowi6_mark = skb->mark,
623 }; 624 };
624 int reg_vif_num; 625 int reg_vif_num;
625 626
@@ -644,7 +645,7 @@ static int pim6_rcv(struct sk_buff *skb)
644 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 645 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
645 goto drop; 646 goto drop;
646 647
647 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) 648 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
648 goto drop; 649 goto drop;
649 reg_vif_num = mrt->mroute_reg_vif_num; 650 reg_vif_num = mrt->mroute_reg_vif_num;
650 651
@@ -662,7 +663,7 @@ static int pim6_rcv(struct sk_buff *skb)
662 skb_pull(skb, (u8 *)encap - skb->data); 663 skb_pull(skb, (u8 *)encap - skb->data);
663 skb_reset_network_header(skb); 664 skb_reset_network_header(skb);
664 skb->protocol = htons(ETH_P_IPV6); 665 skb->protocol = htons(ETH_P_IPV6);
665 skb->ip_summed = 0; 666 skb->ip_summed = CHECKSUM_NONE;
666 skb->pkt_type = PACKET_HOST; 667 skb->pkt_type = PACKET_HOST;
667 668
668 skb_tunnel_rx(skb, reg_dev); 669 skb_tunnel_rx(skb, reg_dev);
@@ -687,14 +688,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
687{ 688{
688 struct net *net = dev_net(dev); 689 struct net *net = dev_net(dev);
689 struct mr6_table *mrt; 690 struct mr6_table *mrt;
690 struct flowi fl = { 691 struct flowi6 fl6 = {
691 .oif = dev->ifindex, 692 .flowi6_oif = dev->ifindex,
692 .iif = skb->skb_iif, 693 .flowi6_iif = skb->skb_iif,
693 .mark = skb->mark, 694 .flowi6_mark = skb->mark,
694 }; 695 };
695 int err; 696 int err;
696 697
697 err = ip6mr_fib_lookup(net, &fl, &mrt); 698 err = ip6mr_fib_lookup(net, &fl6, &mrt);
698 if (err < 0) 699 if (err < 0)
699 return err; 700 return err;
700 701
@@ -988,8 +989,8 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
988} 989}
989 990
990static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, 991static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
991 struct in6_addr *origin, 992 const struct in6_addr *origin,
992 struct in6_addr *mcastgrp) 993 const struct in6_addr *mcastgrp)
993{ 994{
994 int line = MFC6_HASH(mcastgrp, origin); 995 int line = MFC6_HASH(mcastgrp, origin);
995 struct mfc6_cache *c; 996 struct mfc6_cache *c;
@@ -1039,7 +1040,6 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1039 1040
1040 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1041 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1041 if (ipv6_hdr(skb)->version == 0) { 1042 if (ipv6_hdr(skb)->version == 0) {
1042 int err;
1043 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1043 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1044 1044
1045 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 1045 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -1050,7 +1050,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1050 skb_trim(skb, nlh->nlmsg_len); 1050 skb_trim(skb, nlh->nlmsg_len);
1051 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1051 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1052 } 1052 }
1053 err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 1053 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1054 } else 1054 } else
1055 ip6_mr_forward(net, mrt, skb, c); 1055 ip6_mr_forward(net, mrt, skb, c);
1056 } 1056 }
@@ -1548,13 +1548,13 @@ int ip6mr_sk_done(struct sock *sk)
1548struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) 1548struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1549{ 1549{
1550 struct mr6_table *mrt; 1550 struct mr6_table *mrt;
1551 struct flowi fl = { 1551 struct flowi6 fl6 = {
1552 .iif = skb->skb_iif, 1552 .flowi6_iif = skb->skb_iif,
1553 .oif = skb->dev->ifindex, 1553 .flowi6_oif = skb->dev->ifindex,
1554 .mark = skb->mark, 1554 .flowi6_mark = skb->mark,
1555 }; 1555 };
1556 1556
1557 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) 1557 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1558 return NULL; 1558 return NULL;
1559 1559
1560 return mrt->mroute6_sk; 1560 return mrt->mroute6_sk;
@@ -1898,7 +1898,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1898 struct mif_device *vif = &mrt->vif6_table[vifi]; 1898 struct mif_device *vif = &mrt->vif6_table[vifi];
1899 struct net_device *dev; 1899 struct net_device *dev;
1900 struct dst_entry *dst; 1900 struct dst_entry *dst;
1901 struct flowi fl; 1901 struct flowi6 fl6;
1902 1902
1903 if (vif->dev == NULL) 1903 if (vif->dev == NULL)
1904 goto out_free; 1904 goto out_free;
@@ -1916,12 +1916,12 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1916 1916
1917 ipv6h = ipv6_hdr(skb); 1917 ipv6h = ipv6_hdr(skb);
1918 1918
1919 fl = (struct flowi) { 1919 fl6 = (struct flowi6) {
1920 .oif = vif->link, 1920 .flowi6_oif = vif->link,
1921 .fl6_dst = ipv6h->daddr, 1921 .daddr = ipv6h->daddr,
1922 }; 1922 };
1923 1923
1924 dst = ip6_route_output(net, NULL, &fl); 1924 dst = ip6_route_output(net, NULL, &fl6);
1925 if (!dst) 1925 if (!dst)
1926 goto out_free; 1926 goto out_free;
1927 1927
@@ -2044,13 +2044,13 @@ int ip6_mr_input(struct sk_buff *skb)
2044 struct mfc6_cache *cache; 2044 struct mfc6_cache *cache;
2045 struct net *net = dev_net(skb->dev); 2045 struct net *net = dev_net(skb->dev);
2046 struct mr6_table *mrt; 2046 struct mr6_table *mrt;
2047 struct flowi fl = { 2047 struct flowi6 fl6 = {
2048 .iif = skb->dev->ifindex, 2048 .flowi6_iif = skb->dev->ifindex,
2049 .mark = skb->mark, 2049 .flowi6_mark = skb->mark,
2050 }; 2050 };
2051 int err; 2051 int err;
2052 2052
2053 err = ip6mr_fib_lookup(net, &fl, &mrt); 2053 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2054 if (err < 0) 2054 if (err < 0)
2055 return err; 2055 return err;
2056 2056
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 85cccd6ed0b7..bba658d9a03c 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -55,7 +55,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
55{ 55{
56 struct net *net = dev_net(skb->dev); 56 struct net *net = dev_net(skb->dev);
57 __be32 spi; 57 __be32 spi;
58 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; 58 const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
59 struct ip_comp_hdr *ipcomph = 59 struct ip_comp_hdr *ipcomph =
60 (struct ip_comp_hdr *)(skb->data + offset); 60 (struct ip_comp_hdr *)(skb->data + offset);
61 struct xfrm_state *x; 61 struct xfrm_state *x;
@@ -64,7 +64,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
64 return; 64 return;
65 65
66 spi = htonl(ntohs(ipcomph->cpi)); 66 spi = htonl(ntohs(ipcomph->cpi));
67 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); 67 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
68 spi, IPPROTO_COMP, AF_INET6);
68 if (!x) 69 if (!x)
69 return; 70 return;
70 71
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1770e061c08..9cb191ecaba8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -444,12 +444,12 @@ sticky_done:
444 { 444 {
445 struct ipv6_txoptions *opt = NULL; 445 struct ipv6_txoptions *opt = NULL;
446 struct msghdr msg; 446 struct msghdr msg;
447 struct flowi fl; 447 struct flowi6 fl6;
448 int junk; 448 int junk;
449 449
450 fl.fl6_flowlabel = 0; 450 memset(&fl6, 0, sizeof(fl6));
451 fl.oif = sk->sk_bound_dev_if; 451 fl6.flowi6_oif = sk->sk_bound_dev_if;
452 fl.mark = sk->sk_mark; 452 fl6.flowi6_mark = sk->sk_mark;
453 453
454 if (optlen == 0) 454 if (optlen == 0)
455 goto update; 455 goto update;
@@ -475,7 +475,7 @@ sticky_done:
475 msg.msg_controllen = optlen; 475 msg.msg_controllen = optlen;
476 msg.msg_control = (void*)(opt+1); 476 msg.msg_control = (void*)(opt+1);
477 477
478 retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, 478 retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk,
479 &junk); 479 &junk);
480 if (retv) 480 if (retv)
481 goto done; 481 goto done;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 49f986d626a0..3e6ebcdb4779 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -92,16 +92,16 @@ static void mld_gq_timer_expire(unsigned long data);
92static void mld_ifc_timer_expire(unsigned long data); 92static void mld_ifc_timer_expire(unsigned long data);
93static void mld_ifc_event(struct inet6_dev *idev); 93static void mld_ifc_event(struct inet6_dev *idev);
94static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 94static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
95static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *addr); 95static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
96static void mld_clear_delrec(struct inet6_dev *idev); 96static void mld_clear_delrec(struct inet6_dev *idev);
97static int sf_setstate(struct ifmcaddr6 *pmc); 97static int sf_setstate(struct ifmcaddr6 *pmc);
98static void sf_markstate(struct ifmcaddr6 *pmc); 98static void sf_markstate(struct ifmcaddr6 *pmc);
99static void ip6_mc_clear_src(struct ifmcaddr6 *pmc); 99static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
100static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca, 100static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
101 int sfmode, int sfcount, struct in6_addr *psfsrc, 101 int sfmode, int sfcount, const struct in6_addr *psfsrc,
102 int delta); 102 int delta);
103static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca, 103static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
104 int sfmode, int sfcount, struct in6_addr *psfsrc, 104 int sfmode, int sfcount, const struct in6_addr *psfsrc,
105 int delta); 105 int delta);
106static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, 106static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
107 struct inet6_dev *idev); 107 struct inet6_dev *idev);
@@ -201,10 +201,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
201 return 0; 201 return 0;
202} 202}
203 203
204static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
205{
206 kfree(container_of(head, struct ipv6_mc_socklist, rcu));
207}
208/* 204/*
209 * socket leave on multicast group 205 * socket leave on multicast group
210 */ 206 */
@@ -239,7 +235,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
239 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 235 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
240 rcu_read_unlock(); 236 rcu_read_unlock();
241 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); 237 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
242 call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); 238 kfree_rcu(mc_lst, rcu);
243 return 0; 239 return 0;
244 } 240 }
245 } 241 }
@@ -250,7 +246,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
250 246
251/* called with rcu_read_lock() */ 247/* called with rcu_read_lock() */
252static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, 248static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
253 struct in6_addr *group, 249 const struct in6_addr *group,
254 int ifindex) 250 int ifindex)
255{ 251{
256 struct net_device *dev = NULL; 252 struct net_device *dev = NULL;
@@ -307,7 +303,7 @@ void ipv6_sock_mc_close(struct sock *sk)
307 rcu_read_unlock(); 303 rcu_read_unlock();
308 304
309 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); 305 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
310 call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); 306 kfree_rcu(mc_lst, rcu);
311 307
312 spin_lock(&ipv6_sk_mc_lock); 308 spin_lock(&ipv6_sk_mc_lock);
313 } 309 }
@@ -319,7 +315,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
319{ 315{
320 struct in6_addr *source, *group; 316 struct in6_addr *source, *group;
321 struct ipv6_mc_socklist *pmc; 317 struct ipv6_mc_socklist *pmc;
322 struct net_device *dev;
323 struct inet6_dev *idev; 318 struct inet6_dev *idev;
324 struct ipv6_pinfo *inet6 = inet6_sk(sk); 319 struct ipv6_pinfo *inet6 = inet6_sk(sk);
325 struct ip6_sf_socklist *psl; 320 struct ip6_sf_socklist *psl;
@@ -341,7 +336,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
341 rcu_read_unlock(); 336 rcu_read_unlock();
342 return -ENODEV; 337 return -ENODEV;
343 } 338 }
344 dev = idev->dev;
345 339
346 err = -EADDRNOTAVAIL; 340 err = -EADDRNOTAVAIL;
347 341
@@ -453,9 +447,8 @@ done:
453 447
454int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) 448int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
455{ 449{
456 struct in6_addr *group; 450 const struct in6_addr *group;
457 struct ipv6_mc_socklist *pmc; 451 struct ipv6_mc_socklist *pmc;
458 struct net_device *dev;
459 struct inet6_dev *idev; 452 struct inet6_dev *idev;
460 struct ipv6_pinfo *inet6 = inet6_sk(sk); 453 struct ipv6_pinfo *inet6 = inet6_sk(sk);
461 struct ip6_sf_socklist *newpsl, *psl; 454 struct ip6_sf_socklist *newpsl, *psl;
@@ -478,7 +471,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
478 rcu_read_unlock(); 471 rcu_read_unlock();
479 return -ENODEV; 472 return -ENODEV;
480 } 473 }
481 dev = idev->dev;
482 474
483 err = 0; 475 err = 0;
484 476
@@ -546,10 +538,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
546 struct group_filter __user *optval, int __user *optlen) 538 struct group_filter __user *optval, int __user *optlen)
547{ 539{
548 int err, i, count, copycount; 540 int err, i, count, copycount;
549 struct in6_addr *group; 541 const struct in6_addr *group;
550 struct ipv6_mc_socklist *pmc; 542 struct ipv6_mc_socklist *pmc;
551 struct inet6_dev *idev; 543 struct inet6_dev *idev;
552 struct net_device *dev;
553 struct ipv6_pinfo *inet6 = inet6_sk(sk); 544 struct ipv6_pinfo *inet6 = inet6_sk(sk);
554 struct ip6_sf_socklist *psl; 545 struct ip6_sf_socklist *psl;
555 struct net *net = sock_net(sk); 546 struct net *net = sock_net(sk);
@@ -566,7 +557,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
566 rcu_read_unlock(); 557 rcu_read_unlock();
567 return -ENODEV; 558 return -ENODEV;
568 } 559 }
569 dev = idev->dev;
570 560
571 err = -EADDRNOTAVAIL; 561 err = -EADDRNOTAVAIL;
572 /* 562 /*
@@ -758,7 +748,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
758 spin_unlock_bh(&idev->mc_lock); 748 spin_unlock_bh(&idev->mc_lock);
759} 749}
760 750
761static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca) 751static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
762{ 752{
763 struct ifmcaddr6 *pmc, *pmc_prev; 753 struct ifmcaddr6 *pmc, *pmc_prev;
764 struct ip6_sf_list *psf, *psf_next; 754 struct ip6_sf_list *psf, *psf_next;
@@ -1058,7 +1048,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
1058 1048
1059/* mark EXCLUDE-mode sources */ 1049/* mark EXCLUDE-mode sources */
1060static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, 1050static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1061 struct in6_addr *srcs) 1051 const struct in6_addr *srcs)
1062{ 1052{
1063 struct ip6_sf_list *psf; 1053 struct ip6_sf_list *psf;
1064 int i, scount; 1054 int i, scount;
@@ -1086,7 +1076,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1086} 1076}
1087 1077
1088static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, 1078static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1089 struct in6_addr *srcs) 1079 const struct in6_addr *srcs)
1090{ 1080{
1091 struct ip6_sf_list *psf; 1081 struct ip6_sf_list *psf;
1092 int i, scount; 1082 int i, scount;
@@ -1121,7 +1111,7 @@ int igmp6_event_query(struct sk_buff *skb)
1121{ 1111{
1122 struct mld2_query *mlh2 = NULL; 1112 struct mld2_query *mlh2 = NULL;
1123 struct ifmcaddr6 *ma; 1113 struct ifmcaddr6 *ma;
1124 struct in6_addr *group; 1114 const struct in6_addr *group;
1125 unsigned long max_delay; 1115 unsigned long max_delay;
1126 struct inet6_dev *idev; 1116 struct inet6_dev *idev;
1127 struct mld_msg *mld; 1117 struct mld_msg *mld;
@@ -1402,7 +1392,7 @@ static void mld_sendpack(struct sk_buff *skb)
1402 struct inet6_dev *idev; 1392 struct inet6_dev *idev;
1403 struct net *net = dev_net(skb->dev); 1393 struct net *net = dev_net(skb->dev);
1404 int err; 1394 int err;
1405 struct flowi fl; 1395 struct flowi6 fl6;
1406 struct dst_entry *dst; 1396 struct dst_entry *dst;
1407 1397
1408 rcu_read_lock(); 1398 rcu_read_lock();
@@ -1425,11 +1415,16 @@ static void mld_sendpack(struct sk_buff *skb)
1425 goto err_out; 1415 goto err_out;
1426 } 1416 }
1427 1417
1428 icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, 1418 icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
1429 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 1419 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
1430 skb->dev->ifindex); 1420 skb->dev->ifindex);
1431 1421
1432 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 1422 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1423 err = 0;
1424 if (IS_ERR(dst)) {
1425 err = PTR_ERR(dst);
1426 dst = NULL;
1427 }
1433 skb_dst_set(skb, dst); 1428 skb_dst_set(skb, dst);
1434 if (err) 1429 if (err)
1435 goto err_out; 1430 goto err_out;
@@ -1732,7 +1727,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1732 u8 ra[8] = { IPPROTO_ICMPV6, 0, 1727 u8 ra[8] = { IPPROTO_ICMPV6, 0,
1733 IPV6_TLV_ROUTERALERT, 2, 0, 0, 1728 IPV6_TLV_ROUTERALERT, 2, 0, 0,
1734 IPV6_TLV_PADN, 0 }; 1729 IPV6_TLV_PADN, 0 };
1735 struct flowi fl; 1730 struct flowi6 fl6;
1736 struct dst_entry *dst; 1731 struct dst_entry *dst;
1737 1732
1738 if (type == ICMPV6_MGM_REDUCTION) 1733 if (type == ICMPV6_MGM_REDUCTION)
@@ -1792,13 +1787,15 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1792 goto err_out; 1787 goto err_out;
1793 } 1788 }
1794 1789
1795 icmpv6_flow_init(sk, &fl, type, 1790 icmpv6_flow_init(sk, &fl6, type,
1796 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 1791 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
1797 skb->dev->ifindex); 1792 skb->dev->ifindex);
1798 1793
1799 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 1794 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1800 if (err) 1795 if (IS_ERR(dst)) {
1796 err = PTR_ERR(dst);
1801 goto err_out; 1797 goto err_out;
1798 }
1802 1799
1803 skb_dst_set(skb, dst); 1800 skb_dst_set(skb, dst);
1804 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, 1801 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
@@ -1820,7 +1817,7 @@ err_out:
1820} 1817}
1821 1818
1822static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, 1819static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
1823 struct in6_addr *psfsrc) 1820 const struct in6_addr *psfsrc)
1824{ 1821{
1825 struct ip6_sf_list *psf, *psf_prev; 1822 struct ip6_sf_list *psf, *psf_prev;
1826 int rv = 0; 1823 int rv = 0;
@@ -1856,8 +1853,8 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
1856 return rv; 1853 return rv;
1857} 1854}
1858 1855
1859static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca, 1856static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
1860 int sfmode, int sfcount, struct in6_addr *psfsrc, 1857 int sfmode, int sfcount, const struct in6_addr *psfsrc,
1861 int delta) 1858 int delta)
1862{ 1859{
1863 struct ifmcaddr6 *pmc; 1860 struct ifmcaddr6 *pmc;
@@ -1917,7 +1914,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
1917 * Add multicast single-source filter to the interface list 1914 * Add multicast single-source filter to the interface list
1918 */ 1915 */
1919static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, 1916static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
1920 struct in6_addr *psfsrc, int delta) 1917 const struct in6_addr *psfsrc, int delta)
1921{ 1918{
1922 struct ip6_sf_list *psf, *psf_prev; 1919 struct ip6_sf_list *psf, *psf_prev;
1923 1920
@@ -2020,8 +2017,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
2020/* 2017/*
2021 * Add multicast source filter list to the interface list 2018 * Add multicast source filter list to the interface list
2022 */ 2019 */
2023static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca, 2020static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
2024 int sfmode, int sfcount, struct in6_addr *psfsrc, 2021 int sfmode, int sfcount, const struct in6_addr *psfsrc,
2025 int delta) 2022 int delta)
2026{ 2023{
2027 struct ifmcaddr6 *pmc; 2024 struct ifmcaddr6 *pmc;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index d6e9599d0705..43242e6e6103 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -126,7 +126,7 @@ static struct mip6_report_rate_limiter mip6_report_rl = {
126 126
127static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) 127static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
128{ 128{
129 struct ipv6hdr *iph = ipv6_hdr(skb); 129 const struct ipv6hdr *iph = ipv6_hdr(skb);
130 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; 130 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
131 int err = destopt->nexthdr; 131 int err = destopt->nexthdr;
132 132
@@ -181,8 +181,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
181} 181}
182 182
183static inline int mip6_report_rl_allow(struct timeval *stamp, 183static inline int mip6_report_rl_allow(struct timeval *stamp,
184 struct in6_addr *dst, 184 const struct in6_addr *dst,
185 struct in6_addr *src, int iif) 185 const struct in6_addr *src, int iif)
186{ 186{
187 int allow = 0; 187 int allow = 0;
188 188
@@ -203,18 +203,20 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
203 return allow; 203 return allow;
204} 204}
205 205
206static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) 206static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
207 const struct flowi *fl)
207{ 208{
208 struct net *net = xs_net(x); 209 struct net *net = xs_net(x);
209 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; 210 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
211 const struct flowi6 *fl6 = &fl->u.ip6;
210 struct ipv6_destopt_hao *hao = NULL; 212 struct ipv6_destopt_hao *hao = NULL;
211 struct xfrm_selector sel; 213 struct xfrm_selector sel;
212 int offset; 214 int offset;
213 struct timeval stamp; 215 struct timeval stamp;
214 int err = 0; 216 int err = 0;
215 217
216 if (unlikely(fl->proto == IPPROTO_MH && 218 if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
217 fl->fl_mh_type <= IP6_MH_TYPE_MAX)) 219 fl6->fl6_mh_type <= IP6_MH_TYPE_MAX))
218 goto out; 220 goto out;
219 221
220 if (likely(opt->dsthao)) { 222 if (likely(opt->dsthao)) {
@@ -239,14 +241,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
239 sizeof(sel.saddr)); 241 sizeof(sel.saddr));
240 sel.prefixlen_s = 128; 242 sel.prefixlen_s = 128;
241 sel.family = AF_INET6; 243 sel.family = AF_INET6;
242 sel.proto = fl->proto; 244 sel.proto = fl6->flowi6_proto;
243 sel.dport = xfrm_flowi_dport(fl); 245 sel.dport = xfrm_flowi_dport(fl, &fl6->uli);
244 if (sel.dport) 246 if (sel.dport)
245 sel.dport_mask = htons(~0); 247 sel.dport_mask = htons(~0);
246 sel.sport = xfrm_flowi_sport(fl); 248 sel.sport = xfrm_flowi_sport(fl, &fl6->uli);
247 if (sel.sport) 249 if (sel.sport)
248 sel.sport_mask = htons(~0); 250 sel.sport_mask = htons(~0);
249 sel.ifindex = fl->oif; 251 sel.ifindex = fl6->flowi6_oif;
250 252
251 err = km_report(net, IPPROTO_DSTOPTS, &sel, 253 err = km_report(net, IPPROTO_DSTOPTS, &sel,
252 (hao ? (xfrm_address_t *)&hao->addr : NULL)); 254 (hao ? (xfrm_address_t *)&hao->addr : NULL));
@@ -347,7 +349,7 @@ static const struct xfrm_type mip6_destopt_type =
347 349
348static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) 350static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
349{ 351{
350 struct ipv6hdr *iph = ipv6_hdr(skb); 352 const struct ipv6hdr *iph = ipv6_hdr(skb);
351 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; 353 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
352 int err = rt2->rt_hdr.nexthdr; 354 int err = rt2->rt_hdr.nexthdr;
353 355
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2342545a5ee9..7596f071d308 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -324,7 +324,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
324 return lladdr + prepad; 324 return lladdr + prepad;
325} 325}
326 326
327int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) 327int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
328{ 328{
329 switch (dev->type) { 329 switch (dev->type) {
330 case ARPHRD_ETHER: 330 case ARPHRD_ETHER:
@@ -341,6 +341,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
341 case ARPHRD_INFINIBAND: 341 case ARPHRD_INFINIBAND:
342 ipv6_ib_mc_map(addr, dev->broadcast, buf); 342 ipv6_ib_mc_map(addr, dev->broadcast, buf);
343 return 0; 343 return 0;
344 case ARPHRD_IPGRE:
345 return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
344 default: 346 default:
345 if (dir) { 347 if (dir) {
346 memcpy(buf, dev->broadcast, dev->addr_len); 348 memcpy(buf, dev->broadcast, dev->addr_len);
@@ -511,7 +513,7 @@ void ndisc_send_skb(struct sk_buff *skb,
511 const struct in6_addr *saddr, 513 const struct in6_addr *saddr,
512 struct icmp6hdr *icmp6h) 514 struct icmp6hdr *icmp6h)
513{ 515{
514 struct flowi fl; 516 struct flowi6 fl6;
515 struct dst_entry *dst; 517 struct dst_entry *dst;
516 struct net *net = dev_net(dev); 518 struct net *net = dev_net(dev);
517 struct sock *sk = net->ipv6.ndisc_sk; 519 struct sock *sk = net->ipv6.ndisc_sk;
@@ -521,7 +523,7 @@ void ndisc_send_skb(struct sk_buff *skb,
521 523
522 type = icmp6h->icmp6_type; 524 type = icmp6h->icmp6_type;
523 525
524 icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); 526 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
525 527
526 dst = icmp6_dst_alloc(dev, neigh, daddr); 528 dst = icmp6_dst_alloc(dev, neigh, daddr);
527 if (!dst) { 529 if (!dst) {
@@ -529,8 +531,8 @@ void ndisc_send_skb(struct sk_buff *skb,
529 return; 531 return;
530 } 532 }
531 533
532 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 534 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
533 if (err < 0) { 535 if (IS_ERR(dst)) {
534 kfree_skb(skb); 536 kfree_skb(skb);
535 return; 537 return;
536 } 538 }
@@ -609,6 +611,29 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
609 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); 611 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
610} 612}
611 613
614static void ndisc_send_unsol_na(struct net_device *dev)
615{
616 struct inet6_dev *idev;
617 struct inet6_ifaddr *ifa;
618 struct in6_addr mcaddr;
619
620 idev = in6_dev_get(dev);
621 if (!idev)
622 return;
623
624 read_lock_bh(&idev->lock);
625 list_for_each_entry(ifa, &idev->addr_list, if_list) {
626 addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
627 ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
628 /*router=*/ !!idev->cnf.forwarding,
629 /*solicited=*/ false, /*override=*/ true,
630 /*inc_opt=*/ true);
631 }
632 read_unlock_bh(&idev->lock);
633
634 in6_dev_put(idev);
635}
636
612void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, 637void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
613 const struct in6_addr *solicit, 638 const struct in6_addr *solicit,
614 const struct in6_addr *daddr, const struct in6_addr *saddr) 639 const struct in6_addr *daddr, const struct in6_addr *saddr)
@@ -723,8 +748,8 @@ static int pndisc_is_router(const void *pkey,
723static void ndisc_recv_ns(struct sk_buff *skb) 748static void ndisc_recv_ns(struct sk_buff *skb)
724{ 749{
725 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); 750 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
726 struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; 751 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
727 struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; 752 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
728 u8 *lladdr = NULL; 753 u8 *lladdr = NULL;
729 u32 ndoptlen = skb->tail - (skb->transport_header + 754 u32 ndoptlen = skb->tail - (skb->transport_header +
730 offsetof(struct nd_msg, opt)); 755 offsetof(struct nd_msg, opt));
@@ -899,8 +924,8 @@ out:
899static void ndisc_recv_na(struct sk_buff *skb) 924static void ndisc_recv_na(struct sk_buff *skb)
900{ 925{
901 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); 926 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
902 struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; 927 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
903 struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; 928 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
904 u8 *lladdr = NULL; 929 u8 *lladdr = NULL;
905 u32 ndoptlen = skb->tail - (skb->transport_header + 930 u32 ndoptlen = skb->tail - (skb->transport_header +
906 offsetof(struct nd_msg, opt)); 931 offsetof(struct nd_msg, opt));
@@ -943,9 +968,10 @@ static void ndisc_recv_na(struct sk_buff *skb)
943 } 968 }
944 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); 969 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
945 if (ifp) { 970 if (ifp) {
946 if (ifp->flags & IFA_F_TENTATIVE) { 971 if (skb->pkt_type != PACKET_LOOPBACK
947 addrconf_dad_failure(ifp); 972 && (ifp->flags & IFA_F_TENTATIVE)) {
948 return; 973 addrconf_dad_failure(ifp);
974 return;
949 } 975 }
950 /* What should we make now? The advertisement 976 /* What should we make now? The advertisement
951 is invalid, but ndisc specs say nothing 977 is invalid, but ndisc specs say nothing
@@ -1012,7 +1038,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
1012 unsigned long ndoptlen = skb->len - sizeof(*rs_msg); 1038 unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1013 struct neighbour *neigh; 1039 struct neighbour *neigh;
1014 struct inet6_dev *idev; 1040 struct inet6_dev *idev;
1015 struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; 1041 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1016 struct ndisc_options ndopts; 1042 struct ndisc_options ndopts;
1017 u8 *lladdr = NULL; 1043 u8 *lladdr = NULL;
1018 1044
@@ -1409,8 +1435,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1409{ 1435{
1410 struct inet6_dev *in6_dev; 1436 struct inet6_dev *in6_dev;
1411 struct icmp6hdr *icmph; 1437 struct icmp6hdr *icmph;
1412 struct in6_addr *dest; 1438 const struct in6_addr *dest;
1413 struct in6_addr *target; /* new first hop to destination */ 1439 const struct in6_addr *target; /* new first hop to destination */
1414 struct neighbour *neigh; 1440 struct neighbour *neigh;
1415 int on_link = 0; 1441 int on_link = 0;
1416 struct ndisc_options ndopts; 1442 struct ndisc_options ndopts;
@@ -1443,7 +1469,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1443 } 1469 }
1444 1470
1445 icmph = icmp6_hdr(skb); 1471 icmph = icmp6_hdr(skb);
1446 target = (struct in6_addr *) (icmph + 1); 1472 target = (const struct in6_addr *) (icmph + 1);
1447 dest = target + 1; 1473 dest = target + 1;
1448 1474
1449 if (ipv6_addr_is_multicast(dest)) { 1475 if (ipv6_addr_is_multicast(dest)) {
@@ -1515,7 +1541,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1515 struct rt6_info *rt; 1541 struct rt6_info *rt;
1516 struct dst_entry *dst; 1542 struct dst_entry *dst;
1517 struct inet6_dev *idev; 1543 struct inet6_dev *idev;
1518 struct flowi fl; 1544 struct flowi6 fl6;
1519 u8 *opt; 1545 u8 *opt;
1520 int rd_len; 1546 int rd_len;
1521 int err; 1547 int err;
@@ -1535,15 +1561,15 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1535 return; 1561 return;
1536 } 1562 }
1537 1563
1538 icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, 1564 icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1539 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); 1565 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1540 1566
1541 dst = ip6_route_output(net, NULL, &fl); 1567 dst = ip6_route_output(net, NULL, &fl6);
1542 if (dst == NULL) 1568 if (dst == NULL)
1543 return; 1569 return;
1544 1570
1545 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 1571 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1546 if (err) 1572 if (IS_ERR(dst))
1547 return; 1573 return;
1548 1574
1549 rt = (struct rt6_info *) dst; 1575 rt = (struct rt6_info *) dst;
@@ -1553,7 +1579,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1553 "ICMPv6 Redirect: destination is not a neighbour.\n"); 1579 "ICMPv6 Redirect: destination is not a neighbour.\n");
1554 goto release; 1580 goto release;
1555 } 1581 }
1556 if (!xrlim_allow(dst, 1*HZ)) 1582 if (!rt->rt6i_peer)
1583 rt6_bind_peer(rt, 1);
1584 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1557 goto release; 1585 goto release;
1558 1586
1559 if (dev->addr_len) { 1587 if (dev->addr_len) {
@@ -1718,6 +1746,9 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
1718 neigh_ifdown(&nd_tbl, dev); 1746 neigh_ifdown(&nd_tbl, dev);
1719 fib6_run_gc(~0UL, net); 1747 fib6_run_gc(~0UL, net);
1720 break; 1748 break;
1749 case NETDEV_NOTIFY_PEERS:
1750 ndisc_send_unsol_na(dev);
1751 break;
1721 default: 1752 default:
1722 break; 1753 break;
1723 } 1754 }
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 35915e8617f0..30fcee465448 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -13,16 +13,16 @@
13int ip6_route_me_harder(struct sk_buff *skb) 13int ip6_route_me_harder(struct sk_buff *skb)
14{ 14{
15 struct net *net = dev_net(skb_dst(skb)->dev); 15 struct net *net = dev_net(skb_dst(skb)->dev);
16 struct ipv6hdr *iph = ipv6_hdr(skb); 16 const struct ipv6hdr *iph = ipv6_hdr(skb);
17 struct dst_entry *dst; 17 struct dst_entry *dst;
18 struct flowi fl = { 18 struct flowi6 fl6 = {
19 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 19 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
20 .mark = skb->mark, 20 .flowi6_mark = skb->mark,
21 .fl6_dst = iph->daddr, 21 .daddr = iph->daddr,
22 .fl6_src = iph->saddr, 22 .saddr = iph->saddr,
23 }; 23 };
24 24
25 dst = ip6_route_output(net, skb->sk, &fl); 25 dst = ip6_route_output(net, skb->sk, &fl6);
26 if (dst->error) { 26 if (dst->error) {
27 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 27 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
28 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 28 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -37,9 +37,10 @@ int ip6_route_me_harder(struct sk_buff *skb)
37 37
38#ifdef CONFIG_XFRM 38#ifdef CONFIG_XFRM
39 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 39 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
40 xfrm_decode_session(skb, &fl, AF_INET6) == 0) { 40 xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
41 skb_dst_set(skb, NULL); 41 skb_dst_set(skb, NULL);
42 if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) 42 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
43 if (IS_ERR(dst))
43 return -1; 44 return -1;
44 skb_dst_set(skb, dst); 45 skb_dst_set(skb, dst);
45 } 46 }
@@ -66,7 +67,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
66 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); 67 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
67 68
68 if (entry->hook == NF_INET_LOCAL_OUT) { 69 if (entry->hook == NF_INET_LOCAL_OUT) {
69 struct ipv6hdr *iph = ipv6_hdr(skb); 70 const struct ipv6hdr *iph = ipv6_hdr(skb);
70 71
71 rt_info->daddr = iph->daddr; 72 rt_info->daddr = iph->daddr;
72 rt_info->saddr = iph->saddr; 73 rt_info->saddr = iph->saddr;
@@ -80,7 +81,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
80 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); 81 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
81 82
82 if (entry->hook == NF_INET_LOCAL_OUT) { 83 if (entry->hook == NF_INET_LOCAL_OUT) {
83 struct ipv6hdr *iph = ipv6_hdr(skb); 84 const struct ipv6hdr *iph = ipv6_hdr(skb);
84 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || 85 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
85 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || 86 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
86 skb->mark != rt_info->mark) 87 skb->mark != rt_info->mark)
@@ -89,16 +90,25 @@ static int nf_ip6_reroute(struct sk_buff *skb,
89 return 0; 90 return 0;
90} 91}
91 92
92static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) 93static int nf_ip6_route(struct net *net, struct dst_entry **dst,
94 struct flowi *fl, bool strict)
93{ 95{
94 *dst = ip6_route_output(&init_net, NULL, fl); 96 static const struct ipv6_pinfo fake_pinfo;
97 static const struct inet_sock fake_sk = {
98 /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
99 .sk.sk_bound_dev_if = 1,
100 .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
101 };
102 const void *sk = strict ? &fake_sk : NULL;
103
104 *dst = ip6_route_output(net, sk, &fl->u.ip6);
95 return (*dst)->error; 105 return (*dst)->error;
96} 106}
97 107
98__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 108__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
99 unsigned int dataoff, u_int8_t protocol) 109 unsigned int dataoff, u_int8_t protocol)
100{ 110{
101 struct ipv6hdr *ip6h = ipv6_hdr(skb); 111 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
102 __sum16 csum = 0; 112 __sum16 csum = 0;
103 113
104 switch (skb->ip_summed) { 114 switch (skb->ip_summed) {
@@ -132,7 +142,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
132 unsigned int dataoff, unsigned int len, 142 unsigned int dataoff, unsigned int len,
133 u_int8_t protocol) 143 u_int8_t protocol)
134{ 144{
135 struct ipv6hdr *ip6h = ipv6_hdr(skb); 145 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
136 __wsum hsum; 146 __wsum hsum;
137 __sum16 csum = 0; 147 __sum16 csum = 0;
138 148
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d227c644f72..94874b0bdcdc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
340 unsigned int *stackptr, origptr, cpu; 340 unsigned int *stackptr, origptr, cpu;
341 const struct xt_table_info *private; 341 const struct xt_table_info *private;
342 struct xt_action_param acpar; 342 struct xt_action_param acpar;
343 unsigned int addend;
343 344
344 /* Initialization */ 345 /* Initialization */
345 indev = in ? in->name : nulldevname; 346 indev = in ? in->name : nulldevname;
@@ -358,7 +359,8 @@ ip6t_do_table(struct sk_buff *skb,
358 359
359 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 360 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
360 361
361 xt_info_rdlock_bh(); 362 local_bh_disable();
363 addend = xt_write_recseq_begin();
362 private = table->private; 364 private = table->private;
363 cpu = smp_processor_id(); 365 cpu = smp_processor_id();
364 table_base = private->entries[cpu]; 366 table_base = private->entries[cpu];
@@ -410,7 +412,7 @@ ip6t_do_table(struct sk_buff *skb,
410 verdict = (unsigned)(-v) - 1; 412 verdict = (unsigned)(-v) - 1;
411 break; 413 break;
412 } 414 }
413 if (*stackptr == 0) 415 if (*stackptr <= origptr)
414 e = get_entry(table_base, 416 e = get_entry(table_base,
415 private->underflow[hook]); 417 private->underflow[hook]);
416 else 418 else
@@ -441,9 +443,11 @@ ip6t_do_table(struct sk_buff *skb,
441 break; 443 break;
442 } while (!acpar.hotdrop); 444 } while (!acpar.hotdrop);
443 445
444 xt_info_rdunlock_bh();
445 *stackptr = origptr; 446 *stackptr = origptr;
446 447
448 xt_write_recseq_end(addend);
449 local_bh_enable();
450
447#ifdef DEBUG_ALLOW_ALL 451#ifdef DEBUG_ALLOW_ALL
448 return NF_ACCEPT; 452 return NF_ACCEPT;
449#else 453#else
@@ -899,7 +903,7 @@ get_counters(const struct xt_table_info *t,
899 unsigned int i; 903 unsigned int i;
900 904
901 for_each_possible_cpu(cpu) { 905 for_each_possible_cpu(cpu) {
902 seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; 906 seqcount_t *s = &per_cpu(xt_recseq, cpu);
903 907
904 i = 0; 908 i = 0;
905 xt_entry_foreach(iter, t->entries[cpu], t->size) { 909 xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -907,10 +911,10 @@ get_counters(const struct xt_table_info *t,
907 unsigned int start; 911 unsigned int start;
908 912
909 do { 913 do {
910 start = read_seqbegin(lock); 914 start = read_seqcount_begin(s);
911 bcnt = iter->counters.bcnt; 915 bcnt = iter->counters.bcnt;
912 pcnt = iter->counters.pcnt; 916 pcnt = iter->counters.pcnt;
913 } while (read_seqretry(lock, start)); 917 } while (read_seqcount_retry(s, start));
914 918
915 ADD_COUNTER(counters[i], bcnt, pcnt); 919 ADD_COUNTER(counters[i], bcnt, pcnt);
916 ++i; 920 ++i;
@@ -1076,6 +1080,7 @@ static int compat_table_info(const struct xt_table_info *info,
1076 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1080 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1077 newinfo->initial_entries = 0; 1081 newinfo->initial_entries = 0;
1078 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1082 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1083 xt_compat_init_offsets(AF_INET6, info->number);
1079 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1084 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1080 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1085 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1081 if (ret != 0) 1086 if (ret != 0)
@@ -1274,6 +1279,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1274 /* overflow check */ 1279 /* overflow check */
1275 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1280 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1276 return -ENOMEM; 1281 return -ENOMEM;
1282 tmp.name[sizeof(tmp.name)-1] = 0;
1277 1283
1278 newinfo = xt_alloc_table_info(tmp.size); 1284 newinfo = xt_alloc_table_info(tmp.size);
1279 if (!newinfo) 1285 if (!newinfo)
@@ -1323,6 +1329,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1323 int ret = 0; 1329 int ret = 0;
1324 const void *loc_cpu_entry; 1330 const void *loc_cpu_entry;
1325 struct ip6t_entry *iter; 1331 struct ip6t_entry *iter;
1332 unsigned int addend;
1326#ifdef CONFIG_COMPAT 1333#ifdef CONFIG_COMPAT
1327 struct compat_xt_counters_info compat_tmp; 1334 struct compat_xt_counters_info compat_tmp;
1328 1335
@@ -1379,13 +1386,13 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1379 i = 0; 1386 i = 0;
1380 /* Choose the copy that is on our node */ 1387 /* Choose the copy that is on our node */
1381 curcpu = smp_processor_id(); 1388 curcpu = smp_processor_id();
1382 xt_info_wrlock(curcpu); 1389 addend = xt_write_recseq_begin();
1383 loc_cpu_entry = private->entries[curcpu]; 1390 loc_cpu_entry = private->entries[curcpu];
1384 xt_entry_foreach(iter, loc_cpu_entry, private->size) { 1391 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1385 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); 1392 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1386 ++i; 1393 ++i;
1387 } 1394 }
1388 xt_info_wrunlock(curcpu); 1395 xt_write_recseq_end(addend);
1389 1396
1390 unlock_up_free: 1397 unlock_up_free:
1391 local_bh_enable(); 1398 local_bh_enable();
@@ -1576,7 +1583,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1576 struct xt_table_info *newinfo, unsigned char *base) 1583 struct xt_table_info *newinfo, unsigned char *base)
1577{ 1584{
1578 struct xt_entry_target *t; 1585 struct xt_entry_target *t;
1579 struct xt_target *target;
1580 struct ip6t_entry *de; 1586 struct ip6t_entry *de;
1581 unsigned int origsize; 1587 unsigned int origsize;
1582 int ret, h; 1588 int ret, h;
@@ -1598,7 +1604,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1598 } 1604 }
1599 de->target_offset = e->target_offset - (origsize - *size); 1605 de->target_offset = e->target_offset - (origsize - *size);
1600 t = compat_ip6t_get_target(e); 1606 t = compat_ip6t_get_target(e);
1601 target = t->u.kernel.target;
1602 xt_compat_target_from_user(t, dstptr, size); 1607 xt_compat_target_from_user(t, dstptr, size);
1603 1608
1604 de->next_offset = e->next_offset - (origsize - *size); 1609 de->next_offset = e->next_offset - (origsize - *size);
@@ -1679,6 +1684,7 @@ translate_compat_table(struct net *net,
1679 duprintf("translate_compat_table: size %u\n", info->size); 1684 duprintf("translate_compat_table: size %u\n", info->size);
1680 j = 0; 1685 j = 0;
1681 xt_compat_lock(AF_INET6); 1686 xt_compat_lock(AF_INET6);
1687 xt_compat_init_offsets(AF_INET6, number);
1682 /* Walk through entries, checking offsets. */ 1688 /* Walk through entries, checking offsets. */
1683 xt_entry_foreach(iter0, entry0, total_size) { 1689 xt_entry_foreach(iter0, entry0, total_size) {
1684 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1690 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1820,6 +1826,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1820 return -ENOMEM; 1826 return -ENOMEM;
1821 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1827 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1822 return -ENOMEM; 1828 return -ENOMEM;
1829 tmp.name[sizeof(tmp.name)-1] = 0;
1823 1830
1824 newinfo = xt_alloc_table_info(tmp.size); 1831 newinfo = xt_alloc_table_info(tmp.size);
1825 if (!newinfo) 1832 if (!newinfo)
@@ -2049,6 +2056,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2049 ret = -EFAULT; 2056 ret = -EFAULT;
2050 break; 2057 break;
2051 } 2058 }
2059 rev.name[sizeof(rev.name)-1] = 0;
2052 2060
2053 if (cmd == IP6T_SO_GET_REVISION_TARGET) 2061 if (cmd == IP6T_SO_GET_REVISION_TARGET)
2054 target = 1; 2062 target = 1;
@@ -2243,7 +2251,7 @@ static int __init ip6_tables_init(void)
2243 if (ret < 0) 2251 if (ret < 0)
2244 goto err1; 2252 goto err1;
2245 2253
2246 /* Noone else will be downing sem now, so we won't sleep */ 2254 /* No one else will be downing sem now, so we won't sleep */
2247 ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); 2255 ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
2248 if (ret < 0) 2256 if (ret < 0)
2249 goto err2; 2257 goto err2;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index de338037a736..e6af8d72f26b 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
452 in ? in->name : "", 452 in ? in->name : "",
453 out ? out->name : ""); 453 out ? out->name : "");
454 454
455 /* MAC logging for input path only. */ 455 if (in != NULL)
456 if (in && !out)
457 dump_mac_header(m, loginfo, skb); 456 dump_mac_header(m, loginfo, skb);
458 457
459 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); 458 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index bf998feac14e..a5a4c5dd5396 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -45,9 +45,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
45 int tcphoff, needs_ack; 45 int tcphoff, needs_ack;
46 const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); 46 const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
47 struct ipv6hdr *ip6h; 47 struct ipv6hdr *ip6h;
48#define DEFAULT_TOS_VALUE 0x0U
49 const __u8 tclass = DEFAULT_TOS_VALUE;
48 struct dst_entry *dst = NULL; 50 struct dst_entry *dst = NULL;
49 u8 proto; 51 u8 proto;
50 struct flowi fl; 52 struct flowi6 fl6;
51 53
52 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || 54 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
53 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { 55 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
@@ -89,19 +91,20 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
89 return; 91 return;
90 } 92 }
91 93
92 memset(&fl, 0, sizeof(fl)); 94 memset(&fl6, 0, sizeof(fl6));
93 fl.proto = IPPROTO_TCP; 95 fl6.flowi6_proto = IPPROTO_TCP;
94 ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); 96 ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
95 ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); 97 ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
96 fl.fl_ip_sport = otcph.dest; 98 fl6.fl6_sport = otcph.dest;
97 fl.fl_ip_dport = otcph.source; 99 fl6.fl6_dport = otcph.source;
98 security_skb_classify_flow(oldskb, &fl); 100 security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
99 dst = ip6_route_output(net, NULL, &fl); 101 dst = ip6_route_output(net, NULL, &fl6);
100 if (dst == NULL || dst->error) { 102 if (dst == NULL || dst->error) {
101 dst_release(dst); 103 dst_release(dst);
102 return; 104 return;
103 } 105 }
104 if (xfrm_lookup(net, &dst, &fl, NULL, 0)) 106 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
107 if (IS_ERR(dst))
105 return; 108 return;
106 109
107 hh_len = (dst->dev->hard_header_len + 15)&~15; 110 hh_len = (dst->dev->hard_header_len + 15)&~15;
@@ -123,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
123 skb_put(nskb, sizeof(struct ipv6hdr)); 126 skb_put(nskb, sizeof(struct ipv6hdr));
124 skb_reset_network_header(nskb); 127 skb_reset_network_header(nskb);
125 ip6h = ipv6_hdr(nskb); 128 ip6h = ipv6_hdr(nskb);
126 ip6h->version = 6; 129 *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20));
127 ip6h->hop_limit = ip6_dst_hoplimit(dst); 130 ip6h->hop_limit = ip6_dst_hoplimit(dst);
128 ip6h->nexthdr = IPPROTO_TCP; 131 ip6h->nexthdr = IPPROTO_TCP;
129 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); 132 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 679a0a3b7b3c..00d19173db7e 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -64,7 +64,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
64 (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || 64 (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
65 memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || 65 memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
66 skb->mark != mark || 66 skb->mark != mark ||
67 ipv6_hdr(skb)->hop_limit != hop_limit)) 67 ipv6_hdr(skb)->hop_limit != hop_limit ||
68 flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
68 return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; 69 return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
69 70
70 return ret; 71 return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa8fa8d..085727263812 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,6 +45,7 @@
45#include <linux/netfilter_ipv6.h> 45#include <linux/netfilter_ipv6.h>
46#include <linux/kernel.h> 46#include <linux/kernel.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
48 49
49 50
50struct nf_ct_frag6_skb_cb 51struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags;
73static struct netns_frags nf_init_frags; 74static struct netns_frags nf_init_frags;
74 75
75#ifdef CONFIG_SYSCTL 76#ifdef CONFIG_SYSCTL
76struct ctl_table nf_ct_frag6_sysctl_table[] = { 77static struct ctl_table nf_ct_frag6_sysctl_table[] = {
77 { 78 {
78 .procname = "nf_conntrack_frag6_timeout", 79 .procname = "nf_conntrack_frag6_timeout",
79 .data = &nf_init_frags.timeout, 80 .data = &nf_init_frags.timeout,
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 97c5b21b9674..cdd6d045e42e 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -71,7 +71,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
71 if (reasm == NULL) 71 if (reasm == NULL)
72 return NF_STOLEN; 72 return NF_STOLEN;
73 73
74 /* error occured or not fragmented */ 74 /* error occurred or not fragmented */
75 if (reasm == skb) 75 if (reasm == skb)
76 return NF_ACCEPT; 76 return NF_ACCEPT;
77 77
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 24b3558b8e67..18ff5df7ec02 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -141,7 +141,11 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
141 SNMP_MIB_SENTINEL 141 SNMP_MIB_SENTINEL
142}; 142};
143 143
144static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib) 144/* can be called either with percpu mib (pcpumib != NULL),
145 * or shared one (smib != NULL)
146 */
147static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **pcpumib,
148 atomic_long_t *smib)
145{ 149{
146 char name[32]; 150 char name[32];
147 int i; 151 int i;
@@ -158,14 +162,14 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
158 snprintf(name, sizeof(name), "Icmp6%s%s", 162 snprintf(name, sizeof(name), "Icmp6%s%s",
159 i & 0x100 ? "Out" : "In", p); 163 i & 0x100 ? "Out" : "In", p);
160 seq_printf(seq, "%-32s\t%lu\n", name, 164 seq_printf(seq, "%-32s\t%lu\n", name,
161 snmp_fold_field(mib, i)); 165 pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i));
162 } 166 }
163 167
164 /* print by number (nonzero only) - ICMPMsgStat format */ 168 /* print by number (nonzero only) - ICMPMsgStat format */
165 for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { 169 for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
166 unsigned long val; 170 unsigned long val;
167 171
168 val = snmp_fold_field(mib, i); 172 val = pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i);
169 if (!val) 173 if (!val)
170 continue; 174 continue;
171 snprintf(name, sizeof(name), "Icmp6%sType%u", 175 snprintf(name, sizeof(name), "Icmp6%sType%u",
@@ -174,14 +178,22 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
174 } 178 }
175} 179}
176 180
177static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib, 181/* can be called either with percpu mib (pcpumib != NULL),
182 * or shared one (smib != NULL)
183 */
184static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
185 atomic_long_t *smib,
178 const struct snmp_mib *itemlist) 186 const struct snmp_mib *itemlist)
179{ 187{
180 int i; 188 int i;
189 unsigned long val;
181 190
182 for (i = 0; itemlist[i].name; i++) 191 for (i = 0; itemlist[i].name; i++) {
183 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, 192 val = pcpumib ?
184 snmp_fold_field(mib, itemlist[i].entry)); 193 snmp_fold_field(pcpumib, itemlist[i].entry) :
194 atomic_long_read(smib + itemlist[i].entry);
195 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
196 }
185} 197}
186 198
187static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, 199static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
@@ -201,13 +213,13 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
201 snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, 213 snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
202 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); 214 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
203 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, 215 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
204 snmp6_icmp6_list); 216 NULL, snmp6_icmp6_list);
205 snmp6_seq_show_icmpv6msg(seq, 217 snmp6_seq_show_icmpv6msg(seq,
206 (void __percpu **)net->mib.icmpv6msg_statistics); 218 (void __percpu **)net->mib.icmpv6msg_statistics, NULL);
207 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, 219 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
208 snmp6_udp6_list); 220 NULL, snmp6_udp6_list);
209 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, 221 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
210 snmp6_udplite6_list); 222 NULL, snmp6_udplite6_list);
211 return 0; 223 return 0;
212} 224}
213 225
@@ -229,11 +241,11 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
229 struct inet6_dev *idev = (struct inet6_dev *)seq->private; 241 struct inet6_dev *idev = (struct inet6_dev *)seq->private;
230 242
231 seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); 243 seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
232 snmp6_seq_show_item(seq, (void __percpu **)idev->stats.ipv6, 244 snmp6_seq_show_item(seq, (void __percpu **)idev->stats.ipv6, NULL,
233 snmp6_ipstats_list); 245 snmp6_ipstats_list);
234 snmp6_seq_show_item(seq, (void __percpu **)idev->stats.icmpv6, 246 snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
235 snmp6_icmp6_list); 247 snmp6_icmp6_list);
236 snmp6_seq_show_icmpv6msg(seq, (void __percpu **)idev->stats.icmpv6msg); 248 snmp6_seq_show_icmpv6msg(seq, NULL, idev->stats.icmpv6msgdev->mibs);
237 return 0; 249 return 0;
238} 250}
239 251
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c5b0915d106b..ae64984f81aa 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -67,8 +67,8 @@ static struct raw_hashinfo raw_v6_hashinfo = {
67}; 67};
68 68
69static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, 69static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
70 unsigned short num, struct in6_addr *loc_addr, 70 unsigned short num, const struct in6_addr *loc_addr,
71 struct in6_addr *rmt_addr, int dif) 71 const struct in6_addr *rmt_addr, int dif)
72{ 72{
73 struct hlist_node *node; 73 struct hlist_node *node;
74 int is_multicast = ipv6_addr_is_multicast(loc_addr); 74 int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -124,18 +124,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
124} 124}
125 125
126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
127static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); 127typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
128 128
129int rawv6_mh_filter_register(int (*filter)(struct sock *sock, 129static mh_filter_t __rcu *mh_filter __read_mostly;
130 struct sk_buff *skb)) 130
131int rawv6_mh_filter_register(mh_filter_t filter)
131{ 132{
132 rcu_assign_pointer(mh_filter, filter); 133 rcu_assign_pointer(mh_filter, filter);
133 return 0; 134 return 0;
134} 135}
135EXPORT_SYMBOL(rawv6_mh_filter_register); 136EXPORT_SYMBOL(rawv6_mh_filter_register);
136 137
137int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, 138int rawv6_mh_filter_unregister(mh_filter_t filter)
138 struct sk_buff *skb))
139{ 139{
140 rcu_assign_pointer(mh_filter, NULL); 140 rcu_assign_pointer(mh_filter, NULL);
141 synchronize_rcu(); 141 synchronize_rcu();
@@ -154,8 +154,8 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
154 */ 154 */
155static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) 155static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
156{ 156{
157 struct in6_addr *saddr; 157 const struct in6_addr *saddr;
158 struct in6_addr *daddr; 158 const struct in6_addr *daddr;
159 struct sock *sk; 159 struct sock *sk;
160 int delivered = 0; 160 int delivered = 0;
161 __u8 hash; 161 __u8 hash;
@@ -193,10 +193,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
193 * policy is placed in rawv6_rcv() because it is 193 * policy is placed in rawv6_rcv() because it is
194 * required for each socket. 194 * required for each socket.
195 */ 195 */
196 int (*filter)(struct sock *sock, struct sk_buff *skb); 196 mh_filter_t *filter;
197 197
198 filter = rcu_dereference(mh_filter); 198 filter = rcu_dereference(mh_filter);
199 filtered = filter ? filter(sk, skb) : 0; 199 filtered = filter ? (*filter)(sk, skb) : 0;
200 break; 200 break;
201 } 201 }
202#endif 202#endif
@@ -348,7 +348,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
348{ 348{
349 struct sock *sk; 349 struct sock *sk;
350 int hash; 350 int hash;
351 struct in6_addr *saddr, *daddr; 351 const struct in6_addr *saddr, *daddr;
352 struct net *net; 352 struct net *net;
353 353
354 hash = nexthdr & (RAW_HTABLE_SIZE - 1); 354 hash = nexthdr & (RAW_HTABLE_SIZE - 1);
@@ -357,7 +357,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
357 sk = sk_head(&raw_v6_hashinfo.ht[hash]); 357 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
358 if (sk != NULL) { 358 if (sk != NULL) {
359 /* Note: ipv6_hdr(skb) != skb->data */ 359 /* Note: ipv6_hdr(skb) != skb->data */
360 struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data; 360 const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
361 saddr = &ip6h->saddr; 361 saddr = &ip6h->saddr;
362 daddr = &ip6h->daddr; 362 daddr = &ip6h->daddr;
363 net = dev_net(skb->dev); 363 net = dev_net(skb->dev);
@@ -524,7 +524,7 @@ csum_copy_err:
524 goto out; 524 goto out;
525} 525}
526 526
527static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, 527static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
528 struct raw6_sock *rp) 528 struct raw6_sock *rp)
529{ 529{
530 struct sk_buff *skb; 530 struct sk_buff *skb;
@@ -542,8 +542,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
542 goto out; 542 goto out;
543 543
544 offset = rp->offset; 544 offset = rp->offset;
545 total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) - 545 total_len = inet_sk(sk)->cork.base.length - (skb_network_header(skb) -
546 skb->data); 546 skb->data);
547 if (offset >= total_len - 1) { 547 if (offset >= total_len - 1) {
548 err = -EINVAL; 548 err = -EINVAL;
549 ip6_flush_pending_frames(sk); 549 ip6_flush_pending_frames(sk);
@@ -586,11 +586,10 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
586 if (unlikely(csum)) 586 if (unlikely(csum))
587 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); 587 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
588 588
589 csum = csum_ipv6_magic(&fl->fl6_src, 589 csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
590 &fl->fl6_dst, 590 total_len, fl6->flowi6_proto, tmp_csum);
591 total_len, fl->proto, tmp_csum);
592 591
593 if (csum == 0 && fl->proto == IPPROTO_UDP) 592 if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
594 csum = CSUM_MANGLED_0; 593 csum = CSUM_MANGLED_0;
595 594
596 if (skb_store_bits(skb, offset, &csum, 2)) 595 if (skb_store_bits(skb, offset, &csum, 2))
@@ -603,7 +602,7 @@ out:
603} 602}
604 603
605static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, 604static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
606 struct flowi *fl, struct dst_entry **dstp, 605 struct flowi6 *fl6, struct dst_entry **dstp,
607 unsigned int flags) 606 unsigned int flags)
608{ 607{
609 struct ipv6_pinfo *np = inet6_sk(sk); 608 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -613,7 +612,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
613 struct rt6_info *rt = (struct rt6_info *)*dstp; 612 struct rt6_info *rt = (struct rt6_info *)*dstp;
614 613
615 if (length > rt->dst.dev->mtu) { 614 if (length > rt->dst.dev->mtu) {
616 ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); 615 ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
617 return -EMSGSIZE; 616 return -EMSGSIZE;
618 } 617 }
619 if (flags&MSG_PROBE) 618 if (flags&MSG_PROBE)
@@ -662,7 +661,7 @@ error:
662 return err; 661 return err;
663} 662}
664 663
665static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) 664static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
666{ 665{
667 struct iovec *iov; 666 struct iovec *iov;
668 u8 __user *type = NULL; 667 u8 __user *type = NULL;
@@ -679,7 +678,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
679 if (!iov) 678 if (!iov)
680 continue; 679 continue;
681 680
682 switch (fl->proto) { 681 switch (fl6->flowi6_proto) {
683 case IPPROTO_ICMPV6: 682 case IPPROTO_ICMPV6:
684 /* check if one-byte field is readable or not. */ 683 /* check if one-byte field is readable or not. */
685 if (iov->iov_base && iov->iov_len < 1) 684 if (iov->iov_base && iov->iov_len < 1)
@@ -694,8 +693,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
694 code = iov->iov_base; 693 code = iov->iov_base;
695 694
696 if (type && code) { 695 if (type && code) {
697 if (get_user(fl->fl_icmp_type, type) || 696 if (get_user(fl6->fl6_icmp_type, type) ||
698 get_user(fl->fl_icmp_code, code)) 697 get_user(fl6->fl6_icmp_code, code))
699 return -EFAULT; 698 return -EFAULT;
700 probed = 1; 699 probed = 1;
701 } 700 }
@@ -706,7 +705,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
706 /* check if type field is readable or not. */ 705 /* check if type field is readable or not. */
707 if (iov->iov_len > 2 - len) { 706 if (iov->iov_len > 2 - len) {
708 u8 __user *p = iov->iov_base; 707 u8 __user *p = iov->iov_base;
709 if (get_user(fl->fl_mh_type, &p[2 - len])) 708 if (get_user(fl6->fl6_mh_type, &p[2 - len]))
710 return -EFAULT; 709 return -EFAULT;
711 probed = 1; 710 probed = 1;
712 } else 711 } else
@@ -735,7 +734,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
735 struct ipv6_txoptions *opt = NULL; 734 struct ipv6_txoptions *opt = NULL;
736 struct ip6_flowlabel *flowlabel = NULL; 735 struct ip6_flowlabel *flowlabel = NULL;
737 struct dst_entry *dst = NULL; 736 struct dst_entry *dst = NULL;
738 struct flowi fl; 737 struct flowi6 fl6;
739 int addr_len = msg->msg_namelen; 738 int addr_len = msg->msg_namelen;
740 int hlimit = -1; 739 int hlimit = -1;
741 int tclass = -1; 740 int tclass = -1;
@@ -756,9 +755,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
756 /* 755 /*
757 * Get and verify the address. 756 * Get and verify the address.
758 */ 757 */
759 memset(&fl, 0, sizeof(fl)); 758 memset(&fl6, 0, sizeof(fl6));
760 759
761 fl.mark = sk->sk_mark; 760 fl6.flowi6_mark = sk->sk_mark;
762 761
763 if (sin6) { 762 if (sin6) {
764 if (addr_len < SIN6_LEN_RFC2133) 763 if (addr_len < SIN6_LEN_RFC2133)
@@ -780,9 +779,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
780 779
781 daddr = &sin6->sin6_addr; 780 daddr = &sin6->sin6_addr;
782 if (np->sndflow) { 781 if (np->sndflow) {
783 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 782 fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
784 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 783 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
785 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 784 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
786 if (flowlabel == NULL) 785 if (flowlabel == NULL)
787 return -EINVAL; 786 return -EINVAL;
788 daddr = &flowlabel->dst; 787 daddr = &flowlabel->dst;
@@ -800,32 +799,32 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
800 if (addr_len >= sizeof(struct sockaddr_in6) && 799 if (addr_len >= sizeof(struct sockaddr_in6) &&
801 sin6->sin6_scope_id && 800 sin6->sin6_scope_id &&
802 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 801 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
803 fl.oif = sin6->sin6_scope_id; 802 fl6.flowi6_oif = sin6->sin6_scope_id;
804 } else { 803 } else {
805 if (sk->sk_state != TCP_ESTABLISHED) 804 if (sk->sk_state != TCP_ESTABLISHED)
806 return -EDESTADDRREQ; 805 return -EDESTADDRREQ;
807 806
808 proto = inet->inet_num; 807 proto = inet->inet_num;
809 daddr = &np->daddr; 808 daddr = &np->daddr;
810 fl.fl6_flowlabel = np->flow_label; 809 fl6.flowlabel = np->flow_label;
811 } 810 }
812 811
813 if (fl.oif == 0) 812 if (fl6.flowi6_oif == 0)
814 fl.oif = sk->sk_bound_dev_if; 813 fl6.flowi6_oif = sk->sk_bound_dev_if;
815 814
816 if (msg->msg_controllen) { 815 if (msg->msg_controllen) {
817 opt = &opt_space; 816 opt = &opt_space;
818 memset(opt, 0, sizeof(struct ipv6_txoptions)); 817 memset(opt, 0, sizeof(struct ipv6_txoptions));
819 opt->tot_len = sizeof(struct ipv6_txoptions); 818 opt->tot_len = sizeof(struct ipv6_txoptions);
820 819
821 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, 820 err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
822 &tclass, &dontfrag); 821 &tclass, &dontfrag);
823 if (err < 0) { 822 if (err < 0) {
824 fl6_sock_release(flowlabel); 823 fl6_sock_release(flowlabel);
825 return err; 824 return err;
826 } 825 }
827 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 826 if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
828 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 827 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
829 if (flowlabel == NULL) 828 if (flowlabel == NULL)
830 return -EINVAL; 829 return -EINVAL;
831 } 830 }
@@ -838,40 +837,31 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
838 opt = fl6_merge_options(&opt_space, flowlabel, opt); 837 opt = fl6_merge_options(&opt_space, flowlabel, opt);
839 opt = ipv6_fixup_options(&opt_space, opt); 838 opt = ipv6_fixup_options(&opt_space, opt);
840 839
841 fl.proto = proto; 840 fl6.flowi6_proto = proto;
842 err = rawv6_probe_proto_opt(&fl, msg); 841 err = rawv6_probe_proto_opt(&fl6, msg);
843 if (err) 842 if (err)
844 goto out; 843 goto out;
845 844
846 if (!ipv6_addr_any(daddr)) 845 if (!ipv6_addr_any(daddr))
847 ipv6_addr_copy(&fl.fl6_dst, daddr); 846 ipv6_addr_copy(&fl6.daddr, daddr);
848 else 847 else
849 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 848 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
850 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 849 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
851 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 850 ipv6_addr_copy(&fl6.saddr, &np->saddr);
852 851
853 final_p = fl6_update_dst(&fl, opt, &final); 852 final_p = fl6_update_dst(&fl6, opt, &final);
854 853
855 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 854 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
856 fl.oif = np->mcast_oif; 855 fl6.flowi6_oif = np->mcast_oif;
857 security_sk_classify_flow(sk, &fl); 856 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
858 857
859 err = ip6_dst_lookup(sk, &dst, &fl); 858 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
860 if (err) 859 if (IS_ERR(dst)) {
860 err = PTR_ERR(dst);
861 goto out; 861 goto out;
862 if (final_p)
863 ipv6_addr_copy(&fl.fl6_dst, final_p);
864
865 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
866 if (err < 0) {
867 if (err == -EREMOTE)
868 err = ip6_dst_blackhole(sk, &dst, &fl);
869 if (err < 0)
870 goto out;
871 } 862 }
872
873 if (hlimit < 0) { 863 if (hlimit < 0) {
874 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 864 if (ipv6_addr_is_multicast(&fl6.daddr))
875 hlimit = np->mcast_hops; 865 hlimit = np->mcast_hops;
876 else 866 else
877 hlimit = np->hop_limit; 867 hlimit = np->hop_limit;
@@ -890,17 +880,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
890 880
891back_from_confirm: 881back_from_confirm:
892 if (inet->hdrincl) 882 if (inet->hdrincl)
893 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); 883 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
894 else { 884 else {
895 lock_sock(sk); 885 lock_sock(sk);
896 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, 886 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
897 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, 887 len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst,
898 msg->msg_flags, dontfrag); 888 msg->msg_flags, dontfrag);
899 889
900 if (err) 890 if (err)
901 ip6_flush_pending_frames(sk); 891 ip6_flush_pending_frames(sk);
902 else if (!(msg->msg_flags & MSG_MORE)) 892 else if (!(msg->msg_flags & MSG_MORE))
903 err = rawv6_push_pending_frames(sk, &fl, rp); 893 err = rawv6_push_pending_frames(sk, &fl6, rp);
904 release_sock(sk); 894 release_sock(sk);
905 } 895 }
906done: 896done:
@@ -1241,7 +1231,7 @@ struct proto rawv6_prot = {
1241static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 1231static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1242{ 1232{
1243 struct ipv6_pinfo *np = inet6_sk(sp); 1233 struct ipv6_pinfo *np = inet6_sk(sp);
1244 struct in6_addr *dest, *src; 1234 const struct in6_addr *dest, *src;
1245 __u16 destp, srcp; 1235 __u16 destp, srcp;
1246 1236
1247 dest = &np->daddr; 1237 dest = &np->daddr;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 07beeb06f752..7b954e2539d0 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -224,7 +224,7 @@ out:
224} 224}
225 225
226static __inline__ struct frag_queue * 226static __inline__ struct frag_queue *
227fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst) 227fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst)
228{ 228{
229 struct inet_frag_queue *q; 229 struct inet_frag_queue *q;
230 struct ip6_create_arg arg; 230 struct ip6_create_arg arg;
@@ -535,7 +535,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
535{ 535{
536 struct frag_hdr *fhdr; 536 struct frag_hdr *fhdr;
537 struct frag_queue *fq; 537 struct frag_queue *fq;
538 struct ipv6hdr *hdr = ipv6_hdr(skb); 538 const struct ipv6hdr *hdr = ipv6_hdr(skb);
539 struct net *net = dev_net(skb_dst(skb)->dev); 539 struct net *net = dev_net(skb_dst(skb)->dev);
540 540
541 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 541 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a998db6e7895..de2b1decd786 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -89,14 +89,44 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 89
90#ifdef CONFIG_IPV6_ROUTE_INFO 90#ifdef CONFIG_IPV6_ROUTE_INFO
91static struct rt6_info *rt6_add_route_info(struct net *net, 91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen, 92 const struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex, 93 const struct in6_addr *gwaddr, int ifindex,
94 unsigned pref); 94 unsigned pref);
95static struct rt6_info *rt6_get_route_info(struct net *net, 95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen, 96 const struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex); 97 const struct in6_addr *gwaddr, int ifindex);
98#endif 98#endif
99 99
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
100static struct dst_ops ip6_dst_ops_template = { 130static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6, 131 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6), 132 .protocol = cpu_to_be16(ETH_P_IPV6),
@@ -105,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = {
105 .check = ip6_dst_check, 135 .check = ip6_dst_check,
106 .default_advmss = ip6_default_advmss, 136 .default_advmss = ip6_default_advmss,
107 .default_mtu = ip6_default_mtu, 137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
108 .destroy = ip6_dst_destroy, 139 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown, 140 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice, 141 .negative_advice = ip6_negative_advice,
@@ -122,6 +153,12 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
122{ 153{
123} 154}
124 155
156static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 unsigned long old)
158{
159 return NULL;
160}
161
125static struct dst_ops ip6_dst_blackhole_ops = { 162static struct dst_ops ip6_dst_blackhole_ops = {
126 .family = AF_INET6, 163 .family = AF_INET6,
127 .protocol = cpu_to_be16(ETH_P_IPV6), 164 .protocol = cpu_to_be16(ETH_P_IPV6),
@@ -130,6 +167,11 @@ static struct dst_ops ip6_dst_blackhole_ops = {
130 .default_mtu = ip6_blackhole_default_mtu, 167 .default_mtu = ip6_blackhole_default_mtu,
131 .default_advmss = ip6_default_advmss, 168 .default_advmss = ip6_default_advmss,
132 .update_pmtu = ip6_rt_blackhole_update_pmtu, 169 .update_pmtu = ip6_rt_blackhole_update_pmtu,
170 .cow_metrics = ip6_rt_blackhole_cow_metrics,
171};
172
173static const u32 ip6_template_metrics[RTAX_MAX] = {
174 [RTAX_HOPLIMIT - 1] = 255,
133}; 175};
134 176
135static struct rt6_info ip6_null_entry_template = { 177static struct rt6_info ip6_null_entry_template = {
@@ -185,9 +227,14 @@ static struct rt6_info ip6_blk_hole_entry_template = {
185#endif 227#endif
186 228
187/* allocate dst with ip6_dst_ops */ 229/* allocate dst with ip6_dst_ops */
188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 230static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
231 struct net_device *dev)
189{ 232{
190 return (struct rt6_info *)dst_alloc(ops); 233 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
234
235 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
236
237 return rt;
191} 238}
192 239
193static void ip6_dst_destroy(struct dst_entry *dst) 240static void ip6_dst_destroy(struct dst_entry *dst)
@@ -206,6 +253,13 @@ static void ip6_dst_destroy(struct dst_entry *dst)
206 } 253 }
207} 254}
208 255
256static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
257
258static u32 rt6_peer_genid(void)
259{
260 return atomic_read(&__rt6_peer_genid);
261}
262
209void rt6_bind_peer(struct rt6_info *rt, int create) 263void rt6_bind_peer(struct rt6_info *rt, int create)
210{ 264{
211 struct inet_peer *peer; 265 struct inet_peer *peer;
@@ -213,6 +267,8 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
213 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 267 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
214 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 268 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
215 inet_putpeer(peer); 269 inet_putpeer(peer);
270 else
271 rt->rt6i_peer_genid = rt6_peer_genid();
216} 272}
217 273
218static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 274static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -239,7 +295,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
239 time_after(jiffies, rt->rt6i_expires); 295 time_after(jiffies, rt->rt6i_expires);
240} 296}
241 297
242static inline int rt6_need_strict(struct in6_addr *daddr) 298static inline int rt6_need_strict(const struct in6_addr *daddr)
243{ 299{
244 return ipv6_addr_type(daddr) & 300 return ipv6_addr_type(daddr) &
245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 301 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
@@ -251,7 +307,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
251 307
252static inline struct rt6_info *rt6_device_match(struct net *net, 308static inline struct rt6_info *rt6_device_match(struct net *net,
253 struct rt6_info *rt, 309 struct rt6_info *rt,
254 struct in6_addr *saddr, 310 const struct in6_addr *saddr,
255 int oif, 311 int oif,
256 int flags) 312 int flags)
257{ 313{
@@ -463,7 +519,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
463 519
464#ifdef CONFIG_IPV6_ROUTE_INFO 520#ifdef CONFIG_IPV6_ROUTE_INFO
465int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 521int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
466 struct in6_addr *gwaddr) 522 const struct in6_addr *gwaddr)
467{ 523{
468 struct net *net = dev_net(dev); 524 struct net *net = dev_net(dev);
469 struct route_info *rinfo = (struct route_info *) opt; 525 struct route_info *rinfo = (struct route_info *) opt;
@@ -555,17 +611,17 @@ do { \
555 611
556static struct rt6_info *ip6_pol_route_lookup(struct net *net, 612static struct rt6_info *ip6_pol_route_lookup(struct net *net,
557 struct fib6_table *table, 613 struct fib6_table *table,
558 struct flowi *fl, int flags) 614 struct flowi6 *fl6, int flags)
559{ 615{
560 struct fib6_node *fn; 616 struct fib6_node *fn;
561 struct rt6_info *rt; 617 struct rt6_info *rt;
562 618
563 read_lock_bh(&table->tb6_lock); 619 read_lock_bh(&table->tb6_lock);
564 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 620 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
565restart: 621restart:
566 rt = fn->leaf; 622 rt = fn->leaf;
567 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 623 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
568 BACKTRACK(net, &fl->fl6_src); 624 BACKTRACK(net, &fl6->saddr);
569out: 625out:
570 dst_use(&rt->dst, jiffies); 626 dst_use(&rt->dst, jiffies);
571 read_unlock_bh(&table->tb6_lock); 627 read_unlock_bh(&table->tb6_lock);
@@ -576,19 +632,19 @@ out:
576struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 632struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
577 const struct in6_addr *saddr, int oif, int strict) 633 const struct in6_addr *saddr, int oif, int strict)
578{ 634{
579 struct flowi fl = { 635 struct flowi6 fl6 = {
580 .oif = oif, 636 .flowi6_oif = oif,
581 .fl6_dst = *daddr, 637 .daddr = *daddr,
582 }; 638 };
583 struct dst_entry *dst; 639 struct dst_entry *dst;
584 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 640 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
585 641
586 if (saddr) { 642 if (saddr) {
587 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 643 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
588 flags |= RT6_LOOKUP_F_HAS_SADDR; 644 flags |= RT6_LOOKUP_F_HAS_SADDR;
589 } 645 }
590 646
591 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 647 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
592 if (dst->error == 0) 648 if (dst->error == 0)
593 return (struct rt6_info *) dst; 649 return (struct rt6_info *) dst;
594 650
@@ -626,8 +682,8 @@ int ip6_ins_rt(struct rt6_info *rt)
626 return __ip6_ins_rt(rt, &info); 682 return __ip6_ins_rt(rt, &info);
627} 683}
628 684
629static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 685static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
630 struct in6_addr *saddr) 686 const struct in6_addr *saddr)
631{ 687{
632 struct rt6_info *rt; 688 struct rt6_info *rt;
633 689
@@ -695,7 +751,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
695 return rt; 751 return rt;
696} 752}
697 753
698static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 754static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
699{ 755{
700 struct rt6_info *rt = ip6_rt_copy(ort); 756 struct rt6_info *rt = ip6_rt_copy(ort);
701 if (rt) { 757 if (rt) {
@@ -709,7 +765,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
709} 765}
710 766
711static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 767static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
712 struct flowi *fl, int flags) 768 struct flowi6 *fl6, int flags)
713{ 769{
714 struct fib6_node *fn; 770 struct fib6_node *fn;
715 struct rt6_info *rt, *nrt; 771 struct rt6_info *rt, *nrt;
@@ -724,12 +780,12 @@ relookup:
724 read_lock_bh(&table->tb6_lock); 780 read_lock_bh(&table->tb6_lock);
725 781
726restart_2: 782restart_2:
727 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 783 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
728 784
729restart: 785restart:
730 rt = rt6_select(fn, oif, strict | reachable); 786 rt = rt6_select(fn, oif, strict | reachable);
731 787
732 BACKTRACK(net, &fl->fl6_src); 788 BACKTRACK(net, &fl6->saddr);
733 if (rt == net->ipv6.ip6_null_entry || 789 if (rt == net->ipv6.ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE) 790 rt->rt6i_flags & RTF_CACHE)
735 goto out; 791 goto out;
@@ -738,9 +794,11 @@ restart:
738 read_unlock_bh(&table->tb6_lock); 794 read_unlock_bh(&table->tb6_lock);
739 795
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 796 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 797 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
798 else if (!(rt->dst.flags & DST_HOST))
799 nrt = rt6_alloc_clone(rt, &fl6->daddr);
742 else 800 else
743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 801 goto out2;
744 802
745 dst_release(&rt->dst); 803 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry; 804 rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -777,74 +835,71 @@ out2:
777} 835}
778 836
779static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 837static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
780 struct flowi *fl, int flags) 838 struct flowi6 *fl6, int flags)
781{ 839{
782 return ip6_pol_route(net, table, fl->iif, fl, flags); 840 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
783} 841}
784 842
785void ip6_route_input(struct sk_buff *skb) 843void ip6_route_input(struct sk_buff *skb)
786{ 844{
787 struct ipv6hdr *iph = ipv6_hdr(skb); 845 const struct ipv6hdr *iph = ipv6_hdr(skb);
788 struct net *net = dev_net(skb->dev); 846 struct net *net = dev_net(skb->dev);
789 int flags = RT6_LOOKUP_F_HAS_SADDR; 847 int flags = RT6_LOOKUP_F_HAS_SADDR;
790 struct flowi fl = { 848 struct flowi6 fl6 = {
791 .iif = skb->dev->ifindex, 849 .flowi6_iif = skb->dev->ifindex,
792 .fl6_dst = iph->daddr, 850 .daddr = iph->daddr,
793 .fl6_src = iph->saddr, 851 .saddr = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 852 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795 .mark = skb->mark, 853 .flowi6_mark = skb->mark,
796 .proto = iph->nexthdr, 854 .flowi6_proto = iph->nexthdr,
797 }; 855 };
798 856
799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 857 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
800 flags |= RT6_LOOKUP_F_IFACE; 858 flags |= RT6_LOOKUP_F_IFACE;
801 859
802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); 860 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
803} 861}
804 862
805static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 863static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
806 struct flowi *fl, int flags) 864 struct flowi6 *fl6, int flags)
807{ 865{
808 return ip6_pol_route(net, table, fl->oif, fl, flags); 866 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
809} 867}
810 868
811struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 869struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
812 struct flowi *fl) 870 struct flowi6 *fl6)
813{ 871{
814 int flags = 0; 872 int flags = 0;
815 873
816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) 874 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
817 flags |= RT6_LOOKUP_F_IFACE; 875 flags |= RT6_LOOKUP_F_IFACE;
818 876
819 if (!ipv6_addr_any(&fl->fl6_src)) 877 if (!ipv6_addr_any(&fl6->saddr))
820 flags |= RT6_LOOKUP_F_HAS_SADDR; 878 flags |= RT6_LOOKUP_F_HAS_SADDR;
821 else if (sk) 879 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 880 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
823 881
824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 882 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
825} 883}
826 884
827EXPORT_SYMBOL(ip6_route_output); 885EXPORT_SYMBOL(ip6_route_output);
828 886
829int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 887struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
830{ 888{
831 struct rt6_info *ort = (struct rt6_info *) *dstp; 889 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL; 890 struct dst_entry *new = NULL;
835 891
892 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
836 if (rt) { 893 if (rt) {
894 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
895
837 new = &rt->dst; 896 new = &rt->dst;
838 897
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1; 898 new->__use = 1;
841 new->input = dst_discard; 899 new->input = dst_discard;
842 new->output = dst_discard; 900 new->output = dst_discard;
843 901
844 dst_copy_metrics(new, &ort->dst); 902 dst_copy_metrics(new, &ort->dst);
845 new->dev = ort->dst.dev;
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev; 903 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev) 904 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev); 905 in6_dev_hold(rt->rt6i_idev);
@@ -862,11 +917,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
862 dst_free(new); 917 dst_free(new);
863 } 918 }
864 919
865 dst_release(*dstp); 920 dst_release(dst_orig);
866 *dstp = new; 921 return new ? new : ERR_PTR(-ENOMEM);
867 return new ? 0 : -ENOMEM;
868} 922}
869EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870 923
871/* 924/*
872 * Destination cache support functions 925 * Destination cache support functions
@@ -878,9 +931,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
878 931
879 rt = (struct rt6_info *) dst; 932 rt = (struct rt6_info *) dst;
880 933
881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 934 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
935 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
936 if (!rt->rt6i_peer)
937 rt6_bind_peer(rt, 0);
938 rt->rt6i_peer_genid = rt6_peer_genid();
939 }
882 return dst; 940 return dst;
883 941 }
884 return NULL; 942 return NULL;
885} 943}
886 944
@@ -931,7 +989,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
931 dst_metric_set(dst, RTAX_FEATURES, features); 989 dst_metric_set(dst, RTAX_FEATURES, features);
932 } 990 }
933 dst_metric_set(dst, RTAX_MTU, mtu); 991 dst_metric_set(dst, RTAX_MTU, mtu);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
935 } 992 }
936} 993}
937 994
@@ -985,13 +1042,12 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
985 if (unlikely(idev == NULL)) 1042 if (unlikely(idev == NULL))
986 return NULL; 1043 return NULL;
987 1044
988 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1045 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
989 if (unlikely(rt == NULL)) { 1046 if (unlikely(rt == NULL)) {
990 in6_dev_put(idev); 1047 in6_dev_put(idev);
991 goto out; 1048 goto out;
992 } 1049 }
993 1050
994 dev_hold(dev);
995 if (neigh) 1051 if (neigh)
996 neigh_hold(neigh); 1052 neigh_hold(neigh);
997 else { 1053 else {
@@ -1000,7 +1056,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1000 neigh = NULL; 1056 neigh = NULL;
1001 } 1057 }
1002 1058
1003 rt->rt6i_dev = dev;
1004 rt->rt6i_idev = idev; 1059 rt->rt6i_idev = idev;
1005 rt->rt6i_nexthop = neigh; 1060 rt->rt6i_nexthop = neigh;
1006 atomic_set(&rt->dst.__refcnt, 1); 1061 atomic_set(&rt->dst.__refcnt, 1);
@@ -1028,11 +1083,9 @@ out:
1028 1083
1029int icmp6_dst_gc(void) 1084int icmp6_dst_gc(void)
1030{ 1085{
1031 struct dst_entry *dst, *next, **pprev; 1086 struct dst_entry *dst, **pprev;
1032 int more = 0; 1087 int more = 0;
1033 1088
1034 next = NULL;
1035
1036 spin_lock_bh(&icmp6_dst_lock); 1089 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list; 1090 pprev = &icmp6_dst_gc_list;
1038 1091
@@ -1161,7 +1214,7 @@ int ip6_route_add(struct fib6_config *cfg)
1161 goto out; 1214 goto out;
1162 } 1215 }
1163 1216
1164 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1217 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
1165 1218
1166 if (rt == NULL) { 1219 if (rt == NULL) {
1167 err = -ENOMEM; 1220 err = -ENOMEM;
@@ -1228,7 +1281,7 @@ int ip6_route_add(struct fib6_config *cfg)
1228 } 1281 }
1229 1282
1230 if (cfg->fc_flags & RTF_GATEWAY) { 1283 if (cfg->fc_flags & RTF_GATEWAY) {
1231 struct in6_addr *gw_addr; 1284 const struct in6_addr *gw_addr;
1232 int gwa_type; 1285 int gwa_type;
1233 1286
1234 gw_addr = &cfg->fc_gateway; 1287 gw_addr = &cfg->fc_gateway;
@@ -1281,6 +1334,16 @@ int ip6_route_add(struct fib6_config *cfg)
1281 if (dev == NULL) 1334 if (dev == NULL)
1282 goto out; 1335 goto out;
1283 1336
1337 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1338 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1339 err = -EINVAL;
1340 goto out;
1341 }
1342 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1343 rt->rt6i_prefsrc.plen = 128;
1344 } else
1345 rt->rt6i_prefsrc.plen = 0;
1346
1284 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1347 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1285 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1348 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1286 if (IS_ERR(rt->rt6i_nexthop)) { 1349 if (IS_ERR(rt->rt6i_nexthop)) {
@@ -1400,16 +1463,16 @@ static int ip6_route_del(struct fib6_config *cfg)
1400 * Handle redirects 1463 * Handle redirects
1401 */ 1464 */
1402struct ip6rd_flowi { 1465struct ip6rd_flowi {
1403 struct flowi fl; 1466 struct flowi6 fl6;
1404 struct in6_addr gateway; 1467 struct in6_addr gateway;
1405}; 1468};
1406 1469
1407static struct rt6_info *__ip6_route_redirect(struct net *net, 1470static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 struct fib6_table *table, 1471 struct fib6_table *table,
1409 struct flowi *fl, 1472 struct flowi6 *fl6,
1410 int flags) 1473 int flags)
1411{ 1474{
1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1475 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1413 struct rt6_info *rt; 1476 struct rt6_info *rt;
1414 struct fib6_node *fn; 1477 struct fib6_node *fn;
1415 1478
@@ -1425,7 +1488,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1425 */ 1488 */
1426 1489
1427 read_lock_bh(&table->tb6_lock); 1490 read_lock_bh(&table->tb6_lock);
1428 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1491 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1429restart: 1492restart:
1430 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1493 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1431 /* 1494 /*
@@ -1440,7 +1503,7 @@ restart:
1440 continue; 1503 continue;
1441 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1504 if (!(rt->rt6i_flags & RTF_GATEWAY))
1442 continue; 1505 continue;
1443 if (fl->oif != rt->rt6i_dev->ifindex) 1506 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1444 continue; 1507 continue;
1445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1508 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1446 continue; 1509 continue;
@@ -1449,7 +1512,7 @@ restart:
1449 1512
1450 if (!rt) 1513 if (!rt)
1451 rt = net->ipv6.ip6_null_entry; 1514 rt = net->ipv6.ip6_null_entry;
1452 BACKTRACK(net, &fl->fl6_src); 1515 BACKTRACK(net, &fl6->saddr);
1453out: 1516out:
1454 dst_hold(&rt->dst); 1517 dst_hold(&rt->dst);
1455 1518
@@ -1458,18 +1521,18 @@ out:
1458 return rt; 1521 return rt;
1459}; 1522};
1460 1523
1461static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1524static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1462 struct in6_addr *src, 1525 const struct in6_addr *src,
1463 struct in6_addr *gateway, 1526 const struct in6_addr *gateway,
1464 struct net_device *dev) 1527 struct net_device *dev)
1465{ 1528{
1466 int flags = RT6_LOOKUP_F_HAS_SADDR; 1529 int flags = RT6_LOOKUP_F_HAS_SADDR;
1467 struct net *net = dev_net(dev); 1530 struct net *net = dev_net(dev);
1468 struct ip6rd_flowi rdfl = { 1531 struct ip6rd_flowi rdfl = {
1469 .fl = { 1532 .fl6 = {
1470 .oif = dev->ifindex, 1533 .flowi6_oif = dev->ifindex,
1471 .fl6_dst = *dest, 1534 .daddr = *dest,
1472 .fl6_src = *src, 1535 .saddr = *src,
1473 }, 1536 },
1474 }; 1537 };
1475 1538
@@ -1478,12 +1541,12 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1478 if (rt6_need_strict(dest)) 1541 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE; 1542 flags |= RT6_LOOKUP_F_IFACE;
1480 1543
1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1544 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1482 flags, __ip6_route_redirect); 1545 flags, __ip6_route_redirect);
1483} 1546}
1484 1547
1485void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1548void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1486 struct in6_addr *saddr, 1549 const struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link) 1550 struct neighbour *neigh, u8 *lladdr, int on_link)
1488{ 1551{
1489 struct rt6_info *rt, *nrt = NULL; 1552 struct rt6_info *rt, *nrt = NULL;
@@ -1557,7 +1620,7 @@ out:
1557 * i.e. Path MTU discovery 1620 * i.e. Path MTU discovery
1558 */ 1621 */
1559 1622
1560static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, 1623static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1561 struct net *net, u32 pmtu, int ifindex) 1624 struct net *net, u32 pmtu, int ifindex)
1562{ 1625{
1563 struct rt6_info *rt, *nrt; 1626 struct rt6_info *rt, *nrt;
@@ -1642,7 +1705,7 @@ out:
1642 dst_release(&rt->dst); 1705 dst_release(&rt->dst);
1643} 1706}
1644 1707
1645void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1708void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1646 struct net_device *dev, u32 pmtu) 1709 struct net_device *dev, u32 pmtu)
1647{ 1710{
1648 struct net *net = dev_net(dev); 1711 struct net *net = dev_net(dev);
@@ -1670,7 +1733,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1670static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1733static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1671{ 1734{
1672 struct net *net = dev_net(ort->rt6i_dev); 1735 struct net *net = dev_net(ort->rt6i_dev);
1673 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1736 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1737 ort->dst.dev);
1674 1738
1675 if (rt) { 1739 if (rt) {
1676 rt->dst.input = ort->dst.input; 1740 rt->dst.input = ort->dst.input;
@@ -1678,9 +1742,6 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1678 1742
1679 dst_copy_metrics(&rt->dst, &ort->dst); 1743 dst_copy_metrics(&rt->dst, &ort->dst);
1680 rt->dst.error = ort->dst.error; 1744 rt->dst.error = ort->dst.error;
1681 rt->dst.dev = ort->dst.dev;
1682 if (rt->dst.dev)
1683 dev_hold(rt->dst.dev);
1684 rt->rt6i_idev = ort->rt6i_idev; 1745 rt->rt6i_idev = ort->rt6i_idev;
1685 if (rt->rt6i_idev) 1746 if (rt->rt6i_idev)
1686 in6_dev_hold(rt->rt6i_idev); 1747 in6_dev_hold(rt->rt6i_idev);
@@ -1695,6 +1756,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1695#ifdef CONFIG_IPV6_SUBTREES 1756#ifdef CONFIG_IPV6_SUBTREES
1696 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1757 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1697#endif 1758#endif
1759 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1698 rt->rt6i_table = ort->rt6i_table; 1760 rt->rt6i_table = ort->rt6i_table;
1699 } 1761 }
1700 return rt; 1762 return rt;
@@ -1702,8 +1764,8 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1702 1764
1703#ifdef CONFIG_IPV6_ROUTE_INFO 1765#ifdef CONFIG_IPV6_ROUTE_INFO
1704static struct rt6_info *rt6_get_route_info(struct net *net, 1766static struct rt6_info *rt6_get_route_info(struct net *net,
1705 struct in6_addr *prefix, int prefixlen, 1767 const struct in6_addr *prefix, int prefixlen,
1706 struct in6_addr *gwaddr, int ifindex) 1768 const struct in6_addr *gwaddr, int ifindex)
1707{ 1769{
1708 struct fib6_node *fn; 1770 struct fib6_node *fn;
1709 struct rt6_info *rt = NULL; 1771 struct rt6_info *rt = NULL;
@@ -1734,8 +1796,8 @@ out:
1734} 1796}
1735 1797
1736static struct rt6_info *rt6_add_route_info(struct net *net, 1798static struct rt6_info *rt6_add_route_info(struct net *net,
1737 struct in6_addr *prefix, int prefixlen, 1799 const struct in6_addr *prefix, int prefixlen,
1738 struct in6_addr *gwaddr, int ifindex, 1800 const struct in6_addr *gwaddr, int ifindex,
1739 unsigned pref) 1801 unsigned pref)
1740{ 1802{
1741 struct fib6_config cfg = { 1803 struct fib6_config cfg = {
@@ -1763,7 +1825,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
1763} 1825}
1764#endif 1826#endif
1765 1827
1766struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1828struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1767{ 1829{
1768 struct rt6_info *rt; 1830 struct rt6_info *rt;
1769 struct fib6_table *table; 1831 struct fib6_table *table;
@@ -1785,7 +1847,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1785 return rt; 1847 return rt;
1786} 1848}
1787 1849
1788struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1850struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1789 struct net_device *dev, 1851 struct net_device *dev,
1790 unsigned int pref) 1852 unsigned int pref)
1791{ 1853{
@@ -1950,7 +2012,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1950 int anycast) 2012 int anycast)
1951{ 2013{
1952 struct net *net = dev_net(idev->dev); 2014 struct net *net = dev_net(idev->dev);
1953 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 2015 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2016 net->loopback_dev);
1954 struct neighbour *neigh; 2017 struct neighbour *neigh;
1955 2018
1956 if (rt == NULL) { 2019 if (rt == NULL) {
@@ -1960,15 +2023,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1960 return ERR_PTR(-ENOMEM); 2023 return ERR_PTR(-ENOMEM);
1961 } 2024 }
1962 2025
1963 dev_hold(net->loopback_dev);
1964 in6_dev_hold(idev); 2026 in6_dev_hold(idev);
1965 2027
1966 rt->dst.flags = DST_HOST; 2028 rt->dst.flags = DST_HOST;
1967 rt->dst.input = ip6_input; 2029 rt->dst.input = ip6_input;
1968 rt->dst.output = ip6_output; 2030 rt->dst.output = ip6_output;
1969 rt->rt6i_dev = net->loopback_dev;
1970 rt->rt6i_idev = idev; 2031 rt->rt6i_idev = idev;
1971 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1972 rt->dst.obsolete = -1; 2032 rt->dst.obsolete = -1;
1973 2033
1974 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2034 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
@@ -1980,12 +2040,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1980 if (IS_ERR(neigh)) { 2040 if (IS_ERR(neigh)) {
1981 dst_free(&rt->dst); 2041 dst_free(&rt->dst);
1982 2042
1983 /* We are casting this because that is the return 2043 return ERR_CAST(neigh);
1984 * value type. But an errno encoded pointer is the
1985 * same regardless of the underlying pointer type,
1986 * and that's what we are returning. So this is OK.
1987 */
1988 return (struct rt6_info *) neigh;
1989 } 2044 }
1990 rt->rt6i_nexthop = neigh; 2045 rt->rt6i_nexthop = neigh;
1991 2046
@@ -1998,6 +2053,55 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1998 return rt; 2053 return rt;
1999} 2054}
2000 2055
2056int ip6_route_get_saddr(struct net *net,
2057 struct rt6_info *rt,
2058 const struct in6_addr *daddr,
2059 unsigned int prefs,
2060 struct in6_addr *saddr)
2061{
2062 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2063 int err = 0;
2064 if (rt->rt6i_prefsrc.plen)
2065 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2066 else
2067 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2068 daddr, prefs, saddr);
2069 return err;
2070}
2071
2072/* remove deleted ip from prefsrc entries */
2073struct arg_dev_net_ip {
2074 struct net_device *dev;
2075 struct net *net;
2076 struct in6_addr *addr;
2077};
2078
2079static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2080{
2081 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2082 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2083 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2084
2085 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2086 rt != net->ipv6.ip6_null_entry &&
2087 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2088 /* remove prefsrc entry */
2089 rt->rt6i_prefsrc.plen = 0;
2090 }
2091 return 0;
2092}
2093
2094void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2095{
2096 struct net *net = dev_net(ifp->idev->dev);
2097 struct arg_dev_net_ip adni = {
2098 .dev = ifp->idev->dev,
2099 .net = net,
2100 .addr = &ifp->addr,
2101 };
2102 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2103}
2104
2001struct arg_dev_net { 2105struct arg_dev_net {
2002 struct net_device *dev; 2106 struct net_device *dev;
2003 struct net *net; 2107 struct net *net;
@@ -2144,6 +2248,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2144 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2248 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2145 } 2249 }
2146 2250
2251 if (tb[RTA_PREFSRC])
2252 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2253
2147 if (tb[RTA_OIF]) 2254 if (tb[RTA_OIF])
2148 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2255 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2149 2256
@@ -2286,13 +2393,17 @@ static int rt6_fill_node(struct net *net,
2286#endif 2393#endif
2287 NLA_PUT_U32(skb, RTA_IIF, iif); 2394 NLA_PUT_U32(skb, RTA_IIF, iif);
2288 } else if (dst) { 2395 } else if (dst) {
2289 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2290 struct in6_addr saddr_buf; 2396 struct in6_addr saddr_buf;
2291 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2397 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2292 dst, 0, &saddr_buf) == 0)
2293 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2398 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2294 } 2399 }
2295 2400
2401 if (rt->rt6i_prefsrc.plen) {
2402 struct in6_addr saddr_buf;
2403 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2404 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2405 }
2406
2296 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2407 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2297 goto nla_put_failure; 2408 goto nla_put_failure;
2298 2409
@@ -2346,7 +2457,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2346 struct rt6_info *rt; 2457 struct rt6_info *rt;
2347 struct sk_buff *skb; 2458 struct sk_buff *skb;
2348 struct rtmsg *rtm; 2459 struct rtmsg *rtm;
2349 struct flowi fl; 2460 struct flowi6 fl6;
2350 int err, iif = 0; 2461 int err, iif = 0;
2351 2462
2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2463 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
@@ -2354,27 +2465,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2354 goto errout; 2465 goto errout;
2355 2466
2356 err = -EINVAL; 2467 err = -EINVAL;
2357 memset(&fl, 0, sizeof(fl)); 2468 memset(&fl6, 0, sizeof(fl6));
2358 2469
2359 if (tb[RTA_SRC]) { 2470 if (tb[RTA_SRC]) {
2360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2471 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2361 goto errout; 2472 goto errout;
2362 2473
2363 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2474 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2364 } 2475 }
2365 2476
2366 if (tb[RTA_DST]) { 2477 if (tb[RTA_DST]) {
2367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2478 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2368 goto errout; 2479 goto errout;
2369 2480
2370 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2481 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2371 } 2482 }
2372 2483
2373 if (tb[RTA_IIF]) 2484 if (tb[RTA_IIF])
2374 iif = nla_get_u32(tb[RTA_IIF]); 2485 iif = nla_get_u32(tb[RTA_IIF]);
2375 2486
2376 if (tb[RTA_OIF]) 2487 if (tb[RTA_OIF])
2377 fl.oif = nla_get_u32(tb[RTA_OIF]); 2488 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2378 2489
2379 if (iif) { 2490 if (iif) {
2380 struct net_device *dev; 2491 struct net_device *dev;
@@ -2397,10 +2508,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2397 skb_reset_mac_header(skb); 2508 skb_reset_mac_header(skb);
2398 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2509 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2399 2510
2400 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2511 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2401 skb_dst_set(skb, &rt->dst); 2512 skb_dst_set(skb, &rt->dst);
2402 2513
2403 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2514 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2404 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2515 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2405 nlh->nlmsg_seq, 0, 0, 0); 2516 nlh->nlmsg_seq, 0, 0, 0);
2406 if (err < 0) { 2517 if (err < 0) {
@@ -2557,14 +2668,16 @@ static
2557int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2668int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2558 void __user *buffer, size_t *lenp, loff_t *ppos) 2669 void __user *buffer, size_t *lenp, loff_t *ppos)
2559{ 2670{
2560 struct net *net = current->nsproxy->net_ns; 2671 struct net *net;
2561 int delay = net->ipv6.sysctl.flush_delay; 2672 int delay;
2562 if (write) { 2673 if (!write)
2563 proc_dointvec(ctl, write, buffer, lenp, ppos);
2564 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2565 return 0;
2566 } else
2567 return -EINVAL; 2674 return -EINVAL;
2675
2676 net = (struct net *)ctl->extra1;
2677 delay = net->ipv6.sysctl.flush_delay;
2678 proc_dointvec(ctl, write, buffer, lenp, ppos);
2679 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2680 return 0;
2568} 2681}
2569 2682
2570ctl_table ipv6_route_table_template[] = { 2683ctl_table ipv6_route_table_template[] = {
@@ -2651,6 +2764,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2651 2764
2652 if (table) { 2765 if (table) {
2653 table[0].data = &net->ipv6.sysctl.flush_delay; 2766 table[0].data = &net->ipv6.sysctl.flush_delay;
2767 table[0].extra1 = net;
2654 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2768 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2655 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2769 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2656 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2770 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
@@ -2684,7 +2798,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2684 net->ipv6.ip6_null_entry->dst.path = 2798 net->ipv6.ip6_null_entry->dst.path =
2685 (struct dst_entry *)net->ipv6.ip6_null_entry; 2799 (struct dst_entry *)net->ipv6.ip6_null_entry;
2686 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2800 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2687 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); 2801 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2802 ip6_template_metrics, true);
2688 2803
2689#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2804#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2690 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2805 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2695,7 +2810,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2695 net->ipv6.ip6_prohibit_entry->dst.path = 2810 net->ipv6.ip6_prohibit_entry->dst.path =
2696 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2811 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2697 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2812 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2698 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); 2813 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2814 ip6_template_metrics, true);
2699 2815
2700 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2816 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2701 sizeof(*net->ipv6.ip6_blk_hole_entry), 2817 sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2705,7 +2821,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2705 net->ipv6.ip6_blk_hole_entry->dst.path = 2821 net->ipv6.ip6_blk_hole_entry->dst.path =
2706 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2822 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2707 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2823 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2708 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); 2824 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2825 ip6_template_metrics, true);
2709#endif 2826#endif
2710 2827
2711 net->ipv6.sysctl.flush_delay = 0; 2828 net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a547..1cca5761aea9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -250,11 +250,6 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
250 250
251 dev_net_set(dev, net); 251 dev_net_set(dev, net);
252 252
253 if (strchr(name, '%')) {
254 if (dev_alloc_name(dev, name) < 0)
255 goto failed_free;
256 }
257
258 nt = netdev_priv(dev); 253 nt = netdev_priv(dev);
259 254
260 nt->parms = *parms; 255 nt->parms = *parms;
@@ -401,18 +396,13 @@ out:
401 return err; 396 return err;
402} 397}
403 398
404static void prl_entry_destroy_rcu(struct rcu_head *head)
405{
406 kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
407}
408
409static void prl_list_destroy_rcu(struct rcu_head *head) 399static void prl_list_destroy_rcu(struct rcu_head *head)
410{ 400{
411 struct ip_tunnel_prl_entry *p, *n; 401 struct ip_tunnel_prl_entry *p, *n;
412 402
413 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); 403 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
414 do { 404 do {
415 n = p->next; 405 n = rcu_dereference_protected(p->next, 1);
416 kfree(p); 406 kfree(p);
417 p = n; 407 p = n;
418 } while (p); 408 } while (p);
@@ -421,26 +411,28 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
421static int 411static int
422ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) 412ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
423{ 413{
424 struct ip_tunnel_prl_entry *x, **p; 414 struct ip_tunnel_prl_entry *x;
415 struct ip_tunnel_prl_entry __rcu **p;
425 int err = 0; 416 int err = 0;
426 417
427 ASSERT_RTNL(); 418 ASSERT_RTNL();
428 419
429 if (a && a->addr != htonl(INADDR_ANY)) { 420 if (a && a->addr != htonl(INADDR_ANY)) {
430 for (p = &t->prl; *p; p = &(*p)->next) { 421 for (p = &t->prl;
431 if ((*p)->addr == a->addr) { 422 (x = rtnl_dereference(*p)) != NULL;
432 x = *p; 423 p = &x->next) {
424 if (x->addr == a->addr) {
433 *p = x->next; 425 *p = x->next;
434 call_rcu(&x->rcu_head, prl_entry_destroy_rcu); 426 kfree_rcu(x, rcu_head);
435 t->prl_count--; 427 t->prl_count--;
436 goto out; 428 goto out;
437 } 429 }
438 } 430 }
439 err = -ENXIO; 431 err = -ENXIO;
440 } else { 432 } else {
441 if (t->prl) { 433 x = rtnl_dereference(t->prl);
434 if (x) {
442 t->prl_count = 0; 435 t->prl_count = 0;
443 x = t->prl;
444 call_rcu(&x->rcu_head, prl_list_destroy_rcu); 436 call_rcu(&x->rcu_head, prl_list_destroy_rcu);
445 t->prl = NULL; 437 t->prl = NULL;
446 } 438 }
@@ -450,7 +442,7 @@ out:
450} 442}
451 443
452static int 444static int
453isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) 445isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
454{ 446{
455 struct ip_tunnel_prl_entry *p; 447 struct ip_tunnel_prl_entry *p;
456 int ok = 1; 448 int ok = 1;
@@ -463,7 +455,8 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
463 else 455 else
464 skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; 456 skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
465 } else { 457 } else {
466 struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; 458 const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
459
467 if (ipv6_addr_is_isatap(addr6) && 460 if (ipv6_addr_is_isatap(addr6) &&
468 (addr6->s6_addr32[3] == iph->saddr) && 461 (addr6->s6_addr32[3] == iph->saddr) &&
469 ipv6_chk_prefix(addr6, t->dev)) 462 ipv6_chk_prefix(addr6, t->dev))
@@ -497,7 +490,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
497 8 bytes of packet payload. It means, that precise relaying of 490 8 bytes of packet payload. It means, that precise relaying of
498 ICMP in the real Internet is absolutely infeasible. 491 ICMP in the real Internet is absolutely infeasible.
499 */ 492 */
500 struct iphdr *iph = (struct iphdr*)skb->data; 493 const struct iphdr *iph = (const struct iphdr *)skb->data;
501 const int type = icmp_hdr(skb)->type; 494 const int type = icmp_hdr(skb)->type;
502 const int code = icmp_hdr(skb)->code; 495 const int code = icmp_hdr(skb)->code;
503 struct ip_tunnel *t; 496 struct ip_tunnel *t;
@@ -555,7 +548,7 @@ out:
555 return err; 548 return err;
556} 549}
557 550
558static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 551static inline void ipip6_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
559{ 552{
560 if (INET_ECN_is_ce(iph->tos)) 553 if (INET_ECN_is_ce(iph->tos))
561 IP6_ECN_set_ce(ipv6_hdr(skb)); 554 IP6_ECN_set_ce(ipv6_hdr(skb));
@@ -563,7 +556,7 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
563 556
564static int ipip6_rcv(struct sk_buff *skb) 557static int ipip6_rcv(struct sk_buff *skb)
565{ 558{
566 struct iphdr *iph; 559 const struct iphdr *iph;
567 struct ip_tunnel *tunnel; 560 struct ip_tunnel *tunnel;
568 561
569 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 562 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -619,7 +612,7 @@ out:
619 * comes from 6rd / 6to4 (RFC 3056) addr space. 612 * comes from 6rd / 6to4 (RFC 3056) addr space.
620 */ 613 */
621static inline 614static inline
622__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) 615__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
623{ 616{
624 __be32 dst = 0; 617 __be32 dst = 0;
625 618
@@ -662,8 +655,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
662{ 655{
663 struct ip_tunnel *tunnel = netdev_priv(dev); 656 struct ip_tunnel *tunnel = netdev_priv(dev);
664 struct pcpu_tstats *tstats; 657 struct pcpu_tstats *tstats;
665 struct iphdr *tiph = &tunnel->parms.iph; 658 const struct iphdr *tiph = &tunnel->parms.iph;
666 struct ipv6hdr *iph6 = ipv6_hdr(skb); 659 const struct ipv6hdr *iph6 = ipv6_hdr(skb);
667 u8 tos = tunnel->parms.iph.tos; 660 u8 tos = tunnel->parms.iph.tos;
668 __be16 df = tiph->frag_off; 661 __be16 df = tiph->frag_off;
669 struct rtable *rt; /* Route to the other host */ 662 struct rtable *rt; /* Route to the other host */
@@ -671,8 +664,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
671 struct iphdr *iph; /* Our new IP header */ 664 struct iphdr *iph; /* Our new IP header */
672 unsigned int max_headroom; /* The extra header space needed */ 665 unsigned int max_headroom; /* The extra header space needed */
673 __be32 dst = tiph->daddr; 666 __be32 dst = tiph->daddr;
667 struct flowi4 fl4;
674 int mtu; 668 int mtu;
675 struct in6_addr *addr6; 669 const struct in6_addr *addr6;
676 int addr_type; 670 int addr_type;
677 671
678 if (skb->protocol != htons(ETH_P_IPV6)) 672 if (skb->protocol != htons(ETH_P_IPV6))
@@ -691,7 +685,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
691 goto tx_error; 685 goto tx_error;
692 } 686 }
693 687
694 addr6 = (struct in6_addr*)&neigh->primary_key; 688 addr6 = (const struct in6_addr*)&neigh->primary_key;
695 addr_type = ipv6_addr_type(addr6); 689 addr_type = ipv6_addr_type(addr6);
696 690
697 if ((addr_type & IPV6_ADDR_UNICAST) && 691 if ((addr_type & IPV6_ADDR_UNICAST) &&
@@ -716,7 +710,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
716 goto tx_error; 710 goto tx_error;
717 } 711 }
718 712
719 addr6 = (struct in6_addr*)&neigh->primary_key; 713 addr6 = (const struct in6_addr*)&neigh->primary_key;
720 addr_type = ipv6_addr_type(addr6); 714 addr_type = ipv6_addr_type(addr6);
721 715
722 if (addr_type == IPV6_ADDR_ANY) { 716 if (addr_type == IPV6_ADDR_ANY) {
@@ -730,16 +724,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
730 dst = addr6->s6_addr32[3]; 724 dst = addr6->s6_addr32[3];
731 } 725 }
732 726
733 { 727 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
734 struct flowi fl = { .fl4_dst = dst, 728 dst, tiph->saddr,
735 .fl4_src = tiph->saddr, 729 0, 0,
736 .fl4_tos = RT_TOS(tos), 730 IPPROTO_IPV6, RT_TOS(tos),
737 .oif = tunnel->parms.link, 731 tunnel->parms.link);
738 .proto = IPPROTO_IPV6 }; 732 if (IS_ERR(rt)) {
739 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 733 dev->stats.tx_carrier_errors++;
740 dev->stats.tx_carrier_errors++; 734 goto tx_error_icmp;
741 goto tx_error_icmp;
742 }
743 } 735 }
744 if (rt->rt_type != RTN_UNICAST) { 736 if (rt->rt_type != RTN_UNICAST) {
745 ip_rt_put(rt); 737 ip_rt_put(rt);
@@ -826,8 +818,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
826 iph->frag_off = df; 818 iph->frag_off = df;
827 iph->protocol = IPPROTO_IPV6; 819 iph->protocol = IPPROTO_IPV6;
828 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); 820 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
829 iph->daddr = rt->rt_dst; 821 iph->daddr = fl4.daddr;
830 iph->saddr = rt->rt_src; 822 iph->saddr = fl4.saddr;
831 823
832 if ((iph->ttl = tiph->ttl) == 0) 824 if ((iph->ttl = tiph->ttl) == 0)
833 iph->ttl = iph6->hop_limit; 825 iph->ttl = iph6->hop_limit;
@@ -849,19 +841,21 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
849{ 841{
850 struct net_device *tdev = NULL; 842 struct net_device *tdev = NULL;
851 struct ip_tunnel *tunnel; 843 struct ip_tunnel *tunnel;
852 struct iphdr *iph; 844 const struct iphdr *iph;
845 struct flowi4 fl4;
853 846
854 tunnel = netdev_priv(dev); 847 tunnel = netdev_priv(dev);
855 iph = &tunnel->parms.iph; 848 iph = &tunnel->parms.iph;
856 849
857 if (iph->daddr) { 850 if (iph->daddr) {
858 struct flowi fl = { .fl4_dst = iph->daddr, 851 struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
859 .fl4_src = iph->saddr, 852 iph->daddr, iph->saddr,
860 .fl4_tos = RT_TOS(iph->tos), 853 0, 0,
861 .oif = tunnel->parms.link, 854 IPPROTO_IPV6,
862 .proto = IPPROTO_IPV6 }; 855 RT_TOS(iph->tos),
863 struct rtable *rt; 856 tunnel->parms.link);
864 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 857
858 if (!IS_ERR(rt)) {
865 tdev = rt->dst.dev; 859 tdev = rt->dst.dev;
866 ip_rt_put(rt); 860 ip_rt_put(rt);
867 } 861 }
@@ -1179,7 +1173,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1179 if (!dev->tstats) 1173 if (!dev->tstats)
1180 return -ENOMEM; 1174 return -ENOMEM;
1181 dev_hold(dev); 1175 dev_hold(dev);
1182 sitn->tunnels_wc[0] = tunnel; 1176 rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
1183 return 0; 1177 return 0;
1184} 1178}
1185 1179
@@ -1196,11 +1190,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
1196 for (prio = 1; prio < 4; prio++) { 1190 for (prio = 1; prio < 4; prio++) {
1197 int h; 1191 int h;
1198 for (h = 0; h < HASH_SIZE; h++) { 1192 for (h = 0; h < HASH_SIZE; h++) {
1199 struct ip_tunnel *t = sitn->tunnels[prio][h]; 1193 struct ip_tunnel *t;
1200 1194
1195 t = rtnl_dereference(sitn->tunnels[prio][h]);
1201 while (t != NULL) { 1196 while (t != NULL) {
1202 unregister_netdevice_queue(t->dev, head); 1197 unregister_netdevice_queue(t->dev, head);
1203 t = t->next; 1198 t = rtnl_dereference(t->next);
1204 } 1199 }
1205 } 1200 }
1206 } 1201 }
@@ -1290,4 +1285,4 @@ static int __init sit_init(void)
1290module_init(sit_init); 1285module_init(sit_init);
1291module_exit(sit_cleanup); 1286module_exit(sit_cleanup);
1292MODULE_LICENSE("GPL"); 1287MODULE_LICENSE("GPL");
1293MODULE_ALIAS("sit0"); 1288MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 09fd34f0dbf2..8b9644a8b697 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -66,7 +66,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
66static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], 66static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
67 ipv6_cookie_scratch); 67 ipv6_cookie_scratch);
68 68
69static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr, 69static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
70 __be16 sport, __be16 dport, u32 count, int c) 70 __be16 sport, __be16 dport, u32 count, int c)
71{ 71{
72 __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch); 72 __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
@@ -86,7 +86,8 @@ static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
86 return tmp[17]; 86 return tmp[17];
87} 87}
88 88
89static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *daddr, 89static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
90 const struct in6_addr *daddr,
90 __be16 sport, __be16 dport, __u32 sseq, 91 __be16 sport, __be16 dport, __u32 sseq,
91 __u32 count, __u32 data) 92 __u32 count, __u32 data)
92{ 93{
@@ -96,8 +97,8 @@ static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *dadd
96 & COOKIEMASK)); 97 & COOKIEMASK));
97} 98}
98 99
99static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr, 100static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
100 struct in6_addr *daddr, __be16 sport, 101 const struct in6_addr *daddr, __be16 sport,
101 __be16 dport, __u32 sseq, __u32 count, 102 __be16 dport, __u32 sseq, __u32 count,
102 __u32 maxdiff) 103 __u32 maxdiff)
103{ 104{
@@ -116,7 +117,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
116 117
117__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) 118__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
118{ 119{
119 struct ipv6hdr *iph = ipv6_hdr(skb); 120 const struct ipv6hdr *iph = ipv6_hdr(skb);
120 const struct tcphdr *th = tcp_hdr(skb); 121 const struct tcphdr *th = tcp_hdr(skb);
121 int mssind; 122 int mssind;
122 const __u16 mss = *mssp; 123 const __u16 mss = *mssp;
@@ -138,7 +139,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
138 139
139static inline int cookie_check(struct sk_buff *skb, __u32 cookie) 140static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
140{ 141{
141 struct ipv6hdr *iph = ipv6_hdr(skb); 142 const struct ipv6hdr *iph = ipv6_hdr(skb);
142 const struct tcphdr *th = tcp_hdr(skb); 143 const struct tcphdr *th = tcp_hdr(skb);
143 __u32 seq = ntohl(th->seq) - 1; 144 __u32 seq = ntohl(th->seq) - 1;
144 __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, 145 __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
@@ -232,23 +233,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
232 */ 233 */
233 { 234 {
234 struct in6_addr *final_p, final; 235 struct in6_addr *final_p, final;
235 struct flowi fl; 236 struct flowi6 fl6;
236 memset(&fl, 0, sizeof(fl)); 237 memset(&fl6, 0, sizeof(fl6));
237 fl.proto = IPPROTO_TCP; 238 fl6.flowi6_proto = IPPROTO_TCP;
238 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 239 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
239 final_p = fl6_update_dst(&fl, np->opt, &final); 240 final_p = fl6_update_dst(&fl6, np->opt, &final);
240 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 241 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
241 fl.oif = sk->sk_bound_dev_if; 242 fl6.flowi6_oif = sk->sk_bound_dev_if;
242 fl.mark = sk->sk_mark; 243 fl6.flowi6_mark = sk->sk_mark;
243 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 244 fl6.fl6_dport = inet_rsk(req)->rmt_port;
244 fl.fl_ip_sport = inet_sk(sk)->inet_sport; 245 fl6.fl6_sport = inet_sk(sk)->inet_sport;
245 security_req_classify_flow(req, &fl); 246 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
246 if (ip6_dst_lookup(sk, &dst, &fl)) 247
247 goto out_free; 248 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
248 249 if (IS_ERR(dst))
249 if (final_p)
250 ipv6_addr_copy(&fl.fl6_dst, final_p);
251 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
252 goto out_free; 250 goto out_free;
253 } 251 }
254 252
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef79f9c..6dcf5e7d661b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
17 17
18static struct ctl_table empty[1]; 18static struct ctl_table empty[1];
19 19
20static ctl_table ipv6_static_skeleton[] = {
21 {
22 .procname = "neigh",
23 .maxlen = 0,
24 .mode = 0555,
25 .child = empty,
26 },
27 { }
28};
29
20static ctl_table ipv6_table_template[] = { 30static ctl_table ipv6_table_template[] = {
21 { 31 {
22 .procname = "route", 32 .procname = "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
37 .mode = 0644, 47 .mode = 0644,
38 .proc_handler = proc_dointvec 48 .proc_handler = proc_dointvec
39 }, 49 },
40 {
41 .procname = "neigh",
42 .maxlen = 0,
43 .mode = 0555,
44 .child = empty,
45 },
46 { } 50 { }
47}; 51};
48 52
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
160 164
161int ipv6_static_sysctl_register(void) 165int ipv6_static_sysctl_register(void)
162{ 166{
163 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); 167 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
164 if (ip6_base == NULL) 168 if (ip6_base == NULL)
165 return -ENOMEM; 169 return -ENOMEM;
166 return 0; 170 return 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 20aa95e37359..868366470b4a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -76,8 +76,8 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
76 76
77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78static void __tcp_v6_send_check(struct sk_buff *skb, 78static void __tcp_v6_send_check(struct sk_buff *skb,
79 struct in6_addr *saddr, 79 const struct in6_addr *saddr,
80 struct in6_addr *daddr); 80 const struct in6_addr *daddr);
81 81
82static const struct inet_connection_sock_af_ops ipv6_mapped; 82static const struct inet_connection_sock_af_ops ipv6_mapped;
83static const struct inet_connection_sock_af_ops ipv6_specific; 83static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -86,7 +86,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87#else 87#else
88static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, 88static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
89 struct in6_addr *addr) 89 const struct in6_addr *addr)
90{ 90{
91 return NULL; 91 return NULL;
92} 92}
@@ -106,8 +106,8 @@ static void tcp_v6_hash(struct sock *sk)
106} 106}
107 107
108static __inline__ __sum16 tcp_v6_check(int len, 108static __inline__ __sum16 tcp_v6_check(int len,
109 struct in6_addr *saddr, 109 const struct in6_addr *saddr,
110 struct in6_addr *daddr, 110 const struct in6_addr *daddr,
111 __wsum base) 111 __wsum base)
112{ 112{
113 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); 113 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
@@ -131,7 +131,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
131 struct tcp_sock *tp = tcp_sk(sk); 131 struct tcp_sock *tp = tcp_sk(sk);
132 struct in6_addr *saddr = NULL, *final_p, final; 132 struct in6_addr *saddr = NULL, *final_p, final;
133 struct rt6_info *rt; 133 struct rt6_info *rt;
134 struct flowi fl; 134 struct flowi6 fl6;
135 struct dst_entry *dst; 135 struct dst_entry *dst;
136 int addr_type; 136 int addr_type;
137 int err; 137 int err;
@@ -142,14 +142,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142 if (usin->sin6_family != AF_INET6) 142 if (usin->sin6_family != AF_INET6)
143 return -EAFNOSUPPORT; 143 return -EAFNOSUPPORT;
144 144
145 memset(&fl, 0, sizeof(fl)); 145 memset(&fl6, 0, sizeof(fl6));
146 146
147 if (np->sndflow) { 147 if (np->sndflow) {
148 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 148 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
149 IP6_ECN_flow_init(fl.fl6_flowlabel); 149 IP6_ECN_flow_init(fl6.flowlabel);
150 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 150 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
151 struct ip6_flowlabel *flowlabel; 151 struct ip6_flowlabel *flowlabel;
152 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 152 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
153 if (flowlabel == NULL) 153 if (flowlabel == NULL)
154 return -EINVAL; 154 return -EINVAL;
155 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); 155 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
@@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
195 } 195 }
196 196
197 ipv6_addr_copy(&np->daddr, &usin->sin6_addr); 197 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
198 np->flow_label = fl.fl6_flowlabel; 198 np->flow_label = fl6.flowlabel;
199 199
200 /* 200 /*
201 * TCP over IPv4 201 * TCP over IPv4
@@ -242,35 +242,27 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
242 if (!ipv6_addr_any(&np->rcv_saddr)) 242 if (!ipv6_addr_any(&np->rcv_saddr))
243 saddr = &np->rcv_saddr; 243 saddr = &np->rcv_saddr;
244 244
245 fl.proto = IPPROTO_TCP; 245 fl6.flowi6_proto = IPPROTO_TCP;
246 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 246 ipv6_addr_copy(&fl6.daddr, &np->daddr);
247 ipv6_addr_copy(&fl.fl6_src, 247 ipv6_addr_copy(&fl6.saddr,
248 (saddr ? saddr : &np->saddr)); 248 (saddr ? saddr : &np->saddr));
249 fl.oif = sk->sk_bound_dev_if; 249 fl6.flowi6_oif = sk->sk_bound_dev_if;
250 fl.mark = sk->sk_mark; 250 fl6.flowi6_mark = sk->sk_mark;
251 fl.fl_ip_dport = usin->sin6_port; 251 fl6.fl6_dport = usin->sin6_port;
252 fl.fl_ip_sport = inet->inet_sport; 252 fl6.fl6_sport = inet->inet_sport;
253 253
254 final_p = fl6_update_dst(&fl, np->opt, &final); 254 final_p = fl6_update_dst(&fl6, np->opt, &final);
255 255
256 security_sk_classify_flow(sk, &fl); 256 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
257 257
258 err = ip6_dst_lookup(sk, &dst, &fl); 258 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
259 if (err) 259 if (IS_ERR(dst)) {
260 err = PTR_ERR(dst);
260 goto failure; 261 goto failure;
261 if (final_p)
262 ipv6_addr_copy(&fl.fl6_dst, final_p);
263
264 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
265 if (err < 0) {
266 if (err == -EREMOTE)
267 err = ip6_dst_blackhole(sk, &dst, &fl);
268 if (err < 0)
269 goto failure;
270 } 262 }
271 263
272 if (saddr == NULL) { 264 if (saddr == NULL) {
273 saddr = &fl.fl6_src; 265 saddr = &fl6.saddr;
274 ipv6_addr_copy(&np->rcv_saddr, saddr); 266 ipv6_addr_copy(&np->rcv_saddr, saddr);
275 } 267 }
276 268
@@ -339,7 +331,7 @@ failure:
339static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 331static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
340 u8 type, u8 code, int offset, __be32 info) 332 u8 type, u8 code, int offset, __be32 info)
341{ 333{
342 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; 334 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
343 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 335 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
344 struct ipv6_pinfo *np; 336 struct ipv6_pinfo *np;
345 struct sock *sk; 337 struct sock *sk;
@@ -385,7 +377,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
385 np = inet6_sk(sk); 377 np = inet6_sk(sk);
386 378
387 if (type == ICMPV6_PKT_TOOBIG) { 379 if (type == ICMPV6_PKT_TOOBIG) {
388 struct dst_entry *dst = NULL; 380 struct dst_entry *dst;
389 381
390 if (sock_owned_by_user(sk)) 382 if (sock_owned_by_user(sk))
391 goto out; 383 goto out;
@@ -397,29 +389,25 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
397 389
398 if (dst == NULL) { 390 if (dst == NULL) {
399 struct inet_sock *inet = inet_sk(sk); 391 struct inet_sock *inet = inet_sk(sk);
400 struct flowi fl; 392 struct flowi6 fl6;
401 393
402 /* BUGGG_FUTURE: Again, it is not clear how 394 /* BUGGG_FUTURE: Again, it is not clear how
403 to handle rthdr case. Ignore this complexity 395 to handle rthdr case. Ignore this complexity
404 for now. 396 for now.
405 */ 397 */
406 memset(&fl, 0, sizeof(fl)); 398 memset(&fl6, 0, sizeof(fl6));
407 fl.proto = IPPROTO_TCP; 399 fl6.flowi6_proto = IPPROTO_TCP;
408 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 400 ipv6_addr_copy(&fl6.daddr, &np->daddr);
409 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 401 ipv6_addr_copy(&fl6.saddr, &np->saddr);
410 fl.oif = sk->sk_bound_dev_if; 402 fl6.flowi6_oif = sk->sk_bound_dev_if;
411 fl.mark = sk->sk_mark; 403 fl6.flowi6_mark = sk->sk_mark;
412 fl.fl_ip_dport = inet->inet_dport; 404 fl6.fl6_dport = inet->inet_dport;
413 fl.fl_ip_sport = inet->inet_sport; 405 fl6.fl6_sport = inet->inet_sport;
414 security_skb_classify_flow(skb, &fl); 406 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
415 407
416 if ((err = ip6_dst_lookup(sk, &dst, &fl))) { 408 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
417 sk->sk_err_soft = -err; 409 if (IS_ERR(dst)) {
418 goto out; 410 sk->sk_err_soft = -PTR_ERR(dst);
419 }
420
421 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
422 sk->sk_err_soft = -err;
423 goto out; 411 goto out;
424 } 412 }
425 413
@@ -494,38 +482,37 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
494 struct sk_buff * skb; 482 struct sk_buff * skb;
495 struct ipv6_txoptions *opt = NULL; 483 struct ipv6_txoptions *opt = NULL;
496 struct in6_addr * final_p, final; 484 struct in6_addr * final_p, final;
497 struct flowi fl; 485 struct flowi6 fl6;
498 struct dst_entry *dst; 486 struct dst_entry *dst;
499 int err = -1; 487 int err;
500 488
501 memset(&fl, 0, sizeof(fl)); 489 memset(&fl6, 0, sizeof(fl6));
502 fl.proto = IPPROTO_TCP; 490 fl6.flowi6_proto = IPPROTO_TCP;
503 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 491 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
504 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 492 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
505 fl.fl6_flowlabel = 0; 493 fl6.flowlabel = 0;
506 fl.oif = treq->iif; 494 fl6.flowi6_oif = treq->iif;
507 fl.mark = sk->sk_mark; 495 fl6.flowi6_mark = sk->sk_mark;
508 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 496 fl6.fl6_dport = inet_rsk(req)->rmt_port;
509 fl.fl_ip_sport = inet_rsk(req)->loc_port; 497 fl6.fl6_sport = inet_rsk(req)->loc_port;
510 security_req_classify_flow(req, &fl); 498 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
511 499
512 opt = np->opt; 500 opt = np->opt;
513 final_p = fl6_update_dst(&fl, opt, &final); 501 final_p = fl6_update_dst(&fl6, opt, &final);
514 502
515 err = ip6_dst_lookup(sk, &dst, &fl); 503 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
516 if (err) 504 if (IS_ERR(dst)) {
505 err = PTR_ERR(dst);
506 dst = NULL;
517 goto done; 507 goto done;
518 if (final_p) 508 }
519 ipv6_addr_copy(&fl.fl6_dst, final_p);
520 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
521 goto done;
522
523 skb = tcp_make_synack(sk, dst, req, rvp); 509 skb = tcp_make_synack(sk, dst, req, rvp);
510 err = -ENOMEM;
524 if (skb) { 511 if (skb) {
525 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 512 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
526 513
527 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 514 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
528 err = ip6_xmit(sk, skb, &fl, opt); 515 err = ip6_xmit(sk, skb, &fl6, opt);
529 err = net_xmit_eval(err); 516 err = net_xmit_eval(err);
530 } 517 }
531 518
@@ -564,7 +551,7 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
564 551
565#ifdef CONFIG_TCP_MD5SIG 552#ifdef CONFIG_TCP_MD5SIG
566static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, 553static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
567 struct in6_addr *addr) 554 const struct in6_addr *addr)
568{ 555{
569 struct tcp_sock *tp = tcp_sk(sk); 556 struct tcp_sock *tp = tcp_sk(sk);
570 int i; 557 int i;
@@ -593,7 +580,7 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
593 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); 580 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
594} 581}
595 582
596static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, 583static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
597 char *newkey, u8 newkeylen) 584 char *newkey, u8 newkeylen)
598{ 585{
599 /* Add key to the list */ 586 /* Add key to the list */
@@ -658,7 +645,7 @@ static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
658 newkey, newkeylen); 645 newkey, newkeylen);
659} 646}
660 647
661static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer) 648static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
662{ 649{
663 struct tcp_sock *tp = tcp_sk(sk); 650 struct tcp_sock *tp = tcp_sk(sk);
664 int i; 651 int i;
@@ -766,8 +753,8 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
766} 753}
767 754
768static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 755static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
769 struct in6_addr *daddr, 756 const struct in6_addr *daddr,
770 struct in6_addr *saddr, int nbytes) 757 const struct in6_addr *saddr, int nbytes)
771{ 758{
772 struct tcp6_pseudohdr *bp; 759 struct tcp6_pseudohdr *bp;
773 struct scatterlist sg; 760 struct scatterlist sg;
@@ -784,7 +771,7 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
784} 771}
785 772
786static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 773static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
787 struct in6_addr *daddr, struct in6_addr *saddr, 774 const struct in6_addr *daddr, struct in6_addr *saddr,
788 struct tcphdr *th) 775 struct tcphdr *th)
789{ 776{
790 struct tcp_md5sig_pool *hp; 777 struct tcp_md5sig_pool *hp;
@@ -820,7 +807,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
820 struct sock *sk, struct request_sock *req, 807 struct sock *sk, struct request_sock *req,
821 struct sk_buff *skb) 808 struct sk_buff *skb)
822{ 809{
823 struct in6_addr *saddr, *daddr; 810 const struct in6_addr *saddr, *daddr;
824 struct tcp_md5sig_pool *hp; 811 struct tcp_md5sig_pool *hp;
825 struct hash_desc *desc; 812 struct hash_desc *desc;
826 struct tcphdr *th = tcp_hdr(skb); 813 struct tcphdr *th = tcp_hdr(skb);
@@ -832,7 +819,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
832 saddr = &inet6_rsk(req)->loc_addr; 819 saddr = &inet6_rsk(req)->loc_addr;
833 daddr = &inet6_rsk(req)->rmt_addr; 820 daddr = &inet6_rsk(req)->rmt_addr;
834 } else { 821 } else {
835 struct ipv6hdr *ip6h = ipv6_hdr(skb); 822 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
836 saddr = &ip6h->saddr; 823 saddr = &ip6h->saddr;
837 daddr = &ip6h->daddr; 824 daddr = &ip6h->daddr;
838 } 825 }
@@ -870,7 +857,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
870{ 857{
871 __u8 *hash_location = NULL; 858 __u8 *hash_location = NULL;
872 struct tcp_md5sig_key *hash_expected; 859 struct tcp_md5sig_key *hash_expected;
873 struct ipv6hdr *ip6h = ipv6_hdr(skb); 860 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
874 struct tcphdr *th = tcp_hdr(skb); 861 struct tcphdr *th = tcp_hdr(skb);
875 int genhash; 862 int genhash;
876 u8 newhash[16]; 863 u8 newhash[16];
@@ -928,7 +915,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
928#endif 915#endif
929 916
930static void __tcp_v6_send_check(struct sk_buff *skb, 917static void __tcp_v6_send_check(struct sk_buff *skb,
931 struct in6_addr *saddr, struct in6_addr *daddr) 918 const struct in6_addr *saddr, const struct in6_addr *daddr)
932{ 919{
933 struct tcphdr *th = tcp_hdr(skb); 920 struct tcphdr *th = tcp_hdr(skb);
934 921
@@ -952,7 +939,7 @@ static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
952 939
953static int tcp_v6_gso_send_check(struct sk_buff *skb) 940static int tcp_v6_gso_send_check(struct sk_buff *skb)
954{ 941{
955 struct ipv6hdr *ipv6h; 942 const struct ipv6hdr *ipv6h;
956 struct tcphdr *th; 943 struct tcphdr *th;
957 944
958 if (!pskb_may_pull(skb, sizeof(*th))) 945 if (!pskb_may_pull(skb, sizeof(*th)))
@@ -970,7 +957,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
970static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, 957static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
971 struct sk_buff *skb) 958 struct sk_buff *skb)
972{ 959{
973 struct ipv6hdr *iph = skb_gro_network_header(skb); 960 const struct ipv6hdr *iph = skb_gro_network_header(skb);
974 961
975 switch (skb->ip_summed) { 962 switch (skb->ip_summed) {
976 case CHECKSUM_COMPLETE: 963 case CHECKSUM_COMPLETE:
@@ -991,7 +978,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
991 978
992static int tcp6_gro_complete(struct sk_buff *skb) 979static int tcp6_gro_complete(struct sk_buff *skb)
993{ 980{
994 struct ipv6hdr *iph = ipv6_hdr(skb); 981 const struct ipv6hdr *iph = ipv6_hdr(skb);
995 struct tcphdr *th = tcp_hdr(skb); 982 struct tcphdr *th = tcp_hdr(skb);
996 983
997 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), 984 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
@@ -1006,7 +993,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
1006{ 993{
1007 struct tcphdr *th = tcp_hdr(skb), *t1; 994 struct tcphdr *th = tcp_hdr(skb), *t1;
1008 struct sk_buff *buff; 995 struct sk_buff *buff;
1009 struct flowi fl; 996 struct flowi6 fl6;
1010 struct net *net = dev_net(skb_dst(skb)->dev); 997 struct net *net = dev_net(skb_dst(skb)->dev);
1011 struct sock *ctl_sk = net->ipv6.tcp_sk; 998 struct sock *ctl_sk = net->ipv6.tcp_sk;
1012 unsigned int tot_len = sizeof(struct tcphdr); 999 unsigned int tot_len = sizeof(struct tcphdr);
@@ -1060,34 +1047,33 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
1060 } 1047 }
1061#endif 1048#endif
1062 1049
1063 memset(&fl, 0, sizeof(fl)); 1050 memset(&fl6, 0, sizeof(fl6));
1064 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); 1051 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
1065 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); 1052 ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
1066 1053
1067 buff->ip_summed = CHECKSUM_PARTIAL; 1054 buff->ip_summed = CHECKSUM_PARTIAL;
1068 buff->csum = 0; 1055 buff->csum = 0;
1069 1056
1070 __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); 1057 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
1071 1058
1072 fl.proto = IPPROTO_TCP; 1059 fl6.flowi6_proto = IPPROTO_TCP;
1073 fl.oif = inet6_iif(skb); 1060 fl6.flowi6_oif = inet6_iif(skb);
1074 fl.fl_ip_dport = t1->dest; 1061 fl6.fl6_dport = t1->dest;
1075 fl.fl_ip_sport = t1->source; 1062 fl6.fl6_sport = t1->source;
1076 security_skb_classify_flow(skb, &fl); 1063 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
1077 1064
1078 /* Pass a socket to ip6_dst_lookup either it is for RST 1065 /* Pass a socket to ip6_dst_lookup either it is for RST
1079 * Underlying function will use this to retrieve the network 1066 * Underlying function will use this to retrieve the network
1080 * namespace 1067 * namespace
1081 */ 1068 */
1082 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { 1069 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
1083 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { 1070 if (!IS_ERR(dst)) {
1084 skb_dst_set(buff, dst); 1071 skb_dst_set(buff, dst);
1085 ip6_xmit(ctl_sk, buff, &fl, NULL); 1072 ip6_xmit(ctl_sk, buff, &fl6, NULL);
1086 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 1073 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1087 if (rst) 1074 if (rst)
1088 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 1075 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1089 return; 1076 return;
1090 }
1091 } 1077 }
1092 1078
1093 kfree_skb(buff); 1079 kfree_skb(buff);
@@ -1323,7 +1309,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1323 tcp_death_row.sysctl_tw_recycle && 1309 tcp_death_row.sysctl_tw_recycle &&
1324 (dst = inet6_csk_route_req(sk, req)) != NULL && 1310 (dst = inet6_csk_route_req(sk, req)) != NULL &&
1325 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && 1311 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1326 ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, 1312 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1327 &treq->rmt_addr)) { 1313 &treq->rmt_addr)) {
1328 inet_peer_refcheck(peer); 1314 inet_peer_refcheck(peer);
1329 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1315 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
@@ -1483,7 +1469,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1483 1469
1484 First: no IPv4 options. 1470 First: no IPv4 options.
1485 */ 1471 */
1486 newinet->opt = NULL; 1472 newinet->inet_opt = NULL;
1487 newnp->ipv6_fl_list = NULL; 1473 newnp->ipv6_fl_list = NULL;
1488 1474
1489 /* Clone RX bits */ 1475 /* Clone RX bits */
@@ -1636,10 +1622,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1636 opt_skb = skb_clone(skb, GFP_ATOMIC); 1622 opt_skb = skb_clone(skb, GFP_ATOMIC);
1637 1623
1638 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1624 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1639 TCP_CHECK_TIMER(sk); 1625 sock_rps_save_rxhash(sk, skb->rxhash);
1640 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) 1626 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1641 goto reset; 1627 goto reset;
1642 TCP_CHECK_TIMER(sk);
1643 if (opt_skb) 1628 if (opt_skb)
1644 goto ipv6_pktoptions; 1629 goto ipv6_pktoptions;
1645 return 0; 1630 return 0;
@@ -1665,12 +1650,11 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1665 __kfree_skb(opt_skb); 1650 __kfree_skb(opt_skb);
1666 return 0; 1651 return 0;
1667 } 1652 }
1668 } 1653 } else
1654 sock_rps_save_rxhash(sk, skb->rxhash);
1669 1655
1670 TCP_CHECK_TIMER(sk);
1671 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) 1656 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1672 goto reset; 1657 goto reset;
1673 TCP_CHECK_TIMER(sk);
1674 if (opt_skb) 1658 if (opt_skb)
1675 goto ipv6_pktoptions; 1659 goto ipv6_pktoptions;
1676 return 0; 1660 return 0;
@@ -1718,7 +1702,7 @@ ipv6_pktoptions:
1718static int tcp_v6_rcv(struct sk_buff *skb) 1702static int tcp_v6_rcv(struct sk_buff *skb)
1719{ 1703{
1720 struct tcphdr *th; 1704 struct tcphdr *th;
1721 struct ipv6hdr *hdr; 1705 const struct ipv6hdr *hdr;
1722 struct sock *sk; 1706 struct sock *sk;
1723 int ret; 1707 int ret;
1724 struct net *net = dev_net(skb->dev); 1708 struct net *net = dev_net(skb->dev);
@@ -2044,8 +2028,8 @@ static void get_openreq6(struct seq_file *seq,
2044 struct sock *sk, struct request_sock *req, int i, int uid) 2028 struct sock *sk, struct request_sock *req, int i, int uid)
2045{ 2029{
2046 int ttd = req->expires - jiffies; 2030 int ttd = req->expires - jiffies;
2047 struct in6_addr *src = &inet6_rsk(req)->loc_addr; 2031 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
2048 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; 2032 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
2049 2033
2050 if (ttd < 0) 2034 if (ttd < 0)
2051 ttd = 0; 2035 ttd = 0;
@@ -2073,7 +2057,7 @@ static void get_openreq6(struct seq_file *seq,
2073 2057
2074static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2058static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2075{ 2059{
2076 struct in6_addr *dest, *src; 2060 const struct in6_addr *dest, *src;
2077 __u16 destp, srcp; 2061 __u16 destp, srcp;
2078 int timer_active; 2062 int timer_active;
2079 unsigned long timer_expires; 2063 unsigned long timer_expires;
@@ -2130,7 +2114,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2130static void get_timewait6_sock(struct seq_file *seq, 2114static void get_timewait6_sock(struct seq_file *seq,
2131 struct inet_timewait_sock *tw, int i) 2115 struct inet_timewait_sock *tw, int i)
2132{ 2116{
2133 struct in6_addr *dest, *src; 2117 const struct in6_addr *dest, *src;
2134 __u16 destp, srcp; 2118 __u16 destp, srcp;
2135 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); 2119 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
2136 int ttd = tw->tw_ttd - jiffies; 2120 int ttd = tw->tw_ttd - jiffies;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9a009c66c8a3..fc0c42a88e54 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -311,7 +311,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
311 struct udp_table *udptable) 311 struct udp_table *udptable)
312{ 312{
313 struct sock *sk; 313 struct sock *sk;
314 struct ipv6hdr *iph = ipv6_hdr(skb); 314 const struct ipv6hdr *iph = ipv6_hdr(skb);
315 315
316 if (unlikely(sk = skb_steal_sock(skb))) 316 if (unlikely(sk = skb_steal_sock(skb)))
317 return sk; 317 return sk;
@@ -463,9 +463,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
463 struct udp_table *udptable) 463 struct udp_table *udptable)
464{ 464{
465 struct ipv6_pinfo *np; 465 struct ipv6_pinfo *np;
466 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; 466 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
467 struct in6_addr *saddr = &hdr->saddr; 467 const struct in6_addr *saddr = &hdr->saddr;
468 struct in6_addr *daddr = &hdr->daddr; 468 const struct in6_addr *daddr = &hdr->daddr;
469 struct udphdr *uh = (struct udphdr*)(skb->data+offset); 469 struct udphdr *uh = (struct udphdr*)(skb->data+offset);
470 struct sock *sk; 470 struct sock *sk;
471 int err; 471 int err;
@@ -505,6 +505,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
505 int rc; 505 int rc;
506 int is_udplite = IS_UDPLITE(sk); 506 int is_udplite = IS_UDPLITE(sk);
507 507
508 if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
509 sock_rps_save_rxhash(sk, skb->rxhash);
510
508 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 511 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
509 goto drop; 512 goto drop;
510 513
@@ -550,8 +553,8 @@ drop_no_sk_drops_inc:
550} 553}
551 554
552static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, 555static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
553 __be16 loc_port, struct in6_addr *loc_addr, 556 __be16 loc_port, const struct in6_addr *loc_addr,
554 __be16 rmt_port, struct in6_addr *rmt_addr, 557 __be16 rmt_port, const struct in6_addr *rmt_addr,
555 int dif) 558 int dif)
556{ 559{
557 struct hlist_nulls_node *node; 560 struct hlist_nulls_node *node;
@@ -630,7 +633,7 @@ drop:
630 * so we don't need to lock the hashes. 633 * so we don't need to lock the hashes.
631 */ 634 */
632static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 635static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
633 struct in6_addr *saddr, struct in6_addr *daddr, 636 const struct in6_addr *saddr, const struct in6_addr *daddr,
634 struct udp_table *udptable) 637 struct udp_table *udptable)
635{ 638{
636 struct sock *sk, *stack[256 / sizeof(struct sock *)]; 639 struct sock *sk, *stack[256 / sizeof(struct sock *)];
@@ -713,7 +716,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
713 struct net *net = dev_net(skb->dev); 716 struct net *net = dev_net(skb->dev);
714 struct sock *sk; 717 struct sock *sk;
715 struct udphdr *uh; 718 struct udphdr *uh;
716 struct in6_addr *saddr, *daddr; 719 const struct in6_addr *saddr, *daddr;
717 u32 ulen = 0; 720 u32 ulen = 0;
718 721
719 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 722 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -886,7 +889,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
886 struct udphdr *uh; 889 struct udphdr *uh;
887 struct udp_sock *up = udp_sk(sk); 890 struct udp_sock *up = udp_sk(sk);
888 struct inet_sock *inet = inet_sk(sk); 891 struct inet_sock *inet = inet_sk(sk);
889 struct flowi *fl = &inet->cork.fl; 892 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
890 int err = 0; 893 int err = 0;
891 int is_udplite = IS_UDPLITE(sk); 894 int is_udplite = IS_UDPLITE(sk);
892 __wsum csum = 0; 895 __wsum csum = 0;
@@ -899,23 +902,23 @@ static int udp_v6_push_pending_frames(struct sock *sk)
899 * Create a UDP header 902 * Create a UDP header
900 */ 903 */
901 uh = udp_hdr(skb); 904 uh = udp_hdr(skb);
902 uh->source = fl->fl_ip_sport; 905 uh->source = fl6->fl6_sport;
903 uh->dest = fl->fl_ip_dport; 906 uh->dest = fl6->fl6_dport;
904 uh->len = htons(up->len); 907 uh->len = htons(up->len);
905 uh->check = 0; 908 uh->check = 0;
906 909
907 if (is_udplite) 910 if (is_udplite)
908 csum = udplite_csum_outgoing(sk, skb); 911 csum = udplite_csum_outgoing(sk, skb);
909 else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 912 else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
910 udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, 913 udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
911 up->len); 914 up->len);
912 goto send; 915 goto send;
913 } else 916 } else
914 csum = udp_csum_outgoing(sk, skb); 917 csum = udp_csum_outgoing(sk, skb);
915 918
916 /* add protocol-dependent pseudo-header */ 919 /* add protocol-dependent pseudo-header */
917 uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, 920 uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
918 up->len, fl->proto, csum ); 921 up->len, fl6->flowi6_proto, csum);
919 if (uh->check == 0) 922 if (uh->check == 0)
920 uh->check = CSUM_MANGLED_0; 923 uh->check = CSUM_MANGLED_0;
921 924
@@ -947,7 +950,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
947 struct in6_addr *daddr, *final_p, final; 950 struct in6_addr *daddr, *final_p, final;
948 struct ipv6_txoptions *opt = NULL; 951 struct ipv6_txoptions *opt = NULL;
949 struct ip6_flowlabel *flowlabel = NULL; 952 struct ip6_flowlabel *flowlabel = NULL;
950 struct flowi fl; 953 struct flowi6 fl6;
951 struct dst_entry *dst; 954 struct dst_entry *dst;
952 int addr_len = msg->msg_namelen; 955 int addr_len = msg->msg_namelen;
953 int ulen = len; 956 int ulen = len;
@@ -1030,19 +1033,19 @@ do_udp_sendmsg:
1030 } 1033 }
1031 ulen += sizeof(struct udphdr); 1034 ulen += sizeof(struct udphdr);
1032 1035
1033 memset(&fl, 0, sizeof(fl)); 1036 memset(&fl6, 0, sizeof(fl6));
1034 1037
1035 if (sin6) { 1038 if (sin6) {
1036 if (sin6->sin6_port == 0) 1039 if (sin6->sin6_port == 0)
1037 return -EINVAL; 1040 return -EINVAL;
1038 1041
1039 fl.fl_ip_dport = sin6->sin6_port; 1042 fl6.fl6_dport = sin6->sin6_port;
1040 daddr = &sin6->sin6_addr; 1043 daddr = &sin6->sin6_addr;
1041 1044
1042 if (np->sndflow) { 1045 if (np->sndflow) {
1043 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 1046 fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
1044 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 1047 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
1045 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 1048 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
1046 if (flowlabel == NULL) 1049 if (flowlabel == NULL)
1047 return -EINVAL; 1050 return -EINVAL;
1048 daddr = &flowlabel->dst; 1051 daddr = &flowlabel->dst;
@@ -1060,38 +1063,38 @@ do_udp_sendmsg:
1060 if (addr_len >= sizeof(struct sockaddr_in6) && 1063 if (addr_len >= sizeof(struct sockaddr_in6) &&
1061 sin6->sin6_scope_id && 1064 sin6->sin6_scope_id &&
1062 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 1065 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
1063 fl.oif = sin6->sin6_scope_id; 1066 fl6.flowi6_oif = sin6->sin6_scope_id;
1064 } else { 1067 } else {
1065 if (sk->sk_state != TCP_ESTABLISHED) 1068 if (sk->sk_state != TCP_ESTABLISHED)
1066 return -EDESTADDRREQ; 1069 return -EDESTADDRREQ;
1067 1070
1068 fl.fl_ip_dport = inet->inet_dport; 1071 fl6.fl6_dport = inet->inet_dport;
1069 daddr = &np->daddr; 1072 daddr = &np->daddr;
1070 fl.fl6_flowlabel = np->flow_label; 1073 fl6.flowlabel = np->flow_label;
1071 connected = 1; 1074 connected = 1;
1072 } 1075 }
1073 1076
1074 if (!fl.oif) 1077 if (!fl6.flowi6_oif)
1075 fl.oif = sk->sk_bound_dev_if; 1078 fl6.flowi6_oif = sk->sk_bound_dev_if;
1076 1079
1077 if (!fl.oif) 1080 if (!fl6.flowi6_oif)
1078 fl.oif = np->sticky_pktinfo.ipi6_ifindex; 1081 fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
1079 1082
1080 fl.mark = sk->sk_mark; 1083 fl6.flowi6_mark = sk->sk_mark;
1081 1084
1082 if (msg->msg_controllen) { 1085 if (msg->msg_controllen) {
1083 opt = &opt_space; 1086 opt = &opt_space;
1084 memset(opt, 0, sizeof(struct ipv6_txoptions)); 1087 memset(opt, 0, sizeof(struct ipv6_txoptions));
1085 opt->tot_len = sizeof(*opt); 1088 opt->tot_len = sizeof(*opt);
1086 1089
1087 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, 1090 err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
1088 &tclass, &dontfrag); 1091 &tclass, &dontfrag);
1089 if (err < 0) { 1092 if (err < 0) {
1090 fl6_sock_release(flowlabel); 1093 fl6_sock_release(flowlabel);
1091 return err; 1094 return err;
1092 } 1095 }
1093 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 1096 if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
1094 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 1097 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
1095 if (flowlabel == NULL) 1098 if (flowlabel == NULL)
1096 return -EINVAL; 1099 return -EINVAL;
1097 } 1100 }
@@ -1105,42 +1108,35 @@ do_udp_sendmsg:
1105 opt = fl6_merge_options(&opt_space, flowlabel, opt); 1108 opt = fl6_merge_options(&opt_space, flowlabel, opt);
1106 opt = ipv6_fixup_options(&opt_space, opt); 1109 opt = ipv6_fixup_options(&opt_space, opt);
1107 1110
1108 fl.proto = sk->sk_protocol; 1111 fl6.flowi6_proto = sk->sk_protocol;
1109 if (!ipv6_addr_any(daddr)) 1112 if (!ipv6_addr_any(daddr))
1110 ipv6_addr_copy(&fl.fl6_dst, daddr); 1113 ipv6_addr_copy(&fl6.daddr, daddr);
1111 else 1114 else
1112 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 1115 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
1113 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 1116 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
1114 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 1117 ipv6_addr_copy(&fl6.saddr, &np->saddr);
1115 fl.fl_ip_sport = inet->inet_sport; 1118 fl6.fl6_sport = inet->inet_sport;
1116 1119
1117 final_p = fl6_update_dst(&fl, opt, &final); 1120 final_p = fl6_update_dst(&fl6, opt, &final);
1118 if (final_p) 1121 if (final_p)
1119 connected = 0; 1122 connected = 0;
1120 1123
1121 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { 1124 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
1122 fl.oif = np->mcast_oif; 1125 fl6.flowi6_oif = np->mcast_oif;
1123 connected = 0; 1126 connected = 0;
1124 } 1127 }
1125 1128
1126 security_sk_classify_flow(sk, &fl); 1129 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
1127 1130
1128 err = ip6_sk_dst_lookup(sk, &dst, &fl); 1131 dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true);
1129 if (err) 1132 if (IS_ERR(dst)) {
1133 err = PTR_ERR(dst);
1134 dst = NULL;
1130 goto out; 1135 goto out;
1131 if (final_p)
1132 ipv6_addr_copy(&fl.fl6_dst, final_p);
1133
1134 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
1135 if (err < 0) {
1136 if (err == -EREMOTE)
1137 err = ip6_dst_blackhole(sk, &dst, &fl);
1138 if (err < 0)
1139 goto out;
1140 } 1136 }
1141 1137
1142 if (hlimit < 0) { 1138 if (hlimit < 0) {
1143 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 1139 if (ipv6_addr_is_multicast(&fl6.daddr))
1144 hlimit = np->mcast_hops; 1140 hlimit = np->mcast_hops;
1145 else 1141 else
1146 hlimit = np->hop_limit; 1142 hlimit = np->hop_limit;
@@ -1175,7 +1171,7 @@ do_append_data:
1175 up->len += ulen; 1171 up->len += ulen;
1176 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 1172 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
1177 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, 1173 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
1178 sizeof(struct udphdr), hlimit, tclass, opt, &fl, 1174 sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
1179 (struct rt6_info*)dst, 1175 (struct rt6_info*)dst,
1180 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); 1176 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
1181 if (err) 1177 if (err)
@@ -1188,10 +1184,10 @@ do_append_data:
1188 if (dst) { 1184 if (dst) {
1189 if (connected) { 1185 if (connected) {
1190 ip6_dst_store(sk, dst, 1186 ip6_dst_store(sk, dst,
1191 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? 1187 ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
1192 &np->daddr : NULL, 1188 &np->daddr : NULL,
1193#ifdef CONFIG_IPV6_SUBTREES 1189#ifdef CONFIG_IPV6_SUBTREES
1194 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? 1190 ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
1195 &np->saddr : 1191 &np->saddr :
1196#endif 1192#endif
1197 NULL); 1193 NULL);
@@ -1282,7 +1278,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
1282 1278
1283static int udp6_ufo_send_check(struct sk_buff *skb) 1279static int udp6_ufo_send_check(struct sk_buff *skb)
1284{ 1280{
1285 struct ipv6hdr *ipv6h; 1281 const struct ipv6hdr *ipv6h;
1286 struct udphdr *uh; 1282 struct udphdr *uh;
1287 1283
1288 if (!pskb_may_pull(skb, sizeof(*uh))) 1284 if (!pskb_may_pull(skb, sizeof(*uh)))
@@ -1299,7 +1295,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
1299 return 0; 1295 return 0;
1300} 1296}
1301 1297
1302static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) 1298static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
1303{ 1299{
1304 struct sk_buff *segs = ERR_PTR(-EINVAL); 1300 struct sk_buff *segs = ERR_PTR(-EINVAL);
1305 unsigned int mss; 1301 unsigned int mss;
@@ -1332,14 +1328,14 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features)
1332 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 1328 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
1333 * do checksum of UDP packets sent as multiple IP fragments. 1329 * do checksum of UDP packets sent as multiple IP fragments.
1334 */ 1330 */
1335 offset = skb->csum_start - skb_headroom(skb); 1331 offset = skb_checksum_start_offset(skb);
1336 csum = skb_checksum(skb, offset, skb->len- offset, 0); 1332 csum = skb_checksum(skb, offset, skb->len- offset, 0);
1337 offset += skb->csum_offset; 1333 offset += skb->csum_offset;
1338 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 1334 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1339 skb->ip_summed = CHECKSUM_NONE; 1335 skb->ip_summed = CHECKSUM_NONE;
1340 1336
1341 /* Check if there is enough headroom to insert fragment header. */ 1337 /* Check if there is enough headroom to insert fragment header. */
1342 if ((skb_headroom(skb) < frag_hdr_sz) && 1338 if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
1343 pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) 1339 pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
1344 goto out; 1340 goto out;
1345 1341
@@ -1386,7 +1382,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
1386{ 1382{
1387 struct inet_sock *inet = inet_sk(sp); 1383 struct inet_sock *inet = inet_sk(sp);
1388 struct ipv6_pinfo *np = inet6_sk(sp); 1384 struct ipv6_pinfo *np = inet6_sk(sp);
1389 struct in6_addr *dest, *src; 1385 const struct in6_addr *dest, *src;
1390 __u16 destp, srcp; 1386 __u16 destp, srcp;
1391 1387
1392 dest = &np->daddr; 1388 dest = &np->daddr;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index bbd48b101bae..3437d7d4eed6 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -41,10 +41,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
41{ 41{
42 struct ipv6hdr *top_iph; 42 struct ipv6hdr *top_iph;
43 struct ip_beet_phdr *ph; 43 struct ip_beet_phdr *ph;
44 struct iphdr *iphv4;
45 int optlen, hdr_len; 44 int optlen, hdr_len;
46 45
47 iphv4 = ip_hdr(skb);
48 hdr_len = 0; 46 hdr_len = 0;
49 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 47 optlen = XFRM_MODE_SKB_CB(skb)->optlen;
50 if (unlikely(optlen)) 48 if (unlikely(optlen))
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 645cb968d450..4d6edff0498f 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -20,7 +20,7 @@
20 20
21static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) 21static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
22{ 22{
23 struct ipv6hdr *outer_iph = ipv6_hdr(skb); 23 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
24 struct ipv6hdr *inner_iph = ipipv6_hdr(skb); 24 struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
25 25
26 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) 26 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
@@ -55,8 +55,8 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
55 dsfield &= ~INET_ECN_MASK; 55 dsfield &= ~INET_ECN_MASK;
56 ipv6_change_dsfield(top_iph, 0, dsfield); 56 ipv6_change_dsfield(top_iph, 0, dsfield);
57 top_iph->hop_limit = ip6_dst_hoplimit(dst->child); 57 top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
58 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); 58 ipv6_addr_copy(&top_iph->saddr, (const struct in6_addr *)&x->props.saddr);
59 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); 59 ipv6_addr_copy(&top_iph->daddr, (const struct in6_addr *)&x->id.daddr);
60 return 0; 60 return 0;
61} 61}
62 62
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8e688b3de9ab..49a91c5f5623 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -79,7 +79,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
79} 79}
80EXPORT_SYMBOL(xfrm6_prepare_output); 80EXPORT_SYMBOL(xfrm6_prepare_output);
81 81
82static int xfrm6_output_finish(struct sk_buff *skb) 82int xfrm6_output_finish(struct sk_buff *skb)
83{ 83{
84#ifdef CONFIG_NETFILTER 84#ifdef CONFIG_NETFILTER
85 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 85 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
@@ -97,9 +97,9 @@ static int __xfrm6_output(struct sk_buff *skb)
97 if ((x && x->props.mode == XFRM_MODE_TUNNEL) && 97 if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
98 ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 98 ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
99 dst_allfrag(skb_dst(skb)))) { 99 dst_allfrag(skb_dst(skb)))) {
100 return ip6_fragment(skb, xfrm6_output_finish); 100 return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
101 } 101 }
102 return xfrm6_output_finish(skb); 102 return x->outer_mode->afinfo->output_finish(skb);
103} 103}
104 104
105int xfrm6_output(struct sk_buff *skb) 105int xfrm6_output(struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index da87428681cc..d879f7efbd10 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -27,18 +27,19 @@
27static struct xfrm_policy_afinfo xfrm6_policy_afinfo; 27static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
28 28
29static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, 29static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
30 xfrm_address_t *saddr, 30 const xfrm_address_t *saddr,
31 xfrm_address_t *daddr) 31 const xfrm_address_t *daddr)
32{ 32{
33 struct flowi fl = {}; 33 struct flowi6 fl6;
34 struct dst_entry *dst; 34 struct dst_entry *dst;
35 int err; 35 int err;
36 36
37 memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); 37 memset(&fl6, 0, sizeof(fl6));
38 memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
38 if (saddr) 39 if (saddr)
39 memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); 40 memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
40 41
41 dst = ip6_route_output(net, NULL, &fl); 42 dst = ip6_route_output(net, NULL, &fl6);
42 43
43 err = dst->error; 44 err = dst->error;
44 if (dst->error) { 45 if (dst->error) {
@@ -67,7 +68,7 @@ static int xfrm6_get_saddr(struct net *net,
67 return 0; 68 return 0;
68} 69}
69 70
70static int xfrm6_get_tos(struct flowi *fl) 71static int xfrm6_get_tos(const struct flowi *fl)
71{ 72{
72 return 0; 73 return 0;
73} 74}
@@ -87,7 +88,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
87} 88}
88 89
89static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 90static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
90 struct flowi *fl) 91 const struct flowi *fl)
91{ 92{
92 struct rt6_info *rt = (struct rt6_info*)xdst->route; 93 struct rt6_info *rt = (struct rt6_info*)xdst->route;
93 94
@@ -120,18 +121,19 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
120static inline void 121static inline void
121_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) 122_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
122{ 123{
124 struct flowi6 *fl6 = &fl->u.ip6;
123 int onlyproto = 0; 125 int onlyproto = 0;
124 u16 offset = skb_network_header_len(skb); 126 u16 offset = skb_network_header_len(skb);
125 struct ipv6hdr *hdr = ipv6_hdr(skb); 127 const struct ipv6hdr *hdr = ipv6_hdr(skb);
126 struct ipv6_opt_hdr *exthdr; 128 struct ipv6_opt_hdr *exthdr;
127 const unsigned char *nh = skb_network_header(skb); 129 const unsigned char *nh = skb_network_header(skb);
128 u8 nexthdr = nh[IP6CB(skb)->nhoff]; 130 u8 nexthdr = nh[IP6CB(skb)->nhoff];
129 131
130 memset(fl, 0, sizeof(struct flowi)); 132 memset(fl6, 0, sizeof(struct flowi6));
131 fl->mark = skb->mark; 133 fl6->flowi6_mark = skb->mark;
132 134
133 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); 135 ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr);
134 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); 136 ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr);
135 137
136 while (nh + offset + 1 < skb->data || 138 while (nh + offset + 1 < skb->data ||
137 pskb_may_pull(skb, nh + offset + 1 - skb->data)) { 139 pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
@@ -158,20 +160,20 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
158 pskb_may_pull(skb, nh + offset + 4 - skb->data))) { 160 pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
159 __be16 *ports = (__be16 *)exthdr; 161 __be16 *ports = (__be16 *)exthdr;
160 162
161 fl->fl_ip_sport = ports[!!reverse]; 163 fl6->fl6_sport = ports[!!reverse];
162 fl->fl_ip_dport = ports[!reverse]; 164 fl6->fl6_dport = ports[!reverse];
163 } 165 }
164 fl->proto = nexthdr; 166 fl6->flowi6_proto = nexthdr;
165 return; 167 return;
166 168
167 case IPPROTO_ICMPV6: 169 case IPPROTO_ICMPV6:
168 if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { 170 if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
169 u8 *icmp = (u8 *)exthdr; 171 u8 *icmp = (u8 *)exthdr;
170 172
171 fl->fl_icmp_type = icmp[0]; 173 fl6->fl6_icmp_type = icmp[0];
172 fl->fl_icmp_code = icmp[1]; 174 fl6->fl6_icmp_code = icmp[1];
173 } 175 }
174 fl->proto = nexthdr; 176 fl6->flowi6_proto = nexthdr;
175 return; 177 return;
176 178
177#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 179#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -180,9 +182,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
180 struct ip6_mh *mh; 182 struct ip6_mh *mh;
181 mh = (struct ip6_mh *)exthdr; 183 mh = (struct ip6_mh *)exthdr;
182 184
183 fl->fl_mh_type = mh->ip6mh_type; 185 fl6->fl6_mh_type = mh->ip6mh_type;
184 } 186 }
185 fl->proto = nexthdr; 187 fl6->flowi6_proto = nexthdr;
186 return; 188 return;
187#endif 189#endif
188 190
@@ -191,8 +193,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
191 case IPPROTO_ESP: 193 case IPPROTO_ESP:
192 case IPPROTO_COMP: 194 case IPPROTO_COMP:
193 default: 195 default:
194 fl->fl_ipsec_spi = 0; 196 fl6->fl6_ipsec_spi = 0;
195 fl->proto = nexthdr; 197 fl6->flowi6_proto = nexthdr;
196 return; 198 return;
197 } 199 }
198 } 200 }
@@ -220,6 +222,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
220 222
221 if (likely(xdst->u.rt6.rt6i_idev)) 223 if (likely(xdst->u.rt6.rt6i_idev))
222 in6_dev_put(xdst->u.rt6.rt6i_idev); 224 in6_dev_put(xdst->u.rt6.rt6i_idev);
225 dst_destroy_metrics_generic(dst);
223 if (likely(xdst->u.rt6.rt6i_peer)) 226 if (likely(xdst->u.rt6.rt6i_peer))
224 inet_putpeer(xdst->u.rt6.rt6i_peer); 227 inet_putpeer(xdst->u.rt6.rt6i_peer);
225 xfrm_dst_destroy(xdst); 228 xfrm_dst_destroy(xdst);
@@ -257,6 +260,7 @@ static struct dst_ops xfrm6_dst_ops = {
257 .protocol = cpu_to_be16(ETH_P_IPV6), 260 .protocol = cpu_to_be16(ETH_P_IPV6),
258 .gc = xfrm6_garbage_collect, 261 .gc = xfrm6_garbage_collect,
259 .update_pmtu = xfrm6_update_pmtu, 262 .update_pmtu = xfrm6_update_pmtu,
263 .cow_metrics = dst_cow_metrics_generic,
260 .destroy = xfrm6_dst_destroy, 264 .destroy = xfrm6_dst_destroy,
261 .ifdown = xfrm6_dst_ifdown, 265 .ifdown = xfrm6_dst_ifdown,
262 .local_out = __ip6_local_out, 266 .local_out = __ip6_local_out,
@@ -272,6 +276,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
272 .get_tos = xfrm6_get_tos, 276 .get_tos = xfrm6_get_tos,
273 .init_path = xfrm6_init_path, 277 .init_path = xfrm6_init_path,
274 .fill_dst = xfrm6_fill_dst, 278 .fill_dst = xfrm6_fill_dst,
279 .blackhole_route = ip6_blackhole_route,
275}; 280};
276 281
277static int __init xfrm6_policy_init(void) 282static int __init xfrm6_policy_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a67575d472a3..248f0b2a7ee9 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,26 +20,28 @@
20#include <net/addrconf.h> 20#include <net/addrconf.h>
21 21
22static void 22static void
23__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) 23__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
24{ 24{
25 const struct flowi6 *fl6 = &fl->u.ip6;
26
25 /* Initialize temporary selector matching only 27 /* Initialize temporary selector matching only
26 * to current session. */ 28 * to current session. */
27 ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); 29 ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr);
28 ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); 30 ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr);
29 sel->dport = xfrm_flowi_dport(fl); 31 sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
30 sel->dport_mask = htons(0xffff); 32 sel->dport_mask = htons(0xffff);
31 sel->sport = xfrm_flowi_sport(fl); 33 sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
32 sel->sport_mask = htons(0xffff); 34 sel->sport_mask = htons(0xffff);
33 sel->family = AF_INET6; 35 sel->family = AF_INET6;
34 sel->prefixlen_d = 128; 36 sel->prefixlen_d = 128;
35 sel->prefixlen_s = 128; 37 sel->prefixlen_s = 128;
36 sel->proto = fl->proto; 38 sel->proto = fl6->flowi6_proto;
37 sel->ifindex = fl->oif; 39 sel->ifindex = fl6->flowi6_oif;
38} 40}
39 41
40static void 42static void
41xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, 43xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
42 xfrm_address_t *daddr, xfrm_address_t *saddr) 44 const xfrm_address_t *daddr, const xfrm_address_t *saddr)
43{ 45{
44 x->id = tmpl->id; 46 x->id = tmpl->id;
45 if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) 47 if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
@@ -176,6 +178,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
176 .tmpl_sort = __xfrm6_tmpl_sort, 178 .tmpl_sort = __xfrm6_tmpl_sort,
177 .state_sort = __xfrm6_state_sort, 179 .state_sort = __xfrm6_state_sort,
178 .output = xfrm6_output, 180 .output = xfrm6_output,
181 .output_finish = xfrm6_output_finish,
179 .extract_input = xfrm6_extract_input, 182 .extract_input = xfrm6_extract_input,
180 .extract_output = xfrm6_extract_output, 183 .extract_output = xfrm6_extract_output,
181 .transport_finish = xfrm6_transport_finish, 184 .transport_finish = xfrm6_transport_finish,
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2969cad408de..a6770a04e3bd 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -68,7 +68,7 @@ static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
68 68
69static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; 69static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
70 70
71static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr) 71static inline unsigned xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr)
72{ 72{
73 unsigned h; 73 unsigned h;
74 74
@@ -85,7 +85,7 @@ static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
85 return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE; 85 return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
86} 86}
87 87
88static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr) 88static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
89{ 89{
90 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); 90 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
91 struct xfrm6_tunnel_spi *x6spi; 91 struct xfrm6_tunnel_spi *x6spi;
@@ -101,7 +101,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_
101 return NULL; 101 return NULL;
102} 102}
103 103
104__be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr) 104__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
105{ 105{
106 struct xfrm6_tunnel_spi *x6spi; 106 struct xfrm6_tunnel_spi *x6spi;
107 u32 spi; 107 u32 spi;
@@ -237,10 +237,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
237static int xfrm6_tunnel_rcv(struct sk_buff *skb) 237static int xfrm6_tunnel_rcv(struct sk_buff *skb)
238{ 238{
239 struct net *net = dev_net(skb->dev); 239 struct net *net = dev_net(skb->dev);
240 struct ipv6hdr *iph = ipv6_hdr(skb); 240 const struct ipv6hdr *iph = ipv6_hdr(skb);
241 __be32 spi; 241 __be32 spi;
242 242
243 spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&iph->saddr); 243 spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
244 return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; 244 return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0;
245} 245}
246 246
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
index 02549cb2c328..e9ad0062fbb6 100644
--- a/net/ipx/Kconfig
+++ b/net/ipx/Kconfig
@@ -3,7 +3,6 @@
3# 3#
4config IPX 4config IPX
5 tristate "The IPX protocol" 5 tristate "The IPX protocol"
6 depends on BKL # should be fixable
7 select LLC 6 select LLC
8 ---help--- 7 ---help---
9 This is support for the Novell networking protocol, IPX, commonly 8 This is support for the Novell networking protocol, IPX, commonly
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index da3d21c41d90..9680226640ef 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -42,7 +42,6 @@
42#include <linux/uio.h> 42#include <linux/uio.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/skbuff.h> 44#include <linux/skbuff.h>
45#include <linux/smp_lock.h>
46#include <linux/socket.h> 45#include <linux/socket.h>
47#include <linux/sockios.h> 46#include <linux/sockios.h>
48#include <linux/string.h> 47#include <linux/string.h>
@@ -149,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
149 ipx_remove_socket(sk); 148 ipx_remove_socket(sk);
150 skb_queue_purge(&sk->sk_receive_queue); 149 skb_queue_purge(&sk->sk_receive_queue);
151 sk_refcnt_debug_dec(sk); 150 sk_refcnt_debug_dec(sk);
152 sock_put(sk);
153} 151}
154 152
155/* 153/*
@@ -1299,7 +1297,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname,
1299 int opt; 1297 int opt;
1300 int rc = -EINVAL; 1298 int rc = -EINVAL;
1301 1299
1302 lock_kernel(); 1300 lock_sock(sk);
1303 if (optlen != sizeof(int)) 1301 if (optlen != sizeof(int))
1304 goto out; 1302 goto out;
1305 1303
@@ -1314,7 +1312,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname,
1314 ipx_sk(sk)->type = opt; 1312 ipx_sk(sk)->type = opt;
1315 rc = 0; 1313 rc = 0;
1316out: 1314out:
1317 unlock_kernel(); 1315 release_sock(sk);
1318 return rc; 1316 return rc;
1319} 1317}
1320 1318
@@ -1326,7 +1324,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname,
1326 int len; 1324 int len;
1327 int rc = -ENOPROTOOPT; 1325 int rc = -ENOPROTOOPT;
1328 1326
1329 lock_kernel(); 1327 lock_sock(sk);
1330 if (!(level == SOL_IPX && optname == IPX_TYPE)) 1328 if (!(level == SOL_IPX && optname == IPX_TYPE))
1331 goto out; 1329 goto out;
1332 1330
@@ -1347,7 +1345,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname,
1347 1345
1348 rc = 0; 1346 rc = 0;
1349out: 1347out:
1350 unlock_kernel(); 1348 release_sock(sk);
1351 return rc; 1349 return rc;
1352} 1350}
1353 1351
@@ -1396,7 +1394,7 @@ static int ipx_release(struct socket *sock)
1396 if (!sk) 1394 if (!sk)
1397 goto out; 1395 goto out;
1398 1396
1399 lock_kernel(); 1397 lock_sock(sk);
1400 if (!sock_flag(sk, SOCK_DEAD)) 1398 if (!sock_flag(sk, SOCK_DEAD))
1401 sk->sk_state_change(sk); 1399 sk->sk_state_change(sk);
1402 1400
@@ -1404,7 +1402,8 @@ static int ipx_release(struct socket *sock)
1404 sock->sk = NULL; 1402 sock->sk = NULL;
1405 sk_refcnt_debug_release(sk); 1403 sk_refcnt_debug_release(sk);
1406 ipx_destroy_socket(sk); 1404 ipx_destroy_socket(sk);
1407 unlock_kernel(); 1405 release_sock(sk);
1406 sock_put(sk);
1408out: 1407out:
1409 return 0; 1408 return 0;
1410} 1409}
@@ -1530,11 +1529,12 @@ out:
1530 1529
1531static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1530static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1532{ 1531{
1532 struct sock *sk = sock->sk;
1533 int rc; 1533 int rc;
1534 1534
1535 lock_kernel(); 1535 lock_sock(sk);
1536 rc = __ipx_bind(sock, uaddr, addr_len); 1536 rc = __ipx_bind(sock, uaddr, addr_len);
1537 unlock_kernel(); 1537 release_sock(sk);
1538 1538
1539 return rc; 1539 return rc;
1540} 1540}
@@ -1551,7 +1551,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
1551 sk->sk_state = TCP_CLOSE; 1551 sk->sk_state = TCP_CLOSE;
1552 sock->state = SS_UNCONNECTED; 1552 sock->state = SS_UNCONNECTED;
1553 1553
1554 lock_kernel(); 1554 lock_sock(sk);
1555 if (addr_len != sizeof(*addr)) 1555 if (addr_len != sizeof(*addr))
1556 goto out; 1556 goto out;
1557 addr = (struct sockaddr_ipx *)uaddr; 1557 addr = (struct sockaddr_ipx *)uaddr;
@@ -1598,7 +1598,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
1598 ipxrtr_put(rt); 1598 ipxrtr_put(rt);
1599 rc = 0; 1599 rc = 0;
1600out: 1600out:
1601 unlock_kernel(); 1601 release_sock(sk);
1602 return rc; 1602 return rc;
1603} 1603}
1604 1604
@@ -1614,7 +1614,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1614 1614
1615 *uaddr_len = sizeof(struct sockaddr_ipx); 1615 *uaddr_len = sizeof(struct sockaddr_ipx);
1616 1616
1617 lock_kernel(); 1617 lock_sock(sk);
1618 if (peer) { 1618 if (peer) {
1619 rc = -ENOTCONN; 1619 rc = -ENOTCONN;
1620 if (sk->sk_state != TCP_ESTABLISHED) 1620 if (sk->sk_state != TCP_ESTABLISHED)
@@ -1649,19 +1649,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1649 1649
1650 rc = 0; 1650 rc = 0;
1651out: 1651out:
1652 unlock_kernel(); 1652 release_sock(sk);
1653 return rc;
1654}
1655
1656static unsigned int ipx_datagram_poll(struct file *file, struct socket *sock,
1657 poll_table *wait)
1658{
1659 int rc;
1660
1661 lock_kernel();
1662 rc = datagram_poll(file, sock, wait);
1663 unlock_kernel();
1664
1665 return rc; 1653 return rc;
1666} 1654}
1667 1655
@@ -1736,7 +1724,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
1736 int rc = -EINVAL; 1724 int rc = -EINVAL;
1737 int flags = msg->msg_flags; 1725 int flags = msg->msg_flags;
1738 1726
1739 lock_kernel(); 1727 lock_sock(sk);
1740 /* Socket gets bound below anyway */ 1728 /* Socket gets bound below anyway */
1741/* if (sk->sk_zapped) 1729/* if (sk->sk_zapped)
1742 return -EIO; */ /* Socket not bound */ 1730 return -EIO; */ /* Socket not bound */
@@ -1788,7 +1776,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
1788 if (rc >= 0) 1776 if (rc >= 0)
1789 rc = len; 1777 rc = len;
1790out: 1778out:
1791 unlock_kernel(); 1779 release_sock(sk);
1792 return rc; 1780 return rc;
1793} 1781}
1794 1782
@@ -1803,7 +1791,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
1803 struct sk_buff *skb; 1791 struct sk_buff *skb;
1804 int copied, rc; 1792 int copied, rc;
1805 1793
1806 lock_kernel(); 1794 lock_sock(sk);
1807 /* put the autobinding in */ 1795 /* put the autobinding in */
1808 if (!ipxs->port) { 1796 if (!ipxs->port) {
1809 struct sockaddr_ipx uaddr; 1797 struct sockaddr_ipx uaddr;
@@ -1862,7 +1850,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
1862out_free: 1850out_free:
1863 skb_free_datagram(sk, skb); 1851 skb_free_datagram(sk, skb);
1864out: 1852out:
1865 unlock_kernel(); 1853 release_sock(sk);
1866 return rc; 1854 return rc;
1867} 1855}
1868 1856
@@ -1874,7 +1862,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1874 struct sock *sk = sock->sk; 1862 struct sock *sk = sock->sk;
1875 void __user *argp = (void __user *)arg; 1863 void __user *argp = (void __user *)arg;
1876 1864
1877 lock_kernel(); 1865 lock_sock(sk);
1878 switch (cmd) { 1866 switch (cmd) {
1879 case TIOCOUTQ: 1867 case TIOCOUTQ:
1880 amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); 1868 amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
@@ -1937,7 +1925,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1937 rc = -ENOIOCTLCMD; 1925 rc = -ENOIOCTLCMD;
1938 break; 1926 break;
1939 } 1927 }
1940 unlock_kernel(); 1928 release_sock(sk);
1941 1929
1942 return rc; 1930 return rc;
1943} 1931}
@@ -1984,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = {
1984 .socketpair = sock_no_socketpair, 1972 .socketpair = sock_no_socketpair,
1985 .accept = sock_no_accept, 1973 .accept = sock_no_accept,
1986 .getname = ipx_getname, 1974 .getname = ipx_getname,
1987 .poll = ipx_datagram_poll, 1975 .poll = datagram_poll,
1988 .ioctl = ipx_ioctl, 1976 .ioctl = ipx_ioctl,
1989#ifdef CONFIG_COMPAT 1977#ifdef CONFIG_COMPAT
1990 .compat_ioctl = ipx_compat_ioctl, 1978 .compat_ioctl = ipx_compat_ioctl,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index c9890e25cd4c..cc616974a447 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1297,8 +1297,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1297 /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ 1297 /* Note : socket.c set MSG_EOR on SEQPACKET sockets */
1298 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | 1298 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT |
1299 MSG_NOSIGNAL)) { 1299 MSG_NOSIGNAL)) {
1300 err = -EINVAL; 1300 return -EINVAL;
1301 goto out;
1302 } 1301 }
1303 1302
1304 lock_sock(sk); 1303 lock_sock(sk);
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index e97082017f4f..52079f19bbbe 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -244,14 +244,8 @@ EXPORT_SYMBOL(ircomm_connect_request);
244void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb, 244void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb,
245 struct ircomm_info *info) 245 struct ircomm_info *info)
246{ 246{
247 int clen = 0;
248
249 IRDA_DEBUG(2, "%s()\n", __func__ ); 247 IRDA_DEBUG(2, "%s()\n", __func__ );
250 248
251 /* Check if the packet contains data on the control channel */
252 if (skb->len > 0)
253 clen = skb->data[0];
254
255 /* 249 /*
256 * If there are any data hiding in the control channel, we must 250 * If there are any data hiding in the control channel, we must
257 * deliver it first. The side effect is that the control channel 251 * deliver it first. The side effect is that the control channel
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
index 08fb54dc8c41..3b8095c771d4 100644
--- a/net/irda/ircomm/ircomm_lmp.c
+++ b/net/irda/ircomm/ircomm_lmp.c
@@ -75,7 +75,6 @@ static int ircomm_lmp_connect_response(struct ircomm_cb *self,
75 struct sk_buff *userdata) 75 struct sk_buff *userdata)
76{ 76{
77 struct sk_buff *tx_skb; 77 struct sk_buff *tx_skb;
78 int ret;
79 78
80 IRDA_DEBUG(0, "%s()\n", __func__ ); 79 IRDA_DEBUG(0, "%s()\n", __func__ );
81 80
@@ -100,9 +99,7 @@ static int ircomm_lmp_connect_response(struct ircomm_cb *self,
100 tx_skb = userdata; 99 tx_skb = userdata;
101 } 100 }
102 101
103 ret = irlmp_connect_response(self->lsap, tx_skb); 102 return irlmp_connect_response(self->lsap, tx_skb);
104
105 return 0;
106} 103}
107 104
108static int ircomm_lmp_disconnect_request(struct ircomm_cb *self, 105static int ircomm_lmp_disconnect_request(struct ircomm_cb *self,
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index a39cca8331df..b3cc8b3989a9 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -38,6 +38,7 @@
38#include <linux/seq_file.h> 38#include <linux/seq_file.h>
39#include <linux/termios.h> 39#include <linux/termios.h>
40#include <linux/tty.h> 40#include <linux/tty.h>
41#include <linux/tty_flip.h>
41#include <linux/interrupt.h> 42#include <linux/interrupt.h>
42#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */ 43#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */
43 44
@@ -1132,7 +1133,6 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
1132 struct sk_buff *skb) 1133 struct sk_buff *skb)
1133{ 1134{
1134 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; 1135 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
1135 struct tty_ldisc *ld;
1136 1136
1137 IRDA_DEBUG(2, "%s()\n", __func__ ); 1137 IRDA_DEBUG(2, "%s()\n", __func__ );
1138 1138
@@ -1161,15 +1161,11 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
1161 } 1161 }
1162 1162
1163 /* 1163 /*
1164 * Just give it over to the line discipline. There is no need to 1164 * Use flip buffer functions since the code may be called from interrupt
1165 * involve the flip buffers, since we are not running in an interrupt 1165 * context
1166 * handler
1167 */ 1166 */
1168 1167 tty_insert_flip_string(self->tty, skb->data, skb->len);
1169 ld = tty_ldisc_ref(self->tty); 1168 tty_flip_buffer_push(self->tty);
1170 if (ld)
1171 ld->ops->receive_buf(self->tty, skb->data, NULL, skb->len);
1172 tty_ldisc_deref(ld);
1173 1169
1174 /* No need to kfree_skb - see ircomm_ttp_data_indication() */ 1170 /* No need to kfree_skb - see ircomm_ttp_data_indication() */
1175 1171
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 24cb3aa2bbfb..77c5e6499f8f 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -189,12 +189,12 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
189} 189}
190 190
191/* 191/*
192 * Function ircomm_tty_tiocmget (tty, file) 192 * Function ircomm_tty_tiocmget (tty)
193 * 193 *
194 * 194 *
195 * 195 *
196 */ 196 */
197int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file) 197int ircomm_tty_tiocmget(struct tty_struct *tty)
198{ 198{
199 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; 199 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
200 unsigned int result; 200 unsigned int result;
@@ -214,12 +214,12 @@ int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file)
214} 214}
215 215
216/* 216/*
217 * Function ircomm_tty_tiocmset (tty, file, set, clear) 217 * Function ircomm_tty_tiocmset (tty, set, clear)
218 * 218 *
219 * 219 *
220 * 220 *
221 */ 221 */
222int ircomm_tty_tiocmset(struct tty_struct *tty, struct file *file, 222int ircomm_tty_tiocmset(struct tty_struct *tty,
223 unsigned int set, unsigned int clear) 223 unsigned int set, unsigned int clear)
224{ 224{
225 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; 225 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
@@ -365,12 +365,12 @@ static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self,
365} 365}
366 366
367/* 367/*
368 * Function ircomm_tty_ioctl (tty, file, cmd, arg) 368 * Function ircomm_tty_ioctl (tty, cmd, arg)
369 * 369 *
370 * 370 *
371 * 371 *
372 */ 372 */
373int ircomm_tty_ioctl(struct tty_struct *tty, struct file *file, 373int ircomm_tty_ioctl(struct tty_struct *tty,
374 unsigned int cmd, unsigned long arg) 374 unsigned int cmd, unsigned long arg)
375{ 375{
376 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; 376 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 5b743bdd89ba..36477538cea8 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -656,10 +656,16 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
656 n = 1; 656 n = 1;
657 657
658 name_len = fp[n++]; 658 name_len = fp[n++];
659
660 IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;);
661
659 memcpy(name, fp+n, name_len); n+=name_len; 662 memcpy(name, fp+n, name_len); n+=name_len;
660 name[name_len] = '\0'; 663 name[name_len] = '\0';
661 664
662 attr_len = fp[n++]; 665 attr_len = fp[n++];
666
667 IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;);
668
663 memcpy(attr, fp+n, attr_len); n+=attr_len; 669 memcpy(attr, fp+n, attr_len); n+=attr_len;
664 attr[attr_len] = '\0'; 670 attr[attr_len] = '\0';
665 671
diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c
index 9ff7823abec7..7977be7caf0f 100644
--- a/net/irda/irlan/irlan_filter.c
+++ b/net/irda/irlan/irlan_filter.c
@@ -143,12 +143,8 @@ void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb)
143 */ 143 */
144void irlan_check_command_param(struct irlan_cb *self, char *param, char *value) 144void irlan_check_command_param(struct irlan_cb *self, char *param, char *value)
145{ 145{
146 __u8 *bytes;
147
148 IRDA_DEBUG(4, "%s()\n", __func__ ); 146 IRDA_DEBUG(4, "%s()\n", __func__ );
149 147
150 bytes = value;
151
152 IRDA_ASSERT(self != NULL, return;); 148 IRDA_ASSERT(self != NULL, return;);
153 IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); 149 IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
154 150
diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c
index 5cf5e6c872bb..b8af74ab8b68 100644
--- a/net/irda/irlan/irlan_provider.c
+++ b/net/irda/irlan/irlan_provider.c
@@ -128,7 +128,6 @@ static void irlan_provider_connect_indication(void *instance, void *sap,
128{ 128{
129 struct irlan_cb *self; 129 struct irlan_cb *self;
130 struct tsap_cb *tsap; 130 struct tsap_cb *tsap;
131 __u32 saddr, daddr;
132 131
133 IRDA_DEBUG(0, "%s()\n", __func__ ); 132 IRDA_DEBUG(0, "%s()\n", __func__ );
134 133
@@ -141,8 +140,6 @@ static void irlan_provider_connect_indication(void *instance, void *sap,
141 IRDA_ASSERT(tsap == self->provider.tsap_ctrl,return;); 140 IRDA_ASSERT(tsap == self->provider.tsap_ctrl,return;);
142 IRDA_ASSERT(self->provider.state == IRLAN_IDLE, return;); 141 IRDA_ASSERT(self->provider.state == IRLAN_IDLE, return;);
143 142
144 daddr = irttp_get_daddr(tsap);
145 saddr = irttp_get_saddr(tsap);
146 self->provider.max_sdu_size = max_sdu_size; 143 self->provider.max_sdu_size = max_sdu_size;
147 self->provider.max_header_size = max_header_size; 144 self->provider.max_header_size = max_header_size;
148 145
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 783c5f367d29..005b424494a0 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -165,7 +165,7 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
165 165
166 irlap_apply_default_connection_parameters(self); 166 irlap_apply_default_connection_parameters(self);
167 167
168 self->N3 = 3; /* # connections attemts to try before giving up */ 168 self->N3 = 3; /* # connections attempts to try before giving up */
169 169
170 self->state = LAP_NDM; 170 self->state = LAP_NDM;
171 171
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index d434c8880745..ccd214f9d196 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -708,7 +708,7 @@ static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event,
708 708
709 self->frame_sent = TRUE; 709 self->frame_sent = TRUE;
710 } 710 }
711 /* Readjust our timer to accomodate devices 711 /* Readjust our timer to accommodate devices
712 * doing faster or slower discovery than us... 712 * doing faster or slower discovery than us...
713 * Jean II */ 713 * Jean II */
714 irlap_start_query_timer(self, info->S, info->s); 714 irlap_start_query_timer(self, info->S, info->s);
@@ -931,7 +931,7 @@ static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event,
931 irlap_send_rr_frame(self, CMD_FRAME); 931 irlap_send_rr_frame(self, CMD_FRAME);
932 932
933 /* The timer is set to half the normal timer to quickly 933 /* The timer is set to half the normal timer to quickly
934 * detect a failure to negociate the new connection 934 * detect a failure to negotiate the new connection
935 * parameters. IrLAP 6.11.3.2, note 3. 935 * parameters. IrLAP 6.11.3.2, note 3.
936 * Note that currently we don't process this failure 936 * Note that currently we don't process this failure
937 * properly, as we should do a quick disconnect. 937 * properly, as we should do a quick disconnect.
@@ -1052,7 +1052,7 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event,
1052 return -EPROTO; 1052 return -EPROTO;
1053 } 1053 }
1054 1054
1055 /* Substract space used by this skb */ 1055 /* Subtract space used by this skb */
1056 self->bytes_left -= skb->len; 1056 self->bytes_left -= skb->len;
1057#else /* CONFIG_IRDA_DYNAMIC_WINDOW */ 1057#else /* CONFIG_IRDA_DYNAMIC_WINDOW */
1058 /* Window has been adjusted for the max packet 1058 /* Window has been adjusted for the max packet
@@ -1808,7 +1808,7 @@ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
1808 1808
1809 return -EPROTO; /* Try again later */ 1809 return -EPROTO; /* Try again later */
1810 } 1810 }
1811 /* Substract space used by this skb */ 1811 /* Subtract space used by this skb */
1812 self->bytes_left -= skb->len; 1812 self->bytes_left -= skb->len;
1813#else /* CONFIG_IRDA_DYNAMIC_WINDOW */ 1813#else /* CONFIG_IRDA_DYNAMIC_WINDOW */
1814 /* Window has been adjusted for the max packet 1814 /* Window has been adjusted for the max packet
@@ -2227,8 +2227,6 @@ static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
2227static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event, 2227static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event,
2228 struct sk_buff *skb, struct irlap_info *info) 2228 struct sk_buff *skb, struct irlap_info *info)
2229{ 2229{
2230 int ret = 0;
2231
2232 IRDA_DEBUG(1, "%s()\n", __func__); 2230 IRDA_DEBUG(1, "%s()\n", __func__);
2233 2231
2234 IRDA_ASSERT(self != NULL, return -ENODEV;); 2232 IRDA_ASSERT(self != NULL, return -ENODEV;);
@@ -2289,7 +2287,6 @@ static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event,
2289 IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __func__, 2287 IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __func__,
2290 event, irlap_event[event]); 2288 event, irlap_event[event]);
2291 2289
2292 ret = -EINVAL;
2293 break; 2290 break;
2294 } 2291 }
2295 2292
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 688222cbf55b..8c004161a843 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -848,7 +848,7 @@ void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb)
848 * though IrLAP is currently sending the *last* frame of the 848 * though IrLAP is currently sending the *last* frame of the
849 * tx-window, the driver most likely has only just started 849 * tx-window, the driver most likely has only just started
850 * sending the *first* frame of the same tx-window. 850 * sending the *first* frame of the same tx-window.
851 * I.e. we are always at the very begining of or Tx window. 851 * I.e. we are always at the very beginning of or Tx window.
852 * Now, we are supposed to set the final timer from the end 852 * Now, we are supposed to set the final timer from the end
853 * of our tx-window to let the other peer reply. So, we need 853 * of our tx-window to let the other peer reply. So, we need
854 * to add extra time to compensate for the fact that we 854 * to add extra time to compensate for the fact that we
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index c1fb5db81042..9505a7d06f1a 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -498,7 +498,7 @@ static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event,
498 switch (event) { 498 switch (event) {
499#ifdef CONFIG_IRDA_ULTRA 499#ifdef CONFIG_IRDA_ULTRA
500 case LM_UDATA_INDICATION: 500 case LM_UDATA_INDICATION:
501 /* This is most bizzare. Those packets are aka unreliable 501 /* This is most bizarre. Those packets are aka unreliable
502 * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA. 502 * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA.
503 * Why do we pass them as Ultra ??? Jean II */ 503 * Why do we pass them as Ultra ??? Jean II */
504 irlmp_connless_data_indication(self, skb); 504 irlmp_connless_data_indication(self, skb);
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 0d82ff5aeff1..979ecb2435a7 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -73,7 +73,7 @@
73 * Infinite thanks to those brave souls for providing the infrastructure 73 * Infinite thanks to those brave souls for providing the infrastructure
74 * upon which IrNET is built. 74 * upon which IrNET is built.
75 * 75 *
76 * Thanks to all my collegues in HP for helping me. In particular, 76 * Thanks to all my colleagues in HP for helping me. In particular,
77 * thanks to Salil Pradhan and Bill Serra for W2k testing... 77 * thanks to Salil Pradhan and Bill Serra for W2k testing...
78 * Thanks to Luiz Magalhaes for irnetd and much testing... 78 * Thanks to Luiz Magalhaes for irnetd and much testing...
79 * 79 *
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7c567b8aa89a..2bb2beb6a373 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -105,6 +105,9 @@ irnet_ctrl_write(irnet_socket * ap,
105 while(isspace(start[length - 1])) 105 while(isspace(start[length - 1]))
106 length--; 106 length--;
107 107
108 DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5,
109 -EINVAL, CTRL_ERROR, "Invalid nickname.\n");
110
108 /* Copy the name for later reuse */ 111 /* Copy the name for later reuse */
109 memcpy(ap->rname, start + 5, length - 5); 112 memcpy(ap->rname, start + 5, length - 5);
110 ap->rname[length - 5] = '\0'; 113 ap->rname[length - 5] = '\0';
diff --git a/net/irda/irproc.c b/net/irda/irproc.c
index 318766e5dbdf..b9ac598e2116 100644
--- a/net/irda/irproc.c
+++ b/net/irda/irproc.c
@@ -65,15 +65,14 @@ static const struct irda_entry irda_dirs[] = {
65void __init irda_proc_register(void) 65void __init irda_proc_register(void)
66{ 66{
67 int i; 67 int i;
68 struct proc_dir_entry *d;
69 68
70 proc_irda = proc_mkdir("irda", init_net.proc_net); 69 proc_irda = proc_mkdir("irda", init_net.proc_net);
71 if (proc_irda == NULL) 70 if (proc_irda == NULL)
72 return; 71 return;
73 72
74 for (i = 0; i < ARRAY_SIZE(irda_dirs); i++) 73 for (i = 0; i < ARRAY_SIZE(irda_dirs); i++)
75 d = proc_create(irda_dirs[i].name, 0, proc_irda, 74 (void) proc_create(irda_dirs[i].name, 0, proc_irda,
76 irda_dirs[i].fops); 75 irda_dirs[i].fops);
77} 76}
78 77
79/* 78/*
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 849aaf0dabb5..9715e6e5900b 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -40,7 +40,7 @@
40 * o the hash function for ints is pathetic (but could be changed) 40 * o the hash function for ints is pathetic (but could be changed)
41 * o locking is sometime suspicious (especially during enumeration) 41 * o locking is sometime suspicious (especially during enumeration)
42 * o most users have only a few elements (== overhead) 42 * o most users have only a few elements (== overhead)
43 * o most users never use seach, so don't benefit from hashing 43 * o most users never use search, so don't benefit from hashing
44 * Problem already fixed : 44 * Problem already fixed :
45 * o not 64 bit compliant (most users do hashv = (int) self) 45 * o not 64 bit compliant (most users do hashv = (int) self)
46 * o hashbin_remove() is broken => use hashbin_remove_this() 46 * o hashbin_remove() is broken => use hashbin_remove_this()
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index f6054f9ccbe3..9d9af4606970 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1193,7 +1193,7 @@ EXPORT_SYMBOL(irttp_connect_request);
1193/* 1193/*
1194 * Function irttp_connect_confirm (handle, qos, skb) 1194 * Function irttp_connect_confirm (handle, qos, skb)
1195 * 1195 *
1196 * Sevice user confirms TSAP connection with peer. 1196 * Service user confirms TSAP connection with peer.
1197 * 1197 *
1198 */ 1198 */
1199static void irttp_connect_confirm(void *instance, void *sap, 1199static void irttp_connect_confirm(void *instance, void *sap,
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 2b00974e5bae..1b51bcf42394 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -39,16 +39,16 @@
39#include <net/irda/irlap_frame.h> 39#include <net/irda/irlap_frame.h>
40 40
41/* 41/*
42 * Maximum values of the baud rate we negociate with the other end. 42 * Maximum values of the baud rate we negotiate with the other end.
43 * Most often, you don't have to change that, because Linux-IrDA will 43 * Most often, you don't have to change that, because Linux-IrDA will
44 * use the maximum offered by the link layer, which usually works fine. 44 * use the maximum offered by the link layer, which usually works fine.
45 * In some very rare cases, you may want to limit it to lower speeds... 45 * In some very rare cases, you may want to limit it to lower speeds...
46 */ 46 */
47int sysctl_max_baud_rate = 16000000; 47int sysctl_max_baud_rate = 16000000;
48/* 48/*
49 * Maximum value of the lap disconnect timer we negociate with the other end. 49 * Maximum value of the lap disconnect timer we negotiate with the other end.
50 * Most often, the value below represent the best compromise, but some user 50 * Most often, the value below represent the best compromise, but some user
51 * may want to keep the LAP alive longuer or shorter in case of link failure. 51 * may want to keep the LAP alive longer or shorter in case of link failure.
52 * Remember that the threshold time (early warning) is fixed to 3s... 52 * Remember that the threshold time (early warning) is fixed to 3s...
53 */ 53 */
54int sysctl_max_noreply_time = 12; 54int sysctl_max_noreply_time = 12;
@@ -411,7 +411,7 @@ static void irlap_adjust_qos_settings(struct qos_info *qos)
411 * Fix tx data size according to user limits - Jean II 411 * Fix tx data size according to user limits - Jean II
412 */ 412 */
413 if (qos->data_size.value > sysctl_max_tx_data_size) 413 if (qos->data_size.value > sysctl_max_tx_data_size)
414 /* Allow non discrete adjustement to avoid loosing capacity */ 414 /* Allow non discrete adjustement to avoid losing capacity */
415 qos->data_size.value = sysctl_max_tx_data_size; 415 qos->data_size.value = sysctl_max_tx_data_size;
416 /* 416 /*
417 * Override Tx window if user request it. - Jean II 417 * Override Tx window if user request it. - Jean II
diff --git a/net/irda/timer.c b/net/irda/timer.c
index 0335ba0cc593..f418cb2ad49c 100644
--- a/net/irda/timer.c
+++ b/net/irda/timer.c
@@ -59,7 +59,7 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
59 * slot time, plus add some extra time to properly receive the last 59 * slot time, plus add some extra time to properly receive the last
60 * discovery packet (which is longer due to extra discovery info), 60 * discovery packet (which is longer due to extra discovery info),
61 * to avoid messing with for incomming connections requests and 61 * to avoid messing with for incomming connections requests and
62 * to accomodate devices that perform discovery slower than us. 62 * to accommodate devices that perform discovery slower than us.
63 * Jean II */ 63 * Jean II */
64 timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s) 64 timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s)
65 + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT); 65 + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9637e45744fa..e2013e434d03 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -190,7 +190,6 @@ static int afiucv_pm_freeze(struct device *dev)
190 */ 190 */
191static int afiucv_pm_restore_thaw(struct device *dev) 191static int afiucv_pm_restore_thaw(struct device *dev)
192{ 192{
193 struct iucv_sock *iucv;
194 struct sock *sk; 193 struct sock *sk;
195 struct hlist_node *node; 194 struct hlist_node *node;
196 195
@@ -199,7 +198,6 @@ static int afiucv_pm_restore_thaw(struct device *dev)
199#endif 198#endif
200 read_lock(&iucv_sk_list.lock); 199 read_lock(&iucv_sk_list.lock);
201 sk_for_each(sk, node, &iucv_sk_list.head) { 200 sk_for_each(sk, node, &iucv_sk_list.head) {
202 iucv = iucv_sk(sk);
203 switch (sk->sk_state) { 201 switch (sk->sk_state) {
204 case IUCV_CONNECTED: 202 case IUCV_CONNECTED:
205 sk->sk_err = EPIPE; 203 sk->sk_err = EPIPE;
@@ -250,7 +248,7 @@ static struct device *af_iucv_dev;
250 * PRMDATA[0..6] socket data (max 7 bytes); 248 * PRMDATA[0..6] socket data (max 7 bytes);
251 * PRMDATA[7] socket data length value (len is 0xff - PRMDATA[7]) 249 * PRMDATA[7] socket data length value (len is 0xff - PRMDATA[7])
252 * 250 *
253 * The socket data length is computed by substracting the socket data length 251 * The socket data length is computed by subtracting the socket data length
254 * value from 0xFF. 252 * value from 0xFF.
255 * If the socket data len is greater 7, then PRMDATA can be used for special 253 * If the socket data len is greater 7, then PRMDATA can be used for special
256 * notifications (see iucv_sock_shutdown); and further, 254 * notifications (see iucv_sock_shutdown); and further,
@@ -381,7 +379,6 @@ static void iucv_sock_close(struct sock *sk)
381{ 379{
382 unsigned char user_data[16]; 380 unsigned char user_data[16];
383 struct iucv_sock *iucv = iucv_sk(sk); 381 struct iucv_sock *iucv = iucv_sk(sk);
384 int err;
385 unsigned long timeo; 382 unsigned long timeo;
386 383
387 iucv_sock_clear_timer(sk); 384 iucv_sock_clear_timer(sk);
@@ -394,8 +391,6 @@ static void iucv_sock_close(struct sock *sk)
394 391
395 case IUCV_CONNECTED: 392 case IUCV_CONNECTED:
396 case IUCV_DISCONN: 393 case IUCV_DISCONN:
397 err = 0;
398
399 sk->sk_state = IUCV_CLOSING; 394 sk->sk_state = IUCV_CLOSING;
400 sk->sk_state_change(sk); 395 sk->sk_state_change(sk);
401 396
@@ -404,7 +399,7 @@ static void iucv_sock_close(struct sock *sk)
404 timeo = sk->sk_lingertime; 399 timeo = sk->sk_lingertime;
405 else 400 else
406 timeo = IUCV_DISCONN_TIMEOUT; 401 timeo = IUCV_DISCONN_TIMEOUT;
407 err = iucv_sock_wait(sk, 402 iucv_sock_wait(sk,
408 iucv_sock_in_state(sk, IUCV_CLOSED, 0), 403 iucv_sock_in_state(sk, IUCV_CLOSED, 0),
409 timeo); 404 timeo);
410 } 405 }
@@ -417,7 +412,7 @@ static void iucv_sock_close(struct sock *sk)
417 low_nmcpy(user_data, iucv->src_name); 412 low_nmcpy(user_data, iucv->src_name);
418 high_nmcpy(user_data, iucv->dst_name); 413 high_nmcpy(user_data, iucv->dst_name);
419 ASCEBC(user_data, sizeof(user_data)); 414 ASCEBC(user_data, sizeof(user_data));
420 err = iucv_path_sever(iucv->path, user_data); 415 iucv_path_sever(iucv->path, user_data);
421 iucv_path_free(iucv->path); 416 iucv_path_free(iucv->path);
422 iucv->path = NULL; 417 iucv->path = NULL;
423 } 418 }
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1ee5dab3cfae..a15c01524959 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -128,8 +128,8 @@ struct iucv_irq_list {
128}; 128};
129 129
130static struct iucv_irq_data *iucv_irq_data[NR_CPUS]; 130static struct iucv_irq_data *iucv_irq_data[NR_CPUS];
131static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE; 131static cpumask_t iucv_buffer_cpumask = { CPU_BITS_NONE };
132static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE; 132static cpumask_t iucv_irq_cpumask = { CPU_BITS_NONE };
133 133
134/* 134/*
135 * Queue of interrupt buffers lock for delivery via the tasklet 135 * Queue of interrupt buffers lock for delivery via the tasklet
@@ -406,7 +406,7 @@ static void iucv_allow_cpu(void *data)
406 parm->set_mask.ipmask = 0xf8; 406 parm->set_mask.ipmask = 0xf8;
407 iucv_call_b2f0(IUCV_SETCONTROLMASK, parm); 407 iucv_call_b2f0(IUCV_SETCONTROLMASK, parm);
408 /* Set indication that iucv interrupts are allowed for this cpu. */ 408 /* Set indication that iucv interrupts are allowed for this cpu. */
409 cpu_set(cpu, iucv_irq_cpumask); 409 cpumask_set_cpu(cpu, &iucv_irq_cpumask);
410} 410}
411 411
412/** 412/**
@@ -426,7 +426,7 @@ static void iucv_block_cpu(void *data)
426 iucv_call_b2f0(IUCV_SETMASK, parm); 426 iucv_call_b2f0(IUCV_SETMASK, parm);
427 427
428 /* Clear indication that iucv interrupts are allowed for this cpu. */ 428 /* Clear indication that iucv interrupts are allowed for this cpu. */
429 cpu_clear(cpu, iucv_irq_cpumask); 429 cpumask_clear_cpu(cpu, &iucv_irq_cpumask);
430} 430}
431 431
432/** 432/**
@@ -451,7 +451,7 @@ static void iucv_block_cpu_almost(void *data)
451 iucv_call_b2f0(IUCV_SETCONTROLMASK, parm); 451 iucv_call_b2f0(IUCV_SETCONTROLMASK, parm);
452 452
453 /* Clear indication that iucv interrupts are allowed for this cpu. */ 453 /* Clear indication that iucv interrupts are allowed for this cpu. */
454 cpu_clear(cpu, iucv_irq_cpumask); 454 cpumask_clear_cpu(cpu, &iucv_irq_cpumask);
455} 455}
456 456
457/** 457/**
@@ -466,7 +466,7 @@ static void iucv_declare_cpu(void *data)
466 union iucv_param *parm; 466 union iucv_param *parm;
467 int rc; 467 int rc;
468 468
469 if (cpu_isset(cpu, iucv_buffer_cpumask)) 469 if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask))
470 return; 470 return;
471 471
472 /* Declare interrupt buffer. */ 472 /* Declare interrupt buffer. */
@@ -499,9 +499,9 @@ static void iucv_declare_cpu(void *data)
499 } 499 }
500 500
501 /* Set indication that an iucv buffer exists for this cpu. */ 501 /* Set indication that an iucv buffer exists for this cpu. */
502 cpu_set(cpu, iucv_buffer_cpumask); 502 cpumask_set_cpu(cpu, &iucv_buffer_cpumask);
503 503
504 if (iucv_nonsmp_handler == 0 || cpus_empty(iucv_irq_cpumask)) 504 if (iucv_nonsmp_handler == 0 || cpumask_empty(&iucv_irq_cpumask))
505 /* Enable iucv interrupts on this cpu. */ 505 /* Enable iucv interrupts on this cpu. */
506 iucv_allow_cpu(NULL); 506 iucv_allow_cpu(NULL);
507 else 507 else
@@ -520,7 +520,7 @@ static void iucv_retrieve_cpu(void *data)
520 int cpu = smp_processor_id(); 520 int cpu = smp_processor_id();
521 union iucv_param *parm; 521 union iucv_param *parm;
522 522
523 if (!cpu_isset(cpu, iucv_buffer_cpumask)) 523 if (!cpumask_test_cpu(cpu, &iucv_buffer_cpumask))
524 return; 524 return;
525 525
526 /* Block iucv interrupts. */ 526 /* Block iucv interrupts. */
@@ -531,7 +531,7 @@ static void iucv_retrieve_cpu(void *data)
531 iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm); 531 iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm);
532 532
533 /* Clear indication that an iucv buffer exists for this cpu. */ 533 /* Clear indication that an iucv buffer exists for this cpu. */
534 cpu_clear(cpu, iucv_buffer_cpumask); 534 cpumask_clear_cpu(cpu, &iucv_buffer_cpumask);
535} 535}
536 536
537/** 537/**
@@ -546,8 +546,8 @@ static void iucv_setmask_mp(void)
546 get_online_cpus(); 546 get_online_cpus();
547 for_each_online_cpu(cpu) 547 for_each_online_cpu(cpu)
548 /* Enable all cpus with a declared buffer. */ 548 /* Enable all cpus with a declared buffer. */
549 if (cpu_isset(cpu, iucv_buffer_cpumask) && 549 if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
550 !cpu_isset(cpu, iucv_irq_cpumask)) 550 !cpumask_test_cpu(cpu, &iucv_irq_cpumask))
551 smp_call_function_single(cpu, iucv_allow_cpu, 551 smp_call_function_single(cpu, iucv_allow_cpu,
552 NULL, 1); 552 NULL, 1);
553 put_online_cpus(); 553 put_online_cpus();
@@ -564,9 +564,9 @@ static void iucv_setmask_up(void)
564 int cpu; 564 int cpu;
565 565
566 /* Disable all cpu but the first in cpu_irq_cpumask. */ 566 /* Disable all cpu but the first in cpu_irq_cpumask. */
567 cpumask = iucv_irq_cpumask; 567 cpumask_copy(&cpumask, &iucv_irq_cpumask);
568 cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); 568 cpumask_clear_cpu(cpumask_first(&iucv_irq_cpumask), &cpumask);
569 for_each_cpu_mask_nr(cpu, cpumask) 569 for_each_cpu(cpu, &cpumask)
570 smp_call_function_single(cpu, iucv_block_cpu, NULL, 1); 570 smp_call_function_single(cpu, iucv_block_cpu, NULL, 1);
571} 571}
572 572
@@ -593,7 +593,7 @@ static int iucv_enable(void)
593 rc = -EIO; 593 rc = -EIO;
594 for_each_online_cpu(cpu) 594 for_each_online_cpu(cpu)
595 smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); 595 smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
596 if (cpus_empty(iucv_buffer_cpumask)) 596 if (cpumask_empty(&iucv_buffer_cpumask))
597 /* No cpu could declare an iucv buffer. */ 597 /* No cpu could declare an iucv buffer. */
598 goto out; 598 goto out;
599 put_online_cpus(); 599 put_online_cpus();
@@ -675,15 +675,16 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
675 case CPU_DOWN_PREPARE_FROZEN: 675 case CPU_DOWN_PREPARE_FROZEN:
676 if (!iucv_path_table) 676 if (!iucv_path_table)
677 break; 677 break;
678 cpumask = iucv_buffer_cpumask; 678 cpumask_copy(&cpumask, &iucv_buffer_cpumask);
679 cpu_clear(cpu, cpumask); 679 cpumask_clear_cpu(cpu, &cpumask);
680 if (cpus_empty(cpumask)) 680 if (cpumask_empty(&cpumask))
681 /* Can't offline last IUCV enabled cpu. */ 681 /* Can't offline last IUCV enabled cpu. */
682 return notifier_from_errno(-EINVAL); 682 return notifier_from_errno(-EINVAL);
683 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); 683 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
684 if (cpus_empty(iucv_irq_cpumask)) 684 if (cpumask_empty(&iucv_irq_cpumask))
685 smp_call_function_single(first_cpu(iucv_buffer_cpumask), 685 smp_call_function_single(
686 iucv_allow_cpu, NULL, 1); 686 cpumask_first(&iucv_buffer_cpumask),
687 iucv_allow_cpu, NULL, 1);
687 break; 688 break;
688 } 689 }
689 return NOTIFY_OK; 690 return NOTIFY_OK;
@@ -735,7 +736,7 @@ static void iucv_cleanup_queue(void)
735 struct iucv_irq_list *p, *n; 736 struct iucv_irq_list *p, *n;
736 737
737 /* 738 /*
738 * When a path is severed, the pathid can be reused immediatly 739 * When a path is severed, the pathid can be reused immediately
739 * on a iucv connect or a connection pending interrupt. Remove 740 * on a iucv connect or a connection pending interrupt. Remove
740 * all entries from the task queue that refer to a stale pathid 741 * all entries from the task queue that refer to a stale pathid
741 * (iucv_path_table[ix] == NULL). Only then do the iucv connect 742 * (iucv_path_table[ix] == NULL). Only then do the iucv connect
@@ -807,7 +808,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
807 spin_lock_bh(&iucv_table_lock); 808 spin_lock_bh(&iucv_table_lock);
808 /* Remove handler from the iucv_handler_list. */ 809 /* Remove handler from the iucv_handler_list. */
809 list_del_init(&handler->list); 810 list_del_init(&handler->list);
810 /* Sever all pathids still refering to the handler. */ 811 /* Sever all pathids still referring to the handler. */
811 list_for_each_entry_safe(p, n, &handler->paths, list) { 812 list_for_each_entry_safe(p, n, &handler->paths, list) {
812 iucv_sever_pathid(p->pathid, NULL); 813 iucv_sever_pathid(p->pathid, NULL);
813 iucv_path_table[p->pathid] = NULL; 814 iucv_path_table[p->pathid] = NULL;
@@ -828,14 +829,14 @@ EXPORT_SYMBOL(iucv_unregister);
828static int iucv_reboot_event(struct notifier_block *this, 829static int iucv_reboot_event(struct notifier_block *this,
829 unsigned long event, void *ptr) 830 unsigned long event, void *ptr)
830{ 831{
831 int i, rc; 832 int i;
832 833
833 get_online_cpus(); 834 get_online_cpus();
834 on_each_cpu(iucv_block_cpu, NULL, 1); 835 on_each_cpu(iucv_block_cpu, NULL, 1);
835 preempt_disable(); 836 preempt_disable();
836 for (i = 0; i < iucv_max_pathid; i++) { 837 for (i = 0; i < iucv_max_pathid; i++) {
837 if (iucv_path_table[i]) 838 if (iucv_path_table[i])
838 rc = iucv_sever_pathid(i, NULL); 839 iucv_sever_pathid(i, NULL);
839 } 840 }
840 preempt_enable(); 841 preempt_enable();
841 put_online_cpus(); 842 put_online_cpus();
@@ -866,7 +867,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
866 int rc; 867 int rc;
867 868
868 local_bh_disable(); 869 local_bh_disable();
869 if (cpus_empty(iucv_buffer_cpumask)) { 870 if (cpumask_empty(&iucv_buffer_cpumask)) {
870 rc = -EIO; 871 rc = -EIO;
871 goto out; 872 goto out;
872 } 873 }
@@ -915,7 +916,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
915 916
916 spin_lock_bh(&iucv_table_lock); 917 spin_lock_bh(&iucv_table_lock);
917 iucv_cleanup_queue(); 918 iucv_cleanup_queue();
918 if (cpus_empty(iucv_buffer_cpumask)) { 919 if (cpumask_empty(&iucv_buffer_cpumask)) {
919 rc = -EIO; 920 rc = -EIO;
920 goto out; 921 goto out;
921 } 922 }
@@ -975,7 +976,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
975 int rc; 976 int rc;
976 977
977 local_bh_disable(); 978 local_bh_disable();
978 if (cpus_empty(iucv_buffer_cpumask)) { 979 if (cpumask_empty(&iucv_buffer_cpumask)) {
979 rc = -EIO; 980 rc = -EIO;
980 goto out; 981 goto out;
981 } 982 }
@@ -1007,7 +1008,7 @@ int iucv_path_resume(struct iucv_path *path, u8 userdata[16])
1007 int rc; 1008 int rc;
1008 1009
1009 local_bh_disable(); 1010 local_bh_disable();
1010 if (cpus_empty(iucv_buffer_cpumask)) { 1011 if (cpumask_empty(&iucv_buffer_cpumask)) {
1011 rc = -EIO; 1012 rc = -EIO;
1012 goto out; 1013 goto out;
1013 } 1014 }
@@ -1036,7 +1037,7 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
1036 int rc; 1037 int rc;
1037 1038
1038 preempt_disable(); 1039 preempt_disable();
1039 if (cpus_empty(iucv_buffer_cpumask)) { 1040 if (cpumask_empty(&iucv_buffer_cpumask)) {
1040 rc = -EIO; 1041 rc = -EIO;
1041 goto out; 1042 goto out;
1042 } 1043 }
@@ -1070,7 +1071,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
1070 int rc; 1071 int rc;
1071 1072
1072 local_bh_disable(); 1073 local_bh_disable();
1073 if (cpus_empty(iucv_buffer_cpumask)) { 1074 if (cpumask_empty(&iucv_buffer_cpumask)) {
1074 rc = -EIO; 1075 rc = -EIO;
1075 goto out; 1076 goto out;
1076 } 1077 }
@@ -1162,7 +1163,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
1162 if (msg->flags & IUCV_IPRMDATA) 1163 if (msg->flags & IUCV_IPRMDATA)
1163 return iucv_message_receive_iprmdata(path, msg, flags, 1164 return iucv_message_receive_iprmdata(path, msg, flags,
1164 buffer, size, residual); 1165 buffer, size, residual);
1165 if (cpus_empty(iucv_buffer_cpumask)) { 1166 if (cpumask_empty(&iucv_buffer_cpumask)) {
1166 rc = -EIO; 1167 rc = -EIO;
1167 goto out; 1168 goto out;
1168 } 1169 }
@@ -1235,7 +1236,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg)
1235 int rc; 1236 int rc;
1236 1237
1237 local_bh_disable(); 1238 local_bh_disable();
1238 if (cpus_empty(iucv_buffer_cpumask)) { 1239 if (cpumask_empty(&iucv_buffer_cpumask)) {
1239 rc = -EIO; 1240 rc = -EIO;
1240 goto out; 1241 goto out;
1241 } 1242 }
@@ -1274,7 +1275,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
1274 int rc; 1275 int rc;
1275 1276
1276 local_bh_disable(); 1277 local_bh_disable();
1277 if (cpus_empty(iucv_buffer_cpumask)) { 1278 if (cpumask_empty(&iucv_buffer_cpumask)) {
1278 rc = -EIO; 1279 rc = -EIO;
1279 goto out; 1280 goto out;
1280 } 1281 }
@@ -1324,7 +1325,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
1324 union iucv_param *parm; 1325 union iucv_param *parm;
1325 int rc; 1326 int rc;
1326 1327
1327 if (cpus_empty(iucv_buffer_cpumask)) { 1328 if (cpumask_empty(&iucv_buffer_cpumask)) {
1328 rc = -EIO; 1329 rc = -EIO;
1329 goto out; 1330 goto out;
1330 } 1331 }
@@ -1411,7 +1412,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
1411 int rc; 1412 int rc;
1412 1413
1413 local_bh_disable(); 1414 local_bh_disable();
1414 if (cpus_empty(iucv_buffer_cpumask)) { 1415 if (cpumask_empty(&iucv_buffer_cpumask)) {
1415 rc = -EIO; 1416 rc = -EIO;
1416 goto out; 1417 goto out;
1417 } 1418 }
@@ -1888,7 +1889,7 @@ static int iucv_pm_freeze(struct device *dev)
1888 printk(KERN_WARNING "iucv_pm_freeze\n"); 1889 printk(KERN_WARNING "iucv_pm_freeze\n");
1889#endif 1890#endif
1890 if (iucv_pm_state != IUCV_PM_FREEZING) { 1891 if (iucv_pm_state != IUCV_PM_FREEZING) {
1891 for_each_cpu_mask_nr(cpu, iucv_irq_cpumask) 1892 for_each_cpu(cpu, &iucv_irq_cpumask)
1892 smp_call_function_single(cpu, iucv_block_cpu_almost, 1893 smp_call_function_single(cpu, iucv_block_cpu_almost,
1893 NULL, 1); 1894 NULL, 1);
1894 cancel_work_sync(&iucv_work); 1895 cancel_work_sync(&iucv_work);
@@ -1928,7 +1929,7 @@ static int iucv_pm_thaw(struct device *dev)
1928 if (rc) 1929 if (rc)
1929 goto out; 1930 goto out;
1930 } 1931 }
1931 if (cpus_empty(iucv_irq_cpumask)) { 1932 if (cpumask_empty(&iucv_irq_cpumask)) {
1932 if (iucv_nonsmp_handler) 1933 if (iucv_nonsmp_handler)
1933 /* enable interrupts on one cpu */ 1934 /* enable interrupts on one cpu */
1934 iucv_allow_cpu(NULL); 1935 iucv_allow_cpu(NULL);
@@ -1961,7 +1962,7 @@ static int iucv_pm_restore(struct device *dev)
1961 pr_warning("Suspending Linux did not completely close all IUCV " 1962 pr_warning("Suspending Linux did not completely close all IUCV "
1962 "connections\n"); 1963 "connections\n");
1963 iucv_pm_state = IUCV_PM_RESTORING; 1964 iucv_pm_state = IUCV_PM_RESTORING;
1964 if (cpus_empty(iucv_irq_cpumask)) { 1965 if (cpumask_empty(&iucv_irq_cpumask)) {
1965 rc = iucv_query_maxconn(); 1966 rc = iucv_query_maxconn();
1966 rc = iucv_enable(); 1967 rc = iucv_enable();
1967 if (rc) 1968 if (rc)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d87c22df6f1e..d62401c25684 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -70,7 +70,7 @@ static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
70 return (struct pfkey_sock *)sk; 70 return (struct pfkey_sock *)sk;
71} 71}
72 72
73static int pfkey_can_dump(struct sock *sk) 73static int pfkey_can_dump(const struct sock *sk)
74{ 74{
75 if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) 75 if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf)
76 return 1; 76 return 1;
@@ -303,12 +303,13 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
303 return rc; 303 return rc;
304} 304}
305 305
306static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) 306static inline void pfkey_hdr_dup(struct sadb_msg *new,
307 const struct sadb_msg *orig)
307{ 308{
308 *new = *orig; 309 *new = *orig;
309} 310}
310 311
311static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) 312static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
312{ 313{
313 struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); 314 struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
314 struct sadb_msg *hdr; 315 struct sadb_msg *hdr;
@@ -369,13 +370,13 @@ static u8 sadb_ext_min_len[] = {
369}; 370};
370 371
371/* Verify sadb_address_{len,prefixlen} against sa_family. */ 372/* Verify sadb_address_{len,prefixlen} against sa_family. */
372static int verify_address_len(void *p) 373static int verify_address_len(const void *p)
373{ 374{
374 struct sadb_address *sp = p; 375 const struct sadb_address *sp = p;
375 struct sockaddr *addr = (struct sockaddr *)(sp + 1); 376 const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
376 struct sockaddr_in *sin; 377 const struct sockaddr_in *sin;
377#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 378#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
378 struct sockaddr_in6 *sin6; 379 const struct sockaddr_in6 *sin6;
379#endif 380#endif
380 int len; 381 int len;
381 382
@@ -411,16 +412,16 @@ static int verify_address_len(void *p)
411 return 0; 412 return 0;
412} 413}
413 414
414static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) 415static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
415{ 416{
416 return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + 417 return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
417 sec_ctx->sadb_x_ctx_len, 418 sec_ctx->sadb_x_ctx_len,
418 sizeof(uint64_t)); 419 sizeof(uint64_t));
419} 420}
420 421
421static inline int verify_sec_ctx_len(void *p) 422static inline int verify_sec_ctx_len(const void *p)
422{ 423{
423 struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; 424 const struct sadb_x_sec_ctx *sec_ctx = p;
424 int len = sec_ctx->sadb_x_ctx_len; 425 int len = sec_ctx->sadb_x_ctx_len;
425 426
426 if (len > PAGE_SIZE) 427 if (len > PAGE_SIZE)
@@ -434,7 +435,7 @@ static inline int verify_sec_ctx_len(void *p)
434 return 0; 435 return 0;
435} 436}
436 437
437static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx) 438static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx)
438{ 439{
439 struct xfrm_user_sec_ctx *uctx = NULL; 440 struct xfrm_user_sec_ctx *uctx = NULL;
440 int ctx_size = sec_ctx->sadb_x_ctx_len; 441 int ctx_size = sec_ctx->sadb_x_ctx_len;
@@ -455,16 +456,16 @@ static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb
455 return uctx; 456 return uctx;
456} 457}
457 458
458static int present_and_same_family(struct sadb_address *src, 459static int present_and_same_family(const struct sadb_address *src,
459 struct sadb_address *dst) 460 const struct sadb_address *dst)
460{ 461{
461 struct sockaddr *s_addr, *d_addr; 462 const struct sockaddr *s_addr, *d_addr;
462 463
463 if (!src || !dst) 464 if (!src || !dst)
464 return 0; 465 return 0;
465 466
466 s_addr = (struct sockaddr *)(src + 1); 467 s_addr = (const struct sockaddr *)(src + 1);
467 d_addr = (struct sockaddr *)(dst + 1); 468 d_addr = (const struct sockaddr *)(dst + 1);
468 if (s_addr->sa_family != d_addr->sa_family) 469 if (s_addr->sa_family != d_addr->sa_family)
469 return 0; 470 return 0;
470 if (s_addr->sa_family != AF_INET 471 if (s_addr->sa_family != AF_INET
@@ -477,15 +478,15 @@ static int present_and_same_family(struct sadb_address *src,
477 return 1; 478 return 1;
478} 479}
479 480
480static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 481static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs)
481{ 482{
482 char *p = (char *) hdr; 483 const char *p = (char *) hdr;
483 int len = skb->len; 484 int len = skb->len;
484 485
485 len -= sizeof(*hdr); 486 len -= sizeof(*hdr);
486 p += sizeof(*hdr); 487 p += sizeof(*hdr);
487 while (len > 0) { 488 while (len > 0) {
488 struct sadb_ext *ehdr = (struct sadb_ext *) p; 489 const struct sadb_ext *ehdr = (const struct sadb_ext *) p;
489 uint16_t ext_type; 490 uint16_t ext_type;
490 int ext_len; 491 int ext_len;
491 492
@@ -514,7 +515,7 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
514 if (verify_sec_ctx_len(p)) 515 if (verify_sec_ctx_len(p))
515 return -EINVAL; 516 return -EINVAL;
516 } 517 }
517 ext_hdrs[ext_type-1] = p; 518 ext_hdrs[ext_type-1] = (void *) p;
518 } 519 }
519 p += ext_len; 520 p += ext_len;
520 len -= ext_len; 521 len -= ext_len;
@@ -606,21 +607,21 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
606} 607}
607 608
608static 609static
609int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) 610int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr)
610{ 611{
611 return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), 612 return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1),
612 xaddr); 613 xaddr);
613} 614}
614 615
615static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs) 616static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs)
616{ 617{
617 struct sadb_sa *sa; 618 const struct sadb_sa *sa;
618 struct sadb_address *addr; 619 const struct sadb_address *addr;
619 uint16_t proto; 620 uint16_t proto;
620 unsigned short family; 621 unsigned short family;
621 xfrm_address_t *xaddr; 622 xfrm_address_t *xaddr;
622 623
623 sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; 624 sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
624 if (sa == NULL) 625 if (sa == NULL)
625 return NULL; 626 return NULL;
626 627
@@ -629,18 +630,18 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_
629 return NULL; 630 return NULL;
630 631
631 /* sadb_address_len should be checked by caller */ 632 /* sadb_address_len should be checked by caller */
632 addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; 633 addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
633 if (addr == NULL) 634 if (addr == NULL)
634 return NULL; 635 return NULL;
635 636
636 family = ((struct sockaddr *)(addr + 1))->sa_family; 637 family = ((const struct sockaddr *)(addr + 1))->sa_family;
637 switch (family) { 638 switch (family) {
638 case AF_INET: 639 case AF_INET:
639 xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr; 640 xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
640 break; 641 break;
641#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 642#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
642 case AF_INET6: 643 case AF_INET6:
643 xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr; 644 xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
644 break; 645 break;
645#endif 646#endif
646 default: 647 default:
@@ -690,9 +691,9 @@ static inline int pfkey_mode_to_xfrm(int mode)
690 } 691 }
691} 692}
692 693
693static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, 694static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port,
694 struct sockaddr *sa, 695 struct sockaddr *sa,
695 unsigned short family) 696 unsigned short family)
696{ 697{
697 switch (family) { 698 switch (family) {
698 case AF_INET: 699 case AF_INET:
@@ -711,7 +712,7 @@ static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port,
711 sin6->sin6_family = AF_INET6; 712 sin6->sin6_family = AF_INET6;
712 sin6->sin6_port = port; 713 sin6->sin6_port = port;
713 sin6->sin6_flowinfo = 0; 714 sin6->sin6_flowinfo = 0;
714 ipv6_addr_copy(&sin6->sin6_addr, (struct in6_addr *)xaddr->a6); 715 ipv6_addr_copy(&sin6->sin6_addr, (const struct in6_addr *)xaddr->a6);
715 sin6->sin6_scope_id = 0; 716 sin6->sin6_scope_id = 0;
716 return 128; 717 return 128;
717 } 718 }
@@ -720,7 +721,7 @@ static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port,
720 return 0; 721 return 0;
721} 722}
722 723
723static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, 724static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
724 int add_keys, int hsc) 725 int add_keys, int hsc)
725{ 726{
726 struct sk_buff *skb; 727 struct sk_buff *skb;
@@ -1010,7 +1011,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
1010} 1011}
1011 1012
1012 1013
1013static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) 1014static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x)
1014{ 1015{
1015 struct sk_buff *skb; 1016 struct sk_buff *skb;
1016 1017
@@ -1019,26 +1020,26 @@ static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x)
1019 return skb; 1020 return skb;
1020} 1021}
1021 1022
1022static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, 1023static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x,
1023 int hsc) 1024 int hsc)
1024{ 1025{
1025 return __pfkey_xfrm_state2msg(x, 0, hsc); 1026 return __pfkey_xfrm_state2msg(x, 0, hsc);
1026} 1027}
1027 1028
1028static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, 1029static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1029 struct sadb_msg *hdr, 1030 const struct sadb_msg *hdr,
1030 void **ext_hdrs) 1031 void * const *ext_hdrs)
1031{ 1032{
1032 struct xfrm_state *x; 1033 struct xfrm_state *x;
1033 struct sadb_lifetime *lifetime; 1034 const struct sadb_lifetime *lifetime;
1034 struct sadb_sa *sa; 1035 const struct sadb_sa *sa;
1035 struct sadb_key *key; 1036 const struct sadb_key *key;
1036 struct sadb_x_sec_ctx *sec_ctx; 1037 const struct sadb_x_sec_ctx *sec_ctx;
1037 uint16_t proto; 1038 uint16_t proto;
1038 int err; 1039 int err;
1039 1040
1040 1041
1041 sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; 1042 sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
1042 if (!sa || 1043 if (!sa ||
1043 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1044 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1044 ext_hdrs[SADB_EXT_ADDRESS_DST-1])) 1045 ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
@@ -1077,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1077 sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || 1078 sa->sadb_sa_encrypt > SADB_X_CALG_MAX) ||
1078 sa->sadb_sa_encrypt > SADB_EALG_MAX) 1079 sa->sadb_sa_encrypt > SADB_EALG_MAX)
1079 return ERR_PTR(-EINVAL); 1080 return ERR_PTR(-EINVAL);
1080 key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; 1081 key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
1081 if (key != NULL && 1082 if (key != NULL &&
1082 sa->sadb_sa_auth != SADB_X_AALG_NULL && 1083 sa->sadb_sa_auth != SADB_X_AALG_NULL &&
1083 ((key->sadb_key_bits+7) / 8 == 0 || 1084 ((key->sadb_key_bits+7) / 8 == 0 ||
@@ -1104,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1104 if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) 1105 if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
1105 x->props.flags |= XFRM_STATE_NOPMTUDISC; 1106 x->props.flags |= XFRM_STATE_NOPMTUDISC;
1106 1107
1107 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; 1108 lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
1108 if (lifetime != NULL) { 1109 if (lifetime != NULL) {
1109 x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); 1110 x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
1110 x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); 1111 x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
1111 x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; 1112 x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
1112 x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; 1113 x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
1113 } 1114 }
1114 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; 1115 lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
1115 if (lifetime != NULL) { 1116 if (lifetime != NULL) {
1116 x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); 1117 x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
1117 x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); 1118 x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
@@ -1119,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1119 x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; 1120 x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
1120 } 1121 }
1121 1122
1122 sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; 1123 sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
1123 if (sec_ctx != NULL) { 1124 if (sec_ctx != NULL) {
1124 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); 1125 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
1125 1126
@@ -1133,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1133 goto out; 1134 goto out;
1134 } 1135 }
1135 1136
1136 key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; 1137 key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
1137 if (sa->sadb_sa_auth) { 1138 if (sa->sadb_sa_auth) {
1138 int keysize = 0; 1139 int keysize = 0;
1139 struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); 1140 struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
@@ -1202,7 +1203,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1202 &x->id.daddr); 1203 &x->id.daddr);
1203 1204
1204 if (ext_hdrs[SADB_X_EXT_SA2-1]) { 1205 if (ext_hdrs[SADB_X_EXT_SA2-1]) {
1205 struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1]; 1206 const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1];
1206 int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); 1207 int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
1207 if (mode < 0) { 1208 if (mode < 0) {
1208 err = -EINVAL; 1209 err = -EINVAL;
@@ -1213,7 +1214,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1213 } 1214 }
1214 1215
1215 if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { 1216 if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) {
1216 struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; 1217 const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
1217 1218
1218 /* Nobody uses this, but we try. */ 1219 /* Nobody uses this, but we try. */
1219 x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); 1220 x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr);
@@ -1224,7 +1225,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1224 x->sel.family = x->props.family; 1225 x->sel.family = x->props.family;
1225 1226
1226 if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { 1227 if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
1227 struct sadb_x_nat_t_type* n_type; 1228 const struct sadb_x_nat_t_type* n_type;
1228 struct xfrm_encap_tmpl *natt; 1229 struct xfrm_encap_tmpl *natt;
1229 1230
1230 x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); 1231 x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
@@ -1236,12 +1237,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1236 natt->encap_type = n_type->sadb_x_nat_t_type_type; 1237 natt->encap_type = n_type->sadb_x_nat_t_type_type;
1237 1238
1238 if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { 1239 if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
1239 struct sadb_x_nat_t_port* n_port = 1240 const struct sadb_x_nat_t_port *n_port =
1240 ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; 1241 ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
1241 natt->encap_sport = n_port->sadb_x_nat_t_port_port; 1242 natt->encap_sport = n_port->sadb_x_nat_t_port_port;
1242 } 1243 }
1243 if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { 1244 if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) {
1244 struct sadb_x_nat_t_port* n_port = 1245 const struct sadb_x_nat_t_port *n_port =
1245 ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; 1246 ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
1246 natt->encap_dport = n_port->sadb_x_nat_t_port_port; 1247 natt->encap_dport = n_port->sadb_x_nat_t_port_port;
1247 } 1248 }
@@ -1261,12 +1262,12 @@ out:
1261 return ERR_PTR(err); 1262 return ERR_PTR(err);
1262} 1263}
1263 1264
1264static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1265static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1265{ 1266{
1266 return -EOPNOTSUPP; 1267 return -EOPNOTSUPP;
1267} 1268}
1268 1269
1269static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1270static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1270{ 1271{
1271 struct net *net = sock_net(sk); 1272 struct net *net = sock_net(sk);
1272 struct sk_buff *resp_skb; 1273 struct sk_buff *resp_skb;
@@ -1365,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1365 return 0; 1366 return 0;
1366} 1367}
1367 1368
1368static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1369static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1369{ 1370{
1370 struct net *net = sock_net(sk); 1371 struct net *net = sock_net(sk);
1371 struct xfrm_state *x; 1372 struct xfrm_state *x;
@@ -1429,7 +1430,7 @@ static inline int event2keytype(int event)
1429} 1430}
1430 1431
1431/* ADD/UPD/DEL */ 1432/* ADD/UPD/DEL */
1432static int key_notify_sa(struct xfrm_state *x, struct km_event *c) 1433static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
1433{ 1434{
1434 struct sk_buff *skb; 1435 struct sk_buff *skb;
1435 struct sadb_msg *hdr; 1436 struct sadb_msg *hdr;
@@ -1453,7 +1454,7 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
1453 return 0; 1454 return 0;
1454} 1455}
1455 1456
1456static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1457static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1457{ 1458{
1458 struct net *net = sock_net(sk); 1459 struct net *net = sock_net(sk);
1459 struct xfrm_state *x; 1460 struct xfrm_state *x;
@@ -1492,7 +1493,7 @@ out:
1492 return err; 1493 return err;
1493} 1494}
1494 1495
1495static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1496static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1496{ 1497{
1497 struct net *net = sock_net(sk); 1498 struct net *net = sock_net(sk);
1498 struct xfrm_state *x; 1499 struct xfrm_state *x;
@@ -1534,7 +1535,7 @@ out:
1534 return err; 1535 return err;
1535} 1536}
1536 1537
1537static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1538static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1538{ 1539{
1539 struct net *net = sock_net(sk); 1540 struct net *net = sock_net(sk);
1540 __u8 proto; 1541 __u8 proto;
@@ -1570,7 +1571,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1570 return 0; 1571 return 0;
1571} 1572}
1572 1573
1573static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, 1574static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
1574 gfp_t allocation) 1575 gfp_t allocation)
1575{ 1576{
1576 struct sk_buff *skb; 1577 struct sk_buff *skb;
@@ -1642,7 +1643,7 @@ out_put_algs:
1642 return skb; 1643 return skb;
1643} 1644}
1644 1645
1645static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1646static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1646{ 1647{
1647 struct pfkey_sock *pfk = pfkey_sk(sk); 1648 struct pfkey_sock *pfk = pfkey_sk(sk);
1648 struct sk_buff *supp_skb; 1649 struct sk_buff *supp_skb;
@@ -1671,7 +1672,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1671 return 0; 1672 return 0;
1672} 1673}
1673 1674
1674static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) 1675static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
1675{ 1676{
1676 struct sk_buff *skb; 1677 struct sk_buff *skb;
1677 struct sadb_msg *hdr; 1678 struct sadb_msg *hdr;
@@ -1688,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr)
1688 return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); 1689 return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
1689} 1690}
1690 1691
1691static int key_notify_sa_flush(struct km_event *c) 1692static int key_notify_sa_flush(const struct km_event *c)
1692{ 1693{
1693 struct sk_buff *skb; 1694 struct sk_buff *skb;
1694 struct sadb_msg *hdr; 1695 struct sadb_msg *hdr;
@@ -1710,7 +1711,7 @@ static int key_notify_sa_flush(struct km_event *c)
1710 return 0; 1711 return 0;
1711} 1712}
1712 1713
1713static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1714static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1714{ 1715{
1715 struct net *net = sock_net(sk); 1716 struct net *net = sock_net(sk);
1716 unsigned proto; 1717 unsigned proto;
@@ -1784,7 +1785,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
1784 xfrm_state_walk_done(&pfk->dump.u.state); 1785 xfrm_state_walk_done(&pfk->dump.u.state);
1785} 1786}
1786 1787
1787static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1788static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1788{ 1789{
1789 u8 proto; 1790 u8 proto;
1790 struct pfkey_sock *pfk = pfkey_sk(sk); 1791 struct pfkey_sock *pfk = pfkey_sk(sk);
@@ -1805,19 +1806,29 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr
1805 return pfkey_do_dump(pfk); 1806 return pfkey_do_dump(pfk);
1806} 1807}
1807 1808
1808static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1809static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1809{ 1810{
1810 struct pfkey_sock *pfk = pfkey_sk(sk); 1811 struct pfkey_sock *pfk = pfkey_sk(sk);
1811 int satype = hdr->sadb_msg_satype; 1812 int satype = hdr->sadb_msg_satype;
1813 bool reset_errno = false;
1812 1814
1813 if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { 1815 if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) {
1814 /* XXX we mangle packet... */ 1816 reset_errno = true;
1815 hdr->sadb_msg_errno = 0;
1816 if (satype != 0 && satype != 1) 1817 if (satype != 0 && satype != 1)
1817 return -EINVAL; 1818 return -EINVAL;
1818 pfk->promisc = satype; 1819 pfk->promisc = satype;
1819 } 1820 }
1820 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); 1821 if (reset_errno && skb_cloned(skb))
1822 skb = skb_copy(skb, GFP_KERNEL);
1823 else
1824 skb = skb_clone(skb, GFP_KERNEL);
1825
1826 if (reset_errno && skb) {
1827 struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data;
1828 new_hdr->sadb_msg_errno = 0;
1829 }
1830
1831 pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
1821 return 0; 1832 return 0;
1822} 1833}
1823 1834
@@ -1921,7 +1932,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
1921 return 0; 1932 return 0;
1922} 1933}
1923 1934
1924static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) 1935static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
1925{ 1936{
1926 struct xfrm_sec_ctx *xfrm_ctx = xp->security; 1937 struct xfrm_sec_ctx *xfrm_ctx = xp->security;
1927 1938
@@ -1933,9 +1944,9 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
1933 return 0; 1944 return 0;
1934} 1945}
1935 1946
1936static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) 1947static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp)
1937{ 1948{
1938 struct xfrm_tmpl *t; 1949 const struct xfrm_tmpl *t;
1939 int sockaddr_size = pfkey_sockaddr_size(xp->family); 1950 int sockaddr_size = pfkey_sockaddr_size(xp->family);
1940 int socklen = 0; 1951 int socklen = 0;
1941 int i; 1952 int i;
@@ -1955,7 +1966,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
1955 pfkey_xfrm_policy2sec_ctx_size(xp); 1966 pfkey_xfrm_policy2sec_ctx_size(xp);
1956} 1967}
1957 1968
1958static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) 1969static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp)
1959{ 1970{
1960 struct sk_buff *skb; 1971 struct sk_buff *skb;
1961 int size; 1972 int size;
@@ -1969,7 +1980,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
1969 return skb; 1980 return skb;
1970} 1981}
1971 1982
1972static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir) 1983static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir)
1973{ 1984{
1974 struct sadb_msg *hdr; 1985 struct sadb_msg *hdr;
1975 struct sadb_address *addr; 1986 struct sadb_address *addr;
@@ -2065,8 +2076,8 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
2065 pol->sadb_x_policy_priority = xp->priority; 2076 pol->sadb_x_policy_priority = xp->priority;
2066 2077
2067 for (i=0; i<xp->xfrm_nr; i++) { 2078 for (i=0; i<xp->xfrm_nr; i++) {
2079 const struct xfrm_tmpl *t = xp->xfrm_vec + i;
2068 struct sadb_x_ipsecrequest *rq; 2080 struct sadb_x_ipsecrequest *rq;
2069 struct xfrm_tmpl *t = xp->xfrm_vec + i;
2070 int req_size; 2081 int req_size;
2071 int mode; 2082 int mode;
2072 2083
@@ -2123,7 +2134,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
2123 return 0; 2134 return 0;
2124} 2135}
2125 2136
2126static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) 2137static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
2127{ 2138{
2128 struct sk_buff *out_skb; 2139 struct sk_buff *out_skb;
2129 struct sadb_msg *out_hdr; 2140 struct sadb_msg *out_hdr;
@@ -2152,7 +2163,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
2152 2163
2153} 2164}
2154 2165
2155static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2166static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2156{ 2167{
2157 struct net *net = sock_net(sk); 2168 struct net *net = sock_net(sk);
2158 int err = 0; 2169 int err = 0;
@@ -2273,7 +2284,7 @@ out:
2273 return err; 2284 return err;
2274} 2285}
2275 2286
2276static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2287static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2277{ 2288{
2278 struct net *net = sock_net(sk); 2289 struct net *net = sock_net(sk);
2279 int err; 2290 int err;
@@ -2350,7 +2361,7 @@ out:
2350 return err; 2361 return err;
2351} 2362}
2352 2363
2353static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir) 2364static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir)
2354{ 2365{
2355 int err; 2366 int err;
2356 struct sk_buff *out_skb; 2367 struct sk_buff *out_skb;
@@ -2458,7 +2469,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
2458} 2469}
2459 2470
2460static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, 2471static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2461 struct sadb_msg *hdr, void **ext_hdrs) 2472 const struct sadb_msg *hdr, void * const *ext_hdrs)
2462{ 2473{
2463 int i, len, ret, err = -EINVAL; 2474 int i, len, ret, err = -EINVAL;
2464 u8 dir; 2475 u8 dir;
@@ -2549,14 +2560,14 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2549} 2560}
2550#else 2561#else
2551static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, 2562static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2552 struct sadb_msg *hdr, void **ext_hdrs) 2563 const struct sadb_msg *hdr, void * const *ext_hdrs)
2553{ 2564{
2554 return -ENOPROTOOPT; 2565 return -ENOPROTOOPT;
2555} 2566}
2556#endif 2567#endif
2557 2568
2558 2569
2559static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2570static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2560{ 2571{
2561 struct net *net = sock_net(sk); 2572 struct net *net = sock_net(sk);
2562 unsigned int dir; 2573 unsigned int dir;
@@ -2644,7 +2655,7 @@ static void pfkey_dump_sp_done(struct pfkey_sock *pfk)
2644 xfrm_policy_walk_done(&pfk->dump.u.policy); 2655 xfrm_policy_walk_done(&pfk->dump.u.policy);
2645} 2656}
2646 2657
2647static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2658static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2648{ 2659{
2649 struct pfkey_sock *pfk = pfkey_sk(sk); 2660 struct pfkey_sock *pfk = pfkey_sk(sk);
2650 2661
@@ -2660,7 +2671,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2660 return pfkey_do_dump(pfk); 2671 return pfkey_do_dump(pfk);
2661} 2672}
2662 2673
2663static int key_notify_policy_flush(struct km_event *c) 2674static int key_notify_policy_flush(const struct km_event *c)
2664{ 2675{
2665 struct sk_buff *skb_out; 2676 struct sk_buff *skb_out;
2666 struct sadb_msg *hdr; 2677 struct sadb_msg *hdr;
@@ -2680,7 +2691,7 @@ static int key_notify_policy_flush(struct km_event *c)
2680 2691
2681} 2692}
2682 2693
2683static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2694static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2684{ 2695{
2685 struct net *net = sock_net(sk); 2696 struct net *net = sock_net(sk);
2686 struct km_event c; 2697 struct km_event c;
@@ -2709,7 +2720,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2709} 2720}
2710 2721
2711typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, 2722typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
2712 struct sadb_msg *hdr, void **ext_hdrs); 2723 const struct sadb_msg *hdr, void * const *ext_hdrs);
2713static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { 2724static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
2714 [SADB_RESERVED] = pfkey_reserved, 2725 [SADB_RESERVED] = pfkey_reserved,
2715 [SADB_GETSPI] = pfkey_getspi, 2726 [SADB_GETSPI] = pfkey_getspi,
@@ -2736,7 +2747,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
2736 [SADB_X_MIGRATE] = pfkey_migrate, 2747 [SADB_X_MIGRATE] = pfkey_migrate,
2737}; 2748};
2738 2749
2739static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr) 2750static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr)
2740{ 2751{
2741 void *ext_hdrs[SADB_EXT_MAX]; 2752 void *ext_hdrs[SADB_EXT_MAX];
2742 int err; 2753 int err;
@@ -2781,7 +2792,8 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
2781 return hdr; 2792 return hdr;
2782} 2793}
2783 2794
2784static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) 2795static inline int aalg_tmpl_set(const struct xfrm_tmpl *t,
2796 const struct xfrm_algo_desc *d)
2785{ 2797{
2786 unsigned int id = d->desc.sadb_alg_id; 2798 unsigned int id = d->desc.sadb_alg_id;
2787 2799
@@ -2791,7 +2803,8 @@ static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
2791 return (t->aalgos >> id) & 1; 2803 return (t->aalgos >> id) & 1;
2792} 2804}
2793 2805
2794static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) 2806static inline int ealg_tmpl_set(const struct xfrm_tmpl *t,
2807 const struct xfrm_algo_desc *d)
2795{ 2808{
2796 unsigned int id = d->desc.sadb_alg_id; 2809 unsigned int id = d->desc.sadb_alg_id;
2797 2810
@@ -2801,12 +2814,12 @@ static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
2801 return (t->ealgos >> id) & 1; 2814 return (t->ealgos >> id) & 1;
2802} 2815}
2803 2816
2804static int count_ah_combs(struct xfrm_tmpl *t) 2817static int count_ah_combs(const struct xfrm_tmpl *t)
2805{ 2818{
2806 int i, sz = 0; 2819 int i, sz = 0;
2807 2820
2808 for (i = 0; ; i++) { 2821 for (i = 0; ; i++) {
2809 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); 2822 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
2810 if (!aalg) 2823 if (!aalg)
2811 break; 2824 break;
2812 if (aalg_tmpl_set(t, aalg) && aalg->available) 2825 if (aalg_tmpl_set(t, aalg) && aalg->available)
@@ -2815,12 +2828,12 @@ static int count_ah_combs(struct xfrm_tmpl *t)
2815 return sz + sizeof(struct sadb_prop); 2828 return sz + sizeof(struct sadb_prop);
2816} 2829}
2817 2830
2818static int count_esp_combs(struct xfrm_tmpl *t) 2831static int count_esp_combs(const struct xfrm_tmpl *t)
2819{ 2832{
2820 int i, k, sz = 0; 2833 int i, k, sz = 0;
2821 2834
2822 for (i = 0; ; i++) { 2835 for (i = 0; ; i++) {
2823 struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); 2836 const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
2824 if (!ealg) 2837 if (!ealg)
2825 break; 2838 break;
2826 2839
@@ -2828,7 +2841,7 @@ static int count_esp_combs(struct xfrm_tmpl *t)
2828 continue; 2841 continue;
2829 2842
2830 for (k = 1; ; k++) { 2843 for (k = 1; ; k++) {
2831 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); 2844 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
2832 if (!aalg) 2845 if (!aalg)
2833 break; 2846 break;
2834 2847
@@ -2839,7 +2852,7 @@ static int count_esp_combs(struct xfrm_tmpl *t)
2839 return sz + sizeof(struct sadb_prop); 2852 return sz + sizeof(struct sadb_prop);
2840} 2853}
2841 2854
2842static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) 2855static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
2843{ 2856{
2844 struct sadb_prop *p; 2857 struct sadb_prop *p;
2845 int i; 2858 int i;
@@ -2851,7 +2864,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2851 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); 2864 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
2852 2865
2853 for (i = 0; ; i++) { 2866 for (i = 0; ; i++) {
2854 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); 2867 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
2855 if (!aalg) 2868 if (!aalg)
2856 break; 2869 break;
2857 2870
@@ -2871,7 +2884,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2871 } 2884 }
2872} 2885}
2873 2886
2874static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) 2887static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
2875{ 2888{
2876 struct sadb_prop *p; 2889 struct sadb_prop *p;
2877 int i, k; 2890 int i, k;
@@ -2883,7 +2896,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2883 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); 2896 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
2884 2897
2885 for (i=0; ; i++) { 2898 for (i=0; ; i++) {
2886 struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); 2899 const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
2887 if (!ealg) 2900 if (!ealg)
2888 break; 2901 break;
2889 2902
@@ -2892,7 +2905,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2892 2905
2893 for (k = 1; ; k++) { 2906 for (k = 1; ; k++) {
2894 struct sadb_comb *c; 2907 struct sadb_comb *c;
2895 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); 2908 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
2896 if (!aalg) 2909 if (!aalg)
2897 break; 2910 break;
2898 if (!(aalg_tmpl_set(t, aalg) && aalg->available)) 2911 if (!(aalg_tmpl_set(t, aalg) && aalg->available))
@@ -2914,12 +2927,12 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2914 } 2927 }
2915} 2928}
2916 2929
2917static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c) 2930static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
2918{ 2931{
2919 return 0; 2932 return 0;
2920} 2933}
2921 2934
2922static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) 2935static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
2923{ 2936{
2924 struct sk_buff *out_skb; 2937 struct sk_buff *out_skb;
2925 struct sadb_msg *out_hdr; 2938 struct sadb_msg *out_hdr;
@@ -2949,7 +2962,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
2949 return 0; 2962 return 0;
2950} 2963}
2951 2964
2952static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) 2965static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c)
2953{ 2966{
2954 struct net *net = x ? xs_net(x) : c->net; 2967 struct net *net = x ? xs_net(x) : c->net;
2955 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 2968 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -2976,7 +2989,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2976 return 0; 2989 return 0;
2977} 2990}
2978 2991
2979static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2992static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2980{ 2993{
2981 if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) 2994 if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
2982 return 0; 2995 return 0;
@@ -3318,7 +3331,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
3318 3331
3319#ifdef CONFIG_NET_KEY_MIGRATE 3332#ifdef CONFIG_NET_KEY_MIGRATE
3320static int set_sadb_address(struct sk_buff *skb, int sasize, int type, 3333static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
3321 struct xfrm_selector *sel) 3334 const struct xfrm_selector *sel)
3322{ 3335{
3323 struct sadb_address *addr; 3336 struct sadb_address *addr;
3324 addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); 3337 addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize);
@@ -3348,7 +3361,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
3348} 3361}
3349 3362
3350 3363
3351static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) 3364static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k)
3352{ 3365{
3353 struct sadb_x_kmaddress *kma; 3366 struct sadb_x_kmaddress *kma;
3354 u8 *sa; 3367 u8 *sa;
@@ -3376,7 +3389,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k)
3376static int set_ipsecrequest(struct sk_buff *skb, 3389static int set_ipsecrequest(struct sk_buff *skb,
3377 uint8_t proto, uint8_t mode, int level, 3390 uint8_t proto, uint8_t mode, int level,
3378 uint32_t reqid, uint8_t family, 3391 uint32_t reqid, uint8_t family,
3379 xfrm_address_t *src, xfrm_address_t *dst) 3392 const xfrm_address_t *src, const xfrm_address_t *dst)
3380{ 3393{
3381 struct sadb_x_ipsecrequest *rq; 3394 struct sadb_x_ipsecrequest *rq;
3382 u8 *sa; 3395 u8 *sa;
@@ -3404,9 +3417,9 @@ static int set_ipsecrequest(struct sk_buff *skb,
3404#endif 3417#endif
3405 3418
3406#ifdef CONFIG_NET_KEY_MIGRATE 3419#ifdef CONFIG_NET_KEY_MIGRATE
3407static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 3420static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3408 struct xfrm_migrate *m, int num_bundles, 3421 const struct xfrm_migrate *m, int num_bundles,
3409 struct xfrm_kmaddress *k) 3422 const struct xfrm_kmaddress *k)
3410{ 3423{
3411 int i; 3424 int i;
3412 int sasize_sel; 3425 int sasize_sel;
@@ -3415,7 +3428,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
3415 struct sk_buff *skb; 3428 struct sk_buff *skb;
3416 struct sadb_msg *hdr; 3429 struct sadb_msg *hdr;
3417 struct sadb_x_policy *pol; 3430 struct sadb_x_policy *pol;
3418 struct xfrm_migrate *mp; 3431 const struct xfrm_migrate *mp;
3419 3432
3420 if (type != XFRM_POLICY_TYPE_MAIN) 3433 if (type != XFRM_POLICY_TYPE_MAIN)
3421 return 0; 3434 return 0;
@@ -3513,9 +3526,9 @@ err:
3513 return -EINVAL; 3526 return -EINVAL;
3514} 3527}
3515#else 3528#else
3516static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 3529static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3517 struct xfrm_migrate *m, int num_bundles, 3530 const struct xfrm_migrate *m, int num_bundles,
3518 struct xfrm_kmaddress *k) 3531 const struct xfrm_kmaddress *k)
3519{ 3532{
3520 return -ENOPROTOOPT; 3533 return -ENOPROTOOPT;
3521} 3534}
@@ -3655,6 +3668,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
3655} 3668}
3656 3669
3657static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) 3670static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos)
3671 __acquires(rcu)
3658{ 3672{
3659 struct net *net = seq_file_net(f); 3673 struct net *net = seq_file_net(f);
3660 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 3674 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -3672,6 +3686,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
3672} 3686}
3673 3687
3674static void pfkey_seq_stop(struct seq_file *f, void *v) 3688static void pfkey_seq_stop(struct seq_file *f, void *v)
3689 __releases(rcu)
3675{ 3690{
3676 rcu_read_unlock(); 3691 rcu_read_unlock();
3677} 3692}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c64ce0a0bb03..ed8a2335442f 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -954,7 +954,7 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
954} 954}
955 955
956static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, 956static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
957 size_t data_len) 957 struct flowi *fl, size_t data_len)
958{ 958{
959 struct l2tp_tunnel *tunnel = session->tunnel; 959 struct l2tp_tunnel *tunnel = session->tunnel;
960 unsigned int len = skb->len; 960 unsigned int len = skb->len;
@@ -987,7 +987,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
987 987
988 /* Queue the packet to IP for output */ 988 /* Queue the packet to IP for output */
989 skb->local_df = 1; 989 skb->local_df = 1;
990 error = ip_queue_xmit(skb); 990 error = ip_queue_xmit(skb, fl);
991 991
992 /* Update stats */ 992 /* Update stats */
993 if (error >= 0) { 993 if (error >= 0) {
@@ -1028,6 +1028,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
1028 int data_len = skb->len; 1028 int data_len = skb->len;
1029 struct l2tp_tunnel *tunnel = session->tunnel; 1029 struct l2tp_tunnel *tunnel = session->tunnel;
1030 struct sock *sk = tunnel->sock; 1030 struct sock *sk = tunnel->sock;
1031 struct flowi *fl;
1031 struct udphdr *uh; 1032 struct udphdr *uh;
1032 struct inet_sock *inet; 1033 struct inet_sock *inet;
1033 __wsum csum; 1034 __wsum csum;
@@ -1060,14 +1061,21 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
1060 IPSKB_REROUTED); 1061 IPSKB_REROUTED);
1061 nf_reset(skb); 1062 nf_reset(skb);
1062 1063
1064 bh_lock_sock(sk);
1065 if (sock_owned_by_user(sk)) {
1066 dev_kfree_skb(skb);
1067 goto out_unlock;
1068 }
1069
1063 /* Get routing info from the tunnel socket */ 1070 /* Get routing info from the tunnel socket */
1064 skb_dst_drop(skb); 1071 skb_dst_drop(skb);
1065 skb_dst_set(skb, dst_clone(__sk_dst_get(sk))); 1072 skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
1066 1073
1074 inet = inet_sk(sk);
1075 fl = &inet->cork.fl;
1067 switch (tunnel->encap) { 1076 switch (tunnel->encap) {
1068 case L2TP_ENCAPTYPE_UDP: 1077 case L2TP_ENCAPTYPE_UDP:
1069 /* Setup UDP header */ 1078 /* Setup UDP header */
1070 inet = inet_sk(sk);
1071 __skb_push(skb, sizeof(*uh)); 1079 __skb_push(skb, sizeof(*uh));
1072 skb_reset_transport_header(skb); 1080 skb_reset_transport_header(skb);
1073 uh = udp_hdr(skb); 1081 uh = udp_hdr(skb);
@@ -1105,7 +1113,9 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
1105 1113
1106 l2tp_skb_set_owner_w(skb, sk); 1114 l2tp_skb_set_owner_w(skb, sk);
1107 1115
1108 l2tp_xmit_core(session, skb, data_len); 1116 l2tp_xmit_core(session, skb, fl, data_len);
1117out_unlock:
1118 bh_unlock_sock(sk);
1109 1119
1110abort: 1120abort:
1111 return 0; 1121 return 0;
@@ -1425,16 +1435,15 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
1425 1435
1426 /* Add tunnel to our list */ 1436 /* Add tunnel to our list */
1427 INIT_LIST_HEAD(&tunnel->list); 1437 INIT_LIST_HEAD(&tunnel->list);
1428 spin_lock_bh(&pn->l2tp_tunnel_list_lock);
1429 list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
1430 spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
1431 synchronize_rcu();
1432 atomic_inc(&l2tp_tunnel_count); 1438 atomic_inc(&l2tp_tunnel_count);
1433 1439
1434 /* Bump the reference count. The tunnel context is deleted 1440 /* Bump the reference count. The tunnel context is deleted
1435 * only when this drops to zero. 1441 * only when this drops to zero. Must be done before list insertion
1436 */ 1442 */
1437 l2tp_tunnel_inc_refcount(tunnel); 1443 l2tp_tunnel_inc_refcount(tunnel);
1444 spin_lock_bh(&pn->l2tp_tunnel_list_lock);
1445 list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
1446 spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
1438 1447
1439 err = 0; 1448 err = 0;
1440err: 1449err:
@@ -1626,7 +1635,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
1626 hlist_add_head_rcu(&session->global_hlist, 1635 hlist_add_head_rcu(&session->global_hlist,
1627 l2tp_session_id_hash_2(pn, session_id)); 1636 l2tp_session_id_hash_2(pn, session_id));
1628 spin_unlock_bh(&pn->l2tp_session_hlist_lock); 1637 spin_unlock_bh(&pn->l2tp_session_hlist_lock);
1629 synchronize_rcu();
1630 } 1638 }
1631 1639
1632 /* Ignore management session in session count value */ 1640 /* Ignore management session in session count value */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0accc98..a8193f52c13c 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
283 return 0; 283 return 0;
284} 284}
285 285
286static __net_initdata struct pernet_operations l2tp_eth_net_ops = { 286static struct pernet_operations l2tp_eth_net_ops = {
287 .init = l2tp_eth_init_net, 287 .init = l2tp_eth_init_net,
288 .id = &l2tp_eth_net_id, 288 .id = &l2tp_eth_net_id,
289 .size = sizeof(struct l2tp_eth_net), 289 .size = sizeof(struct l2tp_eth_net),
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 110efb704c9b..b6466e71f5e1 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -296,12 +296,12 @@ out_in_use:
296 296
297static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 297static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
298{ 298{
299 int rc;
300 struct inet_sock *inet = inet_sk(sk);
301 struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; 299 struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
300 struct inet_sock *inet = inet_sk(sk);
301 struct flowi4 *fl4;
302 struct rtable *rt; 302 struct rtable *rt;
303 __be32 saddr; 303 __be32 saddr;
304 int oif; 304 int oif, rc;
305 305
306 rc = -EINVAL; 306 rc = -EINVAL;
307 if (addr_len < sizeof(*lsa)) 307 if (addr_len < sizeof(*lsa))
@@ -311,6 +311,8 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
311 if (lsa->l2tp_family != AF_INET) 311 if (lsa->l2tp_family != AF_INET)
312 goto out; 312 goto out;
313 313
314 lock_sock(sk);
315
314 sk_dst_reset(sk); 316 sk_dst_reset(sk);
315 317
316 oif = sk->sk_bound_dev_if; 318 oif = sk->sk_bound_dev_if;
@@ -320,11 +322,13 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
320 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) 322 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
321 goto out; 323 goto out;
322 324
323 rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr, 325 fl4 = &inet->cork.fl.u.ip4;
326 rt = ip_route_connect(fl4, lsa->l2tp_addr.s_addr, saddr,
324 RT_CONN_FLAGS(sk), oif, 327 RT_CONN_FLAGS(sk), oif,
325 IPPROTO_L2TP, 328 IPPROTO_L2TP,
326 0, 0, sk, 1); 329 0, 0, sk, true);
327 if (rc) { 330 if (IS_ERR(rt)) {
331 rc = PTR_ERR(rt);
328 if (rc == -ENETUNREACH) 332 if (rc == -ENETUNREACH)
329 IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); 333 IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
330 goto out; 334 goto out;
@@ -339,10 +343,10 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
339 l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; 343 l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
340 344
341 if (!inet->inet_saddr) 345 if (!inet->inet_saddr)
342 inet->inet_saddr = rt->rt_src; 346 inet->inet_saddr = fl4->saddr;
343 if (!inet->inet_rcv_saddr) 347 if (!inet->inet_rcv_saddr)
344 inet->inet_rcv_saddr = rt->rt_src; 348 inet->inet_rcv_saddr = fl4->saddr;
345 inet->inet_daddr = rt->rt_dst; 349 inet->inet_daddr = fl4->daddr;
346 sk->sk_state = TCP_ESTABLISHED; 350 sk->sk_state = TCP_ESTABLISHED;
347 inet->inet_id = jiffies; 351 inet->inet_id = jiffies;
348 352
@@ -355,6 +359,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
355 359
356 rc = 0; 360 rc = 0;
357out: 361out:
362 release_sock(sk);
358 return rc; 363 return rc;
359} 364}
360 365
@@ -415,23 +420,28 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
415 int rc; 420 int rc;
416 struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk); 421 struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
417 struct inet_sock *inet = inet_sk(sk); 422 struct inet_sock *inet = inet_sk(sk);
418 struct ip_options *opt = inet->opt;
419 struct rtable *rt = NULL; 423 struct rtable *rt = NULL;
424 struct flowi4 *fl4;
420 int connected = 0; 425 int connected = 0;
421 __be32 daddr; 426 __be32 daddr;
422 427
428 lock_sock(sk);
429
430 rc = -ENOTCONN;
423 if (sock_flag(sk, SOCK_DEAD)) 431 if (sock_flag(sk, SOCK_DEAD))
424 return -ENOTCONN; 432 goto out;
425 433
426 /* Get and verify the address. */ 434 /* Get and verify the address. */
427 if (msg->msg_name) { 435 if (msg->msg_name) {
428 struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name; 436 struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
437 rc = -EINVAL;
429 if (msg->msg_namelen < sizeof(*lip)) 438 if (msg->msg_namelen < sizeof(*lip))
430 return -EINVAL; 439 goto out;
431 440
432 if (lip->l2tp_family != AF_INET) { 441 if (lip->l2tp_family != AF_INET) {
442 rc = -EAFNOSUPPORT;
433 if (lip->l2tp_family != AF_UNSPEC) 443 if (lip->l2tp_family != AF_UNSPEC)
434 return -EAFNOSUPPORT; 444 goto out;
435 } 445 }
436 446
437 daddr = lip->l2tp_addr.s_addr; 447 daddr = lip->l2tp_addr.s_addr;
@@ -466,38 +476,39 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
466 goto error; 476 goto error;
467 } 477 }
468 478
479 fl4 = &inet->cork.fl.u.ip4;
469 if (connected) 480 if (connected)
470 rt = (struct rtable *) __sk_dst_check(sk, 0); 481 rt = (struct rtable *) __sk_dst_check(sk, 0);
471 482
472 if (rt == NULL) { 483 if (rt == NULL) {
484 struct ip_options_rcu *inet_opt;
485
486 rcu_read_lock();
487 inet_opt = rcu_dereference(inet->inet_opt);
488
473 /* Use correct destination address if we have options. */ 489 /* Use correct destination address if we have options. */
474 if (opt && opt->srr) 490 if (inet_opt && inet_opt->opt.srr)
475 daddr = opt->faddr; 491 daddr = inet_opt->opt.faddr;
476 492
477 { 493 rcu_read_unlock();
478 struct flowi fl = { .oif = sk->sk_bound_dev_if, 494
479 .fl4_dst = daddr, 495 /* If this fails, retransmit mechanism of transport layer will
480 .fl4_src = inet->inet_saddr, 496 * keep trying until route appears or the connection times
481 .fl4_tos = RT_CONN_FLAGS(sk), 497 * itself out.
482 .proto = sk->sk_protocol, 498 */
483 .flags = inet_sk_flowi_flags(sk), 499 rt = ip_route_output_ports(sock_net(sk), fl4, sk,
484 .fl_ip_sport = inet->inet_sport, 500 daddr, inet->inet_saddr,
485 .fl_ip_dport = inet->inet_dport }; 501 inet->inet_dport, inet->inet_sport,
486 502 sk->sk_protocol, RT_CONN_FLAGS(sk),
487 /* If this fails, retransmit mechanism of transport layer will 503 sk->sk_bound_dev_if);
488 * keep trying until route appears or the connection times 504 if (IS_ERR(rt))
489 * itself out. 505 goto no_route;
490 */
491 security_sk_classify_flow(sk, &fl);
492 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
493 goto no_route;
494 }
495 sk_setup_caps(sk, &rt->dst); 506 sk_setup_caps(sk, &rt->dst);
496 } 507 }
497 skb_dst_set(skb, dst_clone(&rt->dst)); 508 skb_dst_set(skb, dst_clone(&rt->dst));
498 509
499 /* Queue the packet to IP for output */ 510 /* Queue the packet to IP for output */
500 rc = ip_queue_xmit(skb); 511 rc = ip_queue_xmit(skb, &inet->cork.fl);
501 512
502error: 513error:
503 /* Update stats */ 514 /* Update stats */
@@ -509,12 +520,15 @@ error:
509 lsa->tx_errors++; 520 lsa->tx_errors++;
510 } 521 }
511 522
523out:
524 release_sock(sk);
512 return rc; 525 return rc;
513 526
514no_route: 527no_route:
515 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 528 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
516 kfree_skb(skb); 529 kfree_skb(skb);
517 return -EHOSTUNREACH; 530 rc = -EHOSTUNREACH;
531 goto out;
518} 532}
519 533
520static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 534static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
@@ -673,7 +687,7 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
673MODULE_DESCRIPTION("L2TP over IP"); 687MODULE_DESCRIPTION("L2TP over IP");
674MODULE_VERSION("1.0"); 688MODULE_VERSION("1.0");
675 689
676/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like 690/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
677 * enums 691 * enums
678 */ 692 */
679MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); 693MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 4c1e540732d7..93a41a09458b 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -795,11 +795,12 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops
795 goto out; 795 goto out;
796 796
797 l2tp_nl_cmd_ops[pw_type] = ops; 797 l2tp_nl_cmd_ops[pw_type] = ops;
798 ret = 0;
798 799
799out: 800out:
800 genl_unlock(); 801 genl_unlock();
801err: 802err:
802 return 0; 803 return ret;
803} 804}
804EXPORT_SYMBOL_GPL(l2tp_nl_register_ops); 805EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
805 806
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index f99687439139..903242111317 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -121,8 +121,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
121 s32 data_size = ntohs(pdulen) - llc_len; 121 s32 data_size = ntohs(pdulen) - llc_len;
122 122
123 if (data_size < 0 || 123 if (data_size < 0 ||
124 ((skb_tail_pointer(skb) - 124 !pskb_may_pull(skb, data_size))
125 (u8 *)pdu) - llc_len) < data_size)
126 return 0; 125 return 0;
127 if (unlikely(pskb_trim_rcsum(skb, data_size))) 126 if (unlikely(pskb_trim_rcsum(skb, data_size)))
128 return 0; 127 return 0;
@@ -181,25 +180,26 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
181 * LLC functionality 180 * LLC functionality
182 */ 181 */
183 rcv = rcu_dereference(sap->rcv_func); 182 rcv = rcu_dereference(sap->rcv_func);
184 if (rcv) {
185 struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC);
186 if (cskb)
187 rcv(cskb, dev, pt, orig_dev);
188 }
189 dest = llc_pdu_type(skb); 183 dest = llc_pdu_type(skb);
190 if (unlikely(!dest || !llc_type_handlers[dest - 1])) 184 if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
191 goto drop_put; 185 if (rcv)
192 llc_type_handlers[dest - 1](sap, skb); 186 rcv(skb, dev, pt, orig_dev);
193out_put: 187 else
188 kfree_skb(skb);
189 } else {
190 if (rcv) {
191 struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC);
192 if (cskb)
193 rcv(cskb, dev, pt, orig_dev);
194 }
195 llc_type_handlers[dest - 1](sap, skb);
196 }
194 llc_sap_put(sap); 197 llc_sap_put(sap);
195out: 198out:
196 return 0; 199 return 0;
197drop: 200drop:
198 kfree_skb(skb); 201 kfree_skb(skb);
199 goto out; 202 goto out;
200drop_put:
201 kfree_skb(skb);
202 goto out_put;
203handle_station: 203handle_station:
204 if (!llc_station_handler) 204 if (!llc_station_handler)
205 goto drop; 205 goto drop;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index c766056d0488..f5fdfcbf552a 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -2,7 +2,6 @@ config MAC80211
2 tristate "Generic IEEE 802.11 Networking Stack (mac80211)" 2 tristate "Generic IEEE 802.11 Networking Stack (mac80211)"
3 depends on CFG80211 3 depends on CFG80211
4 select CRYPTO 4 select CRYPTO
5 select CRYPTO_ECB
6 select CRYPTO_ARC4 5 select CRYPTO_ARC4
7 select CRYPTO_AES 6 select CRYPTO_AES
8 select CRC32 7 select CRC32
@@ -17,7 +16,7 @@ comment "CFG80211 needs to be enabled for MAC80211"
17if MAC80211 != n 16if MAC80211 != n
18 17
19config MAC80211_HAS_RC 18config MAC80211_HAS_RC
20 def_bool n 19 bool
21 20
22config MAC80211_RC_PID 21config MAC80211_RC_PID
23 bool "PID controller based rate control algorithm" if EXPERT 22 bool "PID controller based rate control algorithm" if EXPERT
@@ -78,7 +77,7 @@ config MAC80211_RC_DEFAULT
78endif 77endif
79 78
80comment "Some wireless drivers require a rate control algorithm" 79comment "Some wireless drivers require a rate control algorithm"
81 depends on MAC80211_HAS_RC=n 80 depends on MAC80211 && MAC80211_HAS_RC=n
82 81
83config MAC80211_MESH 82config MAC80211_MESH
84 bool "Enable mac80211 mesh networking (pre-802.11s) support" 83 bool "Enable mac80211 mesh networking (pre-802.11s) support"
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 4bd6ef0be380..b9b595c08112 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -54,13 +54,12 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
54 u8 *cdata, u8 *mic) 54 u8 *cdata, u8 *mic)
55{ 55{
56 int i, j, last_len, num_blocks; 56 int i, j, last_len, num_blocks;
57 u8 *pos, *cpos, *b, *s_0, *e, *b_0, *aad; 57 u8 *pos, *cpos, *b, *s_0, *e, *b_0;
58 58
59 b = scratch; 59 b = scratch;
60 s_0 = scratch + AES_BLOCK_LEN; 60 s_0 = scratch + AES_BLOCK_LEN;
61 e = scratch + 2 * AES_BLOCK_LEN; 61 e = scratch + 2 * AES_BLOCK_LEN;
62 b_0 = scratch + 3 * AES_BLOCK_LEN; 62 b_0 = scratch + 3 * AES_BLOCK_LEN;
63 aad = scratch + 4 * AES_BLOCK_LEN;
64 63
65 num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); 64 num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
66 last_len = data_len % AES_BLOCK_LEN; 65 last_len = data_len % AES_BLOCK_LEN;
@@ -94,13 +93,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
94 u8 *cdata, size_t data_len, u8 *mic, u8 *data) 93 u8 *cdata, size_t data_len, u8 *mic, u8 *data)
95{ 94{
96 int i, j, last_len, num_blocks; 95 int i, j, last_len, num_blocks;
97 u8 *pos, *cpos, *b, *s_0, *a, *b_0, *aad; 96 u8 *pos, *cpos, *b, *s_0, *a, *b_0;
98 97
99 b = scratch; 98 b = scratch;
100 s_0 = scratch + AES_BLOCK_LEN; 99 s_0 = scratch + AES_BLOCK_LEN;
101 a = scratch + 2 * AES_BLOCK_LEN; 100 a = scratch + 2 * AES_BLOCK_LEN;
102 b_0 = scratch + 3 * AES_BLOCK_LEN; 101 b_0 = scratch + 3 * AES_BLOCK_LEN;
103 aad = scratch + 4 * AES_BLOCK_LEN;
104 102
105 num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); 103 num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
106 last_len = data_len % AES_BLOCK_LEN; 104 last_len = data_len % AES_BLOCK_LEN;
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 227ca82eef72..9c0d76cdca92 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -63,7 +63,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
63 63
64 lockdep_assert_held(&sta->ampdu_mlme.mtx); 64 lockdep_assert_held(&sta->ampdu_mlme.mtx);
65 65
66 tid_rx = sta->ampdu_mlme.tid_rx[tid]; 66 tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid],
67 lockdep_is_held(&sta->ampdu_mlme.mtx));
67 68
68 if (!tid_rx) 69 if (!tid_rx)
69 return; 70 return;
@@ -76,7 +77,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
76#endif /* CONFIG_MAC80211_HT_DEBUG */ 77#endif /* CONFIG_MAC80211_HT_DEBUG */
77 78
78 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, 79 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
79 &sta->sta, tid, NULL)) 80 &sta->sta, tid, NULL, 0))
80 printk(KERN_DEBUG "HW problem - can not stop rx " 81 printk(KERN_DEBUG "HW problem - can not stop rx "
81 "aggregation for tid %d\n", tid); 82 "aggregation for tid %d\n", tid);
82 83
@@ -232,6 +233,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
232 if (buf_size == 0) 233 if (buf_size == 0)
233 buf_size = IEEE80211_MAX_AMPDU_BUF; 234 buf_size = IEEE80211_MAX_AMPDU_BUF;
234 235
236 /* make sure the size doesn't exceed the maximum supported by the hw */
237 if (buf_size > local->hw.max_rx_aggregation_subframes)
238 buf_size = local->hw.max_rx_aggregation_subframes;
235 239
236 /* examine state machine */ 240 /* examine state machine */
237 mutex_lock(&sta->ampdu_mlme.mtx); 241 mutex_lock(&sta->ampdu_mlme.mtx);
@@ -287,7 +291,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
287 } 291 }
288 292
289 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, 293 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
290 &sta->sta, tid, &start_seq_num); 294 &sta->sta, tid, &start_seq_num, 0);
291#ifdef CONFIG_MAC80211_HT_DEBUG 295#ifdef CONFIG_MAC80211_HT_DEBUG
292 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); 296 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
293#endif /* CONFIG_MAC80211_HT_DEBUG */ 297#endif /* CONFIG_MAC80211_HT_DEBUG */
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9cc472c6a6a5..c8be8eff70da 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -136,12 +136,12 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
136 ieee80211_tx_skb(sdata, skb); 136 ieee80211_tx_skb(sdata, skb);
137} 137}
138 138
139static void kfree_tid_tx(struct rcu_head *rcu_head) 139void ieee80211_assign_tid_tx(struct sta_info *sta, int tid,
140 struct tid_ampdu_tx *tid_tx)
140{ 141{
141 struct tid_ampdu_tx *tid_tx = 142 lockdep_assert_held(&sta->ampdu_mlme.mtx);
142 container_of(rcu_head, struct tid_ampdu_tx, rcu_head); 143 lockdep_assert_held(&sta->lock);
143 144 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
144 kfree(tid_tx);
145} 145}
146 146
147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
@@ -149,21 +149,24 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
149 bool tx) 149 bool tx)
150{ 150{
151 struct ieee80211_local *local = sta->local; 151 struct ieee80211_local *local = sta->local;
152 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; 152 struct tid_ampdu_tx *tid_tx;
153 int ret; 153 int ret;
154 154
155 lockdep_assert_held(&sta->ampdu_mlme.mtx); 155 lockdep_assert_held(&sta->ampdu_mlme.mtx);
156 156
157 if (!tid_tx)
158 return -ENOENT;
159
160 spin_lock_bh(&sta->lock); 157 spin_lock_bh(&sta->lock);
161 158
159 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
160 if (!tid_tx) {
161 spin_unlock_bh(&sta->lock);
162 return -ENOENT;
163 }
164
162 if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { 165 if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
163 /* not even started yet! */ 166 /* not even started yet! */
164 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); 167 ieee80211_assign_tid_tx(sta, tid, NULL);
165 spin_unlock_bh(&sta->lock); 168 spin_unlock_bh(&sta->lock);
166 call_rcu(&tid_tx->rcu_head, kfree_tid_tx); 169 kfree_rcu(tid_tx, rcu_head);
167 return 0; 170 return 0;
168 } 171 }
169 172
@@ -190,7 +193,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
190 193
191 ret = drv_ampdu_action(local, sta->sdata, 194 ret = drv_ampdu_action(local, sta->sdata,
192 IEEE80211_AMPDU_TX_STOP, 195 IEEE80211_AMPDU_TX_STOP,
193 &sta->sta, tid, NULL); 196 &sta->sta, tid, NULL, 0);
194 197
195 /* HW shall not deny going back to legacy */ 198 /* HW shall not deny going back to legacy */
196 if (WARN_ON(ret)) { 199 if (WARN_ON(ret)) {
@@ -283,13 +286,13 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
283 286
284void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) 287void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
285{ 288{
286 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; 289 struct tid_ampdu_tx *tid_tx;
287 struct ieee80211_local *local = sta->local; 290 struct ieee80211_local *local = sta->local;
288 struct ieee80211_sub_if_data *sdata = sta->sdata; 291 struct ieee80211_sub_if_data *sdata = sta->sdata;
289 u16 start_seq_num; 292 u16 start_seq_num;
290 int ret; 293 int ret;
291 294
292 lockdep_assert_held(&sta->ampdu_mlme.mtx); 295 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
293 296
294 /* 297 /*
295 * While we're asking the driver about the aggregation, 298 * While we're asking the driver about the aggregation,
@@ -311,18 +314,18 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
311 start_seq_num = sta->tid_seq[tid] >> 4; 314 start_seq_num = sta->tid_seq[tid] >> 4;
312 315
313 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, 316 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
314 &sta->sta, tid, &start_seq_num); 317 &sta->sta, tid, &start_seq_num, 0);
315 if (ret) { 318 if (ret) {
316#ifdef CONFIG_MAC80211_HT_DEBUG 319#ifdef CONFIG_MAC80211_HT_DEBUG
317 printk(KERN_DEBUG "BA request denied - HW unavailable for" 320 printk(KERN_DEBUG "BA request denied - HW unavailable for"
318 " tid %d\n", tid); 321 " tid %d\n", tid);
319#endif 322#endif
320 spin_lock_bh(&sta->lock); 323 spin_lock_bh(&sta->lock);
321 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); 324 ieee80211_assign_tid_tx(sta, tid, NULL);
322 spin_unlock_bh(&sta->lock); 325 spin_unlock_bh(&sta->lock);
323 326
324 ieee80211_wake_queue_agg(local, tid); 327 ieee80211_wake_queue_agg(local, tid);
325 call_rcu(&tid_tx->rcu_head, kfree_tid_tx); 328 kfree_rcu(tid_tx, rcu_head);
326 return; 329 return;
327 } 330 }
328 331
@@ -342,7 +345,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
342 /* send AddBA request */ 345 /* send AddBA request */
343 ieee80211_send_addba_request(sdata, sta->sta.addr, tid, 346 ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
344 tid_tx->dialog_token, start_seq_num, 347 tid_tx->dialog_token, start_seq_num,
345 0x40, tid_tx->timeout); 348 local->hw.max_tx_aggregation_subframes,
349 tid_tx->timeout);
346} 350}
347 351
348int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, 352int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -395,9 +399,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
395 goto err_unlock_sta; 399 goto err_unlock_sta;
396 } 400 }
397 401
398 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 402 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
399 /* check if the TID is not in aggregation flow already */ 403 /* check if the TID is not in aggregation flow already */
400 if (tid_tx) { 404 if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) {
401#ifdef CONFIG_MAC80211_HT_DEBUG 405#ifdef CONFIG_MAC80211_HT_DEBUG
402 printk(KERN_DEBUG "BA request denied - session is not " 406 printk(KERN_DEBUG "BA request denied - session is not "
403 "idle on tid %u\n", tid); 407 "idle on tid %u\n", tid);
@@ -432,8 +436,11 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
432 sta->ampdu_mlme.dialog_token_allocator++; 436 sta->ampdu_mlme.dialog_token_allocator++;
433 tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator; 437 tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator;
434 438
435 /* finally, assign it to the array */ 439 /*
436 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); 440 * Finally, assign it to the start array; the work item will
441 * collect it and move it to the normal array.
442 */
443 sta->ampdu_mlme.tid_start_tx[tid] = tid_tx;
437 444
438 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); 445 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
439 446
@@ -479,15 +486,19 @@ ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
479static void ieee80211_agg_tx_operational(struct ieee80211_local *local, 486static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
480 struct sta_info *sta, u16 tid) 487 struct sta_info *sta, u16 tid)
481{ 488{
489 struct tid_ampdu_tx *tid_tx;
490
482 lockdep_assert_held(&sta->ampdu_mlme.mtx); 491 lockdep_assert_held(&sta->ampdu_mlme.mtx);
483 492
493 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
494
484#ifdef CONFIG_MAC80211_HT_DEBUG 495#ifdef CONFIG_MAC80211_HT_DEBUG
485 printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid); 496 printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
486#endif 497#endif
487 498
488 drv_ampdu_action(local, sta->sdata, 499 drv_ampdu_action(local, sta->sdata,
489 IEEE80211_AMPDU_TX_OPERATIONAL, 500 IEEE80211_AMPDU_TX_OPERATIONAL,
490 &sta->sta, tid, NULL); 501 &sta->sta, tid, NULL, tid_tx->buf_size);
491 502
492 /* 503 /*
493 * synchronize with TX path, while splicing the TX path 504 * synchronize with TX path, while splicing the TX path
@@ -495,13 +506,13 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
495 */ 506 */
496 spin_lock_bh(&sta->lock); 507 spin_lock_bh(&sta->lock);
497 508
498 ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid); 509 ieee80211_agg_splice_packets(local, tid_tx, tid);
499 /* 510 /*
500 * Now mark as operational. This will be visible 511 * Now mark as operational. This will be visible
501 * in the TX path, and lets it go lock-free in 512 * in the TX path, and lets it go lock-free in
502 * the common case. 513 * the common case.
503 */ 514 */
504 set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state); 515 set_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
505 ieee80211_agg_splice_finish(local, tid); 516 ieee80211_agg_splice_finish(local, tid);
506 517
507 spin_unlock_bh(&sta->lock); 518 spin_unlock_bh(&sta->lock);
@@ -535,7 +546,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
535 } 546 }
536 547
537 mutex_lock(&sta->ampdu_mlme.mtx); 548 mutex_lock(&sta->ampdu_mlme.mtx);
538 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 549 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
539 550
540 if (WARN_ON(!tid_tx)) { 551 if (WARN_ON(!tid_tx)) {
541#ifdef CONFIG_MAC80211_HT_DEBUG 552#ifdef CONFIG_MAC80211_HT_DEBUG
@@ -613,7 +624,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
613 return -EINVAL; 624 return -EINVAL;
614 625
615 spin_lock_bh(&sta->lock); 626 spin_lock_bh(&sta->lock);
616 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 627 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
617 628
618 if (!tid_tx) { 629 if (!tid_tx) {
619 ret = -ENOENT; 630 ret = -ENOENT;
@@ -669,7 +680,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
669 680
670 mutex_lock(&sta->ampdu_mlme.mtx); 681 mutex_lock(&sta->ampdu_mlme.mtx);
671 spin_lock_bh(&sta->lock); 682 spin_lock_bh(&sta->lock);
672 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 683 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
673 684
674 if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { 685 if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
675#ifdef CONFIG_MAC80211_HT_DEBUG 686#ifdef CONFIG_MAC80211_HT_DEBUG
@@ -695,11 +706,11 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
695 ieee80211_agg_splice_packets(local, tid_tx, tid); 706 ieee80211_agg_splice_packets(local, tid_tx, tid);
696 707
697 /* future packets must not find the tid_tx struct any more */ 708 /* future packets must not find the tid_tx struct any more */
698 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); 709 ieee80211_assign_tid_tx(sta, tid, NULL);
699 710
700 ieee80211_agg_splice_finish(local, tid); 711 ieee80211_agg_splice_finish(local, tid);
701 712
702 call_rcu(&tid_tx->rcu_head, kfree_tid_tx); 713 kfree_rcu(tid_tx, rcu_head);
703 714
704 unlock_sta: 715 unlock_sta:
705 spin_unlock_bh(&sta->lock); 716 spin_unlock_bh(&sta->lock);
@@ -742,13 +753,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
742{ 753{
743 struct tid_ampdu_tx *tid_tx; 754 struct tid_ampdu_tx *tid_tx;
744 u16 capab, tid; 755 u16 capab, tid;
756 u8 buf_size;
745 757
746 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); 758 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
747 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; 759 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
760 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
748 761
749 mutex_lock(&sta->ampdu_mlme.mtx); 762 mutex_lock(&sta->ampdu_mlme.mtx);
750 763
751 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 764 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
752 if (!tid_tx) 765 if (!tid_tx)
753 goto out; 766 goto out;
754 767
@@ -767,12 +780,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
767 780
768 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) 781 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
769 == WLAN_STATUS_SUCCESS) { 782 == WLAN_STATUS_SUCCESS) {
783 /*
784 * IEEE 802.11-2007 7.3.1.14:
785 * In an ADDBA Response frame, when the Status Code field
786 * is set to 0, the Buffer Size subfield is set to a value
787 * of at least 1.
788 */
789 if (!buf_size)
790 goto out;
791
770 if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, 792 if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
771 &tid_tx->state)) { 793 &tid_tx->state)) {
772 /* ignore duplicate response */ 794 /* ignore duplicate response */
773 goto out; 795 goto out;
774 } 796 }
775 797
798 tid_tx->buf_size = buf_size;
799
776 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) 800 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
777 ieee80211_agg_tx_operational(local, sta, tid); 801 ieee80211_agg_tx_operational(local, sta, tid);
778 802
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9cd73b11506e..be70c70d3f5b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -136,7 +136,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
136 mutex_lock(&sdata->local->sta_mtx); 136 mutex_lock(&sdata->local->sta_mtx);
137 137
138 if (mac_addr) { 138 if (mac_addr) {
139 sta = sta_info_get_bss(sdata, mac_addr); 139 if (ieee80211_vif_is_mesh(&sdata->vif))
140 sta = sta_info_get(sdata, mac_addr);
141 else
142 sta = sta_info_get_bss(sdata, mac_addr);
140 if (!sta) { 143 if (!sta) {
141 ieee80211_key_free(sdata->local, key); 144 ieee80211_key_free(sdata->local, key);
142 err = -ENOENT; 145 err = -ENOENT;
@@ -157,13 +160,14 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
157static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, 160static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
158 u8 key_idx, bool pairwise, const u8 *mac_addr) 161 u8 key_idx, bool pairwise, const u8 *mac_addr)
159{ 162{
160 struct ieee80211_sub_if_data *sdata; 163 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
164 struct ieee80211_local *local = sdata->local;
161 struct sta_info *sta; 165 struct sta_info *sta;
166 struct ieee80211_key *key = NULL;
162 int ret; 167 int ret;
163 168
164 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 169 mutex_lock(&local->sta_mtx);
165 170 mutex_lock(&local->key_mtx);
166 mutex_lock(&sdata->local->sta_mtx);
167 171
168 if (mac_addr) { 172 if (mac_addr) {
169 ret = -ENOENT; 173 ret = -ENOENT;
@@ -172,33 +176,24 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
172 if (!sta) 176 if (!sta)
173 goto out_unlock; 177 goto out_unlock;
174 178
175 if (pairwise) { 179 if (pairwise)
176 if (sta->ptk) { 180 key = key_mtx_dereference(local, sta->ptk);
177 ieee80211_key_free(sdata->local, sta->ptk); 181 else
178 ret = 0; 182 key = key_mtx_dereference(local, sta->gtk[key_idx]);
179 } 183 } else
180 } else { 184 key = key_mtx_dereference(local, sdata->keys[key_idx]);
181 if (sta->gtk[key_idx]) {
182 ieee80211_key_free(sdata->local,
183 sta->gtk[key_idx]);
184 ret = 0;
185 }
186 }
187
188 goto out_unlock;
189 }
190 185
191 if (!sdata->keys[key_idx]) { 186 if (!key) {
192 ret = -ENOENT; 187 ret = -ENOENT;
193 goto out_unlock; 188 goto out_unlock;
194 } 189 }
195 190
196 ieee80211_key_free(sdata->local, sdata->keys[key_idx]); 191 __ieee80211_key_free(key);
197 WARN_ON(sdata->keys[key_idx]);
198 192
199 ret = 0; 193 ret = 0;
200 out_unlock: 194 out_unlock:
201 mutex_unlock(&sdata->local->sta_mtx); 195 mutex_unlock(&local->key_mtx);
196 mutex_unlock(&local->sta_mtx);
202 197
203 return ret; 198 return ret;
204} 199}
@@ -228,11 +223,11 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
228 goto out; 223 goto out;
229 224
230 if (pairwise) 225 if (pairwise)
231 key = sta->ptk; 226 key = rcu_dereference(sta->ptk);
232 else if (key_idx < NUM_DEFAULT_KEYS) 227 else if (key_idx < NUM_DEFAULT_KEYS)
233 key = sta->gtk[key_idx]; 228 key = rcu_dereference(sta->gtk[key_idx]);
234 } else 229 } else
235 key = sdata->keys[key_idx]; 230 key = rcu_dereference(sdata->keys[key_idx]);
236 231
237 if (!key) 232 if (!key)
238 goto out; 233 goto out;
@@ -316,9 +311,21 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
316 return 0; 311 return 0;
317} 312}
318 313
314static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, int idx)
315{
316 if (!(rate->flags & RATE_INFO_FLAGS_MCS)) {
317 struct ieee80211_supported_band *sband;
318 sband = sta->local->hw.wiphy->bands[
319 sta->local->hw.conf.channel->band];
320 rate->legacy = sband->bitrates[idx].bitrate;
321 } else
322 rate->mcs = idx;
323}
324
319static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) 325static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
320{ 326{
321 struct ieee80211_sub_if_data *sdata = sta->sdata; 327 struct ieee80211_sub_if_data *sdata = sta->sdata;
328 struct timespec uptime;
322 329
323 sinfo->generation = sdata->local->sta_generation; 330 sinfo->generation = sdata->local->sta_generation;
324 331
@@ -330,7 +337,13 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
330 STATION_INFO_TX_RETRIES | 337 STATION_INFO_TX_RETRIES |
331 STATION_INFO_TX_FAILED | 338 STATION_INFO_TX_FAILED |
332 STATION_INFO_TX_BITRATE | 339 STATION_INFO_TX_BITRATE |
333 STATION_INFO_RX_DROP_MISC; 340 STATION_INFO_RX_BITRATE |
341 STATION_INFO_RX_DROP_MISC |
342 STATION_INFO_BSS_PARAM |
343 STATION_INFO_CONNECTED_TIME;
344
345 do_posix_clock_monotonic_gettime(&uptime);
346 sinfo->connected_time = uptime.tv_sec - sta->last_connected;
334 347
335 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 348 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
336 sinfo->rx_bytes = sta->rx_bytes; 349 sinfo->rx_bytes = sta->rx_bytes;
@@ -355,15 +368,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
355 sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; 368 sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
356 if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI) 369 if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI)
357 sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; 370 sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
371 rate_idx_to_bitrate(&sinfo->txrate, sta, sta->last_tx_rate.idx);
358 372
359 if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) { 373 sinfo->rxrate.flags = 0;
360 struct ieee80211_supported_band *sband; 374 if (sta->last_rx_rate_flag & RX_FLAG_HT)
361 sband = sta->local->hw.wiphy->bands[ 375 sinfo->rxrate.flags |= RATE_INFO_FLAGS_MCS;
362 sta->local->hw.conf.channel->band]; 376 if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
363 sinfo->txrate.legacy = 377 sinfo->rxrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
364 sband->bitrates[sta->last_tx_rate.idx].bitrate; 378 if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
365 } else 379 sinfo->rxrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
366 sinfo->txrate.mcs = sta->last_tx_rate.idx; 380 rate_idx_to_bitrate(&sinfo->rxrate, sta, sta->last_rx_rate_idx);
367 381
368 if (ieee80211_vif_is_mesh(&sdata->vif)) { 382 if (ieee80211_vif_is_mesh(&sdata->vif)) {
369#ifdef CONFIG_MAC80211_MESH 383#ifdef CONFIG_MAC80211_MESH
@@ -376,6 +390,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
376 sinfo->plink_state = sta->plink_state; 390 sinfo->plink_state = sta->plink_state;
377#endif 391#endif
378 } 392 }
393
394 sinfo->bss_param.flags = 0;
395 if (sdata->vif.bss_conf.use_cts_prot)
396 sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
397 if (sdata->vif.bss_conf.use_short_preamble)
398 sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
399 if (sdata->vif.bss_conf.use_short_slot)
400 sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
401 sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period;
402 sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;
379} 403}
380 404
381 405
@@ -439,7 +463,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
439 int size; 463 int size;
440 int err = -EINVAL; 464 int err = -EINVAL;
441 465
442 old = sdata->u.ap.beacon; 466 old = rtnl_dereference(sdata->u.ap.beacon);
443 467
444 /* head must not be zero-length */ 468 /* head must not be zero-length */
445 if (params->head && !params->head_len) 469 if (params->head && !params->head_len)
@@ -534,8 +558,7 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
534 558
535 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 559 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
536 560
537 old = sdata->u.ap.beacon; 561 old = rtnl_dereference(sdata->u.ap.beacon);
538
539 if (old) 562 if (old)
540 return -EALREADY; 563 return -EALREADY;
541 564
@@ -550,8 +573,7 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
550 573
551 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 574 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
552 575
553 old = sdata->u.ap.beacon; 576 old = rtnl_dereference(sdata->u.ap.beacon);
554
555 if (!old) 577 if (!old)
556 return -ENOENT; 578 return -ENOENT;
557 579
@@ -565,8 +587,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
565 587
566 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 588 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
567 589
568 old = sdata->u.ap.beacon; 590 old = rtnl_dereference(sdata->u.ap.beacon);
569
570 if (!old) 591 if (!old)
571 return -ENOENT; 592 return -ENOENT;
572 593
@@ -662,6 +683,12 @@ static void sta_apply_parameters(struct ieee80211_local *local,
662 if (set & BIT(NL80211_STA_FLAG_MFP)) 683 if (set & BIT(NL80211_STA_FLAG_MFP))
663 sta->flags |= WLAN_STA_MFP; 684 sta->flags |= WLAN_STA_MFP;
664 } 685 }
686
687 if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) {
688 sta->flags &= ~WLAN_STA_AUTH;
689 if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
690 sta->flags |= WLAN_STA_AUTH;
691 }
665 spin_unlock_irqrestore(&sta->flaglock, flags); 692 spin_unlock_irqrestore(&sta->flaglock, flags);
666 693
667 /* 694 /*
@@ -699,15 +726,29 @@ static void sta_apply_parameters(struct ieee80211_local *local,
699 params->ht_capa, 726 params->ht_capa,
700 &sta->sta.ht_cap); 727 &sta->sta.ht_cap);
701 728
702 if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { 729 if (ieee80211_vif_is_mesh(&sdata->vif)) {
703 switch (params->plink_action) { 730#ifdef CONFIG_MAC80211_MESH
704 case PLINK_ACTION_OPEN: 731 if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
705 mesh_plink_open(sta); 732 switch (params->plink_state) {
706 break; 733 case NL80211_PLINK_LISTEN:
707 case PLINK_ACTION_BLOCK: 734 case NL80211_PLINK_ESTAB:
708 mesh_plink_block(sta); 735 case NL80211_PLINK_BLOCKED:
709 break; 736 sta->plink_state = params->plink_state;
710 } 737 break;
738 default:
739 /* nothing */
740 break;
741 }
742 else
743 switch (params->plink_action) {
744 case PLINK_ACTION_OPEN:
745 mesh_plink_open(sta);
746 break;
747 case PLINK_ACTION_BLOCK:
748 mesh_plink_block(sta);
749 break;
750 }
751#endif
711 } 752 }
712} 753}
713 754
@@ -821,6 +862,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
821 862
822 rcu_read_unlock(); 863 rcu_read_unlock();
823 864
865 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
866 params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))
867 ieee80211_recalc_ps(local, -1);
868
824 return 0; 869 return 0;
825} 870}
826 871
@@ -904,8 +949,10 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
904static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, 949static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
905 struct mpath_info *pinfo) 950 struct mpath_info *pinfo)
906{ 951{
907 if (mpath->next_hop) 952 struct sta_info *next_hop_sta = rcu_dereference(mpath->next_hop);
908 memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); 953
954 if (next_hop_sta)
955 memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN);
909 else 956 else
910 memset(next_hop, 0, ETH_ALEN); 957 memset(next_hop, 0, ETH_ALEN);
911 958
@@ -1006,26 +1053,30 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
1006 u8 *new_ie; 1053 u8 *new_ie;
1007 const u8 *old_ie; 1054 const u8 *old_ie;
1008 1055
1009 /* first allocate the new vendor information element */ 1056 /* allocate information elements */
1010 new_ie = NULL; 1057 new_ie = NULL;
1011 old_ie = ifmsh->vendor_ie; 1058 old_ie = ifmsh->ie;
1012 1059
1013 ifmsh->vendor_ie_len = setup->vendor_ie_len; 1060 if (setup->ie_len) {
1014 if (setup->vendor_ie_len) { 1061 new_ie = kmemdup(setup->ie, setup->ie_len,
1015 new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len,
1016 GFP_KERNEL); 1062 GFP_KERNEL);
1017 if (!new_ie) 1063 if (!new_ie)
1018 return -ENOMEM; 1064 return -ENOMEM;
1019 } 1065 }
1066 ifmsh->ie_len = setup->ie_len;
1067 ifmsh->ie = new_ie;
1068 kfree(old_ie);
1020 1069
1021 /* now copy the rest of the setup parameters */ 1070 /* now copy the rest of the setup parameters */
1022 ifmsh->mesh_id_len = setup->mesh_id_len; 1071 ifmsh->mesh_id_len = setup->mesh_id_len;
1023 memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); 1072 memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
1024 ifmsh->mesh_pp_id = setup->path_sel_proto; 1073 ifmsh->mesh_pp_id = setup->path_sel_proto;
1025 ifmsh->mesh_pm_id = setup->path_metric; 1074 ifmsh->mesh_pm_id = setup->path_metric;
1026 ifmsh->vendor_ie = new_ie; 1075 ifmsh->security = IEEE80211_MESH_SEC_NONE;
1027 1076 if (setup->is_authenticated)
1028 kfree(old_ie); 1077 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
1078 if (setup->is_secure)
1079 ifmsh->security |= IEEE80211_MESH_SEC_SECURED;
1029 1080
1030 return 0; 1081 return 0;
1031} 1082}
@@ -1215,6 +1266,9 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
1215{ 1266{
1216 struct ieee80211_local *local = wiphy_priv(wiphy); 1267 struct ieee80211_local *local = wiphy_priv(wiphy);
1217 struct ieee80211_sub_if_data *sdata = NULL; 1268 struct ieee80211_sub_if_data *sdata = NULL;
1269 struct ieee80211_channel *old_oper;
1270 enum nl80211_channel_type old_oper_type;
1271 enum nl80211_channel_type old_vif_oper_type= NL80211_CHAN_NO_HT;
1218 1272
1219 if (netdev) 1273 if (netdev)
1220 sdata = IEEE80211_DEV_TO_SUB_IF(netdev); 1274 sdata = IEEE80211_DEV_TO_SUB_IF(netdev);
@@ -1232,22 +1286,33 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
1232 break; 1286 break;
1233 } 1287 }
1234 1288
1235 local->oper_channel = chan; 1289 if (sdata)
1290 old_vif_oper_type = sdata->vif.bss_conf.channel_type;
1291 old_oper_type = local->_oper_channel_type;
1236 1292
1237 if (!ieee80211_set_channel_type(local, sdata, channel_type)) 1293 if (!ieee80211_set_channel_type(local, sdata, channel_type))
1238 return -EBUSY; 1294 return -EBUSY;
1239 1295
1240 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 1296 old_oper = local->oper_channel;
1241 if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) 1297 local->oper_channel = chan;
1298
1299 /* Update driver if changes were actually made. */
1300 if ((old_oper != local->oper_channel) ||
1301 (old_oper_type != local->_oper_channel_type))
1302 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
1303
1304 if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) &&
1305 old_vif_oper_type != sdata->vif.bss_conf.channel_type)
1242 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); 1306 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
1243 1307
1244 return 0; 1308 return 0;
1245} 1309}
1246 1310
1247#ifdef CONFIG_PM 1311#ifdef CONFIG_PM
1248static int ieee80211_suspend(struct wiphy *wiphy) 1312static int ieee80211_suspend(struct wiphy *wiphy,
1313 struct cfg80211_wowlan *wowlan)
1249{ 1314{
1250 return __ieee80211_suspend(wiphy_priv(wiphy)); 1315 return __ieee80211_suspend(wiphy_priv(wiphy), wowlan);
1251} 1316}
1252 1317
1253static int ieee80211_resume(struct wiphy *wiphy) 1318static int ieee80211_resume(struct wiphy *wiphy)
@@ -1274,8 +1339,11 @@ static int ieee80211_scan(struct wiphy *wiphy,
1274 case NL80211_IFTYPE_P2P_GO: 1339 case NL80211_IFTYPE_P2P_GO:
1275 if (sdata->local->ops->hw_scan) 1340 if (sdata->local->ops->hw_scan)
1276 break; 1341 break;
1277 /* FIXME: implement NoA while scanning in software */ 1342 /*
1278 return -EOPNOTSUPP; 1343 * FIXME: implement NoA while scanning in software,
1344 * for now fall through to allow scanning only when
1345 * beaconing hasn't been configured yet
1346 */
1279 case NL80211_IFTYPE_AP: 1347 case NL80211_IFTYPE_AP:
1280 if (sdata->u.ap.beacon) 1348 if (sdata->u.ap.beacon)
1281 return -EOPNOTSUPP; 1349 return -EOPNOTSUPP;
@@ -1287,6 +1355,30 @@ static int ieee80211_scan(struct wiphy *wiphy,
1287 return ieee80211_request_scan(sdata, req); 1355 return ieee80211_request_scan(sdata, req);
1288} 1356}
1289 1357
1358static int
1359ieee80211_sched_scan_start(struct wiphy *wiphy,
1360 struct net_device *dev,
1361 struct cfg80211_sched_scan_request *req)
1362{
1363 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1364
1365 if (!sdata->local->ops->sched_scan_start)
1366 return -EOPNOTSUPP;
1367
1368 return ieee80211_request_sched_scan_start(sdata, req);
1369}
1370
1371static int
1372ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev)
1373{
1374 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1375
1376 if (!sdata->local->ops->sched_scan_stop)
1377 return -EOPNOTSUPP;
1378
1379 return ieee80211_request_sched_scan_stop(sdata);
1380}
1381
1290static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, 1382static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
1291 struct cfg80211_auth_request *req) 1383 struct cfg80211_auth_request *req)
1292{ 1384{
@@ -1471,6 +1563,8 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
1471 enum ieee80211_smps_mode old_req; 1563 enum ieee80211_smps_mode old_req;
1472 int err; 1564 int err;
1473 1565
1566 lockdep_assert_held(&sdata->u.mgd.mtx);
1567
1474 old_req = sdata->u.mgd.req_smps; 1568 old_req = sdata->u.mgd.req_smps;
1475 sdata->u.mgd.req_smps = smps_mode; 1569 sdata->u.mgd.req_smps = smps_mode;
1476 1570
@@ -1576,16 +1670,13 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
1576{ 1670{
1577 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1671 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1578 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1672 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1579 int i; 1673 int i, ret;
1580
1581 /*
1582 * This _could_ be supported by providing a hook for
1583 * drivers for this function, but at this point it
1584 * doesn't seem worth bothering.
1585 */
1586 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
1587 return -EOPNOTSUPP;
1588 1674
1675 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) {
1676 ret = drv_set_bitrate_mask(local, sdata, mask);
1677 if (ret)
1678 return ret;
1679 }
1589 1680
1590 for (i = 0; i < IEEE80211_NUM_BANDS; i++) 1681 for (i = 0; i < IEEE80211_NUM_BANDS; i++)
1591 sdata->rc_rateidx_mask[i] = mask->control[i].legacy; 1682 sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
@@ -1784,6 +1875,33 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1784 1875
1785 *cookie = (unsigned long) skb; 1876 *cookie = (unsigned long) skb;
1786 1877
1878 if (is_offchan && local->ops->offchannel_tx) {
1879 int ret;
1880
1881 IEEE80211_SKB_CB(skb)->band = chan->band;
1882
1883 mutex_lock(&local->mtx);
1884
1885 if (local->hw_offchan_tx_cookie) {
1886 mutex_unlock(&local->mtx);
1887 return -EBUSY;
1888 }
1889
1890 /* TODO: bitrate control, TX processing? */
1891 ret = drv_offchannel_tx(local, skb, chan, channel_type, wait);
1892
1893 if (ret == 0)
1894 local->hw_offchan_tx_cookie = *cookie;
1895 mutex_unlock(&local->mtx);
1896
1897 /*
1898 * Allow driver to return 1 to indicate it wants to have the
1899 * frame transmitted with a remain_on_channel + regular TX.
1900 */
1901 if (ret != 1)
1902 return ret;
1903 }
1904
1787 if (is_offchan && local->ops->remain_on_channel) { 1905 if (is_offchan && local->ops->remain_on_channel) {
1788 unsigned int duration; 1906 unsigned int duration;
1789 int ret; 1907 int ret;
@@ -1847,6 +1965,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1847 1965
1848 wk->type = IEEE80211_WORK_OFFCHANNEL_TX; 1966 wk->type = IEEE80211_WORK_OFFCHANNEL_TX;
1849 wk->chan = chan; 1967 wk->chan = chan;
1968 wk->chan_type = channel_type;
1850 wk->sdata = sdata; 1969 wk->sdata = sdata;
1851 wk->done = ieee80211_offchan_tx_done; 1970 wk->done = ieee80211_offchan_tx_done;
1852 wk->offchan_tx.frame = skb; 1971 wk->offchan_tx.frame = skb;
@@ -1869,6 +1988,18 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
1869 1988
1870 mutex_lock(&local->mtx); 1989 mutex_lock(&local->mtx);
1871 1990
1991 if (local->ops->offchannel_tx_cancel_wait &&
1992 local->hw_offchan_tx_cookie == cookie) {
1993 ret = drv_offchannel_tx_cancel_wait(local);
1994
1995 if (!ret)
1996 local->hw_offchan_tx_cookie = 0;
1997
1998 mutex_unlock(&local->mtx);
1999
2000 return ret;
2001 }
2002
1872 if (local->ops->cancel_remain_on_channel) { 2003 if (local->ops->cancel_remain_on_channel) {
1873 cookie ^= 2; 2004 cookie ^= 2;
1874 ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); 2005 ret = ieee80211_cancel_remain_on_channel_hw(local, cookie);
@@ -1939,6 +2070,21 @@ static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
1939 return drv_get_antenna(local, tx_ant, rx_ant); 2070 return drv_get_antenna(local, tx_ant, rx_ant);
1940} 2071}
1941 2072
2073static int ieee80211_set_ringparam(struct wiphy *wiphy, u32 tx, u32 rx)
2074{
2075 struct ieee80211_local *local = wiphy_priv(wiphy);
2076
2077 return drv_set_ringparam(local, tx, rx);
2078}
2079
2080static void ieee80211_get_ringparam(struct wiphy *wiphy,
2081 u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max)
2082{
2083 struct ieee80211_local *local = wiphy_priv(wiphy);
2084
2085 drv_get_ringparam(local, tx, tx_max, rx, rx_max);
2086}
2087
1942struct cfg80211_ops mac80211_config_ops = { 2088struct cfg80211_ops mac80211_config_ops = {
1943 .add_virtual_intf = ieee80211_add_iface, 2089 .add_virtual_intf = ieee80211_add_iface,
1944 .del_virtual_intf = ieee80211_del_iface, 2090 .del_virtual_intf = ieee80211_del_iface,
@@ -1974,6 +2120,8 @@ struct cfg80211_ops mac80211_config_ops = {
1974 .suspend = ieee80211_suspend, 2120 .suspend = ieee80211_suspend,
1975 .resume = ieee80211_resume, 2121 .resume = ieee80211_resume,
1976 .scan = ieee80211_scan, 2122 .scan = ieee80211_scan,
2123 .sched_scan_start = ieee80211_sched_scan_start,
2124 .sched_scan_stop = ieee80211_sched_scan_stop,
1977 .auth = ieee80211_auth, 2125 .auth = ieee80211_auth,
1978 .assoc = ieee80211_assoc, 2126 .assoc = ieee80211_assoc,
1979 .deauth = ieee80211_deauth, 2127 .deauth = ieee80211_deauth,
@@ -1996,4 +2144,6 @@ struct cfg80211_ops mac80211_config_ops = {
1996 .mgmt_frame_register = ieee80211_mgmt_frame_register, 2144 .mgmt_frame_register = ieee80211_mgmt_frame_register,
1997 .set_antenna = ieee80211_set_antenna, 2145 .set_antenna = ieee80211_set_antenna,
1998 .get_antenna = ieee80211_get_antenna, 2146 .get_antenna = ieee80211_get_antenna,
2147 .set_ringparam = ieee80211_set_ringparam,
2148 .get_ringparam = ieee80211_get_ringparam,
1999}; 2149};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5b24740fc0b0..889c3e93e0f4 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -77,6 +77,9 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local,
77 switch (tmp->vif.bss_conf.channel_type) { 77 switch (tmp->vif.bss_conf.channel_type) {
78 case NL80211_CHAN_NO_HT: 78 case NL80211_CHAN_NO_HT:
79 case NL80211_CHAN_HT20: 79 case NL80211_CHAN_HT20:
80 if (superchan > tmp->vif.bss_conf.channel_type)
81 break;
82
80 superchan = tmp->vif.bss_conf.channel_type; 83 superchan = tmp->vif.bss_conf.channel_type;
81 break; 84 break;
82 case NL80211_CHAN_HT40PLUS: 85 case NL80211_CHAN_HT40PLUS:
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f02e599a318..186e02f7cc32 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -37,7 +37,7 @@ int mac80211_format_buffer(char __user *userbuf, size_t count,
37 return simple_read_from_buffer(userbuf, count, ppos, buf, res); 37 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
38} 38}
39 39
40#define DEBUGFS_READONLY_FILE(name, fmt, value...) \ 40#define DEBUGFS_READONLY_FILE_FN(name, fmt, value...) \
41static ssize_t name## _read(struct file *file, char __user *userbuf, \ 41static ssize_t name## _read(struct file *file, char __user *userbuf, \
42 size_t count, loff_t *ppos) \ 42 size_t count, loff_t *ppos) \
43{ \ 43{ \
@@ -45,14 +45,19 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \
45 \ 45 \
46 return mac80211_format_buffer(userbuf, count, ppos, \ 46 return mac80211_format_buffer(userbuf, count, ppos, \
47 fmt "\n", ##value); \ 47 fmt "\n", ##value); \
48} \ 48}
49 \ 49
50#define DEBUGFS_READONLY_FILE_OPS(name) \
50static const struct file_operations name## _ops = { \ 51static const struct file_operations name## _ops = { \
51 .read = name## _read, \ 52 .read = name## _read, \
52 .open = mac80211_open_file_generic, \ 53 .open = mac80211_open_file_generic, \
53 .llseek = generic_file_llseek, \ 54 .llseek = generic_file_llseek, \
54}; 55};
55 56
57#define DEBUGFS_READONLY_FILE(name, fmt, value...) \
58 DEBUGFS_READONLY_FILE_FN(name, fmt, value) \
59 DEBUGFS_READONLY_FILE_OPS(name)
60
56#define DEBUGFS_ADD(name) \ 61#define DEBUGFS_ADD(name) \
57 debugfs_create_file(#name, 0400, phyd, local, &name## _ops); 62 debugfs_create_file(#name, 0400, phyd, local, &name## _ops);
58 63
@@ -60,6 +65,10 @@ static const struct file_operations name## _ops = { \
60 debugfs_create_file(#name, mode, phyd, local, &name## _ops); 65 debugfs_create_file(#name, mode, phyd, local, &name## _ops);
61 66
62 67
68DEBUGFS_READONLY_FILE(user_power, "%d",
69 local->user_power_level);
70DEBUGFS_READONLY_FILE(power, "%d",
71 local->hw.conf.power_level);
63DEBUGFS_READONLY_FILE(frequency, "%d", 72DEBUGFS_READONLY_FILE(frequency, "%d",
64 local->hw.conf.channel->center_freq); 73 local->hw.conf.channel->center_freq);
65DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", 74DEBUGFS_READONLY_FILE(total_ps_buffered, "%d",
@@ -126,7 +135,7 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
126 struct ieee80211_local *local = file->private_data; 135 struct ieee80211_local *local = file->private_data;
127 136
128 rtnl_lock(); 137 rtnl_lock();
129 __ieee80211_suspend(&local->hw); 138 __ieee80211_suspend(&local->hw, NULL);
130 __ieee80211_resume(&local->hw); 139 __ieee80211_resume(&local->hw);
131 rtnl_unlock(); 140 rtnl_unlock();
132 141
@@ -287,11 +296,70 @@ static ssize_t channel_type_read(struct file *file, char __user *user_buf,
287 return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); 296 return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
288} 297}
289 298
290static const struct file_operations channel_type_ops = { 299static ssize_t hwflags_read(struct file *file, char __user *user_buf,
291 .read = channel_type_read, 300 size_t count, loff_t *ppos)
292 .open = mac80211_open_file_generic, 301{
293 .llseek = default_llseek, 302 struct ieee80211_local *local = file->private_data;
294}; 303 int mxln = 500;
304 ssize_t rv;
305 char *buf = kzalloc(mxln, GFP_KERNEL);
306 int sf = 0; /* how many written so far */
307
308 sf += snprintf(buf, mxln - sf, "0x%x\n", local->hw.flags);
309 if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
310 sf += snprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n");
311 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
312 sf += snprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n");
313 if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)
314 sf += snprintf(buf + sf, mxln - sf,
315 "HOST_BCAST_PS_BUFFERING\n");
316 if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)
317 sf += snprintf(buf + sf, mxln - sf,
318 "2GHZ_SHORT_SLOT_INCAPABLE\n");
319 if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)
320 sf += snprintf(buf + sf, mxln - sf,
321 "2GHZ_SHORT_PREAMBLE_INCAPABLE\n");
322 if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
323 sf += snprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n");
324 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
325 sf += snprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n");
326 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
327 sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_PERIOD\n");
328 if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)
329 sf += snprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n");
330 if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)
331 sf += snprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n");
332 if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS)
333 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n");
334 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
335 sf += snprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n");
336 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
337 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
338 if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
339 sf += snprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
340 if (local->hw.flags & IEEE80211_HW_BEACON_FILTER)
341 sf += snprintf(buf + sf, mxln - sf, "BEACON_FILTER\n");
342 if (local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS)
343 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_STATIC_SMPS\n");
344 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
345 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_SMPS\n");
346 if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
347 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n");
348 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
349 sf += snprintf(buf + sf, mxln - sf, "REPORTS_TX_ACK_STATUS\n");
350 if (local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
351 sf += snprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n");
352 if (local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)
353 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_CQM_RSSI\n");
354 if (local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)
355 sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n");
356 if (local->hw.flags & IEEE80211_HW_AP_LINK_PS)
357 sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n");
358
359 rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
360 kfree(buf);
361 return rv;
362}
295 363
296static ssize_t queues_read(struct file *file, char __user *user_buf, 364static ssize_t queues_read(struct file *file, char __user *user_buf,
297 size_t count, loff_t *ppos) 365 size_t count, loff_t *ppos)
@@ -311,11 +379,9 @@ static ssize_t queues_read(struct file *file, char __user *user_buf,
311 return simple_read_from_buffer(user_buf, count, ppos, buf, res); 379 return simple_read_from_buffer(user_buf, count, ppos, buf, res);
312} 380}
313 381
314static const struct file_operations queues_ops = { 382DEBUGFS_READONLY_FILE_OPS(hwflags);
315 .read = queues_read, 383DEBUGFS_READONLY_FILE_OPS(channel_type);
316 .open = mac80211_open_file_generic, 384DEBUGFS_READONLY_FILE_OPS(queues);
317 .llseek = default_llseek,
318};
319 385
320/* statistics stuff */ 386/* statistics stuff */
321 387
@@ -391,6 +457,9 @@ void debugfs_hw_add(struct ieee80211_local *local)
391 DEBUGFS_ADD(uapsd_queues); 457 DEBUGFS_ADD(uapsd_queues);
392 DEBUGFS_ADD(uapsd_max_sp_len); 458 DEBUGFS_ADD(uapsd_max_sp_len);
393 DEBUGFS_ADD(channel_type); 459 DEBUGFS_ADD(channel_type);
460 DEBUGFS_ADD(hwflags);
461 DEBUGFS_ADD(user_power);
462 DEBUGFS_ADD(power);
394 463
395 statsd = debugfs_create_dir("statistics", phyd); 464 statsd = debugfs_create_dir("statistics", phyd);
396 465
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index f7ef3477c24a..33c58b85c911 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -241,16 +241,12 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
241 if (!key->debugfs.dir) 241 if (!key->debugfs.dir)
242 return; 242 return;
243 243
244 rcu_read_lock(); 244 sta = key->sta;
245 sta = rcu_dereference(key->sta); 245 if (sta) {
246 if (sta)
247 sprintf(buf, "../../stations/%pM", sta->sta.addr); 246 sprintf(buf, "../../stations/%pM", sta->sta.addr);
248 rcu_read_unlock();
249
250 /* using sta as a boolean is fine outside RCU lock */
251 if (sta)
252 key->debugfs.stalink = 247 key->debugfs.stalink =
253 debugfs_create_symlink("station", key->debugfs.dir, buf); 248 debugfs_create_symlink("station", key->debugfs.dir, buf);
249 }
254 250
255 DEBUGFS_ADD(keylen); 251 DEBUGFS_ADD(keylen);
256 DEBUGFS_ADD(flags); 252 DEBUGFS_ADD(flags);
@@ -286,7 +282,8 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
286 lockdep_assert_held(&sdata->local->key_mtx); 282 lockdep_assert_held(&sdata->local->key_mtx);
287 283
288 if (sdata->default_unicast_key) { 284 if (sdata->default_unicast_key) {
289 key = sdata->default_unicast_key; 285 key = key_mtx_dereference(sdata->local,
286 sdata->default_unicast_key);
290 sprintf(buf, "../keys/%d", key->debugfs.cnt); 287 sprintf(buf, "../keys/%d", key->debugfs.cnt);
291 sdata->debugfs.default_unicast_key = 288 sdata->debugfs.default_unicast_key =
292 debugfs_create_symlink("default_unicast_key", 289 debugfs_create_symlink("default_unicast_key",
@@ -297,7 +294,8 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
297 } 294 }
298 295
299 if (sdata->default_multicast_key) { 296 if (sdata->default_multicast_key) {
300 key = sdata->default_multicast_key; 297 key = key_mtx_dereference(sdata->local,
298 sdata->default_multicast_key);
301 sprintf(buf, "../keys/%d", key->debugfs.cnt); 299 sprintf(buf, "../keys/%d", key->debugfs.cnt);
302 sdata->debugfs.default_multicast_key = 300 sdata->debugfs.default_multicast_key =
303 debugfs_create_symlink("default_multicast_key", 301 debugfs_create_symlink("default_multicast_key",
@@ -316,9 +314,8 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
316 if (!sdata->debugfs.dir) 314 if (!sdata->debugfs.dir)
317 return; 315 return;
318 316
319 /* this is running under the key lock */ 317 key = key_mtx_dereference(sdata->local,
320 318 sdata->default_mgmt_key);
321 key = sdata->default_mgmt_key;
322 if (key) { 319 if (key) {
323 sprintf(buf, "../keys/%d", key->debugfs.cnt); 320 sprintf(buf, "../keys/%d", key->debugfs.cnt);
324 sdata->debugfs.default_mgmt_key = 321 sdata->debugfs.default_mgmt_key =
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 2dabdf7680d0..9ea7c0d0103f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -36,7 +36,7 @@ static ssize_t ieee80211_if_read(
36 ret = (*format)(sdata, buf, sizeof(buf)); 36 ret = (*format)(sdata, buf, sizeof(buf));
37 read_unlock(&dev_base_lock); 37 read_unlock(&dev_base_lock);
38 38
39 if (ret != -EINVAL) 39 if (ret >= 0)
40 ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); 40 ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
41 41
42 return ret; 42 return ret;
@@ -81,6 +81,8 @@ static ssize_t ieee80211_if_fmt_##name( \
81 IEEE80211_IF_FMT(name, field, "%d\n") 81 IEEE80211_IF_FMT(name, field, "%d\n")
82#define IEEE80211_IF_FMT_HEX(name, field) \ 82#define IEEE80211_IF_FMT_HEX(name, field) \
83 IEEE80211_IF_FMT(name, field, "%#x\n") 83 IEEE80211_IF_FMT(name, field, "%#x\n")
84#define IEEE80211_IF_FMT_LHEX(name, field) \
85 IEEE80211_IF_FMT(name, field, "%#lx\n")
84#define IEEE80211_IF_FMT_SIZE(name, field) \ 86#define IEEE80211_IF_FMT_SIZE(name, field) \
85 IEEE80211_IF_FMT(name, field, "%zd\n") 87 IEEE80211_IF_FMT(name, field, "%zd\n")
86 88
@@ -145,6 +147,9 @@ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ],
145 HEX); 147 HEX);
146IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], 148IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
147 HEX); 149 HEX);
150IEEE80211_IF_FILE(flags, flags, HEX);
151IEEE80211_IF_FILE(state, state, LHEX);
152IEEE80211_IF_FILE(channel_type, vif.bss_conf.channel_type, DEC);
148 153
149/* STA attributes */ 154/* STA attributes */
150IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); 155IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
@@ -172,9 +177,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
172 if (sdata->vif.type != NL80211_IFTYPE_STATION) 177 if (sdata->vif.type != NL80211_IFTYPE_STATION)
173 return -EOPNOTSUPP; 178 return -EOPNOTSUPP;
174 179
175 mutex_lock(&local->iflist_mtx); 180 mutex_lock(&sdata->u.mgd.mtx);
176 err = __ieee80211_request_smps(sdata, smps_mode); 181 err = __ieee80211_request_smps(sdata, smps_mode);
177 mutex_unlock(&local->iflist_mtx); 182 mutex_unlock(&sdata->u.mgd.mtx);
178 183
179 return err; 184 return err;
180} 185}
@@ -216,6 +221,104 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
216 221
217__IEEE80211_IF_FILE_W(smps); 222__IEEE80211_IF_FILE_W(smps);
218 223
224static ssize_t ieee80211_if_fmt_tkip_mic_test(
225 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
226{
227 return -EOPNOTSUPP;
228}
229
230static int hwaddr_aton(const char *txt, u8 *addr)
231{
232 int i;
233
234 for (i = 0; i < ETH_ALEN; i++) {
235 int a, b;
236
237 a = hex_to_bin(*txt++);
238 if (a < 0)
239 return -1;
240 b = hex_to_bin(*txt++);
241 if (b < 0)
242 return -1;
243 *addr++ = (a << 4) | b;
244 if (i < 5 && *txt++ != ':')
245 return -1;
246 }
247
248 return 0;
249}
250
251static ssize_t ieee80211_if_parse_tkip_mic_test(
252 struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
253{
254 struct ieee80211_local *local = sdata->local;
255 u8 addr[ETH_ALEN];
256 struct sk_buff *skb;
257 struct ieee80211_hdr *hdr;
258 __le16 fc;
259
260 /*
261 * Assume colon-delimited MAC address with possible white space
262 * following.
263 */
264 if (buflen < 3 * ETH_ALEN - 1)
265 return -EINVAL;
266 if (hwaddr_aton(buf, addr) < 0)
267 return -EINVAL;
268
269 if (!ieee80211_sdata_running(sdata))
270 return -ENOTCONN;
271
272 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 100);
273 if (!skb)
274 return -ENOMEM;
275 skb_reserve(skb, local->hw.extra_tx_headroom);
276
277 hdr = (struct ieee80211_hdr *) skb_put(skb, 24);
278 memset(hdr, 0, 24);
279 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
280
281 switch (sdata->vif.type) {
282 case NL80211_IFTYPE_AP:
283 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
284 /* DA BSSID SA */
285 memcpy(hdr->addr1, addr, ETH_ALEN);
286 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
287 memcpy(hdr->addr3, sdata->vif.addr, ETH_ALEN);
288 break;
289 case NL80211_IFTYPE_STATION:
290 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
291 /* BSSID SA DA */
292 if (sdata->vif.bss_conf.bssid == NULL) {
293 dev_kfree_skb(skb);
294 return -ENOTCONN;
295 }
296 memcpy(hdr->addr1, sdata->vif.bss_conf.bssid, ETH_ALEN);
297 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
298 memcpy(hdr->addr3, addr, ETH_ALEN);
299 break;
300 default:
301 dev_kfree_skb(skb);
302 return -EOPNOTSUPP;
303 }
304 hdr->frame_control = fc;
305
306 /*
307 * Add some length to the test frame to make it look bit more valid.
308 * The exact contents does not matter since the recipient is required
309 * to drop this because of the Michael MIC failure.
310 */
311 memset(skb_put(skb, 50), 0, 50);
312
313 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE;
314
315 ieee80211_tx_skb(sdata, skb);
316
317 return buflen;
318}
319
320__IEEE80211_IF_FILE_W(tkip_mic_test);
321
219/* AP attributes */ 322/* AP attributes */
220IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); 323IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
221IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); 324IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
@@ -283,6 +386,9 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
283static void add_sta_files(struct ieee80211_sub_if_data *sdata) 386static void add_sta_files(struct ieee80211_sub_if_data *sdata)
284{ 387{
285 DEBUGFS_ADD(drop_unencrypted); 388 DEBUGFS_ADD(drop_unencrypted);
389 DEBUGFS_ADD(flags);
390 DEBUGFS_ADD(state);
391 DEBUGFS_ADD(channel_type);
286 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 392 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
287 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 393 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
288 394
@@ -291,22 +397,30 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
291 DEBUGFS_ADD(last_beacon); 397 DEBUGFS_ADD(last_beacon);
292 DEBUGFS_ADD(ave_beacon); 398 DEBUGFS_ADD(ave_beacon);
293 DEBUGFS_ADD_MODE(smps, 0600); 399 DEBUGFS_ADD_MODE(smps, 0600);
400 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
294} 401}
295 402
296static void add_ap_files(struct ieee80211_sub_if_data *sdata) 403static void add_ap_files(struct ieee80211_sub_if_data *sdata)
297{ 404{
298 DEBUGFS_ADD(drop_unencrypted); 405 DEBUGFS_ADD(drop_unencrypted);
406 DEBUGFS_ADD(flags);
407 DEBUGFS_ADD(state);
408 DEBUGFS_ADD(channel_type);
299 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 409 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
300 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 410 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
301 411
302 DEBUGFS_ADD(num_sta_ps); 412 DEBUGFS_ADD(num_sta_ps);
303 DEBUGFS_ADD(dtim_count); 413 DEBUGFS_ADD(dtim_count);
304 DEBUGFS_ADD(num_buffered_multicast); 414 DEBUGFS_ADD(num_buffered_multicast);
415 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
305} 416}
306 417
307static void add_wds_files(struct ieee80211_sub_if_data *sdata) 418static void add_wds_files(struct ieee80211_sub_if_data *sdata)
308{ 419{
309 DEBUGFS_ADD(drop_unencrypted); 420 DEBUGFS_ADD(drop_unencrypted);
421 DEBUGFS_ADD(flags);
422 DEBUGFS_ADD(state);
423 DEBUGFS_ADD(channel_type);
310 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 424 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
311 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 425 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
312 426
@@ -316,12 +430,18 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
316static void add_vlan_files(struct ieee80211_sub_if_data *sdata) 430static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
317{ 431{
318 DEBUGFS_ADD(drop_unencrypted); 432 DEBUGFS_ADD(drop_unencrypted);
433 DEBUGFS_ADD(flags);
434 DEBUGFS_ADD(state);
435 DEBUGFS_ADD(channel_type);
319 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 436 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
320 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 437 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
321} 438}
322 439
323static void add_monitor_files(struct ieee80211_sub_if_data *sdata) 440static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
324{ 441{
442 DEBUGFS_ADD(flags);
443 DEBUGFS_ADD(state);
444 DEBUGFS_ADD(channel_type);
325} 445}
326 446
327#ifdef CONFIG_MAC80211_MESH 447#ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c04a1396cf8d..a01d2137fddc 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -92,6 +92,31 @@ static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
92} 92}
93STA_OPS(inactive_ms); 93STA_OPS(inactive_ms);
94 94
95
96static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
97 size_t count, loff_t *ppos)
98{
99 struct sta_info *sta = file->private_data;
100 struct timespec uptime;
101 struct tm result;
102 long connected_time_secs;
103 char buf[100];
104 int res;
105 do_posix_clock_monotonic_gettime(&uptime);
106 connected_time_secs = uptime.tv_sec - sta->last_connected;
107 time_to_tm(connected_time_secs, 0, &result);
108 result.tm_year -= 70;
109 result.tm_mday -= 1;
110 res = scnprintf(buf, sizeof(buf),
111 "years - %ld\nmonths - %d\ndays - %d\nclock - %d:%d:%d\n\n",
112 result.tm_year, result.tm_mon, result.tm_mday,
113 result.tm_hour, result.tm_min, result.tm_sec);
114 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
115}
116STA_OPS(connected_time);
117
118
119
95static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, 120static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
96 size_t count, loff_t *ppos) 121 size_t count, loff_t *ppos)
97{ 122{
@@ -324,6 +349,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
324 DEBUGFS_ADD(flags); 349 DEBUGFS_ADD(flags);
325 DEBUGFS_ADD(num_ps_buf_frames); 350 DEBUGFS_ADD(num_ps_buf_frames);
326 DEBUGFS_ADD(inactive_ms); 351 DEBUGFS_ADD(inactive_ms);
352 DEBUGFS_ADD(connected_time);
327 DEBUGFS_ADD(last_seq_ctrl); 353 DEBUGFS_ADD(last_seq_ctrl);
328 DEBUGFS_ADD(agg_status); 354 DEBUGFS_ADD(agg_status);
329 DEBUGFS_ADD(dev); 355 DEBUGFS_ADD(dev);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 98d589960a49..eebf7a67daf7 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -5,9 +5,9 @@
5#include "ieee80211_i.h" 5#include "ieee80211_i.h"
6#include "driver-trace.h" 6#include "driver-trace.h"
7 7
8static inline int drv_tx(struct ieee80211_local *local, struct sk_buff *skb) 8static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb)
9{ 9{
10 return local->ops->tx(&local->hw, skb); 10 local->ops->tx(&local->hw, skb);
11} 11}
12 12
13static inline int drv_start(struct ieee80211_local *local) 13static inline int drv_start(struct ieee80211_local *local)
@@ -41,6 +41,33 @@ static inline void drv_stop(struct ieee80211_local *local)
41 local->started = false; 41 local->started = false;
42} 42}
43 43
44#ifdef CONFIG_PM
45static inline int drv_suspend(struct ieee80211_local *local,
46 struct cfg80211_wowlan *wowlan)
47{
48 int ret;
49
50 might_sleep();
51
52 trace_drv_suspend(local);
53 ret = local->ops->suspend(&local->hw, wowlan);
54 trace_drv_return_int(local, ret);
55 return ret;
56}
57
58static inline int drv_resume(struct ieee80211_local *local)
59{
60 int ret;
61
62 might_sleep();
63
64 trace_drv_resume(local);
65 ret = local->ops->resume(&local->hw);
66 trace_drv_return_int(local, ret);
67 return ret;
68}
69#endif
70
44static inline int drv_add_interface(struct ieee80211_local *local, 71static inline int drv_add_interface(struct ieee80211_local *local,
45 struct ieee80211_vif *vif) 72 struct ieee80211_vif *vif)
46{ 73{
@@ -185,12 +212,39 @@ static inline int drv_hw_scan(struct ieee80211_local *local,
185 212
186 might_sleep(); 213 might_sleep();
187 214
188 trace_drv_hw_scan(local, sdata, req); 215 trace_drv_hw_scan(local, sdata);
189 ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); 216 ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
190 trace_drv_return_int(local, ret); 217 trace_drv_return_int(local, ret);
191 return ret; 218 return ret;
192} 219}
193 220
221static inline int
222drv_sched_scan_start(struct ieee80211_local *local,
223 struct ieee80211_sub_if_data *sdata,
224 struct cfg80211_sched_scan_request *req,
225 struct ieee80211_sched_scan_ies *ies)
226{
227 int ret;
228
229 might_sleep();
230
231 trace_drv_sched_scan_start(local, sdata);
232 ret = local->ops->sched_scan_start(&local->hw, &sdata->vif,
233 req, ies);
234 trace_drv_return_int(local, ret);
235 return ret;
236}
237
238static inline void drv_sched_scan_stop(struct ieee80211_local *local,
239 struct ieee80211_sub_if_data *sdata)
240{
241 might_sleep();
242
243 trace_drv_sched_scan_stop(local, sdata);
244 local->ops->sched_scan_stop(&local->hw, &sdata->vif);
245 trace_drv_return_void(local);
246}
247
194static inline void drv_sw_scan_start(struct ieee80211_local *local) 248static inline void drv_sw_scan_start(struct ieee80211_local *local)
195{ 249{
196 might_sleep(); 250 might_sleep();
@@ -382,17 +436,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
382 struct ieee80211_sub_if_data *sdata, 436 struct ieee80211_sub_if_data *sdata,
383 enum ieee80211_ampdu_mlme_action action, 437 enum ieee80211_ampdu_mlme_action action,
384 struct ieee80211_sta *sta, u16 tid, 438 struct ieee80211_sta *sta, u16 tid,
385 u16 *ssn) 439 u16 *ssn, u8 buf_size)
386{ 440{
387 int ret = -EOPNOTSUPP; 441 int ret = -EOPNOTSUPP;
388 442
389 might_sleep(); 443 might_sleep();
390 444
391 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); 445 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
392 446
393 if (local->ops->ampdu_action) 447 if (local->ops->ampdu_action)
394 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, 448 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
395 sta, tid, ssn); 449 sta, tid, ssn, buf_size);
396 450
397 trace_drv_return_int(local, ret); 451 trace_drv_return_int(local, ret);
398 452
@@ -495,4 +549,92 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
495 return ret; 549 return ret;
496} 550}
497 551
552static inline int drv_offchannel_tx(struct ieee80211_local *local,
553 struct sk_buff *skb,
554 struct ieee80211_channel *chan,
555 enum nl80211_channel_type channel_type,
556 unsigned int wait)
557{
558 int ret;
559
560 might_sleep();
561
562 trace_drv_offchannel_tx(local, skb, chan, channel_type, wait);
563 ret = local->ops->offchannel_tx(&local->hw, skb, chan,
564 channel_type, wait);
565 trace_drv_return_int(local, ret);
566
567 return ret;
568}
569
570static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local)
571{
572 int ret;
573
574 might_sleep();
575
576 trace_drv_offchannel_tx_cancel_wait(local);
577 ret = local->ops->offchannel_tx_cancel_wait(&local->hw);
578 trace_drv_return_int(local, ret);
579
580 return ret;
581}
582
583static inline int drv_set_ringparam(struct ieee80211_local *local,
584 u32 tx, u32 rx)
585{
586 int ret = -ENOTSUPP;
587
588 might_sleep();
589
590 trace_drv_set_ringparam(local, tx, rx);
591 if (local->ops->set_ringparam)
592 ret = local->ops->set_ringparam(&local->hw, tx, rx);
593 trace_drv_return_int(local, ret);
594
595 return ret;
596}
597
598static inline void drv_get_ringparam(struct ieee80211_local *local,
599 u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max)
600{
601 might_sleep();
602
603 trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max);
604 if (local->ops->get_ringparam)
605 local->ops->get_ringparam(&local->hw, tx, tx_max, rx, rx_max);
606 trace_drv_return_void(local);
607}
608
609static inline bool drv_tx_frames_pending(struct ieee80211_local *local)
610{
611 bool ret = false;
612
613 might_sleep();
614
615 trace_drv_tx_frames_pending(local);
616 if (local->ops->tx_frames_pending)
617 ret = local->ops->tx_frames_pending(&local->hw);
618 trace_drv_return_bool(local, ret);
619
620 return ret;
621}
622
623static inline int drv_set_bitrate_mask(struct ieee80211_local *local,
624 struct ieee80211_sub_if_data *sdata,
625 const struct cfg80211_bitrate_mask *mask)
626{
627 int ret = -EOPNOTSUPP;
628
629 might_sleep();
630
631 trace_drv_set_bitrate_mask(local, sdata, mask);
632 if (local->ops->set_bitrate_mask)
633 ret = local->ops->set_bitrate_mask(&local->hw,
634 &sdata->vif, mask);
635 trace_drv_return_int(local, ret);
636
637 return ret;
638}
639
498#endif /* __MAC80211_DRIVER_OPS */ 640#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 49c84218b2f4..ed9edcbd9aa5 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -9,6 +9,11 @@
9#undef TRACE_EVENT 9#undef TRACE_EVENT
10#define TRACE_EVENT(name, proto, ...) \ 10#define TRACE_EVENT(name, proto, ...) \
11static inline void trace_ ## name(proto) {} 11static inline void trace_ ## name(proto) {}
12#undef DECLARE_EVENT_CLASS
13#define DECLARE_EVENT_CLASS(...)
14#undef DEFINE_EVENT
15#define DEFINE_EVENT(evt_class, name, proto, ...) \
16static inline void trace_ ## name(proto) {}
12#endif 17#endif
13 18
14#undef TRACE_SYSTEM 19#undef TRACE_SYSTEM
@@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {}
38 * Tracing for driver callbacks. 43 * Tracing for driver callbacks.
39 */ 44 */
40 45
41TRACE_EVENT(drv_return_void, 46DECLARE_EVENT_CLASS(local_only_evt,
42 TP_PROTO(struct ieee80211_local *local), 47 TP_PROTO(struct ieee80211_local *local),
43 TP_ARGS(local), 48 TP_ARGS(local),
44 TP_STRUCT__entry( 49 TP_STRUCT__entry(
@@ -50,92 +55,142 @@ TRACE_EVENT(drv_return_void,
50 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) 55 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
51); 56);
52 57
53TRACE_EVENT(drv_return_int, 58DECLARE_EVENT_CLASS(local_sdata_addr_evt,
54 TP_PROTO(struct ieee80211_local *local, int ret), 59 TP_PROTO(struct ieee80211_local *local,
55 TP_ARGS(local, ret), 60 struct ieee80211_sub_if_data *sdata),
61 TP_ARGS(local, sdata),
62
56 TP_STRUCT__entry( 63 TP_STRUCT__entry(
57 LOCAL_ENTRY 64 LOCAL_ENTRY
58 __field(int, ret) 65 VIF_ENTRY
66 __array(char, addr, 6)
59 ), 67 ),
68
60 TP_fast_assign( 69 TP_fast_assign(
61 LOCAL_ASSIGN; 70 LOCAL_ASSIGN;
62 __entry->ret = ret; 71 VIF_ASSIGN;
72 memcpy(__entry->addr, sdata->vif.addr, 6);
63 ), 73 ),
64 TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret) 74
75 TP_printk(
76 LOCAL_PR_FMT VIF_PR_FMT " addr:%pM",
77 LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr
78 )
65); 79);
66 80
67TRACE_EVENT(drv_return_u64, 81DECLARE_EVENT_CLASS(local_u32_evt,
68 TP_PROTO(struct ieee80211_local *local, u64 ret), 82 TP_PROTO(struct ieee80211_local *local, u32 value),
69 TP_ARGS(local, ret), 83 TP_ARGS(local, value),
84
70 TP_STRUCT__entry( 85 TP_STRUCT__entry(
71 LOCAL_ENTRY 86 LOCAL_ENTRY
72 __field(u64, ret) 87 __field(u32, value)
73 ), 88 ),
89
74 TP_fast_assign( 90 TP_fast_assign(
75 LOCAL_ASSIGN; 91 LOCAL_ASSIGN;
76 __entry->ret = ret; 92 __entry->value = value;
77 ), 93 ),
78 TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
79);
80 94
81TRACE_EVENT(drv_start, 95 TP_printk(
82 TP_PROTO(struct ieee80211_local *local), 96 LOCAL_PR_FMT " value:%d",
97 LOCAL_PR_ARG, __entry->value
98 )
99);
83 100
84 TP_ARGS(local), 101DECLARE_EVENT_CLASS(local_sdata_evt,
102 TP_PROTO(struct ieee80211_local *local,
103 struct ieee80211_sub_if_data *sdata),
104 TP_ARGS(local, sdata),
85 105
86 TP_STRUCT__entry( 106 TP_STRUCT__entry(
87 LOCAL_ENTRY 107 LOCAL_ENTRY
108 VIF_ENTRY
88 ), 109 ),
89 110
90 TP_fast_assign( 111 TP_fast_assign(
91 LOCAL_ASSIGN; 112 LOCAL_ASSIGN;
113 VIF_ASSIGN;
92 ), 114 ),
93 115
94 TP_printk( 116 TP_printk(
95 LOCAL_PR_FMT, LOCAL_PR_ARG 117 LOCAL_PR_FMT VIF_PR_FMT,
118 LOCAL_PR_ARG, VIF_PR_ARG
96 ) 119 )
97); 120);
98 121
99TRACE_EVENT(drv_stop, 122DEFINE_EVENT(local_only_evt, drv_return_void,
100 TP_PROTO(struct ieee80211_local *local), 123 TP_PROTO(struct ieee80211_local *local),
124 TP_ARGS(local)
125);
101 126
102 TP_ARGS(local), 127TRACE_EVENT(drv_return_int,
103 128 TP_PROTO(struct ieee80211_local *local, int ret),
129 TP_ARGS(local, ret),
104 TP_STRUCT__entry( 130 TP_STRUCT__entry(
105 LOCAL_ENTRY 131 LOCAL_ENTRY
132 __field(int, ret)
106 ), 133 ),
107
108 TP_fast_assign( 134 TP_fast_assign(
109 LOCAL_ASSIGN; 135 LOCAL_ASSIGN;
136 __entry->ret = ret;
110 ), 137 ),
111 138 TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret)
112 TP_printk(
113 LOCAL_PR_FMT, LOCAL_PR_ARG
114 )
115); 139);
116 140
117TRACE_EVENT(drv_add_interface, 141TRACE_EVENT(drv_return_bool,
118 TP_PROTO(struct ieee80211_local *local, 142 TP_PROTO(struct ieee80211_local *local, bool ret),
119 struct ieee80211_sub_if_data *sdata), 143 TP_ARGS(local, ret),
120
121 TP_ARGS(local, sdata),
122
123 TP_STRUCT__entry( 144 TP_STRUCT__entry(
124 LOCAL_ENTRY 145 LOCAL_ENTRY
125 VIF_ENTRY 146 __field(bool, ret)
126 __array(char, addr, 6) 147 ),
148 TP_fast_assign(
149 LOCAL_ASSIGN;
150 __entry->ret = ret;
127 ), 151 ),
152 TP_printk(LOCAL_PR_FMT " - %s", LOCAL_PR_ARG, (__entry->ret) ?
153 "true" : "false")
154);
128 155
156TRACE_EVENT(drv_return_u64,
157 TP_PROTO(struct ieee80211_local *local, u64 ret),
158 TP_ARGS(local, ret),
159 TP_STRUCT__entry(
160 LOCAL_ENTRY
161 __field(u64, ret)
162 ),
129 TP_fast_assign( 163 TP_fast_assign(
130 LOCAL_ASSIGN; 164 LOCAL_ASSIGN;
131 VIF_ASSIGN; 165 __entry->ret = ret;
132 memcpy(__entry->addr, sdata->vif.addr, 6);
133 ), 166 ),
167 TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
168);
134 169
135 TP_printk( 170DEFINE_EVENT(local_only_evt, drv_start,
136 LOCAL_PR_FMT VIF_PR_FMT " addr:%pM", 171 TP_PROTO(struct ieee80211_local *local),
137 LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr 172 TP_ARGS(local)
138 ) 173);
174
175DEFINE_EVENT(local_only_evt, drv_suspend,
176 TP_PROTO(struct ieee80211_local *local),
177 TP_ARGS(local)
178);
179
180DEFINE_EVENT(local_only_evt, drv_resume,
181 TP_PROTO(struct ieee80211_local *local),
182 TP_ARGS(local)
183);
184
185DEFINE_EVENT(local_only_evt, drv_stop,
186 TP_PROTO(struct ieee80211_local *local),
187 TP_ARGS(local)
188);
189
190DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface,
191 TP_PROTO(struct ieee80211_local *local,
192 struct ieee80211_sub_if_data *sdata),
193 TP_ARGS(local, sdata)
139); 194);
140 195
141TRACE_EVENT(drv_change_interface, 196TRACE_EVENT(drv_change_interface,
@@ -166,27 +221,10 @@ TRACE_EVENT(drv_change_interface,
166 ) 221 )
167); 222);
168 223
169TRACE_EVENT(drv_remove_interface, 224DEFINE_EVENT(local_sdata_addr_evt, drv_remove_interface,
170 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), 225 TP_PROTO(struct ieee80211_local *local,
171 226 struct ieee80211_sub_if_data *sdata),
172 TP_ARGS(local, sdata), 227 TP_ARGS(local, sdata)
173
174 TP_STRUCT__entry(
175 LOCAL_ENTRY
176 VIF_ENTRY
177 __array(char, addr, 6)
178 ),
179
180 TP_fast_assign(
181 LOCAL_ASSIGN;
182 VIF_ASSIGN;
183 memcpy(__entry->addr, sdata->vif.addr, 6);
184 ),
185
186 TP_printk(
187 LOCAL_PR_FMT VIF_PR_FMT " addr:%pM",
188 LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr
189 )
190); 228);
191 229
192TRACE_EVENT(drv_config, 230TRACE_EVENT(drv_config,
@@ -416,63 +454,32 @@ TRACE_EVENT(drv_update_tkip_key,
416 ) 454 )
417); 455);
418 456
419TRACE_EVENT(drv_hw_scan, 457DEFINE_EVENT(local_sdata_evt, drv_hw_scan,
420 TP_PROTO(struct ieee80211_local *local, 458 TP_PROTO(struct ieee80211_local *local,
421 struct ieee80211_sub_if_data *sdata, 459 struct ieee80211_sub_if_data *sdata),
422 struct cfg80211_scan_request *req), 460 TP_ARGS(local, sdata)
423 461);
424 TP_ARGS(local, sdata, req),
425
426 TP_STRUCT__entry(
427 LOCAL_ENTRY
428 VIF_ENTRY
429 ),
430 462
431 TP_fast_assign( 463DEFINE_EVENT(local_sdata_evt, drv_sched_scan_start,
432 LOCAL_ASSIGN; 464 TP_PROTO(struct ieee80211_local *local,
433 VIF_ASSIGN; 465 struct ieee80211_sub_if_data *sdata),
434 ), 466 TP_ARGS(local, sdata)
467);
435 468
436 TP_printk( 469DEFINE_EVENT(local_sdata_evt, drv_sched_scan_stop,
437 LOCAL_PR_FMT VIF_PR_FMT, 470 TP_PROTO(struct ieee80211_local *local,
438 LOCAL_PR_ARG,VIF_PR_ARG 471 struct ieee80211_sub_if_data *sdata),
439 ) 472 TP_ARGS(local, sdata)
440); 473);
441 474
442TRACE_EVENT(drv_sw_scan_start, 475DEFINE_EVENT(local_only_evt, drv_sw_scan_start,
443 TP_PROTO(struct ieee80211_local *local), 476 TP_PROTO(struct ieee80211_local *local),
444 477 TP_ARGS(local)
445 TP_ARGS(local),
446
447 TP_STRUCT__entry(
448 LOCAL_ENTRY
449 ),
450
451 TP_fast_assign(
452 LOCAL_ASSIGN;
453 ),
454
455 TP_printk(
456 LOCAL_PR_FMT, LOCAL_PR_ARG
457 )
458); 478);
459 479
460TRACE_EVENT(drv_sw_scan_complete, 480DEFINE_EVENT(local_only_evt, drv_sw_scan_complete,
461 TP_PROTO(struct ieee80211_local *local), 481 TP_PROTO(struct ieee80211_local *local),
462 482 TP_ARGS(local)
463 TP_ARGS(local),
464
465 TP_STRUCT__entry(
466 LOCAL_ENTRY
467 ),
468
469 TP_fast_assign(
470 LOCAL_ASSIGN;
471 ),
472
473 TP_printk(
474 LOCAL_PR_FMT, LOCAL_PR_ARG
475 )
476); 483);
477 484
478TRACE_EVENT(drv_get_stats, 485TRACE_EVENT(drv_get_stats,
@@ -531,46 +538,14 @@ TRACE_EVENT(drv_get_tkip_seq,
531 ) 538 )
532); 539);
533 540
534TRACE_EVENT(drv_set_frag_threshold, 541DEFINE_EVENT(local_u32_evt, drv_set_frag_threshold,
535 TP_PROTO(struct ieee80211_local *local, u32 value), 542 TP_PROTO(struct ieee80211_local *local, u32 value),
536 543 TP_ARGS(local, value)
537 TP_ARGS(local, value),
538
539 TP_STRUCT__entry(
540 LOCAL_ENTRY
541 __field(u32, value)
542 ),
543
544 TP_fast_assign(
545 LOCAL_ASSIGN;
546 __entry->value = value;
547 ),
548
549 TP_printk(
550 LOCAL_PR_FMT " value:%d",
551 LOCAL_PR_ARG, __entry->value
552 )
553); 544);
554 545
555TRACE_EVENT(drv_set_rts_threshold, 546DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold,
556 TP_PROTO(struct ieee80211_local *local, u32 value), 547 TP_PROTO(struct ieee80211_local *local, u32 value),
557 548 TP_ARGS(local, value)
558 TP_ARGS(local, value),
559
560 TP_STRUCT__entry(
561 LOCAL_ENTRY
562 __field(u32, value)
563 ),
564
565 TP_fast_assign(
566 LOCAL_ASSIGN;
567 __entry->value = value;
568 ),
569
570 TP_printk(
571 LOCAL_PR_FMT " value:%d",
572 LOCAL_PR_ARG, __entry->value
573 )
574); 549);
575 550
576TRACE_EVENT(drv_set_coverage_class, 551TRACE_EVENT(drv_set_coverage_class,
@@ -702,23 +677,9 @@ TRACE_EVENT(drv_conf_tx,
702 ) 677 )
703); 678);
704 679
705TRACE_EVENT(drv_get_tsf, 680DEFINE_EVENT(local_only_evt, drv_get_tsf,
706 TP_PROTO(struct ieee80211_local *local), 681 TP_PROTO(struct ieee80211_local *local),
707 682 TP_ARGS(local)
708 TP_ARGS(local),
709
710 TP_STRUCT__entry(
711 LOCAL_ENTRY
712 ),
713
714 TP_fast_assign(
715 LOCAL_ASSIGN;
716 ),
717
718 TP_printk(
719 LOCAL_PR_FMT,
720 LOCAL_PR_ARG
721 )
722); 683);
723 684
724TRACE_EVENT(drv_set_tsf, 685TRACE_EVENT(drv_set_tsf,
@@ -742,41 +703,14 @@ TRACE_EVENT(drv_set_tsf,
742 ) 703 )
743); 704);
744 705
745TRACE_EVENT(drv_reset_tsf, 706DEFINE_EVENT(local_only_evt, drv_reset_tsf,
746 TP_PROTO(struct ieee80211_local *local), 707 TP_PROTO(struct ieee80211_local *local),
747 708 TP_ARGS(local)
748 TP_ARGS(local),
749
750 TP_STRUCT__entry(
751 LOCAL_ENTRY
752 ),
753
754 TP_fast_assign(
755 LOCAL_ASSIGN;
756 ),
757
758 TP_printk(
759 LOCAL_PR_FMT, LOCAL_PR_ARG
760 )
761); 709);
762 710
763TRACE_EVENT(drv_tx_last_beacon, 711DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
764 TP_PROTO(struct ieee80211_local *local), 712 TP_PROTO(struct ieee80211_local *local),
765 713 TP_ARGS(local)
766 TP_ARGS(local),
767
768 TP_STRUCT__entry(
769 LOCAL_ENTRY
770 ),
771
772 TP_fast_assign(
773 LOCAL_ASSIGN;
774 ),
775
776 TP_printk(
777 LOCAL_PR_FMT,
778 LOCAL_PR_ARG
779 )
780); 714);
781 715
782TRACE_EVENT(drv_ampdu_action, 716TRACE_EVENT(drv_ampdu_action,
@@ -784,9 +718,9 @@ TRACE_EVENT(drv_ampdu_action,
784 struct ieee80211_sub_if_data *sdata, 718 struct ieee80211_sub_if_data *sdata,
785 enum ieee80211_ampdu_mlme_action action, 719 enum ieee80211_ampdu_mlme_action action,
786 struct ieee80211_sta *sta, u16 tid, 720 struct ieee80211_sta *sta, u16 tid,
787 u16 *ssn), 721 u16 *ssn, u8 buf_size),
788 722
789 TP_ARGS(local, sdata, action, sta, tid, ssn), 723 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
790 724
791 TP_STRUCT__entry( 725 TP_STRUCT__entry(
792 LOCAL_ENTRY 726 LOCAL_ENTRY
@@ -794,6 +728,7 @@ TRACE_EVENT(drv_ampdu_action,
794 __field(u32, action) 728 __field(u32, action)
795 __field(u16, tid) 729 __field(u16, tid)
796 __field(u16, ssn) 730 __field(u16, ssn)
731 __field(u8, buf_size)
797 VIF_ENTRY 732 VIF_ENTRY
798 ), 733 ),
799 734
@@ -804,11 +739,13 @@ TRACE_EVENT(drv_ampdu_action,
804 __entry->action = action; 739 __entry->action = action;
805 __entry->tid = tid; 740 __entry->tid = tid;
806 __entry->ssn = ssn ? *ssn : 0; 741 __entry->ssn = ssn ? *ssn : 0;
742 __entry->buf_size = buf_size;
807 ), 743 ),
808 744
809 TP_printk( 745 TP_printk(
810 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", 746 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
811 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid 747 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
748 __entry->tid, __entry->buf_size
812 ) 749 )
813); 750);
814 751
@@ -959,21 +896,125 @@ TRACE_EVENT(drv_remain_on_channel,
959 ) 896 )
960); 897);
961 898
962TRACE_EVENT(drv_cancel_remain_on_channel, 899DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
963 TP_PROTO(struct ieee80211_local *local), 900 TP_PROTO(struct ieee80211_local *local),
901 TP_ARGS(local)
902);
964 903
965 TP_ARGS(local), 904TRACE_EVENT(drv_offchannel_tx,
905 TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb,
906 struct ieee80211_channel *chan,
907 enum nl80211_channel_type channel_type,
908 unsigned int wait),
909
910 TP_ARGS(local, skb, chan, channel_type, wait),
966 911
967 TP_STRUCT__entry( 912 TP_STRUCT__entry(
968 LOCAL_ENTRY 913 LOCAL_ENTRY
914 __field(int, center_freq)
915 __field(int, channel_type)
916 __field(unsigned int, wait)
969 ), 917 ),
970 918
971 TP_fast_assign( 919 TP_fast_assign(
972 LOCAL_ASSIGN; 920 LOCAL_ASSIGN;
921 __entry->center_freq = chan->center_freq;
922 __entry->channel_type = channel_type;
923 __entry->wait = wait;
973 ), 924 ),
974 925
975 TP_printk( 926 TP_printk(
976 LOCAL_PR_FMT, LOCAL_PR_ARG 927 LOCAL_PR_FMT " freq:%dMHz, wait:%dms",
928 LOCAL_PR_ARG, __entry->center_freq, __entry->wait
929 )
930);
931
932TRACE_EVENT(drv_set_ringparam,
933 TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx),
934
935 TP_ARGS(local, tx, rx),
936
937 TP_STRUCT__entry(
938 LOCAL_ENTRY
939 __field(u32, tx)
940 __field(u32, rx)
941 ),
942
943 TP_fast_assign(
944 LOCAL_ASSIGN;
945 __entry->tx = tx;
946 __entry->rx = rx;
947 ),
948
949 TP_printk(
950 LOCAL_PR_FMT " tx:%d rx %d",
951 LOCAL_PR_ARG, __entry->tx, __entry->rx
952 )
953);
954
955TRACE_EVENT(drv_get_ringparam,
956 TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max,
957 u32 *rx, u32 *rx_max),
958
959 TP_ARGS(local, tx, tx_max, rx, rx_max),
960
961 TP_STRUCT__entry(
962 LOCAL_ENTRY
963 __field(u32, tx)
964 __field(u32, tx_max)
965 __field(u32, rx)
966 __field(u32, rx_max)
967 ),
968
969 TP_fast_assign(
970 LOCAL_ASSIGN;
971 __entry->tx = *tx;
972 __entry->tx_max = *tx_max;
973 __entry->rx = *rx;
974 __entry->rx_max = *rx_max;
975 ),
976
977 TP_printk(
978 LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d",
979 LOCAL_PR_ARG,
980 __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max
981 )
982);
983
984DEFINE_EVENT(local_only_evt, drv_tx_frames_pending,
985 TP_PROTO(struct ieee80211_local *local),
986 TP_ARGS(local)
987);
988
989DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait,
990 TP_PROTO(struct ieee80211_local *local),
991 TP_ARGS(local)
992);
993
994TRACE_EVENT(drv_set_bitrate_mask,
995 TP_PROTO(struct ieee80211_local *local,
996 struct ieee80211_sub_if_data *sdata,
997 const struct cfg80211_bitrate_mask *mask),
998
999 TP_ARGS(local, sdata, mask),
1000
1001 TP_STRUCT__entry(
1002 LOCAL_ENTRY
1003 VIF_ENTRY
1004 __field(u32, legacy_2g)
1005 __field(u32, legacy_5g)
1006 ),
1007
1008 TP_fast_assign(
1009 LOCAL_ASSIGN;
1010 VIF_ASSIGN;
1011 __entry->legacy_2g = mask->control[IEEE80211_BAND_2GHZ].legacy;
1012 __entry->legacy_5g = mask->control[IEEE80211_BAND_5GHZ].legacy;
1013 ),
1014
1015 TP_printk(
1016 LOCAL_PR_FMT VIF_PR_FMT " 2G Mask:0x%x 5G Mask:0x%x",
1017 LOCAL_PR_ARG, VIF_PR_ARG, __entry->legacy_2g, __entry->legacy_5g
977 ) 1018 )
978); 1019);
979 1020
@@ -1069,23 +1110,9 @@ TRACE_EVENT(api_stop_tx_ba_cb,
1069 ) 1110 )
1070); 1111);
1071 1112
1072TRACE_EVENT(api_restart_hw, 1113DEFINE_EVENT(local_only_evt, api_restart_hw,
1073 TP_PROTO(struct ieee80211_local *local), 1114 TP_PROTO(struct ieee80211_local *local),
1074 1115 TP_ARGS(local)
1075 TP_ARGS(local),
1076
1077 TP_STRUCT__entry(
1078 LOCAL_ENTRY
1079 ),
1080
1081 TP_fast_assign(
1082 LOCAL_ASSIGN;
1083 ),
1084
1085 TP_printk(
1086 LOCAL_PR_FMT,
1087 LOCAL_PR_ARG
1088 )
1089); 1116);
1090 1117
1091TRACE_EVENT(api_beacon_loss, 1118TRACE_EVENT(api_beacon_loss,
@@ -1169,87 +1196,97 @@ TRACE_EVENT(api_scan_completed,
1169 ) 1196 )
1170); 1197);
1171 1198
1172TRACE_EVENT(api_sta_block_awake, 1199TRACE_EVENT(api_sched_scan_results,
1173 TP_PROTO(struct ieee80211_local *local, 1200 TP_PROTO(struct ieee80211_local *local),
1174 struct ieee80211_sta *sta, bool block),
1175 1201
1176 TP_ARGS(local, sta, block), 1202 TP_ARGS(local),
1177 1203
1178 TP_STRUCT__entry( 1204 TP_STRUCT__entry(
1179 LOCAL_ENTRY 1205 LOCAL_ENTRY
1180 STA_ENTRY
1181 __field(bool, block)
1182 ), 1206 ),
1183 1207
1184 TP_fast_assign( 1208 TP_fast_assign(
1185 LOCAL_ASSIGN; 1209 LOCAL_ASSIGN;
1186 STA_ASSIGN;
1187 __entry->block = block;
1188 ), 1210 ),
1189 1211
1190 TP_printk( 1212 TP_printk(
1191 LOCAL_PR_FMT STA_PR_FMT " block:%d", 1213 LOCAL_PR_FMT, LOCAL_PR_ARG
1192 LOCAL_PR_ARG, STA_PR_FMT, __entry->block
1193 ) 1214 )
1194); 1215);
1195 1216
1196TRACE_EVENT(api_chswitch_done, 1217TRACE_EVENT(api_sched_scan_stopped,
1197 TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success), 1218 TP_PROTO(struct ieee80211_local *local),
1198 1219
1199 TP_ARGS(sdata, success), 1220 TP_ARGS(local),
1200 1221
1201 TP_STRUCT__entry( 1222 TP_STRUCT__entry(
1202 VIF_ENTRY 1223 LOCAL_ENTRY
1203 __field(bool, success)
1204 ), 1224 ),
1205 1225
1206 TP_fast_assign( 1226 TP_fast_assign(
1207 VIF_ASSIGN; 1227 LOCAL_ASSIGN;
1208 __entry->success = success;
1209 ), 1228 ),
1210 1229
1211 TP_printk( 1230 TP_printk(
1212 VIF_PR_FMT " success=%d", 1231 LOCAL_PR_FMT, LOCAL_PR_ARG
1213 VIF_PR_ARG, __entry->success
1214 ) 1232 )
1215); 1233);
1216 1234
1217TRACE_EVENT(api_ready_on_channel, 1235TRACE_EVENT(api_sta_block_awake,
1218 TP_PROTO(struct ieee80211_local *local), 1236 TP_PROTO(struct ieee80211_local *local,
1237 struct ieee80211_sta *sta, bool block),
1219 1238
1220 TP_ARGS(local), 1239 TP_ARGS(local, sta, block),
1221 1240
1222 TP_STRUCT__entry( 1241 TP_STRUCT__entry(
1223 LOCAL_ENTRY 1242 LOCAL_ENTRY
1243 STA_ENTRY
1244 __field(bool, block)
1224 ), 1245 ),
1225 1246
1226 TP_fast_assign( 1247 TP_fast_assign(
1227 LOCAL_ASSIGN; 1248 LOCAL_ASSIGN;
1249 STA_ASSIGN;
1250 __entry->block = block;
1228 ), 1251 ),
1229 1252
1230 TP_printk( 1253 TP_printk(
1231 LOCAL_PR_FMT, LOCAL_PR_ARG 1254 LOCAL_PR_FMT STA_PR_FMT " block:%d",
1255 LOCAL_PR_ARG, STA_PR_FMT, __entry->block
1232 ) 1256 )
1233); 1257);
1234 1258
1235TRACE_EVENT(api_remain_on_channel_expired, 1259TRACE_EVENT(api_chswitch_done,
1236 TP_PROTO(struct ieee80211_local *local), 1260 TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
1237 1261
1238 TP_ARGS(local), 1262 TP_ARGS(sdata, success),
1239 1263
1240 TP_STRUCT__entry( 1264 TP_STRUCT__entry(
1241 LOCAL_ENTRY 1265 VIF_ENTRY
1266 __field(bool, success)
1242 ), 1267 ),
1243 1268
1244 TP_fast_assign( 1269 TP_fast_assign(
1245 LOCAL_ASSIGN; 1270 VIF_ASSIGN;
1271 __entry->success = success;
1246 ), 1272 ),
1247 1273
1248 TP_printk( 1274 TP_printk(
1249 LOCAL_PR_FMT, LOCAL_PR_ARG 1275 VIF_PR_FMT " success=%d",
1276 VIF_PR_ARG, __entry->success
1250 ) 1277 )
1251); 1278);
1252 1279
1280DEFINE_EVENT(local_only_evt, api_ready_on_channel,
1281 TP_PROTO(struct ieee80211_local *local),
1282 TP_ARGS(local)
1283);
1284
1285DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
1286 TP_PROTO(struct ieee80211_local *local),
1287 TP_ARGS(local)
1288);
1289
1253/* 1290/*
1254 * Tracing for internal functions 1291 * Tracing for internal functions
1255 * (which may also be called in response to driver calls) 1292 * (which may also be called in response to driver calls)
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 75d679d75e63..591add22bcc0 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -66,6 +66,9 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
66 /* own MCS TX capabilities */ 66 /* own MCS TX capabilities */
67 tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; 67 tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
68 68
69 /* Copy peer MCS TX capabilities, the driver might need them. */
70 ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params;
71
69 /* can we TX with MCS rates? */ 72 /* can we TX with MCS rates? */
70 if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) 73 if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED))
71 return; 74 return;
@@ -79,7 +82,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
79 max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; 82 max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS;
80 83
81 /* 84 /*
82 * 802.11n D5.0 20.3.5 / 20.6 says: 85 * 802.11n-2009 20.3.5 / 20.6 says:
83 * - indices 0 to 7 and 32 are single spatial stream 86 * - indices 0 to 7 and 32 are single spatial stream
84 * - 8 to 31 are multiple spatial streams using equal modulation 87 * - 8 to 31 are multiple spatial streams using equal modulation
85 * [8..15 for two streams, 16..23 for three and 24..31 for four] 88 * [8..15 for two streams, 16..23 for three and 24..31 for four]
@@ -137,14 +140,29 @@ void ieee80211_ba_session_work(struct work_struct *work)
137 sta, tid, WLAN_BACK_RECIPIENT, 140 sta, tid, WLAN_BACK_RECIPIENT,
138 WLAN_REASON_QSTA_TIMEOUT, true); 141 WLAN_REASON_QSTA_TIMEOUT, true);
139 142
140 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 143 tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
141 if (!tid_tx) 144 if (tid_tx) {
142 continue; 145 /*
146 * Assign it over to the normal tid_tx array
147 * where it "goes live".
148 */
149 spin_lock_bh(&sta->lock);
150
151 sta->ampdu_mlme.tid_start_tx[tid] = NULL;
152 /* could there be a race? */
153 if (sta->ampdu_mlme.tid_tx[tid])
154 kfree(tid_tx);
155 else
156 ieee80211_assign_tid_tx(sta, tid, tid_tx);
157 spin_unlock_bh(&sta->lock);
143 158
144 if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state))
145 ieee80211_tx_ba_session_handle_start(sta, tid); 159 ieee80211_tx_ba_session_handle_start(sta, tid);
146 else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, 160 continue;
147 &tid_tx->state)) 161 }
162
163 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
164 if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
165 &tid_tx->state))
148 ___ieee80211_stop_tx_ba_session(sta, tid, 166 ___ieee80211_stop_tx_ba_session(sta, tid,
149 WLAN_BACK_INITIATOR, 167 WLAN_BACK_INITIATOR,
150 true); 168 true);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 53c7077ffd4f..421eaa6b0c2b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -31,7 +31,6 @@
31#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) 31#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
32 32
33#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) 33#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
34#define IEEE80211_IBSS_MERGE_DELAY 0x400000
35#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) 34#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
36 35
37#define IEEE80211_IBSS_MAX_STA_ENTRIES 128 36#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
@@ -41,7 +40,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
41 struct ieee80211_mgmt *mgmt, 40 struct ieee80211_mgmt *mgmt,
42 size_t len) 41 size_t len)
43{ 42{
44 u16 auth_alg, auth_transaction, status_code; 43 u16 auth_alg, auth_transaction;
45 44
46 lockdep_assert_held(&sdata->u.ibss.mtx); 45 lockdep_assert_held(&sdata->u.ibss.mtx);
47 46
@@ -50,7 +49,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
50 49
51 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); 50 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
52 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); 51 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
53 status_code = le16_to_cpu(mgmt->u.auth.status_code);
54 52
55 /* 53 /*
56 * IEEE 802.11 standard does not require authentication in IBSS 54 * IEEE 802.11 standard does not require authentication in IBSS
@@ -270,7 +268,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
270 enum ieee80211_band band = rx_status->band; 268 enum ieee80211_band band = rx_status->band;
271 269
272 if (elems->ds_params && elems->ds_params_len == 1) 270 if (elems->ds_params && elems->ds_params_len == 1)
273 freq = ieee80211_channel_to_frequency(elems->ds_params[0]); 271 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
272 band);
274 else 273 else
275 freq = rx_status->freq; 274 freq = rx_status->freq;
276 275
@@ -354,7 +353,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
354 if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) 353 if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
355 goto put_bss; 354 goto put_bss;
356 355
357 if (rx_status->flag & RX_FLAG_TSFT) { 356 if (rx_status->flag & RX_FLAG_MACTIME_MPDU) {
358 /* 357 /*
359 * For correct IBSS merging we need mactime; since mactime is 358 * For correct IBSS merging we need mactime; since mactime is
360 * defined as the time the first data symbol of the frame hits 359 * defined as the time the first data symbol of the frame hits
@@ -396,10 +395,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
396 jiffies); 395 jiffies);
397#endif 396#endif
398 397
399 /* give slow hardware some time to do the TSF sync */
400 if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY)
401 goto put_bss;
402
403 if (beacon_timestamp > rx_timestamp) { 398 if (beacon_timestamp > rx_timestamp) {
404#ifdef CONFIG_MAC80211_IBSS_DEBUG 399#ifdef CONFIG_MAC80211_IBSS_DEBUG
405 printk(KERN_DEBUG "%s: beacon TSF higher than " 400 printk(KERN_DEBUG "%s: beacon TSF higher than "
@@ -531,8 +526,6 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
531static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) 526static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
532{ 527{
533 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 528 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
534 struct ieee80211_local *local = sdata->local;
535 struct ieee80211_supported_band *sband;
536 u8 bssid[ETH_ALEN]; 529 u8 bssid[ETH_ALEN];
537 u16 capability; 530 u16 capability;
538 int i; 531 int i;
@@ -555,8 +548,6 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
555 printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", 548 printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n",
556 sdata->name, bssid); 549 sdata->name, bssid);
557 550
558 sband = local->hw.wiphy->bands[ifibss->channel->band];
559
560 capability = WLAN_CAPABILITY_IBSS; 551 capability = WLAN_CAPABILITY_IBSS;
561 552
562 if (ifibss->privacy) 553 if (ifibss->privacy)
@@ -663,20 +654,24 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
663} 654}
664 655
665static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, 656static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
666 struct ieee80211_mgmt *mgmt, 657 struct sk_buff *req)
667 size_t len)
668{ 658{
659 struct ieee80211_mgmt *mgmt = (void *)req->data;
669 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 660 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
670 struct ieee80211_local *local = sdata->local; 661 struct ieee80211_local *local = sdata->local;
671 int tx_last_beacon; 662 int tx_last_beacon, len = req->len;
672 struct sk_buff *skb; 663 struct sk_buff *skb;
673 struct ieee80211_mgmt *resp; 664 struct ieee80211_mgmt *resp;
665 struct sk_buff *presp;
674 u8 *pos, *end; 666 u8 *pos, *end;
675 667
676 lockdep_assert_held(&ifibss->mtx); 668 lockdep_assert_held(&ifibss->mtx);
677 669
670 presp = rcu_dereference_protected(ifibss->presp,
671 lockdep_is_held(&ifibss->mtx));
672
678 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || 673 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
679 len < 24 + 2 || !ifibss->presp) 674 len < 24 + 2 || !presp)
680 return; 675 return;
681 676
682 tx_last_beacon = drv_tx_last_beacon(local); 677 tx_last_beacon = drv_tx_last_beacon(local);
@@ -688,7 +683,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
688 mgmt->bssid, tx_last_beacon); 683 mgmt->bssid, tx_last_beacon);
689#endif /* CONFIG_MAC80211_IBSS_DEBUG */ 684#endif /* CONFIG_MAC80211_IBSS_DEBUG */
690 685
691 if (!tx_last_beacon) 686 if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da))
692 return; 687 return;
693 688
694 if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 && 689 if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 &&
@@ -714,7 +709,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
714 } 709 }
715 710
716 /* Reply with ProbeResp */ 711 /* Reply with ProbeResp */
717 skb = skb_copy(ifibss->presp, GFP_KERNEL); 712 skb = skb_copy(presp, GFP_KERNEL);
718 if (!skb) 713 if (!skb)
719 return; 714 return;
720 715
@@ -785,7 +780,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
785 780
786 switch (fc & IEEE80211_FCTL_STYPE) { 781 switch (fc & IEEE80211_FCTL_STYPE) {
787 case IEEE80211_STYPE_PROBE_REQ: 782 case IEEE80211_STYPE_PROBE_REQ:
788 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); 783 ieee80211_rx_mgmt_probe_req(sdata, skb);
789 break; 784 break;
790 case IEEE80211_STYPE_PROBE_RESP: 785 case IEEE80211_STYPE_PROBE_RESP:
791 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, 786 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
@@ -994,7 +989,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
994 989
995 /* remove beacon */ 990 /* remove beacon */
996 kfree(sdata->u.ibss.ie); 991 kfree(sdata->u.ibss.ie);
997 skb = sdata->u.ibss.presp; 992 skb = rcu_dereference_protected(sdata->u.ibss.presp,
993 lockdep_is_held(&sdata->u.ibss.mtx));
998 rcu_assign_pointer(sdata->u.ibss.presp, NULL); 994 rcu_assign_pointer(sdata->u.ibss.presp, NULL);
999 sdata->vif.bss_conf.ibss_joined = false; 995 sdata->vif.bss_conf.ibss_joined = false;
1000 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | 996 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 533fd32f49ff..2025af52b195 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -97,7 +97,7 @@ struct ieee80211_bss {
97 size_t supp_rates_len; 97 size_t supp_rates_len;
98 98
99 /* 99 /*
100 * During assocation, we save an ERP value from a probe response so 100 * During association, we save an ERP value from a probe response so
101 * that we can feed ERP info to the driver when handling the 101 * that we can feed ERP info to the driver when handling the
102 * association completes. these fields probably won't be up-to-date 102 * association completes. these fields probably won't be up-to-date
103 * otherwise, you probably don't want to use them. 103 * otherwise, you probably don't want to use them.
@@ -214,7 +214,7 @@ struct beacon_data {
214}; 214};
215 215
216struct ieee80211_if_ap { 216struct ieee80211_if_ap {
217 struct beacon_data *beacon; 217 struct beacon_data __rcu *beacon;
218 218
219 struct list_head vlans; 219 struct list_head vlans;
220 220
@@ -225,6 +225,7 @@ struct ieee80211_if_ap {
225 struct sk_buff_head ps_bc_buf; 225 struct sk_buff_head ps_bc_buf;
226 atomic_t num_sta_ps; /* number of stations in PS mode */ 226 atomic_t num_sta_ps; /* number of stations in PS mode */
227 int dtim_count; 227 int dtim_count;
228 bool dtim_bc_mc;
228}; 229};
229 230
230struct ieee80211_if_wds { 231struct ieee80211_if_wds {
@@ -236,7 +237,7 @@ struct ieee80211_if_vlan {
236 struct list_head list; 237 struct list_head list;
237 238
238 /* used for all tx if the VLAN is configured to 4-addr mode */ 239 /* used for all tx if the VLAN is configured to 4-addr mode */
239 struct sta_info *sta; 240 struct sta_info __rcu *sta;
240}; 241};
241 242
242struct mesh_stats { 243struct mesh_stats {
@@ -441,7 +442,8 @@ struct ieee80211_if_ibss {
441 442
442 unsigned long ibss_join_req; 443 unsigned long ibss_join_req;
443 /* probe response/beacon for IBSS */ 444 /* probe response/beacon for IBSS */
444 struct sk_buff *presp, *skb; 445 struct sk_buff __rcu *presp;
446 struct sk_buff *skb;
445 447
446 enum { 448 enum {
447 IEEE80211_IBSS_MLME_SEARCH, 449 IEEE80211_IBSS_MLME_SEARCH,
@@ -487,8 +489,13 @@ struct ieee80211_if_mesh {
487 struct mesh_config mshcfg; 489 struct mesh_config mshcfg;
488 u32 mesh_seqnum; 490 u32 mesh_seqnum;
489 bool accepting_plinks; 491 bool accepting_plinks;
490 const u8 *vendor_ie; 492 const u8 *ie;
491 u8 vendor_ie_len; 493 u8 ie_len;
494 enum {
495 IEEE80211_MESH_SEC_NONE = 0x0,
496 IEEE80211_MESH_SEC_AUTHED = 0x1,
497 IEEE80211_MESH_SEC_SECURED = 0x2,
498 } security;
492}; 499};
493 500
494#ifdef CONFIG_MAC80211_MESH 501#ifdef CONFIG_MAC80211_MESH
@@ -561,9 +568,10 @@ struct ieee80211_sub_if_data {
561 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; 568 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
562 unsigned int fragment_next; 569 unsigned int fragment_next;
563 570
564 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; 571 struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
565 struct ieee80211_key *default_unicast_key, *default_multicast_key; 572 struct ieee80211_key __rcu *default_unicast_key;
566 struct ieee80211_key *default_mgmt_key; 573 struct ieee80211_key __rcu *default_multicast_key;
574 struct ieee80211_key __rcu *default_mgmt_key;
567 575
568 u16 sequence_number; 576 u16 sequence_number;
569 __be16 control_port_protocol; 577 __be16 control_port_protocol;
@@ -654,8 +662,6 @@ struct tpt_led_trigger {
654 * well be on the operating channel 662 * well be on the operating channel
655 * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to 663 * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to
656 * determine if we are on the operating channel or not 664 * determine if we are on the operating channel or not
657 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
658 * gets only set in conjunction with SCAN_SW_SCANNING
659 * @SCAN_COMPLETED: Set for our scan work function when the driver reported 665 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
660 * that the scan completed. 666 * that the scan completed.
661 * @SCAN_ABORTED: Set for our scan work function when the driver reported 667 * @SCAN_ABORTED: Set for our scan work function when the driver reported
@@ -664,7 +670,6 @@ struct tpt_led_trigger {
664enum { 670enum {
665 SCAN_SW_SCANNING, 671 SCAN_SW_SCANNING,
666 SCAN_HW_SCANNING, 672 SCAN_HW_SCANNING,
667 SCAN_OFF_CHANNEL,
668 SCAN_COMPLETED, 673 SCAN_COMPLETED,
669 SCAN_ABORTED, 674 SCAN_ABORTED,
670}; 675};
@@ -765,8 +770,14 @@ struct ieee80211_local {
765 /* device is started */ 770 /* device is started */
766 bool started; 771 bool started;
767 772
773 /* wowlan is enabled -- don't reconfig on resume */
774 bool wowlan;
775
768 int tx_headroom; /* required headroom for hardware/radiotap */ 776 int tx_headroom; /* required headroom for hardware/radiotap */
769 777
778 /* count for keys needing tailroom space allocation */
779 int crypto_tx_tailroom_needed_cnt;
780
770 /* Tasklet and skb queue to process calls from IRQ mode. All frames 781 /* Tasklet and skb queue to process calls from IRQ mode. All frames
771 * added to skb_queue will be processed, but frames in 782 * added to skb_queue will be processed, but frames in
772 * skb_queue_unreliable may be dropped if the total length of these 783 * skb_queue_unreliable may be dropped if the total length of these
@@ -796,7 +807,7 @@ struct ieee80211_local {
796 spinlock_t sta_lock; 807 spinlock_t sta_lock;
797 unsigned long num_sta; 808 unsigned long num_sta;
798 struct list_head sta_list, sta_pending_list; 809 struct list_head sta_list, sta_pending_list;
799 struct sta_info *sta_hash[STA_HASH_SIZE]; 810 struct sta_info __rcu *sta_hash[STA_HASH_SIZE];
800 struct timer_list sta_cleanup; 811 struct timer_list sta_cleanup;
801 struct work_struct sta_finish_work; 812 struct work_struct sta_finish_work;
802 int sta_generation; 813 int sta_generation;
@@ -811,8 +822,8 @@ struct ieee80211_local {
811 822
812 struct rate_control_ref *rate_ctrl; 823 struct rate_control_ref *rate_ctrl;
813 824
814 struct crypto_blkcipher *wep_tx_tfm; 825 struct crypto_cipher *wep_tx_tfm;
815 struct crypto_blkcipher *wep_rx_tfm; 826 struct crypto_cipher *wep_rx_tfm;
816 u32 wep_iv; 827 u32 wep_iv;
817 828
818 /* see iface.c */ 829 /* see iface.c */
@@ -838,6 +849,10 @@ struct ieee80211_local {
838 int scan_channel_idx; 849 int scan_channel_idx;
839 int scan_ies_len; 850 int scan_ies_len;
840 851
852 bool sched_scanning;
853 struct ieee80211_sched_scan_ies sched_scan_ies;
854 struct work_struct sched_scan_stopped_work;
855
841 unsigned long leave_oper_channel_time; 856 unsigned long leave_oper_channel_time;
842 enum mac80211_scan_state next_scan_state; 857 enum mac80211_scan_state next_scan_state;
843 struct delayed_work scan_work; 858 struct delayed_work scan_work;
@@ -959,6 +974,7 @@ struct ieee80211_local {
959 unsigned int hw_roc_duration; 974 unsigned int hw_roc_duration;
960 u32 hw_roc_cookie; 975 u32 hw_roc_cookie;
961 bool hw_roc_for_tx; 976 bool hw_roc_for_tx;
977 unsigned long hw_offchan_tx_cookie;
962 978
963 /* dummy netdev for use w/ NAPI */ 979 /* dummy netdev for use w/ NAPI */
964 struct net_device napi_dev; 980 struct net_device napi_dev;
@@ -1068,8 +1084,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
1068void ieee80211_configure_filter(struct ieee80211_local *local); 1084void ieee80211_configure_filter(struct ieee80211_local *local);
1069u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); 1085u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
1070 1086
1071extern bool ieee80211_disable_40mhz_24ghz;
1072
1073/* STA code */ 1087/* STA code */
1074void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); 1088void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
1075int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, 1089int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
@@ -1146,11 +1160,21 @@ ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
1146void ieee80211_rx_bss_put(struct ieee80211_local *local, 1160void ieee80211_rx_bss_put(struct ieee80211_local *local,
1147 struct ieee80211_bss *bss); 1161 struct ieee80211_bss *bss);
1148 1162
1163/* scheduled scan handling */
1164int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
1165 struct cfg80211_sched_scan_request *req);
1166int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
1167void ieee80211_sched_scan_stopped_work(struct work_struct *work);
1168
1149/* off-channel helpers */ 1169/* off-channel helpers */
1150void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); 1170bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local);
1151void ieee80211_offchannel_stop_station(struct ieee80211_local *local); 1171void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local,
1172 bool tell_ap);
1173void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
1174 bool offchannel_ps_enable);
1152void ieee80211_offchannel_return(struct ieee80211_local *local, 1175void ieee80211_offchannel_return(struct ieee80211_local *local,
1153 bool enable_beaconing); 1176 bool enable_beaconing,
1177 bool offchannel_ps_disable);
1154void ieee80211_hw_roc_setup(struct ieee80211_local *local); 1178void ieee80211_hw_roc_setup(struct ieee80211_local *local);
1155 1179
1156/* interface handling */ 1180/* interface handling */
@@ -1245,7 +1269,8 @@ int ieee80211_reconfig(struct ieee80211_local *local);
1245void ieee80211_stop_device(struct ieee80211_local *local); 1269void ieee80211_stop_device(struct ieee80211_local *local);
1246 1270
1247#ifdef CONFIG_PM 1271#ifdef CONFIG_PM
1248int __ieee80211_suspend(struct ieee80211_hw *hw); 1272int __ieee80211_suspend(struct ieee80211_hw *hw,
1273 struct cfg80211_wowlan *wowlan);
1249 1274
1250static inline int __ieee80211_resume(struct ieee80211_hw *hw) 1275static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1251{ 1276{
@@ -1258,7 +1283,8 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1258 return ieee80211_reconfig(hw_to_local(hw)); 1283 return ieee80211_reconfig(hw_to_local(hw));
1259} 1284}
1260#else 1285#else
1261static inline int __ieee80211_suspend(struct ieee80211_hw *hw) 1286static inline int __ieee80211_suspend(struct ieee80211_hw *hw,
1287 struct cfg80211_wowlan *wowlan)
1262{ 1288{
1263 return 0; 1289 return 0;
1264} 1290}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7a10a8d1b2d0..7dfbe71dc637 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -382,6 +382,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
382 struct sk_buff *skb, *tmp; 382 struct sk_buff *skb, *tmp;
383 u32 hw_reconf_flags = 0; 383 u32 hw_reconf_flags = 0;
384 int i; 384 int i;
385 enum nl80211_channel_type orig_ct;
385 386
386 if (local->scan_sdata == sdata) 387 if (local->scan_sdata == sdata)
387 ieee80211_scan_cancel(local); 388 ieee80211_scan_cancel(local);
@@ -448,7 +449,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
448 /* APs need special treatment */ 449 /* APs need special treatment */
449 if (sdata->vif.type == NL80211_IFTYPE_AP) { 450 if (sdata->vif.type == NL80211_IFTYPE_AP) {
450 struct ieee80211_sub_if_data *vlan, *tmpsdata; 451 struct ieee80211_sub_if_data *vlan, *tmpsdata;
451 struct beacon_data *old_beacon = sdata->u.ap.beacon; 452 struct beacon_data *old_beacon =
453 rtnl_dereference(sdata->u.ap.beacon);
452 454
453 /* sdata_running will return false, so this will disable */ 455 /* sdata_running will return false, so this will disable */
454 ieee80211_bss_info_change_notify(sdata, 456 ieee80211_bss_info_change_notify(sdata,
@@ -542,8 +544,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
542 hw_reconf_flags = 0; 544 hw_reconf_flags = 0;
543 } 545 }
544 546
547 /* Re-calculate channel-type, in case there are multiple vifs
548 * on different channel types.
549 */
550 orig_ct = local->_oper_channel_type;
551 ieee80211_set_channel_type(local, NULL, NL80211_CHAN_NO_HT);
552
545 /* do after stop to avoid reconfiguring when we stop anyway */ 553 /* do after stop to avoid reconfiguring when we stop anyway */
546 if (hw_reconf_flags) 554 if (hw_reconf_flags || (orig_ct != local->_oper_channel_type))
547 ieee80211_hw_config(local, hw_reconf_flags); 555 ieee80211_hw_config(local, hw_reconf_flags);
548 556
549 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 557 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
@@ -1137,10 +1145,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1137 + IEEE80211_ENCRYPT_HEADROOM; 1145 + IEEE80211_ENCRYPT_HEADROOM;
1138 ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; 1146 ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;
1139 1147
1140 ret = dev_alloc_name(ndev, ndev->name);
1141 if (ret < 0)
1142 goto fail;
1143
1144 ieee80211_assign_perm_addr(local, ndev, type); 1148 ieee80211_assign_perm_addr(local, ndev, type);
1145 memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); 1149 memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
1146 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); 1150 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8c02469b7176..31afd712930d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -101,6 +101,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
101 101
102 if (!ret) { 102 if (!ret) {
103 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 103 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
104
105 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
106 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)))
107 key->local->crypto_tx_tailroom_needed_cnt--;
108
104 return 0; 109 return 0;
105 } 110 }
106 111
@@ -156,6 +161,10 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
156 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 161 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
157 162
158 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 163 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
164
165 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
166 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)))
167 key->local->crypto_tx_tailroom_needed_cnt++;
159} 168}
160 169
161void ieee80211_key_removed(struct ieee80211_key_conf *key_conf) 170void ieee80211_key_removed(struct ieee80211_key_conf *key_conf)
@@ -186,7 +195,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
186 assert_key_lock(sdata->local); 195 assert_key_lock(sdata->local);
187 196
188 if (idx >= 0 && idx < NUM_DEFAULT_KEYS) 197 if (idx >= 0 && idx < NUM_DEFAULT_KEYS)
189 key = sdata->keys[idx]; 198 key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
190 199
191 if (uni) 200 if (uni)
192 rcu_assign_pointer(sdata->default_unicast_key, key); 201 rcu_assign_pointer(sdata->default_unicast_key, key);
@@ -213,7 +222,7 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
213 222
214 if (idx >= NUM_DEFAULT_KEYS && 223 if (idx >= NUM_DEFAULT_KEYS &&
215 idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) 224 idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
216 key = sdata->keys[idx]; 225 key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
217 226
218 rcu_assign_pointer(sdata->default_mgmt_key, key); 227 rcu_assign_pointer(sdata->default_mgmt_key, key);
219 228
@@ -257,9 +266,15 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
257 else 266 else
258 idx = new->conf.keyidx; 267 idx = new->conf.keyidx;
259 268
260 defunikey = old && sdata->default_unicast_key == old; 269 defunikey = old &&
261 defmultikey = old && sdata->default_multicast_key == old; 270 old == key_mtx_dereference(sdata->local,
262 defmgmtkey = old && sdata->default_mgmt_key == old; 271 sdata->default_unicast_key);
272 defmultikey = old &&
273 old == key_mtx_dereference(sdata->local,
274 sdata->default_multicast_key);
275 defmgmtkey = old &&
276 old == key_mtx_dereference(sdata->local,
277 sdata->default_mgmt_key);
263 278
264 if (defunikey && !new) 279 if (defunikey && !new)
265 __ieee80211_set_default_key(sdata, -1, true, false); 280 __ieee80211_set_default_key(sdata, -1, true, false);
@@ -342,7 +357,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
342 if (IS_ERR(key->u.ccmp.tfm)) { 357 if (IS_ERR(key->u.ccmp.tfm)) {
343 err = PTR_ERR(key->u.ccmp.tfm); 358 err = PTR_ERR(key->u.ccmp.tfm);
344 kfree(key); 359 kfree(key);
345 key = ERR_PTR(err); 360 return ERR_PTR(err);
346 } 361 }
347 break; 362 break;
348 case WLAN_CIPHER_SUITE_AES_CMAC: 363 case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -360,7 +375,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
360 if (IS_ERR(key->u.aes_cmac.tfm)) { 375 if (IS_ERR(key->u.aes_cmac.tfm)) {
361 err = PTR_ERR(key->u.aes_cmac.tfm); 376 err = PTR_ERR(key->u.aes_cmac.tfm);
362 kfree(key); 377 kfree(key);
363 key = ERR_PTR(err); 378 return ERR_PTR(err);
364 } 379 }
365 break; 380 break;
366 } 381 }
@@ -388,8 +403,10 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
388 ieee80211_aes_key_free(key->u.ccmp.tfm); 403 ieee80211_aes_key_free(key->u.ccmp.tfm);
389 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) 404 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
390 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); 405 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
391 if (key->local) 406 if (key->local) {
392 ieee80211_debugfs_key_remove(key); 407 ieee80211_debugfs_key_remove(key);
408 key->local->crypto_tx_tailroom_needed_cnt--;
409 }
393 410
394 kfree(key); 411 kfree(key);
395} 412}
@@ -400,11 +417,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
400{ 417{
401 struct ieee80211_key *old_key; 418 struct ieee80211_key *old_key;
402 int idx, ret; 419 int idx, ret;
403 bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; 420 bool pairwise;
404 421
405 BUG_ON(!sdata); 422 BUG_ON(!sdata);
406 BUG_ON(!key); 423 BUG_ON(!key);
407 424
425 pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
408 idx = key->conf.keyidx; 426 idx = key->conf.keyidx;
409 key->local = sdata->local; 427 key->local = sdata->local;
410 key->sdata = sdata; 428 key->sdata = sdata;
@@ -439,17 +457,19 @@ int ieee80211_key_link(struct ieee80211_key *key,
439 mutex_lock(&sdata->local->key_mtx); 457 mutex_lock(&sdata->local->key_mtx);
440 458
441 if (sta && pairwise) 459 if (sta && pairwise)
442 old_key = sta->ptk; 460 old_key = key_mtx_dereference(sdata->local, sta->ptk);
443 else if (sta) 461 else if (sta)
444 old_key = sta->gtk[idx]; 462 old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]);
445 else 463 else
446 old_key = sdata->keys[idx]; 464 old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
447 465
448 __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); 466 __ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
449 __ieee80211_key_destroy(old_key); 467 __ieee80211_key_destroy(old_key);
450 468
451 ieee80211_debugfs_key_add(key); 469 ieee80211_debugfs_key_add(key);
452 470
471 key->local->crypto_tx_tailroom_needed_cnt++;
472
453 ret = ieee80211_key_enable_hw_accel(key); 473 ret = ieee80211_key_enable_hw_accel(key);
454 474
455 mutex_unlock(&sdata->local->key_mtx); 475 mutex_unlock(&sdata->local->key_mtx);
@@ -457,8 +477,11 @@ int ieee80211_key_link(struct ieee80211_key *key,
457 return ret; 477 return ret;
458} 478}
459 479
460static void __ieee80211_key_free(struct ieee80211_key *key) 480void __ieee80211_key_free(struct ieee80211_key *key)
461{ 481{
482 if (!key)
483 return;
484
462 /* 485 /*
463 * Replace key with nothingness if it was ever used. 486 * Replace key with nothingness if it was ever used.
464 */ 487 */
@@ -472,9 +495,6 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
472void ieee80211_key_free(struct ieee80211_local *local, 495void ieee80211_key_free(struct ieee80211_local *local,
473 struct ieee80211_key *key) 496 struct ieee80211_key *key)
474{ 497{
475 if (!key)
476 return;
477
478 mutex_lock(&local->key_mtx); 498 mutex_lock(&local->key_mtx);
479 __ieee80211_key_free(key); 499 __ieee80211_key_free(key);
480 mutex_unlock(&local->key_mtx); 500 mutex_unlock(&local->key_mtx);
@@ -491,8 +511,12 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
491 511
492 mutex_lock(&sdata->local->key_mtx); 512 mutex_lock(&sdata->local->key_mtx);
493 513
494 list_for_each_entry(key, &sdata->key_list, list) 514 sdata->local->crypto_tx_tailroom_needed_cnt = 0;
515
516 list_for_each_entry(key, &sdata->key_list, list) {
517 sdata->local->crypto_tx_tailroom_needed_cnt++;
495 ieee80211_key_enable_hw_accel(key); 518 ieee80211_key_enable_hw_accel(key);
519 }
496 520
497 mutex_unlock(&sdata->local->key_mtx); 521 mutex_unlock(&sdata->local->key_mtx);
498} 522}
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 8106aa1b7466..d801d5351336 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -21,7 +21,6 @@
21 21
22#define WEP_IV_LEN 4 22#define WEP_IV_LEN 4
23#define WEP_ICV_LEN 4 23#define WEP_ICV_LEN 4
24#define ALG_TKIP_KEY_LEN 32
25#define ALG_CCMP_KEY_LEN 16 24#define ALG_CCMP_KEY_LEN 16
26#define CCMP_HDR_LEN 8 25#define CCMP_HDR_LEN 8
27#define CCMP_MIC_LEN 8 26#define CCMP_MIC_LEN 8
@@ -136,6 +135,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
136int __must_check ieee80211_key_link(struct ieee80211_key *key, 135int __must_check ieee80211_key_link(struct ieee80211_key *key,
137 struct ieee80211_sub_if_data *sdata, 136 struct ieee80211_sub_if_data *sdata,
138 struct sta_info *sta); 137 struct sta_info *sta);
138void __ieee80211_key_free(struct ieee80211_key *key);
139void ieee80211_key_free(struct ieee80211_local *local, 139void ieee80211_key_free(struct ieee80211_local *local,
140 struct ieee80211_key *key); 140 struct ieee80211_key *key);
141void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, 141void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
@@ -146,4 +146,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
146void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); 146void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
147void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); 147void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
148 148
149#define key_mtx_dereference(local, ref) \
150 rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
151
149#endif /* IEEE80211_KEY_H */ 152#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a46ff06d7cb8..0d7b08db8e56 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -33,12 +33,6 @@
33#include "cfg.h" 33#include "cfg.h"
34#include "debugfs.h" 34#include "debugfs.h"
35 35
36
37bool ieee80211_disable_40mhz_24ghz;
38module_param(ieee80211_disable_40mhz_24ghz, bool, 0644);
39MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz,
40 "Disable 40MHz support in the 2.4GHz band");
41
42static struct lock_class_key ieee80211_rx_skb_queue_class; 36static struct lock_class_key ieee80211_rx_skb_queue_class;
43 37
44void ieee80211_configure_filter(struct ieee80211_local *local) 38void ieee80211_configure_filter(struct ieee80211_local *local)
@@ -98,6 +92,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)
98 ieee80211_configure_filter(local); 92 ieee80211_configure_filter(local);
99} 93}
100 94
95/*
96 * Returns true if we are logically configured to be on
97 * the operating channel AND the hardware-conf is currently
98 * configured on the operating channel. Compares channel-type
99 * as well.
100 */
101bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local)
102{
103 struct ieee80211_channel *chan, *scan_chan;
104 enum nl80211_channel_type channel_type;
105
106 /* This logic needs to match logic in ieee80211_hw_config */
107 if (local->scan_channel) {
108 chan = local->scan_channel;
109 /* If scanning on oper channel, use whatever channel-type
110 * is currently in use.
111 */
112 if (chan == local->oper_channel)
113 channel_type = local->_oper_channel_type;
114 else
115 channel_type = NL80211_CHAN_NO_HT;
116 } else if (local->tmp_channel) {
117 chan = scan_chan = local->tmp_channel;
118 channel_type = local->tmp_channel_type;
119 } else {
120 chan = local->oper_channel;
121 channel_type = local->_oper_channel_type;
122 }
123
124 if (chan != local->oper_channel ||
125 channel_type != local->_oper_channel_type)
126 return false;
127
128 /* Check current hardware-config against oper_channel. */
129 if ((local->oper_channel != local->hw.conf.channel) ||
130 (local->_oper_channel_type != local->hw.conf.channel_type))
131 return false;
132
133 return true;
134}
135
101int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) 136int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
102{ 137{
103 struct ieee80211_channel *chan, *scan_chan; 138 struct ieee80211_channel *chan, *scan_chan;
@@ -110,21 +145,33 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
110 145
111 scan_chan = local->scan_channel; 146 scan_chan = local->scan_channel;
112 147
148 /* If this off-channel logic ever changes, ieee80211_on_oper_channel
149 * may need to change as well.
150 */
113 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 151 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
114 if (scan_chan) { 152 if (scan_chan) {
115 chan = scan_chan; 153 chan = scan_chan;
116 channel_type = NL80211_CHAN_NO_HT; 154 /* If scanning on oper channel, use whatever channel-type
117 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 155 * is currently in use.
118 } else if (local->tmp_channel && 156 */
119 local->oper_channel != local->tmp_channel) { 157 if (chan == local->oper_channel)
158 channel_type = local->_oper_channel_type;
159 else
160 channel_type = NL80211_CHAN_NO_HT;
161 } else if (local->tmp_channel) {
120 chan = scan_chan = local->tmp_channel; 162 chan = scan_chan = local->tmp_channel;
121 channel_type = local->tmp_channel_type; 163 channel_type = local->tmp_channel_type;
122 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
123 } else { 164 } else {
124 chan = local->oper_channel; 165 chan = local->oper_channel;
125 channel_type = local->_oper_channel_type; 166 channel_type = local->_oper_channel_type;
126 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
127 } 167 }
168
169 if (chan != local->oper_channel ||
170 channel_type != local->_oper_channel_type)
171 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
172 else
173 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
174
128 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 175 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
129 176
130 if (offchannel_flag || chan != local->hw.conf.channel || 177 if (offchannel_flag || chan != local->hw.conf.channel ||
@@ -146,7 +193,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
146 changed |= IEEE80211_CONF_CHANGE_SMPS; 193 changed |= IEEE80211_CONF_CHANGE_SMPS;
147 } 194 }
148 195
149 if (scan_chan) 196 if ((local->scanning & SCAN_SW_SCANNING) ||
197 (local->scanning & SCAN_HW_SCANNING))
150 power = chan->max_power; 198 power = chan->max_power;
151 else 199 else
152 power = local->power_constr_level ? 200 power = local->power_constr_level ?
@@ -231,7 +279,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
231 279
232 if (changed & BSS_CHANGED_BEACON_ENABLED) { 280 if (changed & BSS_CHANGED_BEACON_ENABLED) {
233 if (local->quiescing || !ieee80211_sdata_running(sdata) || 281 if (local->quiescing || !ieee80211_sdata_running(sdata) ||
234 test_bit(SCAN_SW_SCANNING, &local->scanning)) { 282 test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) {
235 sdata->vif.bss_conf.enable_beacon = false; 283 sdata->vif.bss_conf.enable_beacon = false;
236 } else { 284 } else {
237 /* 285 /*
@@ -310,7 +358,8 @@ static void ieee80211_restart_work(struct work_struct *work)
310 flush_workqueue(local->workqueue); 358 flush_workqueue(local->workqueue);
311 359
312 mutex_lock(&local->mtx); 360 mutex_lock(&local->mtx);
313 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), 361 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
362 local->sched_scanning,
314 "%s called with hardware scan in progress\n", __func__); 363 "%s called with hardware scan in progress\n", __func__);
315 mutex_unlock(&local->mtx); 364 mutex_unlock(&local->mtx);
316 365
@@ -326,6 +375,9 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
326 375
327 trace_api_restart_hw(local); 376 trace_api_restart_hw(local);
328 377
378 wiphy_info(hw->wiphy,
379 "Hardware restart was requested\n");
380
329 /* use this reason, ieee80211_reconfig will unblock it */ 381 /* use this reason, ieee80211_reconfig will unblock it */
330 ieee80211_stop_queues_by_reason(hw, 382 ieee80211_stop_queues_by_reason(hw,
331 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 383 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
@@ -488,7 +540,9 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
488 }, 540 },
489 [NL80211_IFTYPE_MESH_POINT] = { 541 [NL80211_IFTYPE_MESH_POINT] = {
490 .tx = 0xffff, 542 .tx = 0xffff,
491 .rx = BIT(IEEE80211_STYPE_ACTION >> 4), 543 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
544 BIT(IEEE80211_STYPE_AUTH >> 4) |
545 BIT(IEEE80211_STYPE_DEAUTH >> 4),
492 }, 546 },
493}; 547};
494 548
@@ -527,8 +581,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
527 581
528 wiphy->flags |= WIPHY_FLAG_NETNS_OK | 582 wiphy->flags |= WIPHY_FLAG_NETNS_OK |
529 WIPHY_FLAG_4ADDR_AP | 583 WIPHY_FLAG_4ADDR_AP |
530 WIPHY_FLAG_4ADDR_STATION | 584 WIPHY_FLAG_4ADDR_STATION;
531 WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS;
532 585
533 if (!ops->set_key) 586 if (!ops->set_key)
534 wiphy->flags |= WIPHY_FLAG_IBSS_RSN; 587 wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
@@ -554,6 +607,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
554 local->hw.queues = 1; 607 local->hw.queues = 1;
555 local->hw.max_rates = 1; 608 local->hw.max_rates = 1;
556 local->hw.max_report_rates = 0; 609 local->hw.max_report_rates = 0;
610 local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
557 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; 611 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
558 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; 612 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
559 local->user_power_level = -1; 613 local->user_power_level = -1;
@@ -598,6 +652,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
598 setup_timer(&local->dynamic_ps_timer, 652 setup_timer(&local->dynamic_ps_timer,
599 ieee80211_dynamic_ps_timer, (unsigned long) local); 653 ieee80211_dynamic_ps_timer, (unsigned long) local);
600 654
655 INIT_WORK(&local->sched_scan_stopped_work,
656 ieee80211_sched_scan_stopped_work);
657
601 sta_info_init(local); 658 sta_info_init(local);
602 659
603 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { 660 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
@@ -628,7 +685,7 @@ EXPORT_SYMBOL(ieee80211_alloc_hw);
628int ieee80211_register_hw(struct ieee80211_hw *hw) 685int ieee80211_register_hw(struct ieee80211_hw *hw)
629{ 686{
630 struct ieee80211_local *local = hw_to_local(hw); 687 struct ieee80211_local *local = hw_to_local(hw);
631 int result; 688 int result, i;
632 enum ieee80211_band band; 689 enum ieee80211_band band;
633 int channels, max_bitrates; 690 int channels, max_bitrates;
634 bool supp_ht; 691 bool supp_ht;
@@ -643,6 +700,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
643 WLAN_CIPHER_SUITE_AES_CMAC 700 WLAN_CIPHER_SUITE_AES_CMAC
644 }; 701 };
645 702
703 if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns)
704#ifdef CONFIG_PM
705 && (!local->ops->suspend || !local->ops->resume)
706#endif
707 )
708 return -EINVAL;
709
646 if (hw->max_report_rates == 0) 710 if (hw->max_report_rates == 0)
647 hw->max_report_rates = hw->max_rates; 711 hw->max_report_rates = hw->max_rates;
648 712
@@ -679,17 +743,30 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
679 return -ENOMEM; 743 return -ENOMEM;
680 744
681 /* if low-level driver supports AP, we also support VLAN */ 745 /* if low-level driver supports AP, we also support VLAN */
682 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) 746 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) {
683 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); 747 hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
748 hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
749 }
684 750
685 /* mac80211 always supports monitor */ 751 /* mac80211 always supports monitor */
686 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 752 hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
753 hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);
754
755 /* mac80211 doesn't support more than 1 channel */
756 for (i = 0; i < hw->wiphy->n_iface_combinations; i++)
757 if (hw->wiphy->iface_combinations[i].num_different_channels > 1)
758 return -EINVAL;
687 759
688#ifndef CONFIG_MAC80211_MESH 760#ifndef CONFIG_MAC80211_MESH
689 /* mesh depends on Kconfig, but drivers should set it if they want */ 761 /* mesh depends on Kconfig, but drivers should set it if they want */
690 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT); 762 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
691#endif 763#endif
692 764
765 /* if the underlying driver supports mesh, mac80211 will (at least)
766 * provide routing of mesh authentication frames to userspace */
767 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_MESH_POINT))
768 local->hw.wiphy->flags |= WIPHY_FLAG_MESH_AUTH;
769
693 /* mac80211 supports control port protocol changing */ 770 /* mac80211 supports control port protocol changing */
694 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL; 771 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
695 772
@@ -768,6 +845,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
768 if (!local->ops->remain_on_channel) 845 if (!local->ops->remain_on_channel)
769 local->hw.wiphy->max_remain_on_channel_duration = 5000; 846 local->hw.wiphy->max_remain_on_channel_duration = 5000;
770 847
848 if (local->ops->sched_scan_start)
849 local->hw.wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
850
771 result = wiphy_register(local->hw.wiphy); 851 result = wiphy_register(local->hw.wiphy);
772 if (result < 0) 852 if (result < 0)
773 goto fail_wiphy_register; 853 goto fail_wiphy_register;
@@ -791,8 +871,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
791 * and we need some headroom for passing the frame to monitor 871 * and we need some headroom for passing the frame to monitor
792 * interfaces, but never both at the same time. 872 * interfaces, but never both at the same time.
793 */ 873 */
874#ifndef __CHECKER__
794 BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM != 875 BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM !=
795 sizeof(struct ieee80211_tx_status_rtap_hdr)); 876 sizeof(struct ieee80211_tx_status_rtap_hdr));
877#endif
796 local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, 878 local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom,
797 sizeof(struct ieee80211_tx_status_rtap_hdr)); 879 sizeof(struct ieee80211_tx_status_rtap_hdr));
798 880
@@ -809,10 +891,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
809 891
810 local->dynamic_ps_forced_timeout = -1; 892 local->dynamic_ps_forced_timeout = -1;
811 893
812 result = sta_info_start(local);
813 if (result < 0)
814 goto fail_sta_info;
815
816 result = ieee80211_wep_init(local); 894 result = ieee80211_wep_init(local);
817 if (result < 0) 895 if (result < 0)
818 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", 896 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
@@ -875,7 +953,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
875 rtnl_unlock(); 953 rtnl_unlock();
876 ieee80211_wep_free(local); 954 ieee80211_wep_free(local);
877 sta_info_stop(local); 955 sta_info_stop(local);
878 fail_sta_info:
879 destroy_workqueue(local->workqueue); 956 destroy_workqueue(local->workqueue);
880 fail_workqueue: 957 fail_workqueue:
881 wiphy_unregister(local->hw.wiphy); 958 wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ca3af4685b0a..29e9980c8e60 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -279,57 +279,14 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
279 MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; 279 MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
280 *pos++ = 0x00; 280 *pos++ = 0x00;
281 281
282 if (sdata->u.mesh.vendor_ie) { 282 if (sdata->u.mesh.ie) {
283 int len = sdata->u.mesh.vendor_ie_len; 283 int len = sdata->u.mesh.ie_len;
284 const u8 *data = sdata->u.mesh.vendor_ie; 284 const u8 *data = sdata->u.mesh.ie;
285 if (skb_tailroom(skb) > len) 285 if (skb_tailroom(skb) > len)
286 memcpy(skb_put(skb, len), data, len); 286 memcpy(skb_put(skb, len), data, len);
287 } 287 }
288} 288}
289 289
290u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
291{
292 /* Use last four bytes of hw addr and interface index as hash index */
293 return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd)
294 & tbl->hash_mask;
295}
296
297struct mesh_table *mesh_table_alloc(int size_order)
298{
299 int i;
300 struct mesh_table *newtbl;
301
302 newtbl = kmalloc(sizeof(struct mesh_table), GFP_KERNEL);
303 if (!newtbl)
304 return NULL;
305
306 newtbl->hash_buckets = kzalloc(sizeof(struct hlist_head) *
307 (1 << size_order), GFP_KERNEL);
308
309 if (!newtbl->hash_buckets) {
310 kfree(newtbl);
311 return NULL;
312 }
313
314 newtbl->hashwlock = kmalloc(sizeof(spinlock_t) *
315 (1 << size_order), GFP_KERNEL);
316 if (!newtbl->hashwlock) {
317 kfree(newtbl->hash_buckets);
318 kfree(newtbl);
319 return NULL;
320 }
321
322 newtbl->size_order = size_order;
323 newtbl->hash_mask = (1 << size_order) - 1;
324 atomic_set(&newtbl->entries, 0);
325 get_random_bytes(&newtbl->hash_rnd,
326 sizeof(newtbl->hash_rnd));
327 for (i = 0; i <= newtbl->hash_mask; i++)
328 spin_lock_init(&newtbl->hashwlock[i]);
329
330 return newtbl;
331}
332
333 290
334static void ieee80211_mesh_path_timer(unsigned long data) 291static void ieee80211_mesh_path_timer(unsigned long data)
335{ 292{
@@ -573,8 +530,12 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
573 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, 530 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
574 &elems); 531 &elems);
575 532
533 /* ignore beacons from secure mesh peers if our security is off */
534 if (elems.rsn_len && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE)
535 return;
536
576 if (elems.ds_params && elems.ds_params_len == 1) 537 if (elems.ds_params && elems.ds_params_len == 1)
577 freq = ieee80211_channel_to_frequency(elems.ds_params[0]); 538 freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
578 else 539 else
579 freq = rx_status->freq; 540 freq = rx_status->freq;
580 541
@@ -586,9 +547,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
586 if (elems.mesh_id && elems.mesh_config && 547 if (elems.mesh_id && elems.mesh_config &&
587 mesh_matches_local(&elems, sdata)) { 548 mesh_matches_local(&elems, sdata)) {
588 supp_rates = ieee80211_sta_get_rates(local, &elems, band); 549 supp_rates = ieee80211_sta_get_rates(local, &elems, band);
589 550 mesh_neighbour_update(mgmt->sa, supp_rates, sdata, &elems);
590 mesh_neighbour_update(mgmt->sa, supp_rates, sdata,
591 mesh_peer_accepts_plinks(&elems));
592 } 551 }
593} 552}
594 553
@@ -598,7 +557,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
598 struct ieee80211_rx_status *rx_status) 557 struct ieee80211_rx_status *rx_status)
599{ 558{
600 switch (mgmt->u.action.category) { 559 switch (mgmt->u.action.category) {
601 case WLAN_CATEGORY_MESH_PLINK: 560 case WLAN_CATEGORY_MESH_ACTION:
602 mesh_rx_plink_frame(sdata, mgmt, len, rx_status); 561 mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
603 break; 562 break;
604 case WLAN_CATEGORY_MESH_PATH_SEL: 563 case WLAN_CATEGORY_MESH_PATH_SEL:
@@ -611,12 +570,9 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
611 struct sk_buff *skb) 570 struct sk_buff *skb)
612{ 571{
613 struct ieee80211_rx_status *rx_status; 572 struct ieee80211_rx_status *rx_status;
614 struct ieee80211_if_mesh *ifmsh;
615 struct ieee80211_mgmt *mgmt; 573 struct ieee80211_mgmt *mgmt;
616 u16 stype; 574 u16 stype;
617 575
618 ifmsh = &sdata->u.mesh;
619
620 rx_status = IEEE80211_SKB_RXCB(skb); 576 rx_status = IEEE80211_SKB_RXCB(skb);
621 mgmt = (struct ieee80211_mgmt *) skb->data; 577 mgmt = (struct ieee80211_mgmt *) skb->data;
622 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; 578 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
@@ -645,7 +601,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
645 if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) 601 if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags))
646 mesh_mpath_table_grow(); 602 mesh_mpath_table_grow();
647 603
648 if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) 604 if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags))
649 mesh_mpp_table_grow(); 605 mesh_mpp_table_grow();
650 606
651 if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) 607 if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags))
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index b99e230fe31c..e7c5fddb4804 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -92,7 +92,7 @@ struct mesh_path {
92 u8 dst[ETH_ALEN]; 92 u8 dst[ETH_ALEN];
93 u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ 93 u8 mpp[ETH_ALEN]; /* used for MPP or MAP */
94 struct ieee80211_sub_if_data *sdata; 94 struct ieee80211_sub_if_data *sdata;
95 struct sta_info *next_hop; 95 struct sta_info __rcu *next_hop;
96 struct timer_list timer; 96 struct timer_list timer;
97 struct sk_buff_head frame_queue; 97 struct sk_buff_head frame_queue;
98 struct rcu_head rcu; 98 struct rcu_head rcu;
@@ -226,7 +226,8 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
226int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); 226int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
227/* Mesh plinks */ 227/* Mesh plinks */
228void mesh_neighbour_update(u8 *hw_addr, u32 rates, 228void mesh_neighbour_update(u8 *hw_addr, u32 rates,
229 struct ieee80211_sub_if_data *sdata, bool add); 229 struct ieee80211_sub_if_data *sdata,
230 struct ieee802_11_elems *ie);
230bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); 231bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
231void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); 232void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
232void mesh_plink_broken(struct sta_info *sta); 233void mesh_plink_broken(struct sta_info *sta);
@@ -239,12 +240,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
239 240
240/* Private interfaces */ 241/* Private interfaces */
241/* Mesh tables */ 242/* Mesh tables */
242struct mesh_table *mesh_table_alloc(int size_order);
243void mesh_table_free(struct mesh_table *tbl, bool free_leafs);
244void mesh_mpath_table_grow(void); 243void mesh_mpath_table_grow(void);
245void mesh_mpp_table_grow(void); 244void mesh_mpp_table_grow(void);
246u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
247 struct mesh_table *tbl);
248/* Mesh paths */ 245/* Mesh paths */
249int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, 246int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode,
250 const u8 *ra, struct ieee80211_sub_if_data *sdata); 247 const u8 *ra, struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 5bf64d7112b3..2b18053070c1 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -391,7 +391,6 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
391 (mpath->flags & MESH_PATH_SN_VALID)) { 391 (mpath->flags & MESH_PATH_SN_VALID)) {
392 if (SN_GT(mpath->sn, orig_sn) || 392 if (SN_GT(mpath->sn, orig_sn) ||
393 (mpath->sn == orig_sn && 393 (mpath->sn == orig_sn &&
394 action == MPATH_PREQ &&
395 new_metric >= mpath->metric)) { 394 new_metric >= mpath->metric)) {
396 process = false; 395 process = false;
397 fresh_info = false; 396 fresh_info = false;
@@ -561,6 +560,14 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
561} 560}
562 561
563 562
563static inline struct sta_info *
564next_hop_deref_protected(struct mesh_path *mpath)
565{
566 return rcu_dereference_protected(mpath->next_hop,
567 lockdep_is_held(&mpath->state_lock));
568}
569
570
564static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, 571static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
565 struct ieee80211_mgmt *mgmt, 572 struct ieee80211_mgmt *mgmt,
566 u8 *prep_elem, u32 metric) 573 u8 *prep_elem, u32 metric)
@@ -600,7 +607,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
600 spin_unlock_bh(&mpath->state_lock); 607 spin_unlock_bh(&mpath->state_lock);
601 goto fail; 608 goto fail;
602 } 609 }
603 memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); 610 memcpy(next_hop, next_hop_deref_protected(mpath)->sta.addr, ETH_ALEN);
604 spin_unlock_bh(&mpath->state_lock); 611 spin_unlock_bh(&mpath->state_lock);
605 --ttl; 612 --ttl;
606 flags = PREP_IE_FLAGS(prep_elem); 613 flags = PREP_IE_FLAGS(prep_elem);
@@ -633,7 +640,6 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
633 struct mesh_path *mpath; 640 struct mesh_path *mpath;
634 u8 ttl; 641 u8 ttl;
635 u8 *ta, *target_addr; 642 u8 *ta, *target_addr;
636 u8 target_flags;
637 u32 target_sn; 643 u32 target_sn;
638 u16 target_rcode; 644 u16 target_rcode;
639 645
@@ -644,7 +650,6 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
644 return; 650 return;
645 } 651 }
646 ttl--; 652 ttl--;
647 target_flags = PERR_IE_TARGET_FLAGS(perr_elem);
648 target_addr = PERR_IE_TARGET_ADDR(perr_elem); 653 target_addr = PERR_IE_TARGET_ADDR(perr_elem);
649 target_sn = PERR_IE_TARGET_SN(perr_elem); 654 target_sn = PERR_IE_TARGET_SN(perr_elem);
650 target_rcode = PERR_IE_TARGET_RCODE(perr_elem); 655 target_rcode = PERR_IE_TARGET_RCODE(perr_elem);
@@ -654,7 +659,8 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
654 if (mpath) { 659 if (mpath) {
655 spin_lock_bh(&mpath->state_lock); 660 spin_lock_bh(&mpath->state_lock);
656 if (mpath->flags & MESH_PATH_ACTIVE && 661 if (mpath->flags & MESH_PATH_ACTIVE &&
657 memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 && 662 memcmp(ta, next_hop_deref_protected(mpath)->sta.addr,
663 ETH_ALEN) == 0 &&
658 (!(mpath->flags & MESH_PATH_SN_VALID) || 664 (!(mpath->flags & MESH_PATH_SN_VALID) ||
659 SN_GT(target_sn, mpath->sn))) { 665 SN_GT(target_sn, mpath->sn))) {
660 mpath->flags &= ~MESH_PATH_ACTIVE; 666 mpath->flags &= ~MESH_PATH_ACTIVE;
@@ -675,12 +681,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
675{ 681{
676 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 682 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
677 struct mesh_path *mpath; 683 struct mesh_path *mpath;
678 u8 *ta;
679 u8 ttl, flags, hopcount; 684 u8 ttl, flags, hopcount;
680 u8 *orig_addr; 685 u8 *orig_addr;
681 u32 orig_sn, metric; 686 u32 orig_sn, metric;
682 687
683 ta = mgmt->sa;
684 ttl = rann->rann_ttl; 688 ttl = rann->rann_ttl;
685 if (ttl <= 1) { 689 if (ttl <= 1) {
686 ifmsh->mshstats.dropped_frames_ttl++; 690 ifmsh->mshstats.dropped_frames_ttl++;
@@ -918,6 +922,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
918{ 922{
919 struct sk_buff *skb_to_free = NULL; 923 struct sk_buff *skb_to_free = NULL;
920 struct mesh_path *mpath; 924 struct mesh_path *mpath;
925 struct sta_info *next_hop;
921 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 926 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
922 u8 *target_addr = hdr->addr3; 927 u8 *target_addr = hdr->addr3;
923 int err = 0; 928 int err = 0;
@@ -945,7 +950,11 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
945 mesh_queue_preq(mpath, 950 mesh_queue_preq(mpath,
946 PREQ_Q_F_START | PREQ_Q_F_REFRESH); 951 PREQ_Q_F_START | PREQ_Q_F_REFRESH);
947 } 952 }
948 memcpy(hdr->addr1, mpath->next_hop->sta.addr, ETH_ALEN); 953 next_hop = rcu_dereference(mpath->next_hop);
954 if (next_hop)
955 memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN);
956 else
957 err = -ENOENT;
949 } else { 958 } else {
950 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 959 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
951 if (!(mpath->flags & MESH_PATH_RESOLVING)) { 960 if (!(mpath->flags & MESH_PATH_RESOLVING)) {
@@ -971,20 +980,11 @@ endlookup:
971 980
972void mesh_path_timer(unsigned long data) 981void mesh_path_timer(unsigned long data)
973{ 982{
974 struct ieee80211_sub_if_data *sdata; 983 struct mesh_path *mpath = (void *) data;
975 struct mesh_path *mpath; 984 struct ieee80211_sub_if_data *sdata = mpath->sdata;
976
977 rcu_read_lock();
978 mpath = (struct mesh_path *) data;
979 mpath = rcu_dereference(mpath);
980 if (!mpath)
981 goto endmpathtimer;
982 sdata = mpath->sdata;
983 985
984 if (sdata->local->quiescing) { 986 if (sdata->local->quiescing)
985 rcu_read_unlock();
986 return; 987 return;
987 }
988 988
989 spin_lock_bh(&mpath->state_lock); 989 spin_lock_bh(&mpath->state_lock);
990 if (mpath->flags & MESH_PATH_RESOLVED || 990 if (mpath->flags & MESH_PATH_RESOLVED ||
@@ -1001,8 +1001,6 @@ void mesh_path_timer(unsigned long data)
1001 } 1001 }
1002 1002
1003 spin_unlock_bh(&mpath->state_lock); 1003 spin_unlock_bh(&mpath->state_lock);
1004endmpathtimer:
1005 rcu_read_unlock();
1006} 1004}
1007 1005
1008void 1006void
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 8d65b47d9837..83ce48e31913 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -40,6 +40,50 @@ static struct mesh_table *mesh_paths;
40static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ 40static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */
41 41
42int mesh_paths_generation; 42int mesh_paths_generation;
43
44/* This lock will have the grow table function as writer and add / delete nodes
45 * as readers. When reading the table (i.e. doing lookups) we are well protected
46 * by RCU
47 */
48static DEFINE_RWLOCK(pathtbl_resize_lock);
49
50
51static struct mesh_table *mesh_table_alloc(int size_order)
52{
53 int i;
54 struct mesh_table *newtbl;
55
56 newtbl = kmalloc(sizeof(struct mesh_table), GFP_KERNEL);
57 if (!newtbl)
58 return NULL;
59
60 newtbl->hash_buckets = kzalloc(sizeof(struct hlist_head) *
61 (1 << size_order), GFP_KERNEL);
62
63 if (!newtbl->hash_buckets) {
64 kfree(newtbl);
65 return NULL;
66 }
67
68 newtbl->hashwlock = kmalloc(sizeof(spinlock_t) *
69 (1 << size_order), GFP_KERNEL);
70 if (!newtbl->hashwlock) {
71 kfree(newtbl->hash_buckets);
72 kfree(newtbl);
73 return NULL;
74 }
75
76 newtbl->size_order = size_order;
77 newtbl->hash_mask = (1 << size_order) - 1;
78 atomic_set(&newtbl->entries, 0);
79 get_random_bytes(&newtbl->hash_rnd,
80 sizeof(newtbl->hash_rnd));
81 for (i = 0; i <= newtbl->hash_mask; i++)
82 spin_lock_init(&newtbl->hashwlock[i]);
83
84 return newtbl;
85}
86
43static void __mesh_table_free(struct mesh_table *tbl) 87static void __mesh_table_free(struct mesh_table *tbl)
44{ 88{
45 kfree(tbl->hash_buckets); 89 kfree(tbl->hash_buckets);
@@ -47,7 +91,7 @@ static void __mesh_table_free(struct mesh_table *tbl)
47 kfree(tbl); 91 kfree(tbl);
48} 92}
49 93
50void mesh_table_free(struct mesh_table *tbl, bool free_leafs) 94static void mesh_table_free(struct mesh_table *tbl, bool free_leafs)
51{ 95{
52 struct hlist_head *mesh_hash; 96 struct hlist_head *mesh_hash;
53 struct hlist_node *p, *q; 97 struct hlist_node *p, *q;
@@ -55,60 +99,56 @@ void mesh_table_free(struct mesh_table *tbl, bool free_leafs)
55 99
56 mesh_hash = tbl->hash_buckets; 100 mesh_hash = tbl->hash_buckets;
57 for (i = 0; i <= tbl->hash_mask; i++) { 101 for (i = 0; i <= tbl->hash_mask; i++) {
58 spin_lock(&tbl->hashwlock[i]); 102 spin_lock_bh(&tbl->hashwlock[i]);
59 hlist_for_each_safe(p, q, &mesh_hash[i]) { 103 hlist_for_each_safe(p, q, &mesh_hash[i]) {
60 tbl->free_node(p, free_leafs); 104 tbl->free_node(p, free_leafs);
61 atomic_dec(&tbl->entries); 105 atomic_dec(&tbl->entries);
62 } 106 }
63 spin_unlock(&tbl->hashwlock[i]); 107 spin_unlock_bh(&tbl->hashwlock[i]);
64 } 108 }
65 __mesh_table_free(tbl); 109 __mesh_table_free(tbl);
66} 110}
67 111
68static struct mesh_table *mesh_table_grow(struct mesh_table *tbl) 112static int mesh_table_grow(struct mesh_table *oldtbl,
113 struct mesh_table *newtbl)
69{ 114{
70 struct mesh_table *newtbl;
71 struct hlist_head *oldhash; 115 struct hlist_head *oldhash;
72 struct hlist_node *p, *q; 116 struct hlist_node *p, *q;
73 int i; 117 int i;
74 118
75 if (atomic_read(&tbl->entries) 119 if (atomic_read(&oldtbl->entries)
76 < tbl->mean_chain_len * (tbl->hash_mask + 1)) 120 < oldtbl->mean_chain_len * (oldtbl->hash_mask + 1))
77 goto endgrow; 121 return -EAGAIN;
78
79 newtbl = mesh_table_alloc(tbl->size_order + 1);
80 if (!newtbl)
81 goto endgrow;
82 122
83 newtbl->free_node = tbl->free_node; 123 newtbl->free_node = oldtbl->free_node;
84 newtbl->mean_chain_len = tbl->mean_chain_len; 124 newtbl->mean_chain_len = oldtbl->mean_chain_len;
85 newtbl->copy_node = tbl->copy_node; 125 newtbl->copy_node = oldtbl->copy_node;
86 atomic_set(&newtbl->entries, atomic_read(&tbl->entries)); 126 atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries));
87 127
88 oldhash = tbl->hash_buckets; 128 oldhash = oldtbl->hash_buckets;
89 for (i = 0; i <= tbl->hash_mask; i++) 129 for (i = 0; i <= oldtbl->hash_mask; i++)
90 hlist_for_each(p, &oldhash[i]) 130 hlist_for_each(p, &oldhash[i])
91 if (tbl->copy_node(p, newtbl) < 0) 131 if (oldtbl->copy_node(p, newtbl) < 0)
92 goto errcopy; 132 goto errcopy;
93 133
94 return newtbl; 134 return 0;
95 135
96errcopy: 136errcopy:
97 for (i = 0; i <= newtbl->hash_mask; i++) { 137 for (i = 0; i <= newtbl->hash_mask; i++) {
98 hlist_for_each_safe(p, q, &newtbl->hash_buckets[i]) 138 hlist_for_each_safe(p, q, &newtbl->hash_buckets[i])
99 tbl->free_node(p, 0); 139 oldtbl->free_node(p, 0);
100 } 140 }
101 __mesh_table_free(newtbl); 141 return -ENOMEM;
102endgrow:
103 return NULL;
104} 142}
105 143
144static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
145 struct mesh_table *tbl)
146{
147 /* Use last four bytes of hw addr and interface index as hash index */
148 return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd)
149 & tbl->hash_mask;
150}
106 151
107/* This lock will have the grow table function as writer and add / delete nodes
108 * as readers. When reading the table (i.e. doing lookups) we are well protected
109 * by RCU
110 */
111static DEFINE_RWLOCK(pathtbl_resize_lock);
112 152
113/** 153/**
114 * 154 *
@@ -280,7 +320,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
280 if (!new_node) 320 if (!new_node)
281 goto err_node_alloc; 321 goto err_node_alloc;
282 322
283 read_lock(&pathtbl_resize_lock); 323 read_lock_bh(&pathtbl_resize_lock);
284 memcpy(new_mpath->dst, dst, ETH_ALEN); 324 memcpy(new_mpath->dst, dst, ETH_ALEN);
285 new_mpath->sdata = sdata; 325 new_mpath->sdata = sdata;
286 new_mpath->flags = 0; 326 new_mpath->flags = 0;
@@ -295,7 +335,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
295 hash_idx = mesh_table_hash(dst, sdata, mesh_paths); 335 hash_idx = mesh_table_hash(dst, sdata, mesh_paths);
296 bucket = &mesh_paths->hash_buckets[hash_idx]; 336 bucket = &mesh_paths->hash_buckets[hash_idx];
297 337
298 spin_lock(&mesh_paths->hashwlock[hash_idx]); 338 spin_lock_bh(&mesh_paths->hashwlock[hash_idx]);
299 339
300 err = -EEXIST; 340 err = -EEXIST;
301 hlist_for_each_entry(node, n, bucket, list) { 341 hlist_for_each_entry(node, n, bucket, list) {
@@ -311,8 +351,8 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
311 351
312 mesh_paths_generation++; 352 mesh_paths_generation++;
313 353
314 spin_unlock(&mesh_paths->hashwlock[hash_idx]); 354 spin_unlock_bh(&mesh_paths->hashwlock[hash_idx]);
315 read_unlock(&pathtbl_resize_lock); 355 read_unlock_bh(&pathtbl_resize_lock);
316 if (grow) { 356 if (grow) {
317 set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); 357 set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags);
318 ieee80211_queue_work(&local->hw, &sdata->work); 358 ieee80211_queue_work(&local->hw, &sdata->work);
@@ -320,8 +360,8 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
320 return 0; 360 return 0;
321 361
322err_exists: 362err_exists:
323 spin_unlock(&mesh_paths->hashwlock[hash_idx]); 363 spin_unlock_bh(&mesh_paths->hashwlock[hash_idx]);
324 read_unlock(&pathtbl_resize_lock); 364 read_unlock_bh(&pathtbl_resize_lock);
325 kfree(new_node); 365 kfree(new_node);
326err_node_alloc: 366err_node_alloc:
327 kfree(new_mpath); 367 kfree(new_mpath);
@@ -334,15 +374,21 @@ void mesh_mpath_table_grow(void)
334{ 374{
335 struct mesh_table *oldtbl, *newtbl; 375 struct mesh_table *oldtbl, *newtbl;
336 376
337 write_lock(&pathtbl_resize_lock); 377 rcu_read_lock();
378 newtbl = mesh_table_alloc(rcu_dereference(mesh_paths)->size_order + 1);
379 if (!newtbl)
380 return;
381 write_lock_bh(&pathtbl_resize_lock);
338 oldtbl = mesh_paths; 382 oldtbl = mesh_paths;
339 newtbl = mesh_table_grow(mesh_paths); 383 if (mesh_table_grow(mesh_paths, newtbl) < 0) {
340 if (!newtbl) { 384 rcu_read_unlock();
341 write_unlock(&pathtbl_resize_lock); 385 __mesh_table_free(newtbl);
386 write_unlock_bh(&pathtbl_resize_lock);
342 return; 387 return;
343 } 388 }
389 rcu_read_unlock();
344 rcu_assign_pointer(mesh_paths, newtbl); 390 rcu_assign_pointer(mesh_paths, newtbl);
345 write_unlock(&pathtbl_resize_lock); 391 write_unlock_bh(&pathtbl_resize_lock);
346 392
347 synchronize_rcu(); 393 synchronize_rcu();
348 mesh_table_free(oldtbl, false); 394 mesh_table_free(oldtbl, false);
@@ -352,15 +398,21 @@ void mesh_mpp_table_grow(void)
352{ 398{
353 struct mesh_table *oldtbl, *newtbl; 399 struct mesh_table *oldtbl, *newtbl;
354 400
355 write_lock(&pathtbl_resize_lock); 401 rcu_read_lock();
402 newtbl = mesh_table_alloc(rcu_dereference(mpp_paths)->size_order + 1);
403 if (!newtbl)
404 return;
405 write_lock_bh(&pathtbl_resize_lock);
356 oldtbl = mpp_paths; 406 oldtbl = mpp_paths;
357 newtbl = mesh_table_grow(mpp_paths); 407 if (mesh_table_grow(mpp_paths, newtbl) < 0) {
358 if (!newtbl) { 408 rcu_read_unlock();
359 write_unlock(&pathtbl_resize_lock); 409 __mesh_table_free(newtbl);
410 write_unlock_bh(&pathtbl_resize_lock);
360 return; 411 return;
361 } 412 }
413 rcu_read_unlock();
362 rcu_assign_pointer(mpp_paths, newtbl); 414 rcu_assign_pointer(mpp_paths, newtbl);
363 write_unlock(&pathtbl_resize_lock); 415 write_unlock_bh(&pathtbl_resize_lock);
364 416
365 synchronize_rcu(); 417 synchronize_rcu();
366 mesh_table_free(oldtbl, false); 418 mesh_table_free(oldtbl, false);
@@ -394,7 +446,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
394 if (!new_node) 446 if (!new_node)
395 goto err_node_alloc; 447 goto err_node_alloc;
396 448
397 read_lock(&pathtbl_resize_lock); 449 read_lock_bh(&pathtbl_resize_lock);
398 memcpy(new_mpath->dst, dst, ETH_ALEN); 450 memcpy(new_mpath->dst, dst, ETH_ALEN);
399 memcpy(new_mpath->mpp, mpp, ETH_ALEN); 451 memcpy(new_mpath->mpp, mpp, ETH_ALEN);
400 new_mpath->sdata = sdata; 452 new_mpath->sdata = sdata;
@@ -407,7 +459,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
407 hash_idx = mesh_table_hash(dst, sdata, mpp_paths); 459 hash_idx = mesh_table_hash(dst, sdata, mpp_paths);
408 bucket = &mpp_paths->hash_buckets[hash_idx]; 460 bucket = &mpp_paths->hash_buckets[hash_idx];
409 461
410 spin_lock(&mpp_paths->hashwlock[hash_idx]); 462 spin_lock_bh(&mpp_paths->hashwlock[hash_idx]);
411 463
412 err = -EEXIST; 464 err = -EEXIST;
413 hlist_for_each_entry(node, n, bucket, list) { 465 hlist_for_each_entry(node, n, bucket, list) {
@@ -421,8 +473,8 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
421 mpp_paths->mean_chain_len * (mpp_paths->hash_mask + 1)) 473 mpp_paths->mean_chain_len * (mpp_paths->hash_mask + 1))
422 grow = 1; 474 grow = 1;
423 475
424 spin_unlock(&mpp_paths->hashwlock[hash_idx]); 476 spin_unlock_bh(&mpp_paths->hashwlock[hash_idx]);
425 read_unlock(&pathtbl_resize_lock); 477 read_unlock_bh(&pathtbl_resize_lock);
426 if (grow) { 478 if (grow) {
427 set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags); 479 set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags);
428 ieee80211_queue_work(&local->hw, &sdata->work); 480 ieee80211_queue_work(&local->hw, &sdata->work);
@@ -430,8 +482,8 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
430 return 0; 482 return 0;
431 483
432err_exists: 484err_exists:
433 spin_unlock(&mpp_paths->hashwlock[hash_idx]); 485 spin_unlock_bh(&mpp_paths->hashwlock[hash_idx]);
434 read_unlock(&pathtbl_resize_lock); 486 read_unlock_bh(&pathtbl_resize_lock);
435 kfree(new_node); 487 kfree(new_node);
436err_node_alloc: 488err_node_alloc:
437 kfree(new_mpath); 489 kfree(new_mpath);
@@ -544,11 +596,11 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
544 int hash_idx; 596 int hash_idx;
545 int err = 0; 597 int err = 0;
546 598
547 read_lock(&pathtbl_resize_lock); 599 read_lock_bh(&pathtbl_resize_lock);
548 hash_idx = mesh_table_hash(addr, sdata, mesh_paths); 600 hash_idx = mesh_table_hash(addr, sdata, mesh_paths);
549 bucket = &mesh_paths->hash_buckets[hash_idx]; 601 bucket = &mesh_paths->hash_buckets[hash_idx];
550 602
551 spin_lock(&mesh_paths->hashwlock[hash_idx]); 603 spin_lock_bh(&mesh_paths->hashwlock[hash_idx]);
552 hlist_for_each_entry(node, n, bucket, list) { 604 hlist_for_each_entry(node, n, bucket, list) {
553 mpath = node->mpath; 605 mpath = node->mpath;
554 if (mpath->sdata == sdata && 606 if (mpath->sdata == sdata &&
@@ -566,8 +618,8 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
566 err = -ENXIO; 618 err = -ENXIO;
567enddel: 619enddel:
568 mesh_paths_generation++; 620 mesh_paths_generation++;
569 spin_unlock(&mesh_paths->hashwlock[hash_idx]); 621 spin_unlock_bh(&mesh_paths->hashwlock[hash_idx]);
570 read_unlock(&pathtbl_resize_lock); 622 read_unlock_bh(&pathtbl_resize_lock);
571 return err; 623 return err;
572} 624}
573 625
@@ -628,7 +680,7 @@ void mesh_path_discard_frame(struct sk_buff *skb,
628 * 680 *
629 * @mpath: mesh path whose queue has to be freed 681 * @mpath: mesh path whose queue has to be freed
630 * 682 *
631 * Locking: the function must me called withing a rcu_read_lock region 683 * Locking: the function must me called within a rcu_read_lock region
632 */ 684 */
633void mesh_path_flush_pending(struct mesh_path *mpath) 685void mesh_path_flush_pending(struct mesh_path *mpath)
634{ 686{
@@ -719,7 +771,7 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
719 struct hlist_node *p; 771 struct hlist_node *p;
720 int i; 772 int i;
721 773
722 read_lock(&pathtbl_resize_lock); 774 read_lock_bh(&pathtbl_resize_lock);
723 for_each_mesh_entry(mesh_paths, p, node, i) { 775 for_each_mesh_entry(mesh_paths, p, node, i) {
724 if (node->mpath->sdata != sdata) 776 if (node->mpath->sdata != sdata)
725 continue; 777 continue;
@@ -734,7 +786,7 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
734 } else 786 } else
735 spin_unlock_bh(&mpath->state_lock); 787 spin_unlock_bh(&mpath->state_lock);
736 } 788 }
737 read_unlock(&pathtbl_resize_lock); 789 read_unlock_bh(&pathtbl_resize_lock);
738} 790}
739 791
740void mesh_pathtbl_unregister(void) 792void mesh_pathtbl_unregister(void)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 44b53931ba5e..f4adc0917888 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -43,7 +43,7 @@
43#define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks) 43#define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks)
44 44
45enum plink_frame_type { 45enum plink_frame_type {
46 PLINK_OPEN = 0, 46 PLINK_OPEN = 1,
47 PLINK_CONFIRM, 47 PLINK_CONFIRM,
48 PLINK_CLOSE 48 PLINK_CLOSE
49}; 49};
@@ -83,7 +83,7 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
83 */ 83 */
84static inline void mesh_plink_fsm_restart(struct sta_info *sta) 84static inline void mesh_plink_fsm_restart(struct sta_info *sta)
85{ 85{
86 sta->plink_state = PLINK_LISTEN; 86 sta->plink_state = NL80211_PLINK_LISTEN;
87 sta->llid = sta->plid = sta->reason = 0; 87 sta->llid = sta->plid = sta->reason = 0;
88 sta->plink_retries = 0; 88 sta->plink_retries = 0;
89} 89}
@@ -105,7 +105,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
105 if (!sta) 105 if (!sta)
106 return NULL; 106 return NULL;
107 107
108 sta->flags = WLAN_STA_AUTHORIZED; 108 sta->flags = WLAN_STA_AUTHORIZED | WLAN_STA_AUTH;
109 sta->sta.supp_rates[local->hw.conf.channel->band] = rates; 109 sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
110 rate_control_rate_init(sta); 110 rate_control_rate_init(sta);
111 111
@@ -126,11 +126,11 @@ static bool __mesh_plink_deactivate(struct sta_info *sta)
126 struct ieee80211_sub_if_data *sdata = sta->sdata; 126 struct ieee80211_sub_if_data *sdata = sta->sdata;
127 bool deactivated = false; 127 bool deactivated = false;
128 128
129 if (sta->plink_state == PLINK_ESTAB) { 129 if (sta->plink_state == NL80211_PLINK_ESTAB) {
130 mesh_plink_dec_estab_count(sdata); 130 mesh_plink_dec_estab_count(sdata);
131 deactivated = true; 131 deactivated = true;
132 } 132 }
133 sta->plink_state = PLINK_BLOCKED; 133 sta->plink_state = NL80211_PLINK_BLOCKED;
134 mesh_path_flush_by_nexthop(sta); 134 mesh_path_flush_by_nexthop(sta);
135 135
136 return deactivated; 136 return deactivated;
@@ -161,7 +161,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
161 __le16 reason) { 161 __le16 reason) {
162 struct ieee80211_local *local = sdata->local; 162 struct ieee80211_local *local = sdata->local;
163 struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + 163 struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 +
164 sdata->u.mesh.vendor_ie_len); 164 sdata->u.mesh.ie_len);
165 struct ieee80211_mgmt *mgmt; 165 struct ieee80211_mgmt *mgmt;
166 bool include_plid = false; 166 bool include_plid = false;
167 static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A }; 167 static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A };
@@ -181,8 +181,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
181 IEEE80211_STYPE_ACTION); 181 IEEE80211_STYPE_ACTION);
182 memcpy(mgmt->da, da, ETH_ALEN); 182 memcpy(mgmt->da, da, ETH_ALEN);
183 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); 183 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
184 /* BSSID is left zeroed, wildcard value */ 184 memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
185 mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK; 185 mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION;
186 mgmt->u.action.u.plink_action.action_code = action; 186 mgmt->u.action.u.plink_action.action_code = action;
187 187
188 if (action == PLINK_CLOSE) 188 if (action == PLINK_CLOSE)
@@ -237,8 +237,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
237 return 0; 237 return 0;
238} 238}
239 239
240void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata, 240void mesh_neighbour_update(u8 *hw_addr, u32 rates,
241 bool peer_accepting_plinks) 241 struct ieee80211_sub_if_data *sdata,
242 struct ieee802_11_elems *elems)
242{ 243{
243 struct ieee80211_local *local = sdata->local; 244 struct ieee80211_local *local = sdata->local;
244 struct sta_info *sta; 245 struct sta_info *sta;
@@ -248,8 +249,14 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data
248 sta = sta_info_get(sdata, hw_addr); 249 sta = sta_info_get(sdata, hw_addr);
249 if (!sta) { 250 if (!sta) {
250 rcu_read_unlock(); 251 rcu_read_unlock();
251 252 /* Userspace handles peer allocation when security is enabled
252 sta = mesh_plink_alloc(sdata, hw_addr, rates); 253 * */
254 if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
255 cfg80211_notify_new_peer_candidate(sdata->dev, hw_addr,
256 elems->ie_start, elems->total_len,
257 GFP_KERNEL);
258 else
259 sta = mesh_plink_alloc(sdata, hw_addr, rates);
253 if (!sta) 260 if (!sta)
254 return; 261 return;
255 if (sta_info_insert_rcu(sta)) { 262 if (sta_info_insert_rcu(sta)) {
@@ -260,7 +267,8 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data
260 267
261 sta->last_rx = jiffies; 268 sta->last_rx = jiffies;
262 sta->sta.supp_rates[local->hw.conf.channel->band] = rates; 269 sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
263 if (peer_accepting_plinks && sta->plink_state == PLINK_LISTEN && 270 if (mesh_peer_accepts_plinks(elems) &&
271 sta->plink_state == NL80211_PLINK_LISTEN &&
264 sdata->u.mesh.accepting_plinks && 272 sdata->u.mesh.accepting_plinks &&
265 sdata->u.mesh.mshcfg.auto_open_plinks) 273 sdata->u.mesh.mshcfg.auto_open_plinks)
266 mesh_plink_open(sta); 274 mesh_plink_open(sta);
@@ -300,8 +308,8 @@ static void mesh_plink_timer(unsigned long data)
300 sdata = sta->sdata; 308 sdata = sta->sdata;
301 309
302 switch (sta->plink_state) { 310 switch (sta->plink_state) {
303 case PLINK_OPN_RCVD: 311 case NL80211_PLINK_OPN_RCVD:
304 case PLINK_OPN_SNT: 312 case NL80211_PLINK_OPN_SNT:
305 /* retry timer */ 313 /* retry timer */
306 if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { 314 if (sta->plink_retries < dot11MeshMaxRetries(sdata)) {
307 u32 rand; 315 u32 rand;
@@ -320,17 +328,17 @@ static void mesh_plink_timer(unsigned long data)
320 } 328 }
321 reason = cpu_to_le16(MESH_MAX_RETRIES); 329 reason = cpu_to_le16(MESH_MAX_RETRIES);
322 /* fall through on else */ 330 /* fall through on else */
323 case PLINK_CNF_RCVD: 331 case NL80211_PLINK_CNF_RCVD:
324 /* confirm timer */ 332 /* confirm timer */
325 if (!reason) 333 if (!reason)
326 reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT); 334 reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT);
327 sta->plink_state = PLINK_HOLDING; 335 sta->plink_state = NL80211_PLINK_HOLDING;
328 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); 336 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
329 spin_unlock_bh(&sta->lock); 337 spin_unlock_bh(&sta->lock);
330 mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, 338 mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid,
331 reason); 339 reason);
332 break; 340 break;
333 case PLINK_HOLDING: 341 case NL80211_PLINK_HOLDING:
334 /* holding timer */ 342 /* holding timer */
335 del_timer(&sta->plink_timer); 343 del_timer(&sta->plink_timer);
336 mesh_plink_fsm_restart(sta); 344 mesh_plink_fsm_restart(sta);
@@ -372,14 +380,17 @@ int mesh_plink_open(struct sta_info *sta)
372 __le16 llid; 380 __le16 llid;
373 struct ieee80211_sub_if_data *sdata = sta->sdata; 381 struct ieee80211_sub_if_data *sdata = sta->sdata;
374 382
383 if (!test_sta_flags(sta, WLAN_STA_AUTH))
384 return -EPERM;
385
375 spin_lock_bh(&sta->lock); 386 spin_lock_bh(&sta->lock);
376 get_random_bytes(&llid, 2); 387 get_random_bytes(&llid, 2);
377 sta->llid = llid; 388 sta->llid = llid;
378 if (sta->plink_state != PLINK_LISTEN) { 389 if (sta->plink_state != NL80211_PLINK_LISTEN) {
379 spin_unlock_bh(&sta->lock); 390 spin_unlock_bh(&sta->lock);
380 return -EBUSY; 391 return -EBUSY;
381 } 392 }
382 sta->plink_state = PLINK_OPN_SNT; 393 sta->plink_state = NL80211_PLINK_OPN_SNT;
383 mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); 394 mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
384 spin_unlock_bh(&sta->lock); 395 spin_unlock_bh(&sta->lock);
385 mpl_dbg("Mesh plink: starting establishment with %pM\n", 396 mpl_dbg("Mesh plink: starting establishment with %pM\n",
@@ -396,7 +407,7 @@ void mesh_plink_block(struct sta_info *sta)
396 407
397 spin_lock_bh(&sta->lock); 408 spin_lock_bh(&sta->lock);
398 deactivated = __mesh_plink_deactivate(sta); 409 deactivated = __mesh_plink_deactivate(sta);
399 sta->plink_state = PLINK_BLOCKED; 410 sta->plink_state = NL80211_PLINK_BLOCKED;
400 spin_unlock_bh(&sta->lock); 411 spin_unlock_bh(&sta->lock);
401 412
402 if (deactivated) 413 if (deactivated)
@@ -419,13 +430,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
419 __le16 plid, llid, reason; 430 __le16 plid, llid, reason;
420#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG 431#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG
421 static const char *mplstates[] = { 432 static const char *mplstates[] = {
422 [PLINK_LISTEN] = "LISTEN", 433 [NL80211_PLINK_LISTEN] = "LISTEN",
423 [PLINK_OPN_SNT] = "OPN-SNT", 434 [NL80211_PLINK_OPN_SNT] = "OPN-SNT",
424 [PLINK_OPN_RCVD] = "OPN-RCVD", 435 [NL80211_PLINK_OPN_RCVD] = "OPN-RCVD",
425 [PLINK_CNF_RCVD] = "CNF_RCVD", 436 [NL80211_PLINK_CNF_RCVD] = "CNF_RCVD",
426 [PLINK_ESTAB] = "ESTAB", 437 [NL80211_PLINK_ESTAB] = "ESTAB",
427 [PLINK_HOLDING] = "HOLDING", 438 [NL80211_PLINK_HOLDING] = "HOLDING",
428 [PLINK_BLOCKED] = "BLOCKED" 439 [NL80211_PLINK_BLOCKED] = "BLOCKED"
429 }; 440 };
430#endif 441#endif
431 442
@@ -449,6 +460,11 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
449 mpl_dbg("Mesh plink: missing necessary peer link ie\n"); 460 mpl_dbg("Mesh plink: missing necessary peer link ie\n");
450 return; 461 return;
451 } 462 }
463 if (elems.rsn_len &&
464 sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) {
465 mpl_dbg("Mesh plink: can't establish link with secure peer\n");
466 return;
467 }
452 468
453 ftype = mgmt->u.action.u.plink_action.action_code; 469 ftype = mgmt->u.action.u.plink_action.action_code;
454 ie_len = elems.peer_link_len; 470 ie_len = elems.peer_link_len;
@@ -480,7 +496,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
480 return; 496 return;
481 } 497 }
482 498
483 if (sta && sta->plink_state == PLINK_BLOCKED) { 499 if (sta && !test_sta_flags(sta, WLAN_STA_AUTH)) {
500 mpl_dbg("Mesh plink: Action frame from non-authed peer\n");
501 rcu_read_unlock();
502 return;
503 }
504
505 if (sta && sta->plink_state == NL80211_PLINK_BLOCKED) {
484 rcu_read_unlock(); 506 rcu_read_unlock();
485 return; 507 return;
486 } 508 }
@@ -550,7 +572,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
550 event = CNF_ACPT; 572 event = CNF_ACPT;
551 break; 573 break;
552 case PLINK_CLOSE: 574 case PLINK_CLOSE:
553 if (sta->plink_state == PLINK_ESTAB) 575 if (sta->plink_state == NL80211_PLINK_ESTAB)
554 /* Do not check for llid or plid. This does not 576 /* Do not check for llid or plid. This does not
555 * follow the standard but since multiple plinks 577 * follow the standard but since multiple plinks
556 * per sta are not supported, it is necessary in 578 * per sta are not supported, it is necessary in
@@ -585,14 +607,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
585 reason = 0; 607 reason = 0;
586 switch (sta->plink_state) { 608 switch (sta->plink_state) {
587 /* spin_unlock as soon as state is updated at each case */ 609 /* spin_unlock as soon as state is updated at each case */
588 case PLINK_LISTEN: 610 case NL80211_PLINK_LISTEN:
589 switch (event) { 611 switch (event) {
590 case CLS_ACPT: 612 case CLS_ACPT:
591 mesh_plink_fsm_restart(sta); 613 mesh_plink_fsm_restart(sta);
592 spin_unlock_bh(&sta->lock); 614 spin_unlock_bh(&sta->lock);
593 break; 615 break;
594 case OPN_ACPT: 616 case OPN_ACPT:
595 sta->plink_state = PLINK_OPN_RCVD; 617 sta->plink_state = NL80211_PLINK_OPN_RCVD;
596 sta->plid = plid; 618 sta->plid = plid;
597 get_random_bytes(&llid, 2); 619 get_random_bytes(&llid, 2);
598 sta->llid = llid; 620 sta->llid = llid;
@@ -609,7 +631,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
609 } 631 }
610 break; 632 break;
611 633
612 case PLINK_OPN_SNT: 634 case NL80211_PLINK_OPN_SNT:
613 switch (event) { 635 switch (event) {
614 case OPN_RJCT: 636 case OPN_RJCT:
615 case CNF_RJCT: 637 case CNF_RJCT:
@@ -618,7 +640,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
618 if (!reason) 640 if (!reason)
619 reason = cpu_to_le16(MESH_CLOSE_RCVD); 641 reason = cpu_to_le16(MESH_CLOSE_RCVD);
620 sta->reason = reason; 642 sta->reason = reason;
621 sta->plink_state = PLINK_HOLDING; 643 sta->plink_state = NL80211_PLINK_HOLDING;
622 if (!mod_plink_timer(sta, 644 if (!mod_plink_timer(sta,
623 dot11MeshHoldingTimeout(sdata))) 645 dot11MeshHoldingTimeout(sdata)))
624 sta->ignore_plink_timer = true; 646 sta->ignore_plink_timer = true;
@@ -630,7 +652,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
630 break; 652 break;
631 case OPN_ACPT: 653 case OPN_ACPT:
632 /* retry timer is left untouched */ 654 /* retry timer is left untouched */
633 sta->plink_state = PLINK_OPN_RCVD; 655 sta->plink_state = NL80211_PLINK_OPN_RCVD;
634 sta->plid = plid; 656 sta->plid = plid;
635 llid = sta->llid; 657 llid = sta->llid;
636 spin_unlock_bh(&sta->lock); 658 spin_unlock_bh(&sta->lock);
@@ -638,7 +660,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
638 plid, 0); 660 plid, 0);
639 break; 661 break;
640 case CNF_ACPT: 662 case CNF_ACPT:
641 sta->plink_state = PLINK_CNF_RCVD; 663 sta->plink_state = NL80211_PLINK_CNF_RCVD;
642 if (!mod_plink_timer(sta, 664 if (!mod_plink_timer(sta,
643 dot11MeshConfirmTimeout(sdata))) 665 dot11MeshConfirmTimeout(sdata)))
644 sta->ignore_plink_timer = true; 666 sta->ignore_plink_timer = true;
@@ -651,7 +673,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
651 } 673 }
652 break; 674 break;
653 675
654 case PLINK_OPN_RCVD: 676 case NL80211_PLINK_OPN_RCVD:
655 switch (event) { 677 switch (event) {
656 case OPN_RJCT: 678 case OPN_RJCT:
657 case CNF_RJCT: 679 case CNF_RJCT:
@@ -660,7 +682,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
660 if (!reason) 682 if (!reason)
661 reason = cpu_to_le16(MESH_CLOSE_RCVD); 683 reason = cpu_to_le16(MESH_CLOSE_RCVD);
662 sta->reason = reason; 684 sta->reason = reason;
663 sta->plink_state = PLINK_HOLDING; 685 sta->plink_state = NL80211_PLINK_HOLDING;
664 if (!mod_plink_timer(sta, 686 if (!mod_plink_timer(sta,
665 dot11MeshHoldingTimeout(sdata))) 687 dot11MeshHoldingTimeout(sdata)))
666 sta->ignore_plink_timer = true; 688 sta->ignore_plink_timer = true;
@@ -678,7 +700,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
678 break; 700 break;
679 case CNF_ACPT: 701 case CNF_ACPT:
680 del_timer(&sta->plink_timer); 702 del_timer(&sta->plink_timer);
681 sta->plink_state = PLINK_ESTAB; 703 sta->plink_state = NL80211_PLINK_ESTAB;
682 spin_unlock_bh(&sta->lock); 704 spin_unlock_bh(&sta->lock);
683 mesh_plink_inc_estab_count(sdata); 705 mesh_plink_inc_estab_count(sdata);
684 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); 706 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
@@ -691,7 +713,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
691 } 713 }
692 break; 714 break;
693 715
694 case PLINK_CNF_RCVD: 716 case NL80211_PLINK_CNF_RCVD:
695 switch (event) { 717 switch (event) {
696 case OPN_RJCT: 718 case OPN_RJCT:
697 case CNF_RJCT: 719 case CNF_RJCT:
@@ -700,7 +722,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
700 if (!reason) 722 if (!reason)
701 reason = cpu_to_le16(MESH_CLOSE_RCVD); 723 reason = cpu_to_le16(MESH_CLOSE_RCVD);
702 sta->reason = reason; 724 sta->reason = reason;
703 sta->plink_state = PLINK_HOLDING; 725 sta->plink_state = NL80211_PLINK_HOLDING;
704 if (!mod_plink_timer(sta, 726 if (!mod_plink_timer(sta,
705 dot11MeshHoldingTimeout(sdata))) 727 dot11MeshHoldingTimeout(sdata)))
706 sta->ignore_plink_timer = true; 728 sta->ignore_plink_timer = true;
@@ -712,7 +734,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
712 break; 734 break;
713 case OPN_ACPT: 735 case OPN_ACPT:
714 del_timer(&sta->plink_timer); 736 del_timer(&sta->plink_timer);
715 sta->plink_state = PLINK_ESTAB; 737 sta->plink_state = NL80211_PLINK_ESTAB;
716 spin_unlock_bh(&sta->lock); 738 spin_unlock_bh(&sta->lock);
717 mesh_plink_inc_estab_count(sdata); 739 mesh_plink_inc_estab_count(sdata);
718 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); 740 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
@@ -727,13 +749,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
727 } 749 }
728 break; 750 break;
729 751
730 case PLINK_ESTAB: 752 case NL80211_PLINK_ESTAB:
731 switch (event) { 753 switch (event) {
732 case CLS_ACPT: 754 case CLS_ACPT:
733 reason = cpu_to_le16(MESH_CLOSE_RCVD); 755 reason = cpu_to_le16(MESH_CLOSE_RCVD);
734 sta->reason = reason; 756 sta->reason = reason;
735 deactivated = __mesh_plink_deactivate(sta); 757 deactivated = __mesh_plink_deactivate(sta);
736 sta->plink_state = PLINK_HOLDING; 758 sta->plink_state = NL80211_PLINK_HOLDING;
737 llid = sta->llid; 759 llid = sta->llid;
738 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); 760 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
739 spin_unlock_bh(&sta->lock); 761 spin_unlock_bh(&sta->lock);
@@ -753,7 +775,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
753 break; 775 break;
754 } 776 }
755 break; 777 break;
756 case PLINK_HOLDING: 778 case NL80211_PLINK_HOLDING:
757 switch (event) { 779 switch (event) {
758 case CLS_ACPT: 780 case CLS_ACPT:
759 if (del_timer(&sta->plink_timer)) 781 if (del_timer(&sta->plink_timer))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c9ceb4d57ab0..4f6b2675e41d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -28,8 +28,15 @@
28#include "rate.h" 28#include "rate.h"
29#include "led.h" 29#include "led.h"
30 30
31#define IEEE80211_MAX_NULLFUNC_TRIES 2 31static int max_nullfunc_tries = 2;
32#define IEEE80211_MAX_PROBE_TRIES 5 32module_param(max_nullfunc_tries, int, 0644);
33MODULE_PARM_DESC(max_nullfunc_tries,
34 "Maximum nullfunc tx tries before disconnecting (reason 4).");
35
36static int max_probe_tries = 5;
37module_param(max_probe_tries, int, 0644);
38MODULE_PARM_DESC(max_probe_tries,
39 "Maximum probe tries before disconnecting (reason 4).");
33 40
34/* 41/*
35 * Beacon loss timeout is calculated as N frames times the 42 * Beacon loss timeout is calculated as N frames times the
@@ -51,7 +58,11 @@
51 * a probe request because of beacon loss or for 58 * a probe request because of beacon loss or for
52 * checking the connection still works. 59 * checking the connection still works.
53 */ 60 */
54#define IEEE80211_PROBE_WAIT (HZ / 2) 61static int probe_wait_ms = 500;
62module_param(probe_wait_ms, int, 0644);
63MODULE_PARM_DESC(probe_wait_ms,
64 "Maximum time(ms) to wait for probe response"
65 " before disconnecting (reason 4).");
55 66
56/* 67/*
57 * Weight given to the latest Beacon frame when calculating average signal 68 * Weight given to the latest Beacon frame when calculating average signal
@@ -79,20 +90,11 @@ enum rx_mgmt_action {
79 /* no action required */ 90 /* no action required */
80 RX_MGMT_NONE, 91 RX_MGMT_NONE,
81 92
82 /* caller must call cfg80211_send_rx_auth() */
83 RX_MGMT_CFG80211_AUTH,
84
85 /* caller must call cfg80211_send_rx_assoc() */
86 RX_MGMT_CFG80211_ASSOC,
87
88 /* caller must call cfg80211_send_deauth() */ 93 /* caller must call cfg80211_send_deauth() */
89 RX_MGMT_CFG80211_DEAUTH, 94 RX_MGMT_CFG80211_DEAUTH,
90 95
91 /* caller must call cfg80211_send_disassoc() */ 96 /* caller must call cfg80211_send_disassoc() */
92 RX_MGMT_CFG80211_DISASSOC, 97 RX_MGMT_CFG80211_DISASSOC,
93
94 /* caller must tell cfg80211 about internal error */
95 RX_MGMT_CFG80211_ASSOC_ERROR,
96}; 98};
97 99
98/* utils */ 100/* utils */
@@ -134,6 +136,9 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
134{ 136{
135 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 137 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
136 138
139 if (unlikely(!sdata->u.mgd.associated))
140 return;
141
137 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 142 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
138 return; 143 return;
139 144
@@ -161,6 +166,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
161 struct ieee80211_supported_band *sband; 166 struct ieee80211_supported_band *sband;
162 struct sta_info *sta; 167 struct sta_info *sta;
163 u32 changed = 0; 168 u32 changed = 0;
169 int hti_cfreq;
164 u16 ht_opmode; 170 u16 ht_opmode;
165 bool enable_ht = true; 171 bool enable_ht = true;
166 enum nl80211_channel_type prev_chantype; 172 enum nl80211_channel_type prev_chantype;
@@ -174,10 +180,27 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
174 if (!sband->ht_cap.ht_supported) 180 if (!sband->ht_cap.ht_supported)
175 enable_ht = false; 181 enable_ht = false;
176 182
177 /* check that channel matches the right operating channel */ 183 if (enable_ht) {
178 if (local->hw.conf.channel->center_freq != 184 hti_cfreq = ieee80211_channel_to_frequency(hti->control_chan,
179 ieee80211_channel_to_frequency(hti->control_chan)) 185 sband->band);
180 enable_ht = false; 186 /* check that channel matches the right operating channel */
187 if (local->hw.conf.channel->center_freq != hti_cfreq) {
188 /* Some APs mess this up, evidently.
189 * Netgear WNDR3700 sometimes reports 4 higher than
190 * the actual channel, for instance.
191 */
192 printk(KERN_DEBUG
193 "%s: Wrong control channel in association"
194 " response: configured center-freq: %d"
195 " hti-cfreq: %d hti->control_chan: %d"
196 " band: %d. Disabling HT.\n",
197 sdata->name,
198 local->hw.conf.channel->center_freq,
199 hti_cfreq, hti->control_chan,
200 sband->band);
201 enable_ht = false;
202 }
203 }
181 204
182 if (enable_ht) { 205 if (enable_ht) {
183 channel_type = NL80211_CHAN_HT20; 206 channel_type = NL80211_CHAN_HT20;
@@ -429,7 +452,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
429 container_of((void *)bss, struct cfg80211_bss, priv); 452 container_of((void *)bss, struct cfg80211_bss, priv);
430 struct ieee80211_channel *new_ch; 453 struct ieee80211_channel *new_ch;
431 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 454 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
432 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); 455 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num,
456 cbss->channel->band);
433 457
434 ASSERT_MGD_MTX(ifmgd); 458 ASSERT_MGD_MTX(ifmgd);
435 459
@@ -580,6 +604,37 @@ static void ieee80211_change_ps(struct ieee80211_local *local)
580 } 604 }
581} 605}
582 606
607static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
608{
609 struct ieee80211_if_managed *mgd = &sdata->u.mgd;
610 struct sta_info *sta = NULL;
611 u32 sta_flags = 0;
612
613 if (!mgd->powersave)
614 return false;
615
616 if (!mgd->associated)
617 return false;
618
619 if (!mgd->associated->beacon_ies)
620 return false;
621
622 if (mgd->flags & (IEEE80211_STA_BEACON_POLL |
623 IEEE80211_STA_CONNECTION_POLL))
624 return false;
625
626 rcu_read_lock();
627 sta = sta_info_get(sdata, mgd->bssid);
628 if (sta)
629 sta_flags = get_sta_flags(sta);
630 rcu_read_unlock();
631
632 if (!(sta_flags & WLAN_STA_AUTHORIZED))
633 return false;
634
635 return true;
636}
637
583/* need to hold RTNL or interface lock */ 638/* need to hold RTNL or interface lock */
584void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) 639void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
585{ 640{
@@ -600,17 +655,21 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
600 list_for_each_entry(sdata, &local->interfaces, list) { 655 list_for_each_entry(sdata, &local->interfaces, list) {
601 if (!ieee80211_sdata_running(sdata)) 656 if (!ieee80211_sdata_running(sdata))
602 continue; 657 continue;
658 if (sdata->vif.type == NL80211_IFTYPE_AP) {
659 /* If an AP vif is found, then disable PS
660 * by setting the count to zero thereby setting
661 * ps_sdata to NULL.
662 */
663 count = 0;
664 break;
665 }
603 if (sdata->vif.type != NL80211_IFTYPE_STATION) 666 if (sdata->vif.type != NL80211_IFTYPE_STATION)
604 continue; 667 continue;
605 found = sdata; 668 found = sdata;
606 count++; 669 count++;
607 } 670 }
608 671
609 if (count == 1 && found->u.mgd.powersave && 672 if (count == 1 && ieee80211_powersave_allowed(found)) {
610 found->u.mgd.associated &&
611 found->u.mgd.associated->beacon_ies &&
612 !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
613 IEEE80211_STA_CONNECTION_POLL))) {
614 struct ieee80211_conf *conf = &local->hw.conf; 673 struct ieee80211_conf *conf = &local->hw.conf;
615 s32 beaconint_us; 674 s32 beaconint_us;
616 675
@@ -691,6 +750,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
691 dynamic_ps_enable_work); 750 dynamic_ps_enable_work);
692 struct ieee80211_sub_if_data *sdata = local->ps_sdata; 751 struct ieee80211_sub_if_data *sdata = local->ps_sdata;
693 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 752 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
753 unsigned long flags;
754 int q;
694 755
695 /* can only happen when PS was just disabled anyway */ 756 /* can only happen when PS was just disabled anyway */
696 if (!sdata) 757 if (!sdata)
@@ -699,9 +760,38 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
699 if (local->hw.conf.flags & IEEE80211_CONF_PS) 760 if (local->hw.conf.flags & IEEE80211_CONF_PS)
700 return; 761 return;
701 762
763 /*
764 * transmission can be stopped by others which leads to
765 * dynamic_ps_timer expiry. Postpond the ps timer if it
766 * is not the actual idle state.
767 */
768 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
769 for (q = 0; q < local->hw.queues; q++) {
770 if (local->queue_stop_reasons[q]) {
771 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
772 flags);
773 mod_timer(&local->dynamic_ps_timer, jiffies +
774 msecs_to_jiffies(
775 local->hw.conf.dynamic_ps_timeout));
776 return;
777 }
778 }
779 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
780
702 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && 781 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
703 (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) 782 (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) {
704 ieee80211_send_nullfunc(local, sdata, 1); 783 netif_tx_stop_all_queues(sdata->dev);
784
785 if (drv_tx_frames_pending(local))
786 mod_timer(&local->dynamic_ps_timer, jiffies +
787 msecs_to_jiffies(
788 local->hw.conf.dynamic_ps_timeout));
789 else {
790 ieee80211_send_nullfunc(local, sdata, 1);
791 /* Flush to get the tx status of nullfunc frame */
792 drv_flush(local, false);
793 }
794 }
705 795
706 if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && 796 if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
707 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || 797 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
@@ -710,6 +800,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
710 local->hw.conf.flags |= IEEE80211_CONF_PS; 800 local->hw.conf.flags |= IEEE80211_CONF_PS;
711 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 801 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
712 } 802 }
803
804 netif_tx_wake_all_queues(sdata->dev);
713} 805}
714 806
715void ieee80211_dynamic_ps_timer(unsigned long data) 807void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1033,12 +1125,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1033 if (is_multicast_ether_addr(hdr->addr1)) 1125 if (is_multicast_ether_addr(hdr->addr1))
1034 return; 1126 return;
1035 1127
1036 /*
1037 * In case we receive frames after disassociation.
1038 */
1039 if (!sdata->u.mgd.associated)
1040 return;
1041
1042 ieee80211_sta_reset_conn_monitor(sdata); 1128 ieee80211_sta_reset_conn_monitor(sdata);
1043} 1129}
1044 1130
@@ -1095,7 +1181,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1095 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1181 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1096 const u8 *ssid; 1182 const u8 *ssid;
1097 u8 *dst = ifmgd->associated->bssid; 1183 u8 *dst = ifmgd->associated->bssid;
1098 u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3); 1184 u8 unicast_limit = max(1, max_probe_tries - 3);
1099 1185
1100 /* 1186 /*
1101 * Try sending broadcast probe requests for the last three 1187 * Try sending broadcast probe requests for the last three
@@ -1121,7 +1207,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1121 } 1207 }
1122 1208
1123 ifmgd->probe_send_count++; 1209 ifmgd->probe_send_count++;
1124 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1210 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
1125 run_again(ifmgd, ifmgd->probe_timeout); 1211 run_again(ifmgd, ifmgd->probe_timeout);
1126} 1212}
1127 1213
@@ -1222,7 +1308,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1222 1308
1223 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); 1309 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
1224 1310
1225 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1311 printk(KERN_DEBUG "%s: Connection to AP %pM lost.\n",
1312 sdata->name, bssid);
1226 1313
1227 ieee80211_set_disassoc(sdata, true, true); 1314 ieee80211_set_disassoc(sdata, true, true);
1228 mutex_unlock(&ifmgd->mtx); 1315 mutex_unlock(&ifmgd->mtx);
@@ -1525,7 +1612,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1525 } 1612 }
1526 1613
1527 if (elems->ds_params && elems->ds_params_len == 1) 1614 if (elems->ds_params && elems->ds_params_len == 1)
1528 freq = ieee80211_channel_to_frequency(elems->ds_params[0]); 1615 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
1616 rx_status->band);
1529 else 1617 else
1530 freq = rx_status->freq; 1618 freq = rx_status->freq;
1531 1619
@@ -1966,9 +2054,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1966 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); 2054 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
1967 2055
1968 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 2056 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
1969 max_tries = IEEE80211_MAX_NULLFUNC_TRIES; 2057 max_tries = max_nullfunc_tries;
1970 else 2058 else
1971 max_tries = IEEE80211_MAX_PROBE_TRIES; 2059 max_tries = max_probe_tries;
1972 2060
1973 /* ACK received for nullfunc probing frame */ 2061 /* ACK received for nullfunc probing frame */
1974 if (!ifmgd->probe_send_count) 2062 if (!ifmgd->probe_send_count)
@@ -1978,9 +2066,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1978#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 2066#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1979 wiphy_debug(local->hw.wiphy, 2067 wiphy_debug(local->hw.wiphy,
1980 "%s: No ack for nullfunc frame to" 2068 "%s: No ack for nullfunc frame to"
1981 " AP %pM, try %d\n", 2069 " AP %pM, try %d/%i\n",
1982 sdata->name, bssid, 2070 sdata->name, bssid,
1983 ifmgd->probe_send_count); 2071 ifmgd->probe_send_count, max_tries);
1984#endif 2072#endif
1985 ieee80211_mgd_probe_ap_send(sdata); 2073 ieee80211_mgd_probe_ap_send(sdata);
1986 } else { 2074 } else {
@@ -2000,17 +2088,17 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
2000 "%s: Failed to send nullfunc to AP %pM" 2088 "%s: Failed to send nullfunc to AP %pM"
2001 " after %dms, disconnecting.\n", 2089 " after %dms, disconnecting.\n",
2002 sdata->name, 2090 sdata->name,
2003 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 2091 bssid, probe_wait_ms);
2004#endif 2092#endif
2005 ieee80211_sta_connection_lost(sdata, bssid); 2093 ieee80211_sta_connection_lost(sdata, bssid);
2006 } else if (ifmgd->probe_send_count < max_tries) { 2094 } else if (ifmgd->probe_send_count < max_tries) {
2007#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 2095#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
2008 wiphy_debug(local->hw.wiphy, 2096 wiphy_debug(local->hw.wiphy,
2009 "%s: No probe response from AP %pM" 2097 "%s: No probe response from AP %pM"
2010 " after %dms, try %d\n", 2098 " after %dms, try %d/%i\n",
2011 sdata->name, 2099 sdata->name,
2012 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ, 2100 bssid, probe_wait_ms,
2013 ifmgd->probe_send_count); 2101 ifmgd->probe_send_count, max_tries);
2014#endif 2102#endif
2015 ieee80211_mgd_probe_ap_send(sdata); 2103 ieee80211_mgd_probe_ap_send(sdata);
2016 } else { 2104 } else {
@@ -2022,7 +2110,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
2022 "%s: No probe response from AP %pM" 2110 "%s: No probe response from AP %pM"
2023 " after %dms, disconnecting.\n", 2111 " after %dms, disconnecting.\n",
2024 sdata->name, 2112 sdata->name,
2025 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 2113 bssid, probe_wait_ms);
2026 2114
2027 ieee80211_sta_connection_lost(sdata, bssid); 2115 ieee80211_sta_connection_lost(sdata, bssid);
2028 } 2116 }
@@ -2260,6 +2348,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
2260 else 2348 else
2261 wk->type = IEEE80211_WORK_DIRECT_PROBE; 2349 wk->type = IEEE80211_WORK_DIRECT_PROBE;
2262 wk->chan = req->bss->channel; 2350 wk->chan = req->bss->channel;
2351 wk->chan_type = NL80211_CHAN_NO_HT;
2263 wk->sdata = sdata; 2352 wk->sdata = sdata;
2264 wk->done = ieee80211_probe_auth_done; 2353 wk->done = ieee80211_probe_auth_done;
2265 2354
@@ -2409,6 +2498,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2409 memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); 2498 memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
2410 2499
2411 wk->chan = req->bss->channel; 2500 wk->chan = req->bss->channel;
2501 wk->chan_type = NL80211_CHAN_NO_HT;
2412 wk->sdata = sdata; 2502 wk->sdata = sdata;
2413 wk->done = ieee80211_assoc_done; 2503 wk->done = ieee80211_assoc_done;
2414 if (!bss->dtim_period && 2504 if (!bss->dtim_period &&
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index b4e52676f3fb..13427b194ced 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -17,10 +17,14 @@
17#include "driver-trace.h" 17#include "driver-trace.h"
18 18
19/* 19/*
20 * inform AP that we will go to sleep so that it will buffer the frames 20 * Tell our hardware to disable PS.
21 * while we scan 21 * Optionally inform AP that we will go to sleep so that it will buffer
22 * the frames while we are doing off-channel work. This is optional
23 * because we *may* be doing work on-operating channel, and want our
24 * hardware unconditionally awake, but still let the AP send us normal frames.
22 */ 25 */
23static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) 26static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata,
27 bool tell_ap)
24{ 28{
25 struct ieee80211_local *local = sdata->local; 29 struct ieee80211_local *local = sdata->local;
26 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 30 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -41,8 +45,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
41 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 45 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
42 } 46 }
43 47
44 if (!(local->offchannel_ps_enabled) || 48 if (tell_ap && (!local->offchannel_ps_enabled ||
45 !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) 49 !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)))
46 /* 50 /*
47 * If power save was enabled, no need to send a nullfunc 51 * If power save was enabled, no need to send a nullfunc
48 * frame because AP knows that we are sleeping. But if the 52 * frame because AP knows that we are sleeping. But if the
@@ -77,6 +81,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
77 * we are sleeping, let's just enable power save mode in 81 * we are sleeping, let's just enable power save mode in
78 * hardware. 82 * hardware.
79 */ 83 */
84 /* TODO: Only set hardware if CONF_PS changed?
85 * TODO: Should we set offchannel_ps_enabled to false?
86 */
80 local->hw.conf.flags |= IEEE80211_CONF_PS; 87 local->hw.conf.flags |= IEEE80211_CONF_PS;
81 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 88 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
82 } else if (local->hw.conf.dynamic_ps_timeout > 0) { 89 } else if (local->hw.conf.dynamic_ps_timeout > 0) {
@@ -95,63 +102,61 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
95 ieee80211_sta_reset_conn_monitor(sdata); 102 ieee80211_sta_reset_conn_monitor(sdata);
96} 103}
97 104
98void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) 105void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
106 bool offchannel_ps_enable)
99{ 107{
100 struct ieee80211_sub_if_data *sdata; 108 struct ieee80211_sub_if_data *sdata;
101 109
110 /*
111 * notify the AP about us leaving the channel and stop all
112 * STA interfaces.
113 */
102 mutex_lock(&local->iflist_mtx); 114 mutex_lock(&local->iflist_mtx);
103 list_for_each_entry(sdata, &local->interfaces, list) { 115 list_for_each_entry(sdata, &local->interfaces, list) {
104 if (!ieee80211_sdata_running(sdata)) 116 if (!ieee80211_sdata_running(sdata))
105 continue; 117 continue;
106 118
107 /* disable beaconing */ 119 if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
120 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
121
122 /* Check to see if we should disable beaconing. */
108 if (sdata->vif.type == NL80211_IFTYPE_AP || 123 if (sdata->vif.type == NL80211_IFTYPE_AP ||
109 sdata->vif.type == NL80211_IFTYPE_ADHOC || 124 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
110 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) 125 sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
111 ieee80211_bss_info_change_notify( 126 ieee80211_bss_info_change_notify(
112 sdata, BSS_CHANGED_BEACON_ENABLED); 127 sdata, BSS_CHANGED_BEACON_ENABLED);
113 128
114 /* 129 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
115 * only handle non-STA interfaces here, STA interfaces
116 * are handled in ieee80211_offchannel_stop_station(),
117 * e.g., from the background scan state machine.
118 *
119 * In addition, do not stop monitor interface to allow it to be
120 * used from user space controlled off-channel operations.
121 */
122 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
123 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
124 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
125 netif_tx_stop_all_queues(sdata->dev); 130 netif_tx_stop_all_queues(sdata->dev);
131 if (offchannel_ps_enable &&
132 (sdata->vif.type == NL80211_IFTYPE_STATION) &&
133 sdata->u.mgd.associated)
134 ieee80211_offchannel_ps_enable(sdata, true);
126 } 135 }
127 } 136 }
128 mutex_unlock(&local->iflist_mtx); 137 mutex_unlock(&local->iflist_mtx);
129} 138}
130 139
131void ieee80211_offchannel_stop_station(struct ieee80211_local *local) 140void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local,
141 bool tell_ap)
132{ 142{
133 struct ieee80211_sub_if_data *sdata; 143 struct ieee80211_sub_if_data *sdata;
134 144
135 /*
136 * notify the AP about us leaving the channel and stop all STA interfaces
137 */
138 mutex_lock(&local->iflist_mtx); 145 mutex_lock(&local->iflist_mtx);
139 list_for_each_entry(sdata, &local->interfaces, list) { 146 list_for_each_entry(sdata, &local->interfaces, list) {
140 if (!ieee80211_sdata_running(sdata)) 147 if (!ieee80211_sdata_running(sdata))
141 continue; 148 continue;
142 149
143 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 150 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
144 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); 151 sdata->u.mgd.associated)
145 netif_tx_stop_all_queues(sdata->dev); 152 ieee80211_offchannel_ps_enable(sdata, tell_ap);
146 if (sdata->u.mgd.associated)
147 ieee80211_offchannel_ps_enable(sdata);
148 }
149 } 153 }
150 mutex_unlock(&local->iflist_mtx); 154 mutex_unlock(&local->iflist_mtx);
151} 155}
152 156
153void ieee80211_offchannel_return(struct ieee80211_local *local, 157void ieee80211_offchannel_return(struct ieee80211_local *local,
154 bool enable_beaconing) 158 bool enable_beaconing,
159 bool offchannel_ps_disable)
155{ 160{
156 struct ieee80211_sub_if_data *sdata; 161 struct ieee80211_sub_if_data *sdata;
157 162
@@ -161,7 +166,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
161 continue; 166 continue;
162 167
163 /* Tell AP we're back */ 168 /* Tell AP we're back */
164 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 169 if (offchannel_ps_disable &&
170 sdata->vif.type == NL80211_IFTYPE_STATION) {
165 if (sdata->u.mgd.associated) 171 if (sdata->u.mgd.associated)
166 ieee80211_offchannel_ps_disable(sdata); 172 ieee80211_offchannel_ps_disable(sdata);
167 } 173 }
@@ -181,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
181 netif_tx_wake_all_queues(sdata->dev); 187 netif_tx_wake_all_queues(sdata->dev);
182 } 188 }
183 189
184 /* re-enable beaconing */ 190 /* Check to see if we should re-enable beaconing */
185 if (enable_beaconing && 191 if (enable_beaconing &&
186 (sdata->vif.type == NL80211_IFTYPE_AP || 192 (sdata->vif.type == NL80211_IFTYPE_AP ||
187 sdata->vif.type == NL80211_IFTYPE_ADHOC || 193 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index e37355193ed1..730778a2c90c 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,7 +6,7 @@
6#include "driver-ops.h" 6#include "driver-ops.h"
7#include "led.h" 7#include "led.h"
8 8
9int __ieee80211_suspend(struct ieee80211_hw *hw) 9int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
10{ 10{
11 struct ieee80211_local *local = hw_to_local(hw); 11 struct ieee80211_local *local = hw_to_local(hw);
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
@@ -14,12 +14,23 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
14 14
15 ieee80211_scan_cancel(local); 15 ieee80211_scan_cancel(local);
16 16
17 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
18 mutex_lock(&local->sta_mtx);
19 list_for_each_entry(sta, &local->sta_list, list) {
20 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
21 ieee80211_sta_tear_down_BA_sessions(sta, true);
22 }
23 mutex_unlock(&local->sta_mtx);
24 }
25
17 ieee80211_stop_queues_by_reason(hw, 26 ieee80211_stop_queues_by_reason(hw,
18 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 27 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
19 28
20 /* flush out all packets */ 29 /* flush out all packets */
21 synchronize_net(); 30 synchronize_net();
22 31
32 drv_flush(local, false);
33
23 local->quiescing = true; 34 local->quiescing = true;
24 /* make quiescing visible to timers everywhere */ 35 /* make quiescing visible to timers everywhere */
25 mb(); 36 mb();
@@ -36,6 +47,16 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
36 cancel_work_sync(&local->dynamic_ps_enable_work); 47 cancel_work_sync(&local->dynamic_ps_enable_work);
37 del_timer_sync(&local->dynamic_ps_timer); 48 del_timer_sync(&local->dynamic_ps_timer);
38 49
50 local->wowlan = wowlan && local->open_count;
51 if (local->wowlan) {
52 int err = drv_suspend(local, wowlan);
53 if (err) {
54 local->quiescing = false;
55 return err;
56 }
57 goto suspend;
58 }
59
39 /* disable keys */ 60 /* disable keys */
40 list_for_each_entry(sdata, &local->interfaces, list) 61 list_for_each_entry(sdata, &local->interfaces, list)
41 ieee80211_disable_keys(sdata); 62 ieee80211_disable_keys(sdata);
@@ -43,11 +64,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
43 /* tear down aggregation sessions and remove STAs */ 64 /* tear down aggregation sessions and remove STAs */
44 mutex_lock(&local->sta_mtx); 65 mutex_lock(&local->sta_mtx);
45 list_for_each_entry(sta, &local->sta_list, list) { 66 list_for_each_entry(sta, &local->sta_list, list) {
46 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
47 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
48 ieee80211_sta_tear_down_BA_sessions(sta, true);
49 }
50
51 if (sta->uploaded) { 67 if (sta->uploaded) {
52 sdata = sta->sdata; 68 sdata = sta->sdata;
53 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 69 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -98,6 +114,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
98 if (local->open_count) 114 if (local->open_count)
99 ieee80211_stop_device(local); 115 ieee80211_stop_device(local);
100 116
117 suspend:
101 local->suspended = true; 118 local->suspended = true;
102 /* need suspended to be visible before quiescing is false */ 119 /* need suspended to be visible before quiescing is false */
103 barrier(); 120 barrier();
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 778c604d7939..8adac67395f7 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -417,8 +417,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
417 tx_time_single = mr->ack_time + mr->perfect_tx_time; 417 tx_time_single = mr->ack_time + mr->perfect_tx_time;
418 418
419 /* contention window */ 419 /* contention window */
420 tx_time_single += t_slot + min(cw, mp->cw_max); 420 tx_time_single += (t_slot * cw) >> 1;
421 cw = (cw << 1) | 1; 421 cw = min((cw << 1) | 1, mp->cw_max);
422 422
423 tx_time += tx_time_single; 423 tx_time += tx_time_single;
424 tx_time_cts += tx_time_single + mi->sp_ack_dur; 424 tx_time_cts += tx_time_single + mi->sp_ack_dur;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 165a4518bb48..333b5118be6d 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -259,7 +259,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
259 } 259 }
260 } 260 }
261 261
262 /* try to sample up to half of the availble rates during each interval */ 262 /* try to sample up to half of the available rates during each interval */
263 mi->sample_count *= 4; 263 mi->sample_count *= 4;
264 264
265 cur_prob = 0; 265 cur_prob = 0;
@@ -415,10 +415,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
415 mi->sample_count--; 415 mi->sample_count--;
416 } 416 }
417 417
418 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { 418 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
419 mi->sample_packets += info->status.ampdu_len; 419 mi->sample_packets += info->status.ampdu_len;
420 minstrel_next_sample_idx(mi);
421 }
422 420
423 for (i = 0; !last; i++) { 421 for (i = 0; !last; i++) {
424 last = (i == IEEE80211_TX_MAX_RATES - 1) || 422 last = (i == IEEE80211_TX_MAX_RATES - 1) ||
@@ -466,6 +464,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
466 const struct mcs_group *group; 464 const struct mcs_group *group;
467 unsigned int tx_time, tx_time_rtscts, tx_time_data; 465 unsigned int tx_time, tx_time_rtscts, tx_time_data;
468 unsigned int cw = mp->cw_min; 466 unsigned int cw = mp->cw_min;
467 unsigned int ctime = 0;
469 unsigned int t_slot = 9; /* FIXME */ 468 unsigned int t_slot = 9; /* FIXME */
470 unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); 469 unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
471 470
@@ -482,13 +481,27 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
482 481
483 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; 482 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
484 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; 483 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len;
485 tx_time = 2 * (t_slot + mi->overhead + tx_time_data); 484
486 tx_time_rtscts = 2 * (t_slot + mi->overhead_rtscts + tx_time_data); 485 /* Contention time for first 2 tries */
486 ctime = (t_slot * cw) >> 1;
487 cw = min((cw << 1) | 1, mp->cw_max);
488 ctime += (t_slot * cw) >> 1;
489 cw = min((cw << 1) | 1, mp->cw_max);
490
491 /* Total TX time for data and Contention after first 2 tries */
492 tx_time = ctime + 2 * (mi->overhead + tx_time_data);
493 tx_time_rtscts = ctime + 2 * (mi->overhead_rtscts + tx_time_data);
494
495 /* See how many more tries we can fit inside segment size */
487 do { 496 do {
488 cw = (cw << 1) | 1; 497 /* Contention time for this try */
489 cw = min(cw, mp->cw_max); 498 ctime = (t_slot * cw) >> 1;
490 tx_time += cw + t_slot + mi->overhead; 499 cw = min((cw << 1) | 1, mp->cw_max);
491 tx_time_rtscts += cw + t_slot + mi->overhead_rtscts; 500
501 /* Total TX time after this try */
502 tx_time += ctime + mi->overhead + tx_time_data;
503 tx_time_rtscts += ctime + mi->overhead_rtscts + tx_time_data;
504
492 if (tx_time_rtscts < mp->segment_size) 505 if (tx_time_rtscts < mp->segment_size)
493 mr->retry_count_rtscts++; 506 mr->retry_count_rtscts++;
494 } while ((tx_time < mp->segment_size) && 507 } while ((tx_time < mp->segment_size) &&
@@ -519,9 +532,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
519 rate->count = mr->retry_count; 532 rate->count = mr->retry_count;
520 533
521 rate->flags = IEEE80211_TX_RC_MCS | group->flags; 534 rate->flags = IEEE80211_TX_RC_MCS | group->flags;
522 if (txrc->short_preamble) 535 if (rtscts)
523 rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
524 if (txrc->rts || rtscts)
525 rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; 536 rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
526 rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; 537 rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES;
527} 538}
@@ -553,13 +564,14 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
553 sample_idx = sample_table[mg->column][mg->index]; 564 sample_idx = sample_table[mg->column][mg->index];
554 mr = &mg->rates[sample_idx]; 565 mr = &mg->rates[sample_idx];
555 sample_idx += mi->sample_group * MCS_GROUP_RATES; 566 sample_idx += mi->sample_group * MCS_GROUP_RATES;
567 minstrel_next_sample_idx(mi);
556 568
557 /* 569 /*
558 * When not using MRR, do not sample if the probability is already 570 * When not using MRR, do not sample if the probability is already
559 * higher than 95% to avoid wasting airtime 571 * higher than 95% to avoid wasting airtime
560 */ 572 */
561 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) 573 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
562 goto next; 574 return -1;
563 575
564 /* 576 /*
565 * Make sure that lower rates get sampled only occasionally, 577 * Make sure that lower rates get sampled only occasionally,
@@ -568,17 +580,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
568 if (minstrel_get_duration(sample_idx) > 580 if (minstrel_get_duration(sample_idx) >
569 minstrel_get_duration(mi->max_tp_rate)) { 581 minstrel_get_duration(mi->max_tp_rate)) {
570 if (mr->sample_skipped < 20) 582 if (mr->sample_skipped < 20)
571 goto next; 583 return -1;
572 584
573 if (mi->sample_slow++ > 2) 585 if (mi->sample_slow++ > 2)
574 goto next; 586 return -1;
575 } 587 }
576 588
577 return sample_idx; 589 return sample_idx;
578
579next:
580 minstrel_next_sample_idx(mi);
581 return -1;
582} 590}
583 591
584static void 592static void
@@ -605,19 +613,46 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
605 sample = true; 613 sample = true;
606 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, 614 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
607 txrc, true, false); 615 txrc, true, false);
608 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
609 txrc, false, false);
610 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; 616 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
611 } else { 617 } else {
612 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, 618 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
613 txrc, false, false); 619 txrc, false, false);
614 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
615 txrc, false, true);
616 } 620 }
617 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample);
618 621
619 ar[3].count = 0; 622 if (mp->hw->max_rates >= 3) {
620 ar[3].idx = -1; 623 /*
624 * At least 3 tx rates supported, use
625 * sample_rate -> max_tp_rate -> max_prob_rate for sampling and
626 * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default.
627 */
628 if (sample_idx >= 0)
629 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
630 txrc, false, false);
631 else
632 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
633 txrc, false, true);
634
635 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate,
636 txrc, false, !sample);
637
638 ar[3].count = 0;
639 ar[3].idx = -1;
640 } else if (mp->hw->max_rates == 2) {
641 /*
642 * Only 2 tx rates supported, use
643 * sample_rate -> max_prob_rate for sampling and
644 * max_tp_rate -> max_prob_rate by default.
645 */
646 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate,
647 txrc, false, !sample);
648
649 ar[2].count = 0;
650 ar[2].idx = -1;
651 } else {
652 /* Not using MRR, only use the first rate */
653 ar[1].count = 0;
654 ar[1].idx = -1;
655 }
621 656
622 mi->total_packets++; 657 mi->total_packets++;
623 658
@@ -639,18 +674,14 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
639 struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; 674 struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
640 struct ieee80211_local *local = hw_to_local(mp->hw); 675 struct ieee80211_local *local = hw_to_local(mp->hw);
641 u16 sta_cap = sta->ht_cap.cap; 676 u16 sta_cap = sta->ht_cap.cap;
677 int n_supported = 0;
642 int ack_dur; 678 int ack_dur;
643 int stbc; 679 int stbc;
644 int i; 680 int i;
645 681
646 /* fall back to the old minstrel for legacy stations */ 682 /* fall back to the old minstrel for legacy stations */
647 if (!sta->ht_cap.ht_supported) { 683 if (!sta->ht_cap.ht_supported)
648 msp->is_ht = false; 684 goto use_legacy;
649 memset(&msp->legacy, 0, sizeof(msp->legacy));
650 msp->legacy.r = msp->ratelist;
651 msp->legacy.sample_table = msp->sample_table;
652 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
653 }
654 685
655 BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != 686 BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) !=
656 MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); 687 MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS);
@@ -705,7 +736,22 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
705 736
706 mi->groups[i].supported = 737 mi->groups[i].supported =
707 mcs->rx_mask[minstrel_mcs_groups[i].streams - 1]; 738 mcs->rx_mask[minstrel_mcs_groups[i].streams - 1];
739
740 if (mi->groups[i].supported)
741 n_supported++;
708 } 742 }
743
744 if (!n_supported)
745 goto use_legacy;
746
747 return;
748
749use_legacy:
750 msp->is_ht = false;
751 memset(&msp->legacy, 0, sizeof(msp->legacy));
752 msp->legacy.r = msp->ratelist;
753 msp->legacy.sample_table = msp->sample_table;
754 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
709} 755}
710 756
711static void 757static void
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 1a873f00691a..19111c7bf454 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -24,9 +24,6 @@
24/* Fixed point arithmetic shifting amount. */ 24/* Fixed point arithmetic shifting amount. */
25#define RC_PID_ARITH_SHIFT 8 25#define RC_PID_ARITH_SHIFT 8
26 26
27/* Fixed point arithmetic factor. */
28#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
29
30/* Proportional PID component coefficient. */ 27/* Proportional PID component coefficient. */
31#define RC_PID_COEFF_P 15 28#define RC_PID_COEFF_P 15
32/* Integral PID component coefficient. */ 29/* Integral PID component coefficient. */
@@ -80,7 +77,7 @@ union rc_pid_event_data {
80}; 77};
81 78
82struct rc_pid_event { 79struct rc_pid_event {
83 /* The time when the event occured */ 80 /* The time when the event occurred */
84 unsigned long timestamp; 81 unsigned long timestamp;
85 82
86 /* Event ID number */ 83 /* Event ID number */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a6701ed87f0d..7fa8c6be7bf0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -77,7 +77,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
77 /* always present fields */ 77 /* always present fields */
78 len = sizeof(struct ieee80211_radiotap_header) + 9; 78 len = sizeof(struct ieee80211_radiotap_header) + 9;
79 79
80 if (status->flag & RX_FLAG_TSFT) 80 if (status->flag & RX_FLAG_MACTIME_MPDU)
81 len += 8; 81 len += 8;
82 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 82 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
83 len += 1; 83 len += 1;
@@ -85,6 +85,9 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
85 if (len & 1) /* padding for RX_FLAGS if necessary */ 85 if (len & 1) /* padding for RX_FLAGS if necessary */
86 len++; 86 len++;
87 87
88 if (status->flag & RX_FLAG_HT) /* HT info */
89 len += 3;
90
88 return len; 91 return len;
89} 92}
90 93
@@ -120,7 +123,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
120 /* the order of the following fields is important */ 123 /* the order of the following fields is important */
121 124
122 /* IEEE80211_RADIOTAP_TSFT */ 125 /* IEEE80211_RADIOTAP_TSFT */
123 if (status->flag & RX_FLAG_TSFT) { 126 if (status->flag & RX_FLAG_MACTIME_MPDU) {
124 put_unaligned_le64(status->mactime, pos); 127 put_unaligned_le64(status->mactime, pos);
125 rthdr->it_present |= 128 rthdr->it_present |=
126 cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); 129 cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
@@ -139,11 +142,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
139 /* IEEE80211_RADIOTAP_RATE */ 142 /* IEEE80211_RADIOTAP_RATE */
140 if (status->flag & RX_FLAG_HT) { 143 if (status->flag & RX_FLAG_HT) {
141 /* 144 /*
142 * TODO: add following information into radiotap header once 145 * MCS information is a separate field in radiotap,
143 * suitable fields are defined for it: 146 * added below. The byte here is needed as padding
144 * - MCS index (status->rate_idx) 147 * for the channel though, so initialise it to 0.
145 * - HT40 (status->flag & RX_FLAG_40MHZ)
146 * - short-GI (status->flag & RX_FLAG_SHORT_GI)
147 */ 148 */
148 *pos = 0; 149 *pos = 0;
149 } else { 150 } else {
@@ -193,6 +194,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
193 rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; 194 rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP;
194 put_unaligned_le16(rx_flags, pos); 195 put_unaligned_le16(rx_flags, pos);
195 pos += 2; 196 pos += 2;
197
198 if (status->flag & RX_FLAG_HT) {
199 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
200 *pos++ = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
201 IEEE80211_RADIOTAP_MCS_HAVE_GI |
202 IEEE80211_RADIOTAP_MCS_HAVE_BW;
203 *pos = 0;
204 if (status->flag & RX_FLAG_SHORT_GI)
205 *pos |= IEEE80211_RADIOTAP_MCS_SGI;
206 if (status->flag & RX_FLAG_40MHZ)
207 *pos |= IEEE80211_RADIOTAP_MCS_BW_40;
208 pos++;
209 *pos++ = status->rate_idx;
210 }
196} 211}
197 212
198/* 213/*
@@ -367,7 +382,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
367 * specs were sane enough this time around to require padding each A-MSDU 382 * specs were sane enough this time around to require padding each A-MSDU
368 * subframe to a length that is a multiple of four. 383 * subframe to a length that is a multiple of four.
369 * 384 *
370 * Padding like Atheros hardware adds which is inbetween the 802.11 header and 385 * Padding like Atheros hardware adds which is between the 802.11 header and
371 * the payload is not supported, the driver is required to move the 802.11 386 * the payload is not supported, the driver is required to move the 802.11
372 * header to be directly in front of the payload in that case. 387 * header to be directly in front of the payload in that case.
373 */ 388 */
@@ -389,19 +404,15 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
389 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); 404 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
390 struct sk_buff *skb = rx->skb; 405 struct sk_buff *skb = rx->skb;
391 406
392 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN))) 407 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
408 !local->sched_scanning))
393 return RX_CONTINUE; 409 return RX_CONTINUE;
394 410
395 if (test_bit(SCAN_HW_SCANNING, &local->scanning)) 411 if (test_bit(SCAN_HW_SCANNING, &local->scanning) ||
412 test_bit(SCAN_SW_SCANNING, &local->scanning) ||
413 local->sched_scanning)
396 return ieee80211_scan_rx(rx->sdata, skb); 414 return ieee80211_scan_rx(rx->sdata, skb);
397 415
398 if (test_bit(SCAN_SW_SCANNING, &local->scanning)) {
399 /* drop all the other packets during a software scan anyway */
400 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
401 dev_kfree_skb(skb);
402 return RX_QUEUED;
403 }
404
405 /* scanning finished during invoking of handlers */ 416 /* scanning finished during invoking of handlers */
406 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); 417 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
407 return RX_DROP_UNUSABLE; 418 return RX_DROP_UNUSABLE;
@@ -479,22 +490,26 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
479 * establisment frame, beacon or probe, drop the frame. 490 * establisment frame, beacon or probe, drop the frame.
480 */ 491 */
481 492
482 if (!rx->sta || sta_plink_state(rx->sta) != PLINK_ESTAB) { 493 if (!rx->sta || sta_plink_state(rx->sta) != NL80211_PLINK_ESTAB) {
483 struct ieee80211_mgmt *mgmt; 494 struct ieee80211_mgmt *mgmt;
484 495
485 if (!ieee80211_is_mgmt(hdr->frame_control)) 496 if (!ieee80211_is_mgmt(hdr->frame_control))
486 return RX_DROP_MONITOR; 497 return RX_DROP_MONITOR;
487 498
488 if (ieee80211_is_action(hdr->frame_control)) { 499 if (ieee80211_is_action(hdr->frame_control)) {
500 u8 category;
489 mgmt = (struct ieee80211_mgmt *)hdr; 501 mgmt = (struct ieee80211_mgmt *)hdr;
490 if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK) 502 category = mgmt->u.action.category;
503 if (category != WLAN_CATEGORY_MESH_ACTION &&
504 category != WLAN_CATEGORY_SELF_PROTECTED)
491 return RX_DROP_MONITOR; 505 return RX_DROP_MONITOR;
492 return RX_CONTINUE; 506 return RX_CONTINUE;
493 } 507 }
494 508
495 if (ieee80211_is_probe_req(hdr->frame_control) || 509 if (ieee80211_is_probe_req(hdr->frame_control) ||
496 ieee80211_is_probe_resp(hdr->frame_control) || 510 ieee80211_is_probe_resp(hdr->frame_control) ||
497 ieee80211_is_beacon(hdr->frame_control)) 511 ieee80211_is_beacon(hdr->frame_control) ||
512 ieee80211_is_auth(hdr->frame_control))
498 return RX_CONTINUE; 513 return RX_CONTINUE;
499 514
500 return RX_DROP_MONITOR; 515 return RX_DROP_MONITOR;
@@ -604,7 +619,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
604 skipped++; 619 skipped++;
605 continue; 620 continue;
606 } 621 }
607 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] + 622 if (skipped &&
623 !time_after(jiffies, tid_agg_rx->reorder_time[j] +
608 HT_RX_REORDER_BUF_TIMEOUT)) 624 HT_RX_REORDER_BUF_TIMEOUT))
609 goto set_release_timer; 625 goto set_release_timer;
610 626
@@ -641,7 +657,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
641 set_release_timer: 657 set_release_timer:
642 658
643 mod_timer(&tid_agg_rx->reorder_timer, 659 mod_timer(&tid_agg_rx->reorder_timer,
644 tid_agg_rx->reorder_time[j] + 660 tid_agg_rx->reorder_time[j] + 1 +
645 HT_RX_REORDER_BUF_TIMEOUT); 661 HT_RX_REORDER_BUF_TIMEOUT);
646 } else { 662 } else {
647 del_timer(&tid_agg_rx->reorder_timer); 663 del_timer(&tid_agg_rx->reorder_timer);
@@ -698,6 +714,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
698 /* 714 /*
699 * If the current MPDU is in the right order and nothing else 715 * If the current MPDU is in the right order and nothing else
700 * is stored we can process it directly, no need to buffer it. 716 * is stored we can process it directly, no need to buffer it.
717 * If it is first but there's something stored, we may be able
718 * to release frames after this one.
701 */ 719 */
702 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 720 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
703 tid_agg_rx->stored_mpdu_num == 0) { 721 tid_agg_rx->stored_mpdu_num == 0) {
@@ -798,7 +816,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
798 rx->local->dot11FrameDuplicateCount++; 816 rx->local->dot11FrameDuplicateCount++;
799 rx->sta->num_duplicates++; 817 rx->sta->num_duplicates++;
800 } 818 }
801 return RX_DROP_MONITOR; 819 return RX_DROP_UNUSABLE;
802 } else 820 } else
803 rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl; 821 rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl;
804 } 822 }
@@ -824,18 +842,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
824 ieee80211_is_pspoll(hdr->frame_control)) && 842 ieee80211_is_pspoll(hdr->frame_control)) &&
825 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && 843 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
826 rx->sdata->vif.type != NL80211_IFTYPE_WDS && 844 rx->sdata->vif.type != NL80211_IFTYPE_WDS &&
827 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { 845 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC))))
828 if ((!ieee80211_has_fromds(hdr->frame_control) &&
829 !ieee80211_has_tods(hdr->frame_control) &&
830 ieee80211_is_data(hdr->frame_control)) ||
831 !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
832 /* Drop IBSS frames and frames for other hosts
833 * silently. */
834 return RX_DROP_MONITOR;
835 }
836
837 return RX_DROP_MONITOR; 846 return RX_DROP_MONITOR;
838 }
839 847
840 return RX_CONTINUE; 848 return RX_CONTINUE;
841} 849}
@@ -1088,7 +1096,8 @@ static void ap_sta_ps_start(struct sta_info *sta)
1088 1096
1089 atomic_inc(&sdata->bss->num_sta_ps); 1097 atomic_inc(&sdata->bss->num_sta_ps);
1090 set_sta_flags(sta, WLAN_STA_PS_STA); 1098 set_sta_flags(sta, WLAN_STA_PS_STA);
1091 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); 1099 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
1100 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
1092#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 1101#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
1093 printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", 1102 printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
1094 sdata->name, sta->sta.addr, sta->sta.aid); 1103 sdata->name, sta->sta.addr, sta->sta.aid);
@@ -1117,6 +1126,27 @@ static void ap_sta_ps_end(struct sta_info *sta)
1117 ieee80211_sta_ps_deliver_wakeup(sta); 1126 ieee80211_sta_ps_deliver_wakeup(sta);
1118} 1127}
1119 1128
1129int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start)
1130{
1131 struct sta_info *sta_inf = container_of(sta, struct sta_info, sta);
1132 bool in_ps;
1133
1134 WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS));
1135
1136 /* Don't let the same PS state be set twice */
1137 in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA);
1138 if ((start && in_ps) || (!start && !in_ps))
1139 return -EINVAL;
1140
1141 if (start)
1142 ap_sta_ps_start(sta_inf);
1143 else
1144 ap_sta_ps_end(sta_inf);
1145
1146 return 0;
1147}
1148EXPORT_SYMBOL(ieee80211_sta_ps_transition);
1149
1120static ieee80211_rx_result debug_noinline 1150static ieee80211_rx_result debug_noinline
1121ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) 1151ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1122{ 1152{
@@ -1136,14 +1166,23 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1136 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { 1166 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) {
1137 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, 1167 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
1138 NL80211_IFTYPE_ADHOC); 1168 NL80211_IFTYPE_ADHOC);
1139 if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) 1169 if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) {
1140 sta->last_rx = jiffies; 1170 sta->last_rx = jiffies;
1171 if (ieee80211_is_data(hdr->frame_control)) {
1172 sta->last_rx_rate_idx = status->rate_idx;
1173 sta->last_rx_rate_flag = status->flag;
1174 }
1175 }
1141 } else if (!is_multicast_ether_addr(hdr->addr1)) { 1176 } else if (!is_multicast_ether_addr(hdr->addr1)) {
1142 /* 1177 /*
1143 * Mesh beacons will update last_rx when if they are found to 1178 * Mesh beacons will update last_rx when if they are found to
1144 * match the current local configuration when processed. 1179 * match the current local configuration when processed.
1145 */ 1180 */
1146 sta->last_rx = jiffies; 1181 sta->last_rx = jiffies;
1182 if (ieee80211_is_data(hdr->frame_control)) {
1183 sta->last_rx_rate_idx = status->rate_idx;
1184 sta->last_rx_rate_flag = status->flag;
1185 }
1147 } 1186 }
1148 1187
1149 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) 1188 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
@@ -1161,7 +1200,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1161 * Change STA power saving mode only at the end of a frame 1200 * Change STA power saving mode only at the end of a frame
1162 * exchange sequence. 1201 * exchange sequence.
1163 */ 1202 */
1164 if (!ieee80211_has_morefrags(hdr->frame_control) && 1203 if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) &&
1204 !ieee80211_has_morefrags(hdr->frame_control) &&
1165 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && 1205 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
1166 (rx->sdata->vif.type == NL80211_IFTYPE_AP || 1206 (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
1167 rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { 1207 rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
@@ -1552,21 +1592,43 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
1552} 1592}
1553 1593
1554static int 1594static int
1555__ieee80211_data_to_8023(struct ieee80211_rx_data *rx) 1595__ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control)
1556{ 1596{
1557 struct ieee80211_sub_if_data *sdata = rx->sdata; 1597 struct ieee80211_sub_if_data *sdata = rx->sdata;
1558 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 1598 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
1599 bool check_port_control = false;
1600 struct ethhdr *ehdr;
1601 int ret;
1559 1602
1603 *port_control = false;
1560 if (ieee80211_has_a4(hdr->frame_control) && 1604 if (ieee80211_has_a4(hdr->frame_control) &&
1561 sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) 1605 sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
1562 return -1; 1606 return -1;
1563 1607
1608 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1609 !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) {
1610
1611 if (!sdata->u.mgd.use_4addr)
1612 return -1;
1613 else
1614 check_port_control = true;
1615 }
1616
1564 if (is_multicast_ether_addr(hdr->addr1) && 1617 if (is_multicast_ether_addr(hdr->addr1) &&
1565 ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || 1618 sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta)
1566 (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr)))
1567 return -1; 1619 return -1;
1568 1620
1569 return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); 1621 ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
1622 if (ret < 0)
1623 return ret;
1624
1625 ehdr = (struct ethhdr *) rx->skb->data;
1626 if (ehdr->h_proto == rx->sdata->control_port_protocol)
1627 *port_control = true;
1628 else if (check_port_control)
1629 return -1;
1630
1631 return 0;
1570} 1632}
1571 1633
1572/* 1634/*
@@ -1721,7 +1783,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1721 1783
1722 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, 1784 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
1723 rx->sdata->vif.type, 1785 rx->sdata->vif.type,
1724 rx->local->hw.extra_tx_headroom); 1786 rx->local->hw.extra_tx_headroom, true);
1725 1787
1726 while (!skb_queue_empty(&frame_list)) { 1788 while (!skb_queue_empty(&frame_list)) {
1727 rx->skb = __skb_dequeue(&frame_list); 1789 rx->skb = __skb_dequeue(&frame_list);
@@ -1864,6 +1926,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
1864 struct net_device *dev = sdata->dev; 1926 struct net_device *dev = sdata->dev;
1865 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 1927 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
1866 __le16 fc = hdr->frame_control; 1928 __le16 fc = hdr->frame_control;
1929 bool port_control;
1867 int err; 1930 int err;
1868 1931
1869 if (unlikely(!ieee80211_is_data(hdr->frame_control))) 1932 if (unlikely(!ieee80211_is_data(hdr->frame_control)))
@@ -1880,20 +1943,31 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
1880 sdata->vif.type == NL80211_IFTYPE_AP) 1943 sdata->vif.type == NL80211_IFTYPE_AP)
1881 return RX_DROP_MONITOR; 1944 return RX_DROP_MONITOR;
1882 1945
1883 err = __ieee80211_data_to_8023(rx); 1946 err = __ieee80211_data_to_8023(rx, &port_control);
1884 if (unlikely(err)) 1947 if (unlikely(err))
1885 return RX_DROP_UNUSABLE; 1948 return RX_DROP_UNUSABLE;
1886 1949
1887 if (!ieee80211_frame_allowed(rx, fc)) 1950 if (!ieee80211_frame_allowed(rx, fc))
1888 return RX_DROP_MONITOR; 1951 return RX_DROP_MONITOR;
1889 1952
1953 if (rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
1954 unlikely(port_control) && sdata->bss) {
1955 sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
1956 u.ap);
1957 dev = sdata->dev;
1958 rx->sdata = sdata;
1959 }
1960
1890 rx->skb->dev = dev; 1961 rx->skb->dev = dev;
1891 1962
1892 dev->stats.rx_packets++; 1963 dev->stats.rx_packets++;
1893 dev->stats.rx_bytes += rx->skb->len; 1964 dev->stats.rx_bytes += rx->skb->len;
1894 1965
1895 if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && 1966 if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 &&
1896 !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) { 1967 !is_multicast_ether_addr(
1968 ((struct ethhdr *)rx->skb->data)->h_dest) &&
1969 (!local->scanning &&
1970 !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) {
1897 mod_timer(&local->dynamic_ps_timer, jiffies + 1971 mod_timer(&local->dynamic_ps_timer, jiffies +
1898 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 1972 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
1899 } 1973 }
@@ -2136,7 +2210,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2136 goto handled; 2210 goto handled;
2137 } 2211 }
2138 break; 2212 break;
2139 case WLAN_CATEGORY_MESH_PLINK: 2213 case WLAN_CATEGORY_MESH_ACTION:
2140 if (!ieee80211_vif_is_mesh(&sdata->vif)) 2214 if (!ieee80211_vif_is_mesh(&sdata->vif))
2141 break; 2215 break;
2142 goto queue; 2216 goto queue;
@@ -2299,47 +2373,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2299 return RX_QUEUED; 2373 return RX_QUEUED;
2300} 2374}
2301 2375
2302static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr,
2303 struct ieee80211_rx_data *rx)
2304{
2305 int keyidx;
2306 unsigned int hdrlen;
2307
2308 hdrlen = ieee80211_hdrlen(hdr->frame_control);
2309 if (rx->skb->len >= hdrlen + 4)
2310 keyidx = rx->skb->data[hdrlen + 3] >> 6;
2311 else
2312 keyidx = -1;
2313
2314 if (!rx->sta) {
2315 /*
2316 * Some hardware seem to generate incorrect Michael MIC
2317 * reports; ignore them to avoid triggering countermeasures.
2318 */
2319 return;
2320 }
2321
2322 if (!ieee80211_has_protected(hdr->frame_control))
2323 return;
2324
2325 if (rx->sdata->vif.type == NL80211_IFTYPE_AP && keyidx) {
2326 /*
2327 * APs with pairwise keys should never receive Michael MIC
2328 * errors for non-zero keyidx because these are reserved for
2329 * group keys and only the AP is sending real multicast
2330 * frames in the BSS.
2331 */
2332 return;
2333 }
2334
2335 if (!ieee80211_is_data(hdr->frame_control) &&
2336 !ieee80211_is_auth(hdr->frame_control))
2337 return;
2338
2339 mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr, NULL,
2340 GFP_ATOMIC);
2341}
2342
2343/* TODO: use IEEE80211_RX_FRAGMENTED */ 2376/* TODO: use IEEE80211_RX_FRAGMENTED */
2344static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, 2377static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2345 struct ieee80211_rate *rate) 2378 struct ieee80211_rate *rate)
@@ -2488,7 +2521,6 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
2488 * same TID from the same station 2521 * same TID from the same station
2489 */ 2522 */
2490 rx->skb = skb; 2523 rx->skb = skb;
2491 rx->flags = 0;
2492 2524
2493 CALL_RXH(ieee80211_rx_h_decrypt) 2525 CALL_RXH(ieee80211_rx_h_decrypt)
2494 CALL_RXH(ieee80211_rx_h_check_more_data) 2526 CALL_RXH(ieee80211_rx_h_check_more_data)
@@ -2559,6 +2591,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
2559 .sdata = sta->sdata, 2591 .sdata = sta->sdata,
2560 .local = sta->local, 2592 .local = sta->local,
2561 .queue = tid, 2593 .queue = tid,
2594 .flags = 0,
2562 }; 2595 };
2563 struct tid_ampdu_rx *tid_agg_rx; 2596 struct tid_ampdu_rx *tid_agg_rx;
2564 2597
@@ -2590,7 +2623,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2590 return 0; 2623 return 0;
2591 if (!multicast && 2624 if (!multicast &&
2592 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { 2625 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
2593 if (!(sdata->dev->flags & IFF_PROMISC)) 2626 if (!(sdata->dev->flags & IFF_PROMISC) ||
2627 sdata->u.mgd.use_4addr)
2594 return 0; 2628 return 0;
2595 status->rx_flags &= ~IEEE80211_RX_RA_MATCH; 2629 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2596 } 2630 }
@@ -2639,7 +2673,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2639 return 0; 2673 return 0;
2640 } else if (!ieee80211_bssid_match(bssid, 2674 } else if (!ieee80211_bssid_match(bssid,
2641 sdata->vif.addr)) { 2675 sdata->vif.addr)) {
2642 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) 2676 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
2677 !ieee80211_is_beacon(hdr->frame_control))
2643 return 0; 2678 return 0;
2644 status->rx_flags &= ~IEEE80211_RX_RA_MATCH; 2679 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2645 } 2680 }
@@ -2681,18 +2716,12 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
2681 if (!prepares) 2716 if (!prepares)
2682 return false; 2717 return false;
2683 2718
2684 if (status->flag & RX_FLAG_MMIC_ERROR) {
2685 if (status->rx_flags & IEEE80211_RX_RA_MATCH)
2686 ieee80211_rx_michael_mic_report(hdr, rx);
2687 return false;
2688 }
2689
2690 if (!consume) { 2719 if (!consume) {
2691 skb = skb_copy(skb, GFP_ATOMIC); 2720 skb = skb_copy(skb, GFP_ATOMIC);
2692 if (!skb) { 2721 if (!skb) {
2693 if (net_ratelimit()) 2722 if (net_ratelimit())
2694 wiphy_debug(local->hw.wiphy, 2723 wiphy_debug(local->hw.wiphy,
2695 "failed to copy multicast frame for %s\n", 2724 "failed to copy skb for %s\n",
2696 sdata->name); 2725 sdata->name);
2697 return true; 2726 return true;
2698 } 2727 }
@@ -2730,7 +2759,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2730 local->dot11ReceivedFragmentCount++; 2759 local->dot11ReceivedFragmentCount++;
2731 2760
2732 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || 2761 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
2733 test_bit(SCAN_OFF_CHANNEL, &local->scanning))) 2762 test_bit(SCAN_SW_SCANNING, &local->scanning)))
2734 status->rx_flags |= IEEE80211_RX_IN_SCAN; 2763 status->rx_flags |= IEEE80211_RX_IN_SCAN;
2735 2764
2736 if (ieee80211_is_mgmt(fc)) 2765 if (ieee80211_is_mgmt(fc))
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fb274db77e3c..d20046b5d8f4 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -15,6 +15,7 @@
15#include <linux/if_arp.h> 15#include <linux/if_arp.h>
16#include <linux/rtnetlink.h> 16#include <linux/rtnetlink.h>
17#include <linux/pm_qos_params.h> 17#include <linux/pm_qos_params.h>
18#include <linux/slab.h>
18#include <net/sch_generic.h> 19#include <net/sch_generic.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20#include <net/mac80211.h> 21#include <net/mac80211.h>
@@ -170,7 +171,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
170 return RX_CONTINUE; 171 return RX_CONTINUE;
171 172
172 if (skb->len < 24) 173 if (skb->len < 24)
173 return RX_DROP_MONITOR; 174 return RX_CONTINUE;
174 175
175 presp = ieee80211_is_probe_resp(fc); 176 presp = ieee80211_is_probe_resp(fc);
176 if (presp) { 177 if (presp) {
@@ -196,7 +197,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
196 ieee802_11_parse_elems(elements, skb->len - baselen, &elems); 197 ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
197 198
198 if (elems.ds_params && elems.ds_params_len == 1) 199 if (elems.ds_params && elems.ds_params_len == 1)
199 freq = ieee80211_channel_to_frequency(elems.ds_params[0]); 200 freq = ieee80211_channel_to_frequency(elems.ds_params[0],
201 rx_status->band);
200 else 202 else
201 freq = rx_status->freq; 203 freq = rx_status->freq;
202 204
@@ -211,6 +213,14 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
211 if (bss) 213 if (bss)
212 ieee80211_rx_bss_put(sdata->local, bss); 214 ieee80211_rx_bss_put(sdata->local, bss);
213 215
216 /* If we are on-operating-channel, and this packet is for the
217 * current channel, pass the pkt on up the stack so that
218 * the rest of the stack can make use of it.
219 */
220 if (ieee80211_cfg_on_oper_channel(sdata->local)
221 && (channel == sdata->local->oper_channel))
222 return RX_CONTINUE;
223
214 dev_kfree_skb(skb); 224 dev_kfree_skb(skb);
215 return RX_QUEUED; 225 return RX_QUEUED;
216} 226}
@@ -249,10 +259,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
249 return true; 259 return true;
250} 260}
251 261
252static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, 262static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
253 bool was_hw_scan) 263 bool was_hw_scan)
254{ 264{
255 struct ieee80211_local *local = hw_to_local(hw); 265 struct ieee80211_local *local = hw_to_local(hw);
266 bool on_oper_chan;
267 bool enable_beacons = false;
256 268
257 lockdep_assert_held(&local->mtx); 269 lockdep_assert_held(&local->mtx);
258 270
@@ -266,12 +278,12 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
266 aborted = true; 278 aborted = true;
267 279
268 if (WARN_ON(!local->scan_req)) 280 if (WARN_ON(!local->scan_req))
269 return false; 281 return;
270 282
271 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 283 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
272 int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req); 284 int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req);
273 if (rc == 0) 285 if (rc == 0)
274 return false; 286 return;
275 } 287 }
276 288
277 kfree(local->hw_scan_req); 289 kfree(local->hw_scan_req);
@@ -285,24 +297,28 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
285 local->scanning = 0; 297 local->scanning = 0;
286 local->scan_channel = NULL; 298 local->scan_channel = NULL;
287 299
288 return true; 300 on_oper_chan = ieee80211_cfg_on_oper_channel(local);
289}
290 301
291static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, 302 if (was_hw_scan || !on_oper_chan)
292 bool was_hw_scan) 303 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
293{ 304 else
294 struct ieee80211_local *local = hw_to_local(hw); 305 /* Set power back to normal operating levels. */
306 ieee80211_hw_config(local, 0);
295 307
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (!was_hw_scan) { 308 if (!was_hw_scan) {
309 bool on_oper_chan2;
298 ieee80211_configure_filter(local); 310 ieee80211_configure_filter(local);
299 drv_sw_scan_complete(local); 311 drv_sw_scan_complete(local);
300 ieee80211_offchannel_return(local, true); 312 on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
313 /* We should always be on-channel at this point. */
314 WARN_ON(!on_oper_chan2);
315 if (on_oper_chan2 && (on_oper_chan != on_oper_chan2))
316 enable_beacons = true;
317
318 ieee80211_offchannel_return(local, enable_beacons, true);
301 } 319 }
302 320
303 mutex_lock(&local->mtx);
304 ieee80211_recalc_idle(local); 321 ieee80211_recalc_idle(local);
305 mutex_unlock(&local->mtx);
306 322
307 ieee80211_mlme_notify_scan_completed(local); 323 ieee80211_mlme_notify_scan_completed(local);
308 ieee80211_ibss_notify_scan_completed(local); 324 ieee80211_ibss_notify_scan_completed(local);
@@ -340,16 +356,21 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
340 */ 356 */
341 drv_sw_scan_start(local); 357 drv_sw_scan_start(local);
342 358
343 ieee80211_offchannel_stop_beaconing(local);
344
345 local->leave_oper_channel_time = 0; 359 local->leave_oper_channel_time = 0;
346 local->next_scan_state = SCAN_DECISION; 360 local->next_scan_state = SCAN_DECISION;
347 local->scan_channel_idx = 0; 361 local->scan_channel_idx = 0;
348 362
349 drv_flush(local, false); 363 /* We always want to use off-channel PS, even if we
364 * are not really leaving oper-channel. Don't
365 * tell the AP though, as long as we are on-channel.
366 */
367 ieee80211_offchannel_enable_all_ps(local, false);
350 368
351 ieee80211_configure_filter(local); 369 ieee80211_configure_filter(local);
352 370
371 /* We need to set power level at maximum rate for scanning. */
372 ieee80211_hw_config(local, 0);
373
353 ieee80211_queue_delayed_work(&local->hw, 374 ieee80211_queue_delayed_work(&local->hw,
354 &local->scan_work, 375 &local->scan_work,
355 IEEE80211_CHANNEL_TIME); 376 IEEE80211_CHANNEL_TIME);
@@ -486,7 +507,20 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
486 } 507 }
487 mutex_unlock(&local->iflist_mtx); 508 mutex_unlock(&local->iflist_mtx);
488 509
489 if (local->scan_channel) { 510 next_chan = local->scan_req->channels[local->scan_channel_idx];
511
512 if (ieee80211_cfg_on_oper_channel(local)) {
513 /* We're currently on operating channel. */
514 if (next_chan == local->oper_channel)
515 /* We don't need to move off of operating channel. */
516 local->next_scan_state = SCAN_SET_CHANNEL;
517 else
518 /*
519 * We do need to leave operating channel, as next
520 * scan is somewhere else.
521 */
522 local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL;
523 } else {
490 /* 524 /*
491 * we're currently scanning a different channel, let's 525 * we're currently scanning a different channel, let's
492 * see if we can scan another channel without interfering 526 * see if we can scan another channel without interfering
@@ -502,7 +536,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
502 * 536 *
503 * Otherwise switch back to the operating channel. 537 * Otherwise switch back to the operating channel.
504 */ 538 */
505 next_chan = local->scan_req->channels[local->scan_channel_idx];
506 539
507 bad_latency = time_after(jiffies + 540 bad_latency = time_after(jiffies +
508 ieee80211_scan_get_channel_time(next_chan), 541 ieee80211_scan_get_channel_time(next_chan),
@@ -520,12 +553,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
520 local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; 553 local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
521 else 554 else
522 local->next_scan_state = SCAN_SET_CHANNEL; 555 local->next_scan_state = SCAN_SET_CHANNEL;
523 } else {
524 /*
525 * we're on the operating channel currently, let's
526 * leave that channel now to scan another one
527 */
528 local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL;
529 } 556 }
530 557
531 *next_delay = 0; 558 *next_delay = 0;
@@ -534,9 +561,10 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
534static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, 561static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local,
535 unsigned long *next_delay) 562 unsigned long *next_delay)
536{ 563{
537 ieee80211_offchannel_stop_station(local); 564 /* PS will already be in off-channel mode,
538 565 * we do that once at the beginning of scanning.
539 __set_bit(SCAN_OFF_CHANNEL, &local->scanning); 566 */
567 ieee80211_offchannel_stop_vifs(local, false);
540 568
541 /* 569 /*
542 * What if the nullfunc frames didn't arrive? 570 * What if the nullfunc frames didn't arrive?
@@ -559,15 +587,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca
559{ 587{
560 /* switch back to the operating channel */ 588 /* switch back to the operating channel */
561 local->scan_channel = NULL; 589 local->scan_channel = NULL;
562 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 590 if (!ieee80211_cfg_on_oper_channel(local))
591 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
563 592
564 /* 593 /*
565 * Only re-enable station mode interface now; beaconing will be 594 * Re-enable vifs and beaconing. Leave PS
566 * re-enabled once the full scan has been completed. 595 * in off-channel state..will put that back
596 * on-channel at the end of scanning.
567 */ 597 */
568 ieee80211_offchannel_return(local, false); 598 ieee80211_offchannel_return(local, true, false);
569
570 __clear_bit(SCAN_OFF_CHANNEL, &local->scanning);
571 599
572 *next_delay = HZ / 5; 600 *next_delay = HZ / 5;
573 local->next_scan_state = SCAN_DECISION; 601 local->next_scan_state = SCAN_DECISION;
@@ -583,8 +611,11 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
583 chan = local->scan_req->channels[local->scan_channel_idx]; 611 chan = local->scan_req->channels[local->scan_channel_idx];
584 612
585 local->scan_channel = chan; 613 local->scan_channel = chan;
586 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) 614
587 skip = 1; 615 /* Only call hw-config if we really need to change channels. */
616 if (chan != local->hw.conf.channel)
617 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
618 skip = 1;
588 619
589 /* advance state machine to next channel/band */ 620 /* advance state machine to next channel/band */
590 local->scan_channel_idx++; 621 local->scan_channel_idx++;
@@ -642,12 +673,14 @@ void ieee80211_scan_work(struct work_struct *work)
642{ 673{
643 struct ieee80211_local *local = 674 struct ieee80211_local *local =
644 container_of(work, struct ieee80211_local, scan_work.work); 675 container_of(work, struct ieee80211_local, scan_work.work);
645 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 676 struct ieee80211_sub_if_data *sdata;
646 unsigned long next_delay = 0; 677 unsigned long next_delay = 0;
647 bool aborted, hw_scan, finish; 678 bool aborted, hw_scan;
648 679
649 mutex_lock(&local->mtx); 680 mutex_lock(&local->mtx);
650 681
682 sdata = local->scan_sdata;
683
651 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { 684 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
652 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); 685 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
653 goto out_complete; 686 goto out_complete;
@@ -711,17 +744,11 @@ void ieee80211_scan_work(struct work_struct *work)
711 } while (next_delay == 0); 744 } while (next_delay == 0);
712 745
713 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); 746 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay);
714 mutex_unlock(&local->mtx); 747 goto out;
715 return;
716 748
717out_complete: 749out_complete:
718 hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); 750 hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
719 finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan); 751 __ieee80211_scan_completed(&local->hw, aborted, hw_scan);
720 mutex_unlock(&local->mtx);
721 if (finish)
722 __ieee80211_scan_completed_finish(&local->hw, hw_scan);
723 return;
724
725out: 752out:
726 mutex_unlock(&local->mtx); 753 mutex_unlock(&local->mtx);
727} 754}
@@ -791,7 +818,6 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
791void ieee80211_scan_cancel(struct ieee80211_local *local) 818void ieee80211_scan_cancel(struct ieee80211_local *local)
792{ 819{
793 bool abortscan; 820 bool abortscan;
794 bool finish = false;
795 821
796 /* 822 /*
797 * We are only canceling software scan, or deferred scan that was not 823 * We are only canceling software scan, or deferred scan that was not
@@ -811,14 +837,136 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
811 837
812 mutex_lock(&local->mtx); 838 mutex_lock(&local->mtx);
813 abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning); 839 abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning);
814 if (abortscan) 840 if (abortscan) {
815 finish = __ieee80211_scan_completed(&local->hw, true, false); 841 /*
842 * The scan is canceled, but stop work from being pending.
843 *
844 * If the work is currently running, it must be blocked on
845 * the mutex, but we'll set scan_sdata = NULL and it'll
846 * simply exit once it acquires the mutex.
847 */
848 cancel_delayed_work(&local->scan_work);
849 /* and clean up */
850 __ieee80211_scan_completed(&local->hw, true, false);
851 }
816 mutex_unlock(&local->mtx); 852 mutex_unlock(&local->mtx);
853}
817 854
818 if (abortscan) { 855int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
819 /* The scan is canceled, but stop work from being pending */ 856 struct cfg80211_sched_scan_request *req)
820 cancel_delayed_work_sync(&local->scan_work); 857{
858 struct ieee80211_local *local = sdata->local;
859 int ret, i;
860
861 mutex_lock(&sdata->local->mtx);
862
863 if (local->sched_scanning) {
864 ret = -EBUSY;
865 goto out;
866 }
867
868 if (!local->ops->sched_scan_start) {
869 ret = -ENOTSUPP;
870 goto out;
821 } 871 }
822 if (finish) 872
823 __ieee80211_scan_completed_finish(&local->hw, false); 873 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
874 local->sched_scan_ies.ie[i] = kzalloc(2 +
875 IEEE80211_MAX_SSID_LEN +
876 local->scan_ies_len,
877 GFP_KERNEL);
878 if (!local->sched_scan_ies.ie[i]) {
879 ret = -ENOMEM;
880 goto out_free;
881 }
882
883 local->sched_scan_ies.len[i] =
884 ieee80211_build_preq_ies(local,
885 local->sched_scan_ies.ie[i],
886 req->ie, req->ie_len, i,
887 (u32) -1, 0);
888 }
889
890 ret = drv_sched_scan_start(local, sdata, req,
891 &local->sched_scan_ies);
892 if (ret == 0) {
893 local->sched_scanning = true;
894 goto out;
895 }
896
897out_free:
898 while (i > 0)
899 kfree(local->sched_scan_ies.ie[--i]);
900out:
901 mutex_unlock(&sdata->local->mtx);
902 return ret;
903}
904
905int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
906{
907 struct ieee80211_local *local = sdata->local;
908 int ret = 0, i;
909
910 mutex_lock(&sdata->local->mtx);
911
912 if (!local->ops->sched_scan_stop) {
913 ret = -ENOTSUPP;
914 goto out;
915 }
916
917 if (local->sched_scanning) {
918 for (i = 0; i < IEEE80211_NUM_BANDS; i++)
919 kfree(local->sched_scan_ies.ie[i]);
920
921 drv_sched_scan_stop(local, sdata);
922 local->sched_scanning = false;
923 }
924out:
925 mutex_unlock(&sdata->local->mtx);
926
927 return ret;
928}
929
930void ieee80211_sched_scan_results(struct ieee80211_hw *hw)
931{
932 struct ieee80211_local *local = hw_to_local(hw);
933
934 trace_api_sched_scan_results(local);
935
936 cfg80211_sched_scan_results(hw->wiphy);
937}
938EXPORT_SYMBOL(ieee80211_sched_scan_results);
939
940void ieee80211_sched_scan_stopped_work(struct work_struct *work)
941{
942 struct ieee80211_local *local =
943 container_of(work, struct ieee80211_local,
944 sched_scan_stopped_work);
945 int i;
946
947 mutex_lock(&local->mtx);
948
949 if (!local->sched_scanning) {
950 mutex_unlock(&local->mtx);
951 return;
952 }
953
954 for (i = 0; i < IEEE80211_NUM_BANDS; i++)
955 kfree(local->sched_scan_ies.ie[i]);
956
957 local->sched_scanning = false;
958
959 mutex_unlock(&local->mtx);
960
961 cfg80211_sched_scan_stopped(local->hw.wiphy);
962}
963
964void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
965{
966 struct ieee80211_local *local = hw_to_local(hw);
967
968 trace_api_sched_scan_stopped(local);
969
970 ieee80211_queue_work(&local->hw, &local->sched_scan_stopped_work);
824} 971}
972EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c426504ed1cf..b83870bf60fa 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -47,9 +47,9 @@
47 * Station entries are added by mac80211 when you establish a link with a 47 * Station entries are added by mac80211 when you establish a link with a
48 * peer. This means different things for the different type of interfaces 48 * peer. This means different things for the different type of interfaces
49 * we support. For a regular station this mean we add the AP sta when we 49 * we support. For a regular station this mean we add the AP sta when we
50 * receive an assocation response from the AP. For IBSS this occurs when 50 * receive an association response from the AP. For IBSS this occurs when
51 * get to know about a peer on the same IBSS. For WDS we add the sta for 51 * get to know about a peer on the same IBSS. For WDS we add the sta for
52 * the peer imediately upon device open. When using AP mode we add stations 52 * the peer immediately upon device open. When using AP mode we add stations
53 * for each respective station upon request from userspace through nl80211. 53 * for each respective station upon request from userspace through nl80211.
54 * 54 *
55 * In order to remove a STA info structure, various sta_info_destroy_*() 55 * In order to remove a STA info structure, various sta_info_destroy_*()
@@ -67,7 +67,8 @@ static int sta_info_hash_del(struct ieee80211_local *local,
67{ 67{
68 struct sta_info *s; 68 struct sta_info *s;
69 69
70 s = local->sta_hash[STA_HASH(sta->sta.addr)]; 70 s = rcu_dereference_protected(local->sta_hash[STA_HASH(sta->sta.addr)],
71 lockdep_is_held(&local->sta_lock));
71 if (!s) 72 if (!s)
72 return -ENOENT; 73 return -ENOENT;
73 if (s == sta) { 74 if (s == sta) {
@@ -76,9 +77,11 @@ static int sta_info_hash_del(struct ieee80211_local *local,
76 return 0; 77 return 0;
77 } 78 }
78 79
79 while (s->hnext && s->hnext != sta) 80 while (rcu_access_pointer(s->hnext) &&
80 s = s->hnext; 81 rcu_access_pointer(s->hnext) != sta)
81 if (s->hnext) { 82 s = rcu_dereference_protected(s->hnext,
83 lockdep_is_held(&local->sta_lock));
84 if (rcu_access_pointer(s->hnext)) {
82 rcu_assign_pointer(s->hnext, sta->hnext); 85 rcu_assign_pointer(s->hnext, sta->hnext);
83 return 0; 86 return 0;
84 } 87 }
@@ -228,6 +231,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
228{ 231{
229 struct ieee80211_local *local = sdata->local; 232 struct ieee80211_local *local = sdata->local;
230 struct sta_info *sta; 233 struct sta_info *sta;
234 struct timespec uptime;
231 int i; 235 int i;
232 236
233 sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); 237 sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp);
@@ -243,7 +247,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
243 memcpy(sta->sta.addr, addr, ETH_ALEN); 247 memcpy(sta->sta.addr, addr, ETH_ALEN);
244 sta->local = local; 248 sta->local = local;
245 sta->sdata = sdata; 249 sta->sdata = sdata;
250 sta->last_rx = jiffies;
246 251
252 do_posix_clock_monotonic_gettime(&uptime);
253 sta->last_connected = uptime.tv_sec;
247 ewma_init(&sta->avg_signal, 1024, 8); 254 ewma_init(&sta->avg_signal, 1024, 8);
248 255
249 if (sta_prepare_rate_control(local, sta, gfp)) { 256 if (sta_prepare_rate_control(local, sta, gfp)) {
@@ -270,7 +277,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
270#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 277#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
271 278
272#ifdef CONFIG_MAC80211_MESH 279#ifdef CONFIG_MAC80211_MESH
273 sta->plink_state = PLINK_LISTEN; 280 sta->plink_state = NL80211_PLINK_LISTEN;
274 init_timer(&sta->plink_timer); 281 init_timer(&sta->plink_timer);
275#endif 282#endif
276 283
@@ -583,7 +590,6 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
583{ 590{
584 unsigned long flags; 591 unsigned long flags;
585 struct sk_buff *skb; 592 struct sk_buff *skb;
586 struct ieee80211_sub_if_data *sdata;
587 593
588 if (skb_queue_empty(&sta->ps_tx_buf)) 594 if (skb_queue_empty(&sta->ps_tx_buf))
589 return false; 595 return false;
@@ -600,7 +606,6 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
600 if (!skb) 606 if (!skb)
601 break; 607 break;
602 608
603 sdata = sta->sdata;
604 local->total_ps_buffered--; 609 local->total_ps_buffered--;
605#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 610#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
606 printk(KERN_DEBUG "Buffered frame expired (STA %pM)\n", 611 printk(KERN_DEBUG "Buffered frame expired (STA %pM)\n",
@@ -608,7 +613,8 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
608#endif 613#endif
609 dev_kfree_skb(skb); 614 dev_kfree_skb(skb);
610 615
611 if (skb_queue_empty(&sta->ps_tx_buf)) 616 if (skb_queue_empty(&sta->ps_tx_buf) &&
617 !test_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF))
612 sta_info_clear_tim_bit(sta); 618 sta_info_clear_tim_bit(sta);
613 } 619 }
614 620
@@ -649,10 +655,12 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
649 if (ret) 655 if (ret)
650 return ret; 656 return ret;
651 657
658 mutex_lock(&local->key_mtx);
652 for (i = 0; i < NUM_DEFAULT_KEYS; i++) 659 for (i = 0; i < NUM_DEFAULT_KEYS; i++)
653 ieee80211_key_free(local, sta->gtk[i]); 660 __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]));
654 if (sta->ptk) 661 if (sta->ptk)
655 ieee80211_key_free(local, sta->ptk); 662 __ieee80211_key_free(key_mtx_dereference(local, sta->ptk));
663 mutex_unlock(&local->key_mtx);
656 664
657 sta->dead = true; 665 sta->dead = true;
658 666
@@ -697,6 +705,8 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
697#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 705#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
698 cancel_work_sync(&sta->drv_unblock_wk); 706 cancel_work_sync(&sta->drv_unblock_wk);
699 707
708 cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL);
709
700 rate_control_remove_sta_debugfs(sta); 710 rate_control_remove_sta_debugfs(sta);
701 ieee80211_sta_debugfs_remove(sta); 711 ieee80211_sta_debugfs_remove(sta);
702 712
@@ -765,9 +775,8 @@ static void sta_info_cleanup(unsigned long data)
765 if (!timer_needed) 775 if (!timer_needed)
766 return; 776 return;
767 777
768 local->sta_cleanup.expires = 778 mod_timer(&local->sta_cleanup,
769 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); 779 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL));
770 add_timer(&local->sta_cleanup);
771} 780}
772 781
773void sta_info_init(struct ieee80211_local *local) 782void sta_info_init(struct ieee80211_local *local)
@@ -780,14 +789,6 @@ void sta_info_init(struct ieee80211_local *local)
780 789
781 setup_timer(&local->sta_cleanup, sta_info_cleanup, 790 setup_timer(&local->sta_cleanup, sta_info_cleanup,
782 (unsigned long)local); 791 (unsigned long)local);
783 local->sta_cleanup.expires =
784 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
785}
786
787int sta_info_start(struct ieee80211_local *local)
788{
789 add_timer(&local->sta_cleanup);
790 return 0;
791} 792}
792 793
793void sta_info_stop(struct ieee80211_local *local) 794void sta_info_stop(struct ieee80211_local *local)
@@ -899,7 +900,9 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
899 struct ieee80211_local *local = sdata->local; 900 struct ieee80211_local *local = sdata->local;
900 int sent, buffered; 901 int sent, buffered;
901 902
902 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); 903 clear_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF);
904 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
905 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
903 906
904 if (!skb_queue_empty(&sta->ps_tx_buf)) 907 if (!skb_queue_empty(&sta->ps_tx_buf))
905 sta_info_clear_tim_bit(sta); 908 sta_info_clear_tim_bit(sta);
@@ -990,3 +993,12 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
990 ieee80211_queue_work(hw, &sta->drv_unblock_wk); 993 ieee80211_queue_work(hw, &sta->drv_unblock_wk);
991} 994}
992EXPORT_SYMBOL(ieee80211_sta_block_awake); 995EXPORT_SYMBOL(ieee80211_sta_block_awake);
996
997void ieee80211_sta_set_tim(struct ieee80211_sta *pubsta)
998{
999 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
1000
1001 set_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF);
1002 sta_info_set_tim_bit(sta);
1003}
1004EXPORT_SYMBOL(ieee80211_sta_set_tim);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index bbdd2a86a94b..c6ae8718bd57 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -43,6 +43,8 @@
43 * be in the queues 43 * be in the queues
44 * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping 44 * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping
45 * station in power-save mode, reply when the driver unblocks. 45 * station in power-save mode, reply when the driver unblocks.
46 * @WLAN_STA_PS_DRIVER_BUF: Station has frames pending in driver internal
47 * buffers. Automatically cleared on station wake-up.
46 */ 48 */
47enum ieee80211_sta_info_flags { 49enum ieee80211_sta_info_flags {
48 WLAN_STA_AUTH = 1<<0, 50 WLAN_STA_AUTH = 1<<0,
@@ -58,6 +60,7 @@ enum ieee80211_sta_info_flags {
58 WLAN_STA_BLOCK_BA = 1<<11, 60 WLAN_STA_BLOCK_BA = 1<<11,
59 WLAN_STA_PS_DRIVER = 1<<12, 61 WLAN_STA_PS_DRIVER = 1<<12,
60 WLAN_STA_PSPOLL = 1<<13, 62 WLAN_STA_PSPOLL = 1<<13,
63 WLAN_STA_PS_DRIVER_BUF = 1<<14,
61}; 64};
62 65
63#define STA_TID_NUM 16 66#define STA_TID_NUM 16
@@ -82,6 +85,7 @@ enum ieee80211_sta_info_flags {
82 * @state: session state (see above) 85 * @state: session state (see above)
83 * @stop_initiator: initiator of a session stop 86 * @stop_initiator: initiator of a session stop
84 * @tx_stop: TX DelBA frame when stopping 87 * @tx_stop: TX DelBA frame when stopping
88 * @buf_size: reorder buffer size at receiver
85 * 89 *
86 * This structure's lifetime is managed by RCU, assignments to 90 * This structure's lifetime is managed by RCU, assignments to
87 * the array holding it must hold the aggregation mutex. 91 * the array holding it must hold the aggregation mutex.
@@ -101,6 +105,7 @@ struct tid_ampdu_tx {
101 u8 dialog_token; 105 u8 dialog_token;
102 u8 stop_initiator; 106 u8 stop_initiator;
103 bool tx_stop; 107 bool tx_stop;
108 u8 buf_size;
104}; 109};
105 110
106/** 111/**
@@ -147,6 +152,7 @@ struct tid_ampdu_rx {
147 * 152 *
148 * @tid_rx: aggregation info for Rx per TID -- RCU protected 153 * @tid_rx: aggregation info for Rx per TID -- RCU protected
149 * @tid_tx: aggregation info for Tx per TID 154 * @tid_tx: aggregation info for Tx per TID
155 * @tid_start_tx: sessions where start was requested
150 * @addba_req_num: number of times addBA request has been sent. 156 * @addba_req_num: number of times addBA request has been sent.
151 * @dialog_token_allocator: dialog token enumerator for each new session; 157 * @dialog_token_allocator: dialog token enumerator for each new session;
152 * @work: work struct for starting/stopping aggregation 158 * @work: work struct for starting/stopping aggregation
@@ -158,40 +164,18 @@ struct tid_ampdu_rx {
158struct sta_ampdu_mlme { 164struct sta_ampdu_mlme {
159 struct mutex mtx; 165 struct mutex mtx;
160 /* rx */ 166 /* rx */
161 struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; 167 struct tid_ampdu_rx __rcu *tid_rx[STA_TID_NUM];
162 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)]; 168 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)];
163 /* tx */ 169 /* tx */
164 struct work_struct work; 170 struct work_struct work;
165 struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; 171 struct tid_ampdu_tx __rcu *tid_tx[STA_TID_NUM];
172 struct tid_ampdu_tx *tid_start_tx[STA_TID_NUM];
166 u8 addba_req_num[STA_TID_NUM]; 173 u8 addba_req_num[STA_TID_NUM];
167 u8 dialog_token_allocator; 174 u8 dialog_token_allocator;
168}; 175};
169 176
170 177
171/** 178/**
172 * enum plink_state - state of a mesh peer link finite state machine
173 *
174 * @PLINK_LISTEN: initial state, considered the implicit state of non existant
175 * mesh peer links
176 * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh peer
177 * @PLINK_OPN_RCVD: mesh plink open frame has been received from this mesh peer
178 * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from this mesh
179 * peer
180 * @PLINK_ESTAB: mesh peer link is established
181 * @PLINK_HOLDING: mesh peer link is being closed or cancelled
182 * @PLINK_BLOCKED: all frames transmitted from this mesh plink are discarded
183 */
184enum plink_state {
185 PLINK_LISTEN,
186 PLINK_OPN_SNT,
187 PLINK_OPN_RCVD,
188 PLINK_CNF_RCVD,
189 PLINK_ESTAB,
190 PLINK_HOLDING,
191 PLINK_BLOCKED
192};
193
194/**
195 * struct sta_info - STA information 179 * struct sta_info - STA information
196 * 180 *
197 * This structure collects information about a station that 181 * This structure collects information about a station that
@@ -207,6 +191,8 @@ enum plink_state {
207 * @rate_ctrl_priv: rate control private per-STA pointer 191 * @rate_ctrl_priv: rate control private per-STA pointer
208 * @last_tx_rate: rate used for last transmit, to report to userspace as 192 * @last_tx_rate: rate used for last transmit, to report to userspace as
209 * "the" transmit rate 193 * "the" transmit rate
194 * @last_rx_rate_idx: rx status rate index of the last data packet
195 * @last_rx_rate_flag: rx status flag of the last data packet
210 * @lock: used for locking all fields that require locking, see comments 196 * @lock: used for locking all fields that require locking, see comments
211 * in the header file. 197 * in the header file.
212 * @flaglock: spinlock for flags accesses 198 * @flaglock: spinlock for flags accesses
@@ -222,6 +208,7 @@ enum plink_state {
222 * @rx_bytes: Number of bytes received from this STA 208 * @rx_bytes: Number of bytes received from this STA
223 * @wep_weak_iv_count: number of weak WEP IVs received from this station 209 * @wep_weak_iv_count: number of weak WEP IVs received from this station
224 * @last_rx: time (in jiffies) when last frame was received from this STA 210 * @last_rx: time (in jiffies) when last frame was received from this STA
211 * @last_connected: time (in seconds) when a station got connected
225 * @num_duplicates: number of duplicate frames received from this STA 212 * @num_duplicates: number of duplicate frames received from this STA
226 * @rx_fragments: number of received MPDUs 213 * @rx_fragments: number of received MPDUs
227 * @rx_dropped: number of dropped MPDUs from this STA 214 * @rx_dropped: number of dropped MPDUs from this STA
@@ -256,11 +243,11 @@ enum plink_state {
256struct sta_info { 243struct sta_info {
257 /* General information, mostly static */ 244 /* General information, mostly static */
258 struct list_head list; 245 struct list_head list;
259 struct sta_info *hnext; 246 struct sta_info __rcu *hnext;
260 struct ieee80211_local *local; 247 struct ieee80211_local *local;
261 struct ieee80211_sub_if_data *sdata; 248 struct ieee80211_sub_if_data *sdata;
262 struct ieee80211_key *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; 249 struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
263 struct ieee80211_key *ptk; 250 struct ieee80211_key __rcu *ptk;
264 struct rate_control_ref *rate_ctrl; 251 struct rate_control_ref *rate_ctrl;
265 void *rate_ctrl_priv; 252 void *rate_ctrl_priv;
266 spinlock_t lock; 253 spinlock_t lock;
@@ -291,6 +278,7 @@ struct sta_info {
291 unsigned long rx_packets, rx_bytes; 278 unsigned long rx_packets, rx_bytes;
292 unsigned long wep_weak_iv_count; 279 unsigned long wep_weak_iv_count;
293 unsigned long last_rx; 280 unsigned long last_rx;
281 long last_connected;
294 unsigned long num_duplicates; 282 unsigned long num_duplicates;
295 unsigned long rx_fragments; 283 unsigned long rx_fragments;
296 unsigned long rx_dropped; 284 unsigned long rx_dropped;
@@ -309,6 +297,8 @@ struct sta_info {
309 unsigned long tx_bytes; 297 unsigned long tx_bytes;
310 unsigned long tx_fragments; 298 unsigned long tx_fragments;
311 struct ieee80211_tx_rate last_tx_rate; 299 struct ieee80211_tx_rate last_tx_rate;
300 int last_rx_rate_idx;
301 int last_rx_rate_flag;
312 u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; 302 u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
313 303
314 /* 304 /*
@@ -328,7 +318,7 @@ struct sta_info {
328 u8 plink_retries; 318 u8 plink_retries;
329 bool ignore_plink_timer; 319 bool ignore_plink_timer;
330 bool plink_timer_was_running; 320 bool plink_timer_was_running;
331 enum plink_state plink_state; 321 enum nl80211_plink_state plink_state;
332 u32 plink_timeout; 322 u32 plink_timeout;
333 struct timer_list plink_timer; 323 struct timer_list plink_timer;
334#endif 324#endif
@@ -346,12 +336,12 @@ struct sta_info {
346 struct ieee80211_sta sta; 336 struct ieee80211_sta sta;
347}; 337};
348 338
349static inline enum plink_state sta_plink_state(struct sta_info *sta) 339static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta)
350{ 340{
351#ifdef CONFIG_MAC80211_MESH 341#ifdef CONFIG_MAC80211_MESH
352 return sta->plink_state; 342 return sta->plink_state;
353#endif 343#endif
354 return PLINK_LISTEN; 344 return NL80211_PLINK_LISTEN;
355} 345}
356 346
357static inline void set_sta_flags(struct sta_info *sta, const u32 flags) 347static inline void set_sta_flags(struct sta_info *sta, const u32 flags)
@@ -410,7 +400,16 @@ static inline u32 get_sta_flags(struct sta_info *sta)
410 return ret; 400 return ret;
411} 401}
412 402
403void ieee80211_assign_tid_tx(struct sta_info *sta, int tid,
404 struct tid_ampdu_tx *tid_tx);
413 405
406static inline struct tid_ampdu_tx *
407rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid)
408{
409 return rcu_dereference_protected(sta->ampdu_mlme.tid_tx[tid],
410 lockdep_is_held(&sta->lock) ||
411 lockdep_is_held(&sta->ampdu_mlme.mtx));
412}
414 413
415#define STA_HASH_SIZE 256 414#define STA_HASH_SIZE 256
416#define STA_HASH(sta) (sta[5]) 415#define STA_HASH(sta) (sta[5])
@@ -491,7 +490,6 @@ void sta_info_set_tim_bit(struct sta_info *sta);
491void sta_info_clear_tim_bit(struct sta_info *sta); 490void sta_info_clear_tim_bit(struct sta_info *sta);
492 491
493void sta_info_init(struct ieee80211_local *local); 492void sta_info_init(struct ieee80211_local *local);
494int sta_info_start(struct ieee80211_local *local);
495void sta_info_stop(struct ieee80211_local *local); 493void sta_info_stop(struct ieee80211_local *local);
496int sta_info_flush(struct ieee80211_local *local, 494int sta_info_flush(struct ieee80211_local *local,
497 struct ieee80211_sub_if_data *sdata); 495 struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 071ac95c4aa0..1658efaa2e8e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -98,6 +98,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
98 * (b) always process RX events before TX status events if ordering 98 * (b) always process RX events before TX status events if ordering
99 * can be unknown, for example with different interrupt status 99 * can be unknown, for example with different interrupt status
100 * bits. 100 * bits.
101 * (c) if PS mode transitions are manual (i.e. the flag
102 * %IEEE80211_HW_AP_LINK_PS is set), always process PS state
103 * changes before calling TX status events if ordering can be
104 * unknown.
101 */ 105 */
102 if (test_sta_flags(sta, WLAN_STA_PS_STA) && 106 if (test_sta_flags(sta, WLAN_STA_PS_STA) &&
103 skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { 107 skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) {
@@ -185,16 +189,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
185 bool acked; 189 bool acked;
186 190
187 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 191 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
188 /* the HW cannot have attempted that rate */ 192 if (info->status.rates[i].idx < 0) {
189 if (i >= hw->max_report_rates) { 193 break;
194 } else if (i >= hw->max_report_rates) {
195 /* the HW cannot have attempted that rate */
190 info->status.rates[i].idx = -1; 196 info->status.rates[i].idx = -1;
191 info->status.rates[i].count = 0; 197 info->status.rates[i].count = 0;
192 } else if (info->status.rates[i].idx >= 0) { 198 break;
193 rates_idx = i;
194 } 199 }
195 200
196 retry_count += info->status.rates[i].count; 201 retry_count += info->status.rates[i].count;
197 } 202 }
203 rates_idx = i - 1;
204
198 if (retry_count < 0) 205 if (retry_count < 0)
199 retry_count = 0; 206 retry_count = 0;
200 207
@@ -314,8 +321,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
314 if (info->flags & IEEE80211_TX_STAT_ACK) { 321 if (info->flags & IEEE80211_TX_STAT_ACK) {
315 local->ps_sdata->u.mgd.flags |= 322 local->ps_sdata->u.mgd.flags |=
316 IEEE80211_STA_NULLFUNC_ACKED; 323 IEEE80211_STA_NULLFUNC_ACKED;
317 ieee80211_queue_work(&local->hw,
318 &local->dynamic_ps_enable_work);
319 } else 324 } else
320 mod_timer(&local->dynamic_ps_timer, jiffies + 325 mod_timer(&local->dynamic_ps_timer, jiffies +
321 msecs_to_jiffies(10)); 326 msecs_to_jiffies(10));
@@ -339,6 +344,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
339 cookie = local->hw_roc_cookie ^ 2; 344 cookie = local->hw_roc_cookie ^ 2;
340 local->hw_roc_skb_for_status = NULL; 345 local->hw_roc_skb_for_status = NULL;
341 } 346 }
347
348 if (cookie == local->hw_offchan_tx_cookie)
349 local->hw_offchan_tx_cookie = 0;
350
342 cfg80211_mgmt_tx_status( 351 cfg80211_mgmt_tx_status(
343 skb->dev, cookie, skb->data, skb->len, 352 skb->dev, cookie, skb->data, skb->len,
344 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 353 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
@@ -437,3 +446,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
437 dev_kfree_skb(skb); 446 dev_kfree_skb(skb);
438} 447}
439EXPORT_SYMBOL(ieee80211_tx_status); 448EXPORT_SYMBOL(ieee80211_tx_status);
449
450void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets)
451{
452 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
453 cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
454 num_packets, GFP_ATOMIC);
455}
456EXPORT_SYMBOL(ieee80211_report_low_ack);
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index e840c9cd46db..757e4eb2baf7 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -202,7 +202,7 @@ EXPORT_SYMBOL(ieee80211_get_tkip_key);
202 * @payload_len is the length of payload (_not_ including IV/ICV length). 202 * @payload_len is the length of payload (_not_ including IV/ICV length).
203 * @ta is the transmitter addresses. 203 * @ta is the transmitter addresses.
204 */ 204 */
205int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, 205int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
206 struct ieee80211_key *key, 206 struct ieee80211_key *key,
207 u8 *pos, size_t payload_len, u8 *ta) 207 u8 *pos, size_t payload_len, u8 *ta)
208{ 208{
@@ -223,7 +223,7 @@ int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
223 * beginning of the buffer containing IEEE 802.11 header payload, i.e., 223 * beginning of the buffer containing IEEE 802.11 header payload, i.e.,
224 * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the 224 * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the
225 * length of payload, including IV, Ext. IV, MIC, ICV. */ 225 * length of payload, including IV, Ext. IV, MIC, ICV. */
226int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, 226int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
227 struct ieee80211_key *key, 227 struct ieee80211_key *key,
228 u8 *payload, size_t payload_len, u8 *ta, 228 u8 *payload, size_t payload_len, u8 *ta,
229 u8 *ra, int only_iv, int queue, 229 u8 *ra, int only_iv, int queue,
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index 7e83dee976fa..1cab9c86978f 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -15,7 +15,7 @@
15 15
16u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); 16u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16);
17 17
18int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, 18int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
19 struct ieee80211_key *key, 19 struct ieee80211_key *key,
20 u8 *pos, size_t payload_len, u8 *ta); 20 u8 *pos, size_t payload_len, u8 *ta);
21enum { 21enum {
@@ -24,7 +24,7 @@ enum {
24 TKIP_DECRYPT_INVALID_KEYIDX = -2, 24 TKIP_DECRYPT_INVALID_KEYIDX = -2,
25 TKIP_DECRYPT_REPLAY = -3, 25 TKIP_DECRYPT_REPLAY = -3,
26}; 26};
27int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, 27int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
28 struct ieee80211_key *key, 28 struct ieee80211_key *key,
29 u8 *payload, size_t payload_len, u8 *ta, 29 u8 *payload, size_t payload_len, u8 *ta,
30 u8 *ra, int only_iv, int queue, 30 u8 *ra, int only_iv, int queue,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b0beaa58246b..64e0f7587e6d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -33,10 +33,6 @@
33#include "wme.h" 33#include "wme.h"
34#include "rate.h" 34#include "rate.h"
35 35
36#define IEEE80211_TX_OK 0
37#define IEEE80211_TX_AGAIN 1
38#define IEEE80211_TX_PENDING 2
39
40/* misc utils */ 36/* misc utils */
41 37
42static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, 38static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
@@ -173,7 +169,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
173 return cpu_to_le16(dur); 169 return cpu_to_le16(dur);
174} 170}
175 171
176static int inline is_ieee80211_device(struct ieee80211_local *local, 172static inline int is_ieee80211_device(struct ieee80211_local *local,
177 struct net_device *dev) 173 struct net_device *dev)
178{ 174{
179 return local == wdev_priv(dev->ieee80211_ptr); 175 return local == wdev_priv(dev->ieee80211_ptr);
@@ -236,10 +232,15 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
236 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 232 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
237 ieee80211_stop_queues_by_reason(&local->hw, 233 ieee80211_stop_queues_by_reason(&local->hw,
238 IEEE80211_QUEUE_STOP_REASON_PS); 234 IEEE80211_QUEUE_STOP_REASON_PS);
235 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
239 ieee80211_queue_work(&local->hw, 236 ieee80211_queue_work(&local->hw,
240 &local->dynamic_ps_disable_work); 237 &local->dynamic_ps_disable_work);
241 } 238 }
242 239
240 /* Don't restart the timer if we're not disassociated */
241 if (!ifmgd->associated)
242 return TX_CONTINUE;
243
243 mod_timer(&local->dynamic_ps_timer, jiffies + 244 mod_timer(&local->dynamic_ps_timer, jiffies +
244 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 245 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
245 246
@@ -257,7 +258,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
257 if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) 258 if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED))
258 return TX_CONTINUE; 259 return TX_CONTINUE;
259 260
260 if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) && 261 if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) &&
262 test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) &&
261 !ieee80211_is_probe_req(hdr->frame_control) && 263 !ieee80211_is_probe_req(hdr->frame_control) &&
262 !ieee80211_is_nullfunc(hdr->frame_control)) 264 !ieee80211_is_nullfunc(hdr->frame_control))
263 /* 265 /*
@@ -1038,14 +1040,11 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
1038 struct ieee80211_radiotap_iterator iterator; 1040 struct ieee80211_radiotap_iterator iterator;
1039 struct ieee80211_radiotap_header *rthdr = 1041 struct ieee80211_radiotap_header *rthdr =
1040 (struct ieee80211_radiotap_header *) skb->data; 1042 (struct ieee80211_radiotap_header *) skb->data;
1041 struct ieee80211_supported_band *sband;
1042 bool hw_frag; 1043 bool hw_frag;
1043 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1044 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1044 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, 1045 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
1045 NULL); 1046 NULL);
1046 1047
1047 sband = tx->local->hw.wiphy->bands[tx->channel->band];
1048
1049 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 1048 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1050 tx->flags &= ~IEEE80211_TX_FRAGMENTED; 1049 tx->flags &= ~IEEE80211_TX_FRAGMENTED;
1051 1050
@@ -1152,7 +1151,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
1152 * packet pass through because splicing the frames 1151 * packet pass through because splicing the frames
1153 * back is already done. 1152 * back is already done.
1154 */ 1153 */
1155 tid_tx = tx->sta->ampdu_mlme.tid_tx[tid]; 1154 tid_tx = rcu_dereference_protected_tid_tx(tx->sta, tid);
1156 1155
1157 if (!tid_tx) { 1156 if (!tid_tx) {
1158 /* do nothing, let packet pass through */ 1157 /* do nothing, let packet pass through */
@@ -1283,16 +1282,17 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1283 return TX_CONTINUE; 1282 return TX_CONTINUE;
1284} 1283}
1285 1284
1286static int __ieee80211_tx(struct ieee80211_local *local, 1285/*
1287 struct sk_buff **skbp, 1286 * Returns false if the frame couldn't be transmitted but was queued instead.
1288 struct sta_info *sta, 1287 */
1289 bool txpending) 1288static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp,
1289 struct sta_info *sta, bool txpending)
1290{ 1290{
1291 struct sk_buff *skb = *skbp, *next; 1291 struct sk_buff *skb = *skbp, *next;
1292 struct ieee80211_tx_info *info; 1292 struct ieee80211_tx_info *info;
1293 struct ieee80211_sub_if_data *sdata; 1293 struct ieee80211_sub_if_data *sdata;
1294 unsigned long flags; 1294 unsigned long flags;
1295 int ret, len; 1295 int len;
1296 bool fragm = false; 1296 bool fragm = false;
1297 1297
1298 while (skb) { 1298 while (skb) {
@@ -1300,13 +1300,37 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1300 __le16 fc; 1300 __le16 fc;
1301 1301
1302 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 1302 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1303 ret = IEEE80211_TX_OK;
1304 if (local->queue_stop_reasons[q] || 1303 if (local->queue_stop_reasons[q] ||
1305 (!txpending && !skb_queue_empty(&local->pending[q]))) 1304 (!txpending && !skb_queue_empty(&local->pending[q]))) {
1306 ret = IEEE80211_TX_PENDING; 1305 /*
1306 * Since queue is stopped, queue up frames for later
1307 * transmission from the tx-pending tasklet when the
1308 * queue is woken again.
1309 */
1310
1311 do {
1312 next = skb->next;
1313 skb->next = NULL;
1314 /*
1315 * NB: If txpending is true, next must already
1316 * be NULL since we must've gone through this
1317 * loop before already; therefore we can just
1318 * queue the frame to the head without worrying
1319 * about reordering of fragments.
1320 */
1321 if (unlikely(txpending))
1322 __skb_queue_head(&local->pending[q],
1323 skb);
1324 else
1325 __skb_queue_tail(&local->pending[q],
1326 skb);
1327 } while ((skb = next));
1328
1329 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1330 flags);
1331 return false;
1332 }
1307 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 1333 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
1308 if (ret != IEEE80211_TX_OK)
1309 return ret;
1310 1334
1311 info = IEEE80211_SKB_CB(skb); 1335 info = IEEE80211_SKB_CB(skb);
1312 1336
@@ -1341,15 +1365,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1341 info->control.sta = NULL; 1365 info->control.sta = NULL;
1342 1366
1343 fc = ((struct ieee80211_hdr *)skb->data)->frame_control; 1367 fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
1344 ret = drv_tx(local, skb); 1368 drv_tx(local, skb);
1345 if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
1346 dev_kfree_skb(skb);
1347 ret = NETDEV_TX_OK;
1348 }
1349 if (ret != NETDEV_TX_OK) {
1350 info->control.vif = &sdata->vif;
1351 return IEEE80211_TX_AGAIN;
1352 }
1353 1369
1354 ieee80211_tpt_led_trig_tx(local, fc, len); 1370 ieee80211_tpt_led_trig_tx(local, fc, len);
1355 *skbp = skb = next; 1371 *skbp = skb = next;
@@ -1357,7 +1373,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1357 fragm = true; 1373 fragm = true;
1358 } 1374 }
1359 1375
1360 return IEEE80211_TX_OK; 1376 return true;
1361} 1377}
1362 1378
1363/* 1379/*
@@ -1394,7 +1410,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1394 /* handlers after fragment must be aware of tx info fragmentation! */ 1410 /* handlers after fragment must be aware of tx info fragmentation! */
1395 CALL_TXH(ieee80211_tx_h_stats); 1411 CALL_TXH(ieee80211_tx_h_stats);
1396 CALL_TXH(ieee80211_tx_h_encrypt); 1412 CALL_TXH(ieee80211_tx_h_encrypt);
1397 CALL_TXH(ieee80211_tx_h_calculate_duration); 1413 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1414 CALL_TXH(ieee80211_tx_h_calculate_duration);
1398#undef CALL_TXH 1415#undef CALL_TXH
1399 1416
1400 txh_done: 1417 txh_done:
@@ -1416,23 +1433,21 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1416 return 0; 1433 return 0;
1417} 1434}
1418 1435
1419static void ieee80211_tx(struct ieee80211_sub_if_data *sdata, 1436/*
1437 * Returns false if the frame couldn't be transmitted but was queued instead.
1438 */
1439static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
1420 struct sk_buff *skb, bool txpending) 1440 struct sk_buff *skb, bool txpending)
1421{ 1441{
1422 struct ieee80211_local *local = sdata->local; 1442 struct ieee80211_local *local = sdata->local;
1423 struct ieee80211_tx_data tx; 1443 struct ieee80211_tx_data tx;
1424 ieee80211_tx_result res_prepare; 1444 ieee80211_tx_result res_prepare;
1425 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1445 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1426 struct sk_buff *next; 1446 bool result = true;
1427 unsigned long flags;
1428 int ret, retries;
1429 u16 queue;
1430
1431 queue = skb_get_queue_mapping(skb);
1432 1447
1433 if (unlikely(skb->len < 10)) { 1448 if (unlikely(skb->len < 10)) {
1434 dev_kfree_skb(skb); 1449 dev_kfree_skb(skb);
1435 return; 1450 return true;
1436 } 1451 }
1437 1452
1438 rcu_read_lock(); 1453 rcu_read_lock();
@@ -1442,85 +1457,19 @@ static void ieee80211_tx(struct ieee80211_sub_if_data *sdata,
1442 1457
1443 if (unlikely(res_prepare == TX_DROP)) { 1458 if (unlikely(res_prepare == TX_DROP)) {
1444 dev_kfree_skb(skb); 1459 dev_kfree_skb(skb);
1445 rcu_read_unlock(); 1460 goto out;
1446 return;
1447 } else if (unlikely(res_prepare == TX_QUEUED)) { 1461 } else if (unlikely(res_prepare == TX_QUEUED)) {
1448 rcu_read_unlock(); 1462 goto out;
1449 return;
1450 } 1463 }
1451 1464
1452 tx.channel = local->hw.conf.channel; 1465 tx.channel = local->hw.conf.channel;
1453 info->band = tx.channel->band; 1466 info->band = tx.channel->band;
1454 1467
1455 if (invoke_tx_handlers(&tx)) 1468 if (!invoke_tx_handlers(&tx))
1456 goto out; 1469 result = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
1457
1458 retries = 0;
1459 retry:
1460 ret = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
1461 switch (ret) {
1462 case IEEE80211_TX_OK:
1463 break;
1464 case IEEE80211_TX_AGAIN:
1465 /*
1466 * Since there are no fragmented frames on A-MPDU
1467 * queues, there's no reason for a driver to reject
1468 * a frame there, warn and drop it.
1469 */
1470 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
1471 goto drop;
1472 /* fall through */
1473 case IEEE80211_TX_PENDING:
1474 skb = tx.skb;
1475
1476 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1477
1478 if (local->queue_stop_reasons[queue] ||
1479 !skb_queue_empty(&local->pending[queue])) {
1480 /*
1481 * if queue is stopped, queue up frames for later
1482 * transmission from the tasklet
1483 */
1484 do {
1485 next = skb->next;
1486 skb->next = NULL;
1487 if (unlikely(txpending))
1488 __skb_queue_head(&local->pending[queue],
1489 skb);
1490 else
1491 __skb_queue_tail(&local->pending[queue],
1492 skb);
1493 } while ((skb = next));
1494
1495 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1496 flags);
1497 } else {
1498 /*
1499 * otherwise retry, but this is a race condition or
1500 * a driver bug (which we warn about if it persists)
1501 */
1502 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1503 flags);
1504
1505 retries++;
1506 if (WARN(retries > 10, "tx refused but queue active\n"))
1507 goto drop;
1508 goto retry;
1509 }
1510 }
1511 out: 1470 out:
1512 rcu_read_unlock(); 1471 rcu_read_unlock();
1513 return; 1472 return result;
1514
1515 drop:
1516 rcu_read_unlock();
1517
1518 skb = tx.skb;
1519 while (skb) {
1520 next = skb->next;
1521 dev_kfree_skb(skb);
1522 skb = next;
1523 }
1524} 1473}
1525 1474
1526/* device xmit handlers */ 1475/* device xmit handlers */
@@ -1531,12 +1480,7 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
1531{ 1480{
1532 int tail_need = 0; 1481 int tail_need = 0;
1533 1482
1534 /* 1483 if (may_encrypt && local->crypto_tx_tailroom_needed_cnt) {
1535 * This could be optimised, devices that do full hardware
1536 * crypto (including TKIP MMIC) need no tailroom... But we
1537 * have no drivers for such devices currently.
1538 */
1539 if (may_encrypt) {
1540 tail_need = IEEE80211_ENCRYPT_TAILROOM; 1484 tail_need = IEEE80211_ENCRYPT_TAILROOM;
1541 tail_need -= skb_tailroom(skb); 1485 tail_need -= skb_tailroom(skb);
1542 tail_need = max_t(int, tail_need, 0); 1486 tail_need = max_t(int, tail_need, 0);
@@ -1750,7 +1694,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1750 __le16 fc; 1694 __le16 fc;
1751 struct ieee80211_hdr hdr; 1695 struct ieee80211_hdr hdr;
1752 struct ieee80211s_hdr mesh_hdr __maybe_unused; 1696 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1753 struct mesh_path *mppath = NULL; 1697 struct mesh_path __maybe_unused *mppath = NULL;
1754 const u8 *encaps_data; 1698 const u8 *encaps_data;
1755 int encaps_len, skip_header_bytes; 1699 int encaps_len, skip_header_bytes;
1756 int nh_pos, h_pos; 1700 int nh_pos, h_pos;
@@ -1811,27 +1755,28 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1811 ret = NETDEV_TX_OK; 1755 ret = NETDEV_TX_OK;
1812 goto fail; 1756 goto fail;
1813 } 1757 }
1758 rcu_read_lock();
1814 if (!is_multicast_ether_addr(skb->data)) 1759 if (!is_multicast_ether_addr(skb->data))
1815 mppath = mpp_path_lookup(skb->data, sdata); 1760 mppath = mpp_path_lookup(skb->data, sdata);
1816 1761
1817 /* 1762 /*
1818 * Do not use address extension, if it is a packet from 1763 * Use address extension if it is a packet from
1819 * the same interface and the destination is not being 1764 * another interface or if we know the destination
1820 * proxied by any other mest point. 1765 * is being proxied by a portal (i.e. portal address
1766 * differs from proxied address)
1821 */ 1767 */
1822 if (compare_ether_addr(sdata->vif.addr, 1768 if (compare_ether_addr(sdata->vif.addr,
1823 skb->data + ETH_ALEN) == 0 && 1769 skb->data + ETH_ALEN) == 0 &&
1824 (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) { 1770 !(mppath && compare_ether_addr(mppath->mpp, skb->data))) {
1825 hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, 1771 hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
1826 skb->data, skb->data + ETH_ALEN); 1772 skb->data, skb->data + ETH_ALEN);
1773 rcu_read_unlock();
1827 meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, 1774 meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
1828 sdata, NULL, NULL); 1775 sdata, NULL, NULL);
1829 } else { 1776 } else {
1830 /* packet from other interface */
1831 int is_mesh_mcast = 1; 1777 int is_mesh_mcast = 1;
1832 const u8 *mesh_da; 1778 const u8 *mesh_da;
1833 1779
1834 rcu_read_lock();
1835 if (is_multicast_ether_addr(skb->data)) 1780 if (is_multicast_ether_addr(skb->data))
1836 /* DA TA mSA AE:SA */ 1781 /* DA TA mSA AE:SA */
1837 mesh_da = skb->data; 1782 mesh_da = skb->data;
@@ -2067,6 +2012,11 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local)
2067 skb_queue_purge(&local->pending[i]); 2012 skb_queue_purge(&local->pending[i]);
2068} 2013}
2069 2014
2015/*
2016 * Returns false if the frame couldn't be transmitted but was queued instead,
2017 * which in this case means re-queued -- take as an indication to stop sending
2018 * more pending frames.
2019 */
2070static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, 2020static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
2071 struct sk_buff *skb) 2021 struct sk_buff *skb)
2072{ 2022{
@@ -2074,20 +2024,17 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
2074 struct ieee80211_sub_if_data *sdata; 2024 struct ieee80211_sub_if_data *sdata;
2075 struct sta_info *sta; 2025 struct sta_info *sta;
2076 struct ieee80211_hdr *hdr; 2026 struct ieee80211_hdr *hdr;
2077 int ret; 2027 bool result;
2078 bool result = true;
2079 2028
2080 sdata = vif_to_sdata(info->control.vif); 2029 sdata = vif_to_sdata(info->control.vif);
2081 2030
2082 if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) { 2031 if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) {
2083 ieee80211_tx(sdata, skb, true); 2032 result = ieee80211_tx(sdata, skb, true);
2084 } else { 2033 } else {
2085 hdr = (struct ieee80211_hdr *)skb->data; 2034 hdr = (struct ieee80211_hdr *)skb->data;
2086 sta = sta_info_get(sdata, hdr->addr1); 2035 sta = sta_info_get(sdata, hdr->addr1);
2087 2036
2088 ret = __ieee80211_tx(local, &skb, sta, true); 2037 result = __ieee80211_tx(local, &skb, sta, true);
2089 if (ret != IEEE80211_TX_OK)
2090 result = false;
2091 } 2038 }
2092 2039
2093 return result; 2040 return result;
@@ -2129,8 +2076,6 @@ void ieee80211_tx_pending(unsigned long data)
2129 flags); 2076 flags);
2130 2077
2131 txok = ieee80211_tx_pending_skb(local, skb); 2078 txok = ieee80211_tx_pending_skb(local, skb);
2132 if (!txok)
2133 __skb_queue_head(&local->pending[i], skb);
2134 spin_lock_irqsave(&local->queue_stop_reason_lock, 2079 spin_lock_irqsave(&local->queue_stop_reason_lock,
2135 flags); 2080 flags);
2136 if (!txok) 2081 if (!txok)
@@ -2178,6 +2123,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss,
2178 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) 2123 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
2179 aid0 = 1; 2124 aid0 = 1;
2180 2125
2126 bss->dtim_bc_mc = aid0 == 1;
2127
2181 if (have_bits) { 2128 if (have_bits) {
2182 /* Find largest even number N1 so that bits numbered 1 through 2129 /* Find largest even number N1 so that bits numbered 1 through
2183 * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits 2130 * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits
@@ -2241,7 +2188,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2241 if (sdata->vif.type == NL80211_IFTYPE_AP) { 2188 if (sdata->vif.type == NL80211_IFTYPE_AP) {
2242 ap = &sdata->u.ap; 2189 ap = &sdata->u.ap;
2243 beacon = rcu_dereference(ap->beacon); 2190 beacon = rcu_dereference(ap->beacon);
2244 if (ap && beacon) { 2191 if (beacon) {
2245 /* 2192 /*
2246 * headroom, head length, 2193 * headroom, head length,
2247 * tail length and maximum TIM length 2194 * tail length and maximum TIM length
@@ -2302,9 +2249,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2302 struct ieee80211_mgmt *mgmt; 2249 struct ieee80211_mgmt *mgmt;
2303 u8 *pos; 2250 u8 *pos;
2304 2251
2252#ifdef CONFIG_MAC80211_MESH
2253 if (!sdata->u.mesh.mesh_id_len)
2254 goto out;
2255#endif
2256
2305 /* headroom, head length, tail length and maximum TIM length */ 2257 /* headroom, head length, tail length and maximum TIM length */
2306 skb = dev_alloc_skb(local->tx_headroom + 400 + 2258 skb = dev_alloc_skb(local->tx_headroom + 400 +
2307 sdata->u.mesh.vendor_ie_len); 2259 sdata->u.mesh.ie_len);
2308 if (!skb) 2260 if (!skb)
2309 goto out; 2261 goto out;
2310 2262
@@ -2527,7 +2479,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
2527{ 2479{
2528 struct ieee80211_local *local = hw_to_local(hw); 2480 struct ieee80211_local *local = hw_to_local(hw);
2529 struct sk_buff *skb = NULL; 2481 struct sk_buff *skb = NULL;
2530 struct sta_info *sta;
2531 struct ieee80211_tx_data tx; 2482 struct ieee80211_tx_data tx;
2532 struct ieee80211_sub_if_data *sdata; 2483 struct ieee80211_sub_if_data *sdata;
2533 struct ieee80211_if_ap *bss = NULL; 2484 struct ieee80211_if_ap *bss = NULL;
@@ -2543,7 +2494,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
2543 if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) 2494 if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head)
2544 goto out; 2495 goto out;
2545 2496
2546 if (bss->dtim_count != 0) 2497 if (bss->dtim_count != 0 || !bss->dtim_bc_mc)
2547 goto out; /* send buffered bc/mc only after DTIM beacon */ 2498 goto out; /* send buffered bc/mc only after DTIM beacon */
2548 2499
2549 while (1) { 2500 while (1) {
@@ -2569,7 +2520,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
2569 2520
2570 info = IEEE80211_SKB_CB(skb); 2521 info = IEEE80211_SKB_CB(skb);
2571 2522
2572 sta = tx.sta;
2573 tx.flags |= IEEE80211_TX_PS_BUFFERED; 2523 tx.flags |= IEEE80211_TX_PS_BUFFERED;
2574 tx.channel = local->hw.conf.channel; 2524 tx.channel = local->hw.conf.channel;
2575 info->band = tx.channel->band; 2525 info->band = tx.channel->band;
@@ -2589,8 +2539,9 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
2589 skb_set_network_header(skb, 0); 2539 skb_set_network_header(skb, 0);
2590 skb_set_transport_header(skb, 0); 2540 skb_set_transport_header(skb, 0);
2591 2541
2592 /* send all internal mgmt frames on VO */ 2542 /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */
2593 skb_set_queue_mapping(skb, 0); 2543 skb_set_queue_mapping(skb, IEEE80211_AC_VO);
2544 skb->priority = 7;
2594 2545
2595 /* 2546 /*
2596 * The other path calling ieee80211_xmit is from the tasklet, 2547 * The other path calling ieee80211_xmit is from the tasklet,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d036597aabbe..d3fe2d237485 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -986,12 +986,6 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
986 u16 cap = sband->ht_cap.cap; 986 u16 cap = sband->ht_cap.cap;
987 __le16 tmp; 987 __le16 tmp;
988 988
989 if (ieee80211_disable_40mhz_24ghz &&
990 sband->band == IEEE80211_BAND_2GHZ) {
991 cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
992 cap &= ~IEEE80211_HT_CAP_SGI_40;
993 }
994
995 *pos++ = WLAN_EID_HT_CAPABILITY; 989 *pos++ = WLAN_EID_HT_CAPABILITY;
996 *pos++ = sizeof(struct ieee80211_ht_cap); 990 *pos++ = sizeof(struct ieee80211_ht_cap);
997 memset(pos, 0, sizeof(struct ieee80211_ht_cap)); 991 memset(pos, 0, sizeof(struct ieee80211_ht_cap));
@@ -1131,9 +1125,27 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1131 struct sta_info *sta; 1125 struct sta_info *sta;
1132 int res; 1126 int res;
1133 1127
1128#ifdef CONFIG_PM
1134 if (local->suspended) 1129 if (local->suspended)
1135 local->resuming = true; 1130 local->resuming = true;
1136 1131
1132 if (local->wowlan) {
1133 local->wowlan = false;
1134 res = drv_resume(local);
1135 if (res < 0) {
1136 local->resuming = false;
1137 return res;
1138 }
1139 if (res == 0)
1140 goto wake_up;
1141 WARN_ON(res > 1);
1142 /*
1143 * res is 1, which means the driver requested
1144 * to go through a regular reset on wakeup.
1145 */
1146 }
1147#endif
1148
1137 /* restart hardware */ 1149 /* restart hardware */
1138 if (local->open_count) { 1150 if (local->open_count) {
1139 /* 1151 /*
@@ -1264,6 +1276,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1264 if (ieee80211_sdata_running(sdata)) 1276 if (ieee80211_sdata_running(sdata))
1265 ieee80211_enable_keys(sdata); 1277 ieee80211_enable_keys(sdata);
1266 1278
1279 wake_up:
1267 ieee80211_wake_queues_by_reason(hw, 1280 ieee80211_wake_queues_by_reason(hw,
1268 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 1281 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
1269 1282
@@ -1296,7 +1309,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1296 } 1309 }
1297 } 1310 }
1298 1311
1299 add_timer(&local->sta_cleanup); 1312 mod_timer(&local->sta_cleanup, jiffies + 1);
1300 1313
1301 mutex_lock(&local->sta_mtx); 1314 mutex_lock(&local->sta_mtx);
1302 list_for_each_entry(sta, &local->sta_list, list) 1315 list_for_each_entry(sta, &local->sta_list, list)
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 2ff6d1e3ed21..a1c6bfd55f0f 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -30,17 +30,15 @@ int ieee80211_wep_init(struct ieee80211_local *local)
30 /* start WEP IV from a random value */ 30 /* start WEP IV from a random value */
31 get_random_bytes(&local->wep_iv, WEP_IV_LEN); 31 get_random_bytes(&local->wep_iv, WEP_IV_LEN);
32 32
33 local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, 33 local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
34 CRYPTO_ALG_ASYNC);
35 if (IS_ERR(local->wep_tx_tfm)) { 34 if (IS_ERR(local->wep_tx_tfm)) {
36 local->wep_rx_tfm = ERR_PTR(-EINVAL); 35 local->wep_rx_tfm = ERR_PTR(-EINVAL);
37 return PTR_ERR(local->wep_tx_tfm); 36 return PTR_ERR(local->wep_tx_tfm);
38 } 37 }
39 38
40 local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, 39 local->wep_rx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
41 CRYPTO_ALG_ASYNC);
42 if (IS_ERR(local->wep_rx_tfm)) { 40 if (IS_ERR(local->wep_rx_tfm)) {
43 crypto_free_blkcipher(local->wep_tx_tfm); 41 crypto_free_cipher(local->wep_tx_tfm);
44 local->wep_tx_tfm = ERR_PTR(-EINVAL); 42 local->wep_tx_tfm = ERR_PTR(-EINVAL);
45 return PTR_ERR(local->wep_rx_tfm); 43 return PTR_ERR(local->wep_rx_tfm);
46 } 44 }
@@ -51,9 +49,9 @@ int ieee80211_wep_init(struct ieee80211_local *local)
51void ieee80211_wep_free(struct ieee80211_local *local) 49void ieee80211_wep_free(struct ieee80211_local *local)
52{ 50{
53 if (!IS_ERR(local->wep_tx_tfm)) 51 if (!IS_ERR(local->wep_tx_tfm))
54 crypto_free_blkcipher(local->wep_tx_tfm); 52 crypto_free_cipher(local->wep_tx_tfm);
55 if (!IS_ERR(local->wep_rx_tfm)) 53 if (!IS_ERR(local->wep_rx_tfm))
56 crypto_free_blkcipher(local->wep_rx_tfm); 54 crypto_free_cipher(local->wep_rx_tfm);
57} 55}
58 56
59static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen) 57static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen)
@@ -127,12 +125,11 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
127/* Perform WEP encryption using given key. data buffer must have tailroom 125/* Perform WEP encryption using given key. data buffer must have tailroom
128 * for 4-byte ICV. data_len must not include this ICV. Note: this function 126 * for 4-byte ICV. data_len must not include this ICV. Note: this function
129 * does _not_ add IV. data = RC4(data | CRC32(data)) */ 127 * does _not_ add IV. data = RC4(data | CRC32(data)) */
130int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, 128int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
131 size_t klen, u8 *data, size_t data_len) 129 size_t klen, u8 *data, size_t data_len)
132{ 130{
133 struct blkcipher_desc desc = { .tfm = tfm };
134 struct scatterlist sg;
135 __le32 icv; 131 __le32 icv;
132 int i;
136 133
137 if (IS_ERR(tfm)) 134 if (IS_ERR(tfm))
138 return -1; 135 return -1;
@@ -140,9 +137,9 @@ int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
140 icv = cpu_to_le32(~crc32_le(~0, data, data_len)); 137 icv = cpu_to_le32(~crc32_le(~0, data, data_len));
141 put_unaligned(icv, (__le32 *)(data + data_len)); 138 put_unaligned(icv, (__le32 *)(data + data_len));
142 139
143 crypto_blkcipher_setkey(tfm, rc4key, klen); 140 crypto_cipher_setkey(tfm, rc4key, klen);
144 sg_init_one(&sg, data, data_len + WEP_ICV_LEN); 141 for (i = 0; i < data_len + WEP_ICV_LEN; i++)
145 crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); 142 crypto_cipher_encrypt_one(tfm, data + i, data + i);
146 143
147 return 0; 144 return 0;
148} 145}
@@ -186,19 +183,18 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
186/* Perform WEP decryption using given key. data buffer includes encrypted 183/* Perform WEP decryption using given key. data buffer includes encrypted
187 * payload, including 4-byte ICV, but _not_ IV. data_len must not include ICV. 184 * payload, including 4-byte ICV, but _not_ IV. data_len must not include ICV.
188 * Return 0 on success and -1 on ICV mismatch. */ 185 * Return 0 on success and -1 on ICV mismatch. */
189int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, 186int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
190 size_t klen, u8 *data, size_t data_len) 187 size_t klen, u8 *data, size_t data_len)
191{ 188{
192 struct blkcipher_desc desc = { .tfm = tfm };
193 struct scatterlist sg;
194 __le32 crc; 189 __le32 crc;
190 int i;
195 191
196 if (IS_ERR(tfm)) 192 if (IS_ERR(tfm))
197 return -1; 193 return -1;
198 194
199 crypto_blkcipher_setkey(tfm, rc4key, klen); 195 crypto_cipher_setkey(tfm, rc4key, klen);
200 sg_init_one(&sg, data, data_len + WEP_ICV_LEN); 196 for (i = 0; i < data_len + WEP_ICV_LEN; i++)
201 crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); 197 crypto_cipher_decrypt_one(tfm, data + i, data + i);
202 198
203 crc = cpu_to_le32(~crc32_le(~0, data, data_len)); 199 crc = cpu_to_le32(~crc32_le(~0, data, data_len));
204 if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0) 200 if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0)
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index 58654ee33518..01e54840a628 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h
@@ -18,12 +18,12 @@
18 18
19int ieee80211_wep_init(struct ieee80211_local *local); 19int ieee80211_wep_init(struct ieee80211_local *local);
20void ieee80211_wep_free(struct ieee80211_local *local); 20void ieee80211_wep_free(struct ieee80211_local *local);
21int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, 21int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
22 size_t klen, u8 *data, size_t data_len); 22 size_t klen, u8 *data, size_t data_len);
23int ieee80211_wep_encrypt(struct ieee80211_local *local, 23int ieee80211_wep_encrypt(struct ieee80211_local *local,
24 struct sk_buff *skb, 24 struct sk_buff *skb,
25 const u8 *key, int keylen, int keyidx); 25 const u8 *key, int keylen, int keyidx);
26int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, 26int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
27 size_t klen, u8 *data, size_t data_len); 27 size_t klen, u8 *data, size_t data_len);
28bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); 28bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key);
29 29
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 36305e0d06ef..d2e7f0e86677 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -30,7 +30,6 @@
30#define IEEE80211_AUTH_MAX_TRIES 3 30#define IEEE80211_AUTH_MAX_TRIES 3
31#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) 31#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
32#define IEEE80211_ASSOC_MAX_TRIES 3 32#define IEEE80211_ASSOC_MAX_TRIES 3
33#define IEEE80211_MAX_PROBE_TRIES 5
34 33
35enum work_action { 34enum work_action {
36 WORK_ACT_MISMATCH, 35 WORK_ACT_MISMATCH,
@@ -66,17 +65,9 @@ static void run_again(struct ieee80211_local *local,
66 mod_timer(&local->work_timer, timeout); 65 mod_timer(&local->work_timer, timeout);
67} 66}
68 67
69static void work_free_rcu(struct rcu_head *head)
70{
71 struct ieee80211_work *wk =
72 container_of(head, struct ieee80211_work, rcu_head);
73
74 kfree(wk);
75}
76
77void free_work(struct ieee80211_work *wk) 68void free_work(struct ieee80211_work *wk)
78{ 69{
79 call_rcu(&wk->rcu_head, work_free_rcu); 70 kfree_rcu(wk, rcu_head);
80} 71}
81 72
82static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len, 73static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
@@ -126,12 +117,6 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie,
126 117
127 /* determine capability flags */ 118 /* determine capability flags */
128 119
129 if (ieee80211_disable_40mhz_24ghz &&
130 sband->band == IEEE80211_BAND_2GHZ) {
131 cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
132 cap &= ~IEEE80211_HT_CAP_SGI_40;
133 }
134
135 switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { 120 switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
136 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: 121 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
137 if (flags & IEEE80211_CHAN_NO_HT40PLUS) { 122 if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
@@ -205,9 +190,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
205 struct sk_buff *skb; 190 struct sk_buff *skb;
206 struct ieee80211_mgmt *mgmt; 191 struct ieee80211_mgmt *mgmt;
207 u8 *pos, qos_info; 192 u8 *pos, qos_info;
208 const u8 *ies;
209 size_t offset = 0, noffset; 193 size_t offset = 0, noffset;
210 int i, len, count, rates_len, supp_rates_len; 194 int i, count, rates_len, supp_rates_len;
211 u16 capab; 195 u16 capab;
212 struct ieee80211_supported_band *sband; 196 struct ieee80211_supported_band *sband;
213 u32 rates = 0; 197 u32 rates = 0;
@@ -292,7 +276,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
292 } 276 }
293 277
294 /* SSID */ 278 /* SSID */
295 ies = pos = skb_put(skb, 2 + wk->assoc.ssid_len); 279 pos = skb_put(skb, 2 + wk->assoc.ssid_len);
296 *pos++ = WLAN_EID_SSID; 280 *pos++ = WLAN_EID_SSID;
297 *pos++ = wk->assoc.ssid_len; 281 *pos++ = wk->assoc.ssid_len;
298 memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len); 282 memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len);
@@ -302,7 +286,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
302 if (supp_rates_len > 8) 286 if (supp_rates_len > 8)
303 supp_rates_len = 8; 287 supp_rates_len = 8;
304 288
305 len = sband->n_bitrates;
306 pos = skb_put(skb, supp_rates_len + 2); 289 pos = skb_put(skb, supp_rates_len + 2);
307 *pos++ = WLAN_EID_SUPP_RATES; 290 *pos++ = WLAN_EID_SUPP_RATES;
308 *pos++ = supp_rates_len; 291 *pos++ = supp_rates_len;
@@ -874,6 +857,44 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
874 kfree_skb(skb); 857 kfree_skb(skb);
875} 858}
876 859
860static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct,
861 enum nl80211_channel_type oper_ct)
862{
863 switch (wk_ct) {
864 case NL80211_CHAN_NO_HT:
865 return true;
866 case NL80211_CHAN_HT20:
867 if (oper_ct != NL80211_CHAN_NO_HT)
868 return true;
869 return false;
870 case NL80211_CHAN_HT40MINUS:
871 case NL80211_CHAN_HT40PLUS:
872 return (wk_ct == oper_ct);
873 }
874 WARN_ON(1); /* shouldn't get here */
875 return false;
876}
877
878static enum nl80211_channel_type
879ieee80211_calc_ct(enum nl80211_channel_type wk_ct,
880 enum nl80211_channel_type oper_ct)
881{
882 switch (wk_ct) {
883 case NL80211_CHAN_NO_HT:
884 return oper_ct;
885 case NL80211_CHAN_HT20:
886 if (oper_ct != NL80211_CHAN_NO_HT)
887 return oper_ct;
888 return wk_ct;
889 case NL80211_CHAN_HT40MINUS:
890 case NL80211_CHAN_HT40PLUS:
891 return wk_ct;
892 }
893 WARN_ON(1); /* shouldn't get here */
894 return wk_ct;
895}
896
897
877static void ieee80211_work_timer(unsigned long data) 898static void ieee80211_work_timer(unsigned long data)
878{ 899{
879 struct ieee80211_local *local = (void *) data; 900 struct ieee80211_local *local = (void *) data;
@@ -924,18 +945,52 @@ static void ieee80211_work_work(struct work_struct *work)
924 } 945 }
925 946
926 if (!started && !local->tmp_channel) { 947 if (!started && !local->tmp_channel) {
948 bool on_oper_chan;
949 bool tmp_chan_changed = false;
950 bool on_oper_chan2;
951 enum nl80211_channel_type wk_ct;
952 on_oper_chan = ieee80211_cfg_on_oper_channel(local);
953
954 /* Work with existing channel type if possible. */
955 wk_ct = wk->chan_type;
956 if (wk->chan == local->hw.conf.channel)
957 wk_ct = ieee80211_calc_ct(wk->chan_type,
958 local->hw.conf.channel_type);
959
960 if (local->tmp_channel)
961 if ((local->tmp_channel != wk->chan) ||
962 (local->tmp_channel_type != wk_ct))
963 tmp_chan_changed = true;
964
965 local->tmp_channel = wk->chan;
966 local->tmp_channel_type = wk_ct;
927 /* 967 /*
928 * TODO: could optimize this by leaving the 968 * Leave the station vifs in awake mode if they
929 * station vifs in awake mode if they 969 * happen to be on the same channel as
930 * happen to be on the same channel as 970 * the requested channel.
931 * the requested channel
932 */ 971 */
933 ieee80211_offchannel_stop_beaconing(local); 972 on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
934 ieee80211_offchannel_stop_station(local); 973 if (on_oper_chan != on_oper_chan2) {
974 if (on_oper_chan2) {
975 /* going off oper channel, PS too */
976 ieee80211_offchannel_stop_vifs(local,
977 true);
978 ieee80211_hw_config(local, 0);
979 } else {
980 /* going on channel, but leave PS
981 * off-channel. */
982 ieee80211_hw_config(local, 0);
983 ieee80211_offchannel_return(local,
984 true,
985 false);
986 }
987 } else if (tmp_chan_changed)
988 /* Still off-channel, but on some other
989 * channel, so update hardware.
990 * PS should already be off-channel.
991 */
992 ieee80211_hw_config(local, 0);
935 993
936 local->tmp_channel = wk->chan;
937 local->tmp_channel_type = wk->chan_type;
938 ieee80211_hw_config(local, 0);
939 started = true; 994 started = true;
940 wk->timeout = jiffies; 995 wk->timeout = jiffies;
941 } 996 }
@@ -1005,15 +1060,34 @@ static void ieee80211_work_work(struct work_struct *work)
1005 continue; 1060 continue;
1006 if (wk->chan != local->tmp_channel) 1061 if (wk->chan != local->tmp_channel)
1007 continue; 1062 continue;
1008 if (wk->chan_type != local->tmp_channel_type) 1063 if (ieee80211_work_ct_coexists(wk->chan_type,
1064 local->tmp_channel_type))
1009 continue; 1065 continue;
1010 remain_off_channel = true; 1066 remain_off_channel = true;
1011 } 1067 }
1012 1068
1013 if (!remain_off_channel && local->tmp_channel) { 1069 if (!remain_off_channel && local->tmp_channel) {
1070 bool on_oper_chan = ieee80211_cfg_on_oper_channel(local);
1014 local->tmp_channel = NULL; 1071 local->tmp_channel = NULL;
1015 ieee80211_hw_config(local, 0); 1072 /* If tmp_channel wasn't operating channel, then
1016 ieee80211_offchannel_return(local, true); 1073 * we need to go back on-channel.
1074 * NOTE: If we can ever be here while scannning,
1075 * or if the hw_config() channel config logic changes,
1076 * then we may need to do a more thorough check to see if
1077 * we still need to do a hardware config. Currently,
1078 * we cannot be here while scanning, however.
1079 */
1080 if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan)
1081 ieee80211_hw_config(local, 0);
1082
1083 /* At the least, we need to disable offchannel_ps,
1084 * so just go ahead and run the entire offchannel
1085 * return logic here. We *could* skip enabling
1086 * beaconing if we were already on-oper-channel
1087 * as a future optimization.
1088 */
1089 ieee80211_offchannel_return(local, true, true);
1090
1017 /* give connection some time to breathe */ 1091 /* give connection some time to breathe */
1018 run_again(local, jiffies + HZ/2); 1092 run_again(local, jiffies + HZ/2);
1019 } 1093 }
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index bee230d8fd11..9dc3b5f26e80 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -26,13 +26,12 @@
26ieee80211_tx_result 26ieee80211_tx_result
27ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) 27ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
28{ 28{
29 u8 *data, *key, *mic, key_offset; 29 u8 *data, *key, *mic;
30 size_t data_len; 30 size_t data_len;
31 unsigned int hdrlen; 31 unsigned int hdrlen;
32 struct ieee80211_hdr *hdr; 32 struct ieee80211_hdr *hdr;
33 struct sk_buff *skb = tx->skb; 33 struct sk_buff *skb = tx->skb;
34 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 34 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
35 int authenticator;
36 int tail; 35 int tail;
37 36
38 hdr = (struct ieee80211_hdr *)skb->data; 37 hdr = (struct ieee80211_hdr *)skb->data;
@@ -47,6 +46,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
47 data = skb->data + hdrlen; 46 data = skb->data + hdrlen;
48 data_len = skb->len - hdrlen; 47 data_len = skb->len - hdrlen;
49 48
49 if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) {
50 /* Need to use software crypto for the test */
51 info->control.hw_key = NULL;
52 }
53
50 if (info->control.hw_key && 54 if (info->control.hw_key &&
51 !(tx->flags & IEEE80211_TX_FRAGMENTED) && 55 !(tx->flags & IEEE80211_TX_FRAGMENTED) &&
52 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { 56 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
@@ -62,17 +66,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
62 skb_headroom(skb) < TKIP_IV_LEN)) 66 skb_headroom(skb) < TKIP_IV_LEN))
63 return TX_DROP; 67 return TX_DROP;
64 68
65#if 0 69 key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
66 authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */
67#else
68 authenticator = 1;
69#endif
70 key_offset = authenticator ?
71 NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY :
72 NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY;
73 key = &tx->key->conf.key[key_offset];
74 mic = skb_put(skb, MICHAEL_MIC_LEN); 70 mic = skb_put(skb, MICHAEL_MIC_LEN);
75 michael_mic(key, hdr, data, data_len, mic); 71 michael_mic(key, hdr, data, data_len, mic);
72 if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE))
73 mic[0]++;
76 74
77 return TX_CONTINUE; 75 return TX_CONTINUE;
78} 76}
@@ -81,59 +79,84 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
81ieee80211_rx_result 79ieee80211_rx_result
82ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) 80ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
83{ 81{
84 u8 *data, *key = NULL, key_offset; 82 u8 *data, *key = NULL;
85 size_t data_len; 83 size_t data_len;
86 unsigned int hdrlen; 84 unsigned int hdrlen;
87 u8 mic[MICHAEL_MIC_LEN]; 85 u8 mic[MICHAEL_MIC_LEN];
88 struct sk_buff *skb = rx->skb; 86 struct sk_buff *skb = rx->skb;
89 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 87 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
90 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 88 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
91 int authenticator = 1, wpa_test = 0;
92 89
93 /* No way to verify the MIC if the hardware stripped it */ 90 /*
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 91 * it makes no sense to check for MIC errors on anything other
92 * than data frames.
93 */
94 if (!ieee80211_is_data_present(hdr->frame_control))
95 return RX_CONTINUE; 95 return RX_CONTINUE;
96 96
97 /*
98 * No way to verify the MIC if the hardware stripped it or
99 * the IV with the key index. In this case we have solely rely
100 * on the driver to set RX_FLAG_MMIC_ERROR in the event of a
101 * MIC failure report.
102 */
103 if (status->flag & (RX_FLAG_MMIC_STRIPPED | RX_FLAG_IV_STRIPPED)) {
104 if (status->flag & RX_FLAG_MMIC_ERROR)
105 goto mic_fail;
106
107 if (!(status->flag & RX_FLAG_IV_STRIPPED))
108 goto update_iv;
109
110 return RX_CONTINUE;
111 }
112
113 /*
114 * Some hardware seems to generate Michael MIC failure reports; even
115 * though, the frame was not encrypted with TKIP and therefore has no
116 * MIC. Ignore the flag them to avoid triggering countermeasures.
117 */
97 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || 118 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
98 !ieee80211_has_protected(hdr->frame_control) || 119 !(status->flag & RX_FLAG_DECRYPTED))
99 !ieee80211_is_data_present(hdr->frame_control))
100 return RX_CONTINUE; 120 return RX_CONTINUE;
101 121
122 if (rx->sdata->vif.type == NL80211_IFTYPE_AP && rx->key->conf.keyidx) {
123 /*
124 * APs with pairwise keys should never receive Michael MIC
125 * errors for non-zero keyidx because these are reserved for
126 * group keys and only the AP is sending real multicast
127 * frames in the BSS. (
128 */
129 return RX_DROP_UNUSABLE;
130 }
131
132 if (status->flag & RX_FLAG_MMIC_ERROR)
133 goto mic_fail;
134
102 hdrlen = ieee80211_hdrlen(hdr->frame_control); 135 hdrlen = ieee80211_hdrlen(hdr->frame_control);
103 if (skb->len < hdrlen + MICHAEL_MIC_LEN) 136 if (skb->len < hdrlen + MICHAEL_MIC_LEN)
104 return RX_DROP_UNUSABLE; 137 return RX_DROP_UNUSABLE;
105 138
106 data = skb->data + hdrlen; 139 data = skb->data + hdrlen;
107 data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; 140 data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
108 141 key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
109#if 0
110 authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */
111#else
112 authenticator = 1;
113#endif
114 key_offset = authenticator ?
115 NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY :
116 NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY;
117 key = &rx->key->conf.key[key_offset];
118 michael_mic(key, hdr, data, data_len, mic); 142 michael_mic(key, hdr, data, data_len, mic);
119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { 143 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0)
120 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) 144 goto mic_fail;
121 return RX_DROP_UNUSABLE;
122
123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
124 (void *) skb->data, NULL,
125 GFP_ATOMIC);
126 return RX_DROP_UNUSABLE;
127 }
128 145
129 /* remove Michael MIC from payload */ 146 /* remove Michael MIC from payload */
130 skb_trim(skb, skb->len - MICHAEL_MIC_LEN); 147 skb_trim(skb, skb->len - MICHAEL_MIC_LEN);
131 148
149update_iv:
132 /* update IV in key information to be able to detect replays */ 150 /* update IV in key information to be able to detect replays */
133 rx->key->u.tkip.rx[rx->queue].iv32 = rx->tkip_iv32; 151 rx->key->u.tkip.rx[rx->queue].iv32 = rx->tkip_iv32;
134 rx->key->u.tkip.rx[rx->queue].iv16 = rx->tkip_iv16; 152 rx->key->u.tkip.rx[rx->queue].iv16 = rx->tkip_iv16;
135 153
136 return RX_CONTINUE; 154 return RX_CONTINUE;
155
156mic_fail:
157 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
158 (void *) skb->data, NULL, GFP_ATOMIC);
159 return RX_DROP_UNUSABLE;
137} 160}
138 161
139 162
@@ -208,7 +231,7 @@ ieee80211_rx_result
208ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) 231ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
209{ 232{
210 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; 233 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
211 int hdrlen, res, hwaccel = 0, wpa_test = 0; 234 int hdrlen, res, hwaccel = 0;
212 struct ieee80211_key *key = rx->key; 235 struct ieee80211_key *key = rx->key;
213 struct sk_buff *skb = rx->skb; 236 struct sk_buff *skb = rx->skb;
214 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 237 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -235,7 +258,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
235 hdr->addr1, hwaccel, rx->queue, 258 hdr->addr1, hwaccel, rx->queue,
236 &rx->tkip_iv32, 259 &rx->tkip_iv32,
237 &rx->tkip_iv16); 260 &rx->tkip_iv16);
238 if (res != TKIP_DECRYPT_OK || wpa_test) 261 if (res != TKIP_DECRYPT_OK)
239 return RX_DROP_UNUSABLE; 262 return RX_DROP_UNUSABLE;
240 263
241 /* Trim ICV */ 264 /* Trim ICV */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1534f2b44caf..32bff6d86cb2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
85 85
86 If unsure, say `N'. 86 If unsure, say `N'.
87 87
88config NF_CONNTRACK_TIMESTAMP
89 bool 'Connection tracking timestamping'
90 depends on NETFILTER_ADVANCED
91 help
92 This option enables support for connection tracking timestamping.
93 This allows you to store the flow start-time and to obtain
94 the flow-stop time (once it has been destroyed) via Connection
95 tracking events.
96
97 If unsure, say `N'.
98
88config NF_CT_PROTO_DCCP 99config NF_CT_PROTO_DCCP
89 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' 100 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
90 depends on EXPERIMENTAL 101 depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
185 196
186 To compile it as a module, choose M here. If unsure, say N. 197 To compile it as a module, choose M here. If unsure, say N.
187 198
199config NF_CONNTRACK_BROADCAST
200 tristate
201
188config NF_CONNTRACK_NETBIOS_NS 202config NF_CONNTRACK_NETBIOS_NS
189 tristate "NetBIOS name service protocol support" 203 tristate "NetBIOS name service protocol support"
190 depends on NETFILTER_ADVANCED 204 depends on NETFILTER_ADVANCED
205 select NF_CONNTRACK_BROADCAST
191 help 206 help
192 NetBIOS name service requests are sent as broadcast messages from an 207 NetBIOS name service requests are sent as broadcast messages from an
193 unprivileged port and responded to with unicast messages to the 208 unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
204 219
205 To compile it as a module, choose M here. If unsure, say N. 220 To compile it as a module, choose M here. If unsure, say N.
206 221
222config NF_CONNTRACK_SNMP
223 tristate "SNMP service protocol support"
224 depends on NETFILTER_ADVANCED
225 select NF_CONNTRACK_BROADCAST
226 help
227 SNMP service requests are sent as broadcast messages from an
228 unprivileged port and responded to with unicast messages to the
229 same port. This make them hard to firewall properly because connection
230 tracking doesn't deal with broadcasts. This helper tracks locally
231 originating SNMP service requests and the corresponding
232 responses. It relies on correct IP address configuration, specifically
233 netmask and broadcast address.
234
235 To compile it as a module, choose M here. If unsure, say N.
236
207config NF_CONNTRACK_PPTP 237config NF_CONNTRACK_PPTP
208 tristate "PPtP protocol support" 238 tristate "PPtP protocol support"
209 depends on NETFILTER_ADVANCED 239 depends on NETFILTER_ADVANCED
@@ -322,10 +352,32 @@ config NETFILTER_XT_CONNMARK
322 ctmark), similarly to the packet mark (nfmark). Using this 352 ctmark), similarly to the packet mark (nfmark). Using this
323 target and match, you can set and match on this mark. 353 target and match, you can set and match on this mark.
324 354
355config NETFILTER_XT_SET
356 tristate 'set target and match support'
357 depends on IP_SET
358 depends on NETFILTER_ADVANCED
359 help
360 This option adds the "SET" target and "set" match.
361
362 Using this target and match, you can add/delete and match
363 elements in the sets created by ipset(8).
364
365 To compile it as a module, choose M here. If unsure, say N.
366
325# alphabetically ordered list of targets 367# alphabetically ordered list of targets
326 368
327comment "Xtables targets" 369comment "Xtables targets"
328 370
371config NETFILTER_XT_TARGET_AUDIT
372 tristate "AUDIT target support"
373 depends on AUDIT
374 depends on NETFILTER_ADVANCED
375 ---help---
376 This option adds a 'AUDIT' target, which can be used to create
377 audit records for packets dropped/accepted.
378
379 To compileit as a module, choose M here. If unsure, say N.
380
329config NETFILTER_XT_TARGET_CHECKSUM 381config NETFILTER_XT_TARGET_CHECKSUM
330 tristate "CHECKSUM target support" 382 tristate "CHECKSUM target support"
331 depends on IP_NF_MANGLE || IP6_NF_MANGLE 383 depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +529,7 @@ config NETFILTER_XT_TARGET_NFLOG
477config NETFILTER_XT_TARGET_NFQUEUE 529config NETFILTER_XT_TARGET_NFQUEUE
478 tristate '"NFQUEUE" target Support' 530 tristate '"NFQUEUE" target Support'
479 depends on NETFILTER_ADVANCED 531 depends on NETFILTER_ADVANCED
532 select NETFILTER_NETLINK_QUEUE
480 help 533 help
481 This target replaced the old obsolete QUEUE target. 534 This target replaced the old obsolete QUEUE target.
482 535
@@ -596,6 +649,16 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
596 649
597comment "Xtables matches" 650comment "Xtables matches"
598 651
652config NETFILTER_XT_MATCH_ADDRTYPE
653 tristate '"addrtype" address type match support'
654 depends on NETFILTER_ADVANCED
655 ---help---
656 This option allows you to match what routing thinks of an address,
657 eg. UNICAST, LOCAL, BROADCAST, ...
658
659 If you want to compile it as a module, say M here and read
660 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
661
599config NETFILTER_XT_MATCH_CLUSTER 662config NETFILTER_XT_MATCH_CLUSTER
600 tristate '"cluster" match support' 663 tristate '"cluster" match support'
601 depends on NF_CONNTRACK 664 depends on NF_CONNTRACK
@@ -685,6 +748,15 @@ config NETFILTER_XT_MATCH_DCCP
685 If you want to compile it as a module, say M here and read 748 If you want to compile it as a module, say M here and read
686 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 749 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
687 750
751config NETFILTER_XT_MATCH_DEVGROUP
752 tristate '"devgroup" match support'
753 depends on NETFILTER_ADVANCED
754 help
755 This options adds a `devgroup' match, which allows to match on the
756 device group a network device is assigned to.
757
758 To compile it as a module, choose M here. If unsure, say N.
759
688config NETFILTER_XT_MATCH_DSCP 760config NETFILTER_XT_MATCH_DSCP
689 tristate '"dscp" and "tos" match support' 761 tristate '"dscp" and "tos" match support'
690 depends on NETFILTER_ADVANCED 762 depends on NETFILTER_ADVANCED
@@ -886,7 +958,7 @@ config NETFILTER_XT_MATCH_RATEEST
886config NETFILTER_XT_MATCH_REALM 958config NETFILTER_XT_MATCH_REALM
887 tristate '"realm" match support' 959 tristate '"realm" match support'
888 depends on NETFILTER_ADVANCED 960 depends on NETFILTER_ADVANCED
889 select NET_CLS_ROUTE 961 select IP_ROUTE_CLASSID
890 help 962 help
891 This option adds a `realm' match, which allows you to use the realm 963 This option adds a `realm' match, which allows you to use the realm
892 key from the routing subsystem inside iptables. 964 key from the routing subsystem inside iptables.
@@ -1011,4 +1083,6 @@ endif # NETFILTER_XTABLES
1011 1083
1012endmenu 1084endmenu
1013 1085
1086source "net/netfilter/ipset/Kconfig"
1087
1014source "net/netfilter/ipvs/Kconfig" 1088source "net/netfilter/ipvs/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 441050f31111..1a02853df863 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
2 2
3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
5 6
6obj-$(CONFIG_NETFILTER) = netfilter.o 7obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
28obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o 29obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
29obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o 30obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
30obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o 31obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
32obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
31obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o 33obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
34obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
32obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o 35obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
33obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o 36obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
34obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o 37obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -43,8 +46,10 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
43# combos 46# combos
44obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o 47obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
45obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o 48obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
49obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
46 50
47# targets 51# targets
52obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o 53obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 54obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 55obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
@@ -65,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
65obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o 70obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
66 71
67# matches 72# matches
73obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
68obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 76obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
@@ -72,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 80obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
75obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o 82obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 83obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
77obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o 84obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
@@ -101,5 +108,8 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
101obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o 108obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
102obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o 109obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
103 110
111# ipset
112obj-$(CONFIG_IP_SET) += ipset/
113
104# IPVS 114# IPVS
105obj-$(CONFIG_IP_VS) += ipvs/ 115obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4aa614b8a96a..899b71c0ff5d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -176,13 +176,21 @@ next_hook:
176 ret = 1; 176 ret = 1;
177 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { 177 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
178 kfree_skb(skb); 178 kfree_skb(skb);
179 ret = -(verdict >> NF_VERDICT_BITS); 179 ret = NF_DROP_GETERR(verdict);
180 if (ret == 0) 180 if (ret == 0)
181 ret = -EPERM; 181 ret = -EPERM;
182 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 182 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
183 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 183 ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
184 verdict >> NF_VERDICT_BITS)) 184 verdict >> NF_VERDICT_QBITS);
185 goto next_hook; 185 if (ret < 0) {
186 if (ret == -ECANCELED)
187 goto next_hook;
188 if (ret == -ESRCH &&
189 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
190 goto next_hook;
191 kfree_skb(skb);
192 }
193 ret = 0;
186 } 194 }
187 rcu_read_unlock(); 195 rcu_read_unlock();
188 return ret; 196 return ret;
@@ -215,7 +223,7 @@ EXPORT_SYMBOL(skb_make_writable);
215/* This does not belong here, but locally generated errors need it if connection 223/* This does not belong here, but locally generated errors need it if connection
216 tracking in use: without this, connection may not be in hash table, and hence 224 tracking in use: without this, connection may not be in hash table, and hence
217 manufactured ICMP or RST packets will not be associated with it. */ 225 manufactured ICMP or RST packets will not be associated with it. */
218void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); 226void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
219EXPORT_SYMBOL(ip_ct_attach); 227EXPORT_SYMBOL(ip_ct_attach);
220 228
221void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) 229void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -232,7 +240,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
232} 240}
233EXPORT_SYMBOL(nf_ct_attach); 241EXPORT_SYMBOL(nf_ct_attach);
234 242
235void (*nf_ct_destroy)(struct nf_conntrack *); 243void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
236EXPORT_SYMBOL(nf_ct_destroy); 244EXPORT_SYMBOL(nf_ct_destroy);
237 245
238void nf_conntrack_destroy(struct nf_conntrack *nfct) 246void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
new file mode 100644
index 000000000000..2c5b348eb3a8
--- /dev/null
+++ b/net/netfilter/ipset/Kconfig
@@ -0,0 +1,122 @@
1menuconfig IP_SET
2 tristate "IP set support"
3 depends on INET && NETFILTER
4 depends on NETFILTER_NETLINK
5 help
6 This option adds IP set support to the kernel.
7 In order to define and use the sets, you need the userspace utility
8 ipset(8). You can use the sets in netfilter via the "set" match
9 and "SET" target.
10
11 To compile it as a module, choose M here. If unsure, say N.
12
13if IP_SET
14
15config IP_SET_MAX
16 int "Maximum number of IP sets"
17 default 256
18 range 2 65534
19 depends on IP_SET
20 help
21 You can define here default value of the maximum number
22 of IP sets for the kernel.
23
24 The value can be overriden by the 'max_sets' module
25 parameter of the 'ip_set' module.
26
27config IP_SET_BITMAP_IP
28 tristate "bitmap:ip set support"
29 depends on IP_SET
30 help
31 This option adds the bitmap:ip set type support, by which one
32 can store IPv4 addresses (or network addresse) from a range.
33
34 To compile it as a module, choose M here. If unsure, say N.
35
36config IP_SET_BITMAP_IPMAC
37 tristate "bitmap:ip,mac set support"
38 depends on IP_SET
39 help
40 This option adds the bitmap:ip,mac set type support, by which one
41 can store IPv4 address and (source) MAC address pairs from a range.
42
43 To compile it as a module, choose M here. If unsure, say N.
44
45config IP_SET_BITMAP_PORT
46 tristate "bitmap:port set support"
47 depends on IP_SET
48 help
49 This option adds the bitmap:port set type support, by which one
50 can store TCP/UDP port numbers from a range.
51
52 To compile it as a module, choose M here. If unsure, say N.
53
54config IP_SET_HASH_IP
55 tristate "hash:ip set support"
56 depends on IP_SET
57 help
58 This option adds the hash:ip set type support, by which one
59 can store arbitrary IPv4 or IPv6 addresses (or network addresses)
60 in a set.
61
62 To compile it as a module, choose M here. If unsure, say N.
63
64config IP_SET_HASH_IPPORT
65 tristate "hash:ip,port set support"
66 depends on IP_SET
67 help
68 This option adds the hash:ip,port set type support, by which one
69 can store IPv4/IPv6 address and protocol/port pairs.
70
71 To compile it as a module, choose M here. If unsure, say N.
72
73config IP_SET_HASH_IPPORTIP
74 tristate "hash:ip,port,ip set support"
75 depends on IP_SET
76 help
77 This option adds the hash:ip,port,ip set type support, by which
78 one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
79 address triples in a set.
80
81 To compile it as a module, choose M here. If unsure, say N.
82
83config IP_SET_HASH_IPPORTNET
84 tristate "hash:ip,port,net set support"
85 depends on IP_SET
86 help
87 This option adds the hash:ip,port,net set type support, by which
88 one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
89 network address/prefix triples in a set.
90
91 To compile it as a module, choose M here. If unsure, say N.
92
93config IP_SET_HASH_NET
94 tristate "hash:net set support"
95 depends on IP_SET
96 help
97 This option adds the hash:net set type support, by which
98 one can store IPv4/IPv6 network address/prefix elements in a set.
99
100 To compile it as a module, choose M here. If unsure, say N.
101
102config IP_SET_HASH_NETPORT
103 tristate "hash:net,port set support"
104 depends on IP_SET
105 help
106 This option adds the hash:net,port set type support, by which
107 one can store IPv4/IPv6 network address/prefix and
108 protocol/port pairs as elements in a set.
109
110 To compile it as a module, choose M here. If unsure, say N.
111
112config IP_SET_LIST_SET
113 tristate "list:set set support"
114 depends on IP_SET
115 help
116 This option adds the list:set set type support. In this
117 kind of set one can store the name of other sets and it forms
118 an ordered union of the member sets.
119
120 To compile it as a module, choose M here. If unsure, say N.
121
122endif # IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
new file mode 100644
index 000000000000..5adbdab67bd2
--- /dev/null
+++ b/net/netfilter/ipset/Makefile
@@ -0,0 +1,24 @@
1#
2# Makefile for the ipset modules
3#
4
5ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o
6
7# ipset core
8obj-$(CONFIG_IP_SET) += ip_set.o
9
10# bitmap types
11obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o
12obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o
13obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
14
15# hash types
16obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
17obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
18obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
19obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
20obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
21obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
22
23# list types
24obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
new file mode 100644
index 000000000000..a113ff066928
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -0,0 +1,586 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module implementing an IP set type: the bitmap:ip type */
11
12#include <linux/module.h>
13#include <linux/ip.h>
14#include <linux/skbuff.h>
15#include <linux/errno.h>
16#include <linux/bitops.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/jiffies.h>
20#include <linux/timer.h>
21#include <net/netlink.h>
22#include <net/tcp.h>
23
24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_bitmap.h>
27#define IP_SET_BITMAP_TIMEOUT
28#include <linux/netfilter/ipset/ip_set_timeout.h>
29
30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
32MODULE_DESCRIPTION("bitmap:ip type of IP sets");
33MODULE_ALIAS("ip_set_bitmap:ip");
34
35/* Type structure */
36struct bitmap_ip {
37 void *members; /* the set members */
38 u32 first_ip; /* host byte order, included in range */
39 u32 last_ip; /* host byte order, included in range */
40 u32 elements; /* number of max elements in the set */
41 u32 hosts; /* number of hosts in a subnet */
42 size_t memsize; /* members size */
43 u8 netmask; /* subnet netmask */
44 u32 timeout; /* timeout parameter */
45 struct timer_list gc; /* garbage collection */
46};
47
48/* Base variant */
49
50static inline u32
51ip_to_id(const struct bitmap_ip *m, u32 ip)
52{
53 return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
54}
55
56static int
57bitmap_ip_test(struct ip_set *set, void *value, u32 timeout)
58{
59 const struct bitmap_ip *map = set->data;
60 u16 id = *(u16 *)value;
61
62 return !!test_bit(id, map->members);
63}
64
65static int
66bitmap_ip_add(struct ip_set *set, void *value, u32 timeout)
67{
68 struct bitmap_ip *map = set->data;
69 u16 id = *(u16 *)value;
70
71 if (test_and_set_bit(id, map->members))
72 return -IPSET_ERR_EXIST;
73
74 return 0;
75}
76
77static int
78bitmap_ip_del(struct ip_set *set, void *value, u32 timeout)
79{
80 struct bitmap_ip *map = set->data;
81 u16 id = *(u16 *)value;
82
83 if (!test_and_clear_bit(id, map->members))
84 return -IPSET_ERR_EXIST;
85
86 return 0;
87}
88
89static int
90bitmap_ip_list(const struct ip_set *set,
91 struct sk_buff *skb, struct netlink_callback *cb)
92{
93 const struct bitmap_ip *map = set->data;
94 struct nlattr *atd, *nested;
95 u32 id, first = cb->args[2];
96
97 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
98 if (!atd)
99 return -EMSGSIZE;
100 for (; cb->args[2] < map->elements; cb->args[2]++) {
101 id = cb->args[2];
102 if (!test_bit(id, map->members))
103 continue;
104 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
105 if (!nested) {
106 if (id == first) {
107 nla_nest_cancel(skb, atd);
108 return -EMSGSIZE;
109 } else
110 goto nla_put_failure;
111 }
112 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
113 htonl(map->first_ip + id * map->hosts));
114 ipset_nest_end(skb, nested);
115 }
116 ipset_nest_end(skb, atd);
117 /* Set listing finished */
118 cb->args[2] = 0;
119 return 0;
120
121nla_put_failure:
122 nla_nest_cancel(skb, nested);
123 ipset_nest_end(skb, atd);
124 if (unlikely(id == first)) {
125 cb->args[2] = 0;
126 return -EMSGSIZE;
127 }
128 return 0;
129}
130
131/* Timeout variant */
132
133static int
134bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout)
135{
136 const struct bitmap_ip *map = set->data;
137 const unsigned long *members = map->members;
138 u16 id = *(u16 *)value;
139
140 return ip_set_timeout_test(members[id]);
141}
142
143static int
144bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout)
145{
146 struct bitmap_ip *map = set->data;
147 unsigned long *members = map->members;
148 u16 id = *(u16 *)value;
149
150 if (ip_set_timeout_test(members[id]))
151 return -IPSET_ERR_EXIST;
152
153 members[id] = ip_set_timeout_set(timeout);
154
155 return 0;
156}
157
158static int
159bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout)
160{
161 struct bitmap_ip *map = set->data;
162 unsigned long *members = map->members;
163 u16 id = *(u16 *)value;
164 int ret = -IPSET_ERR_EXIST;
165
166 if (ip_set_timeout_test(members[id]))
167 ret = 0;
168
169 members[id] = IPSET_ELEM_UNSET;
170 return ret;
171}
172
173static int
174bitmap_ip_tlist(const struct ip_set *set,
175 struct sk_buff *skb, struct netlink_callback *cb)
176{
177 const struct bitmap_ip *map = set->data;
178 struct nlattr *adt, *nested;
179 u32 id, first = cb->args[2];
180 const unsigned long *members = map->members;
181
182 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
183 if (!adt)
184 return -EMSGSIZE;
185 for (; cb->args[2] < map->elements; cb->args[2]++) {
186 id = cb->args[2];
187 if (!ip_set_timeout_test(members[id]))
188 continue;
189 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
190 if (!nested) {
191 if (id == first) {
192 nla_nest_cancel(skb, adt);
193 return -EMSGSIZE;
194 } else
195 goto nla_put_failure;
196 }
197 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
198 htonl(map->first_ip + id * map->hosts));
199 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
200 htonl(ip_set_timeout_get(members[id])));
201 ipset_nest_end(skb, nested);
202 }
203 ipset_nest_end(skb, adt);
204
205 /* Set listing finished */
206 cb->args[2] = 0;
207
208 return 0;
209
210nla_put_failure:
211 nla_nest_cancel(skb, nested);
212 ipset_nest_end(skb, adt);
213 if (unlikely(id == first)) {
214 cb->args[2] = 0;
215 return -EMSGSIZE;
216 }
217 return 0;
218}
219
220static int
221bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
222 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
223{
224 struct bitmap_ip *map = set->data;
225 ipset_adtfn adtfn = set->variant->adt[adt];
226 u32 ip;
227
228 ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
229 if (ip < map->first_ip || ip > map->last_ip)
230 return -IPSET_ERR_BITMAP_RANGE;
231
232 ip = ip_to_id(map, ip);
233
234 return adtfn(set, &ip, map->timeout);
235}
236
237static int
238bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
239 enum ipset_adt adt, u32 *lineno, u32 flags)
240{
241 struct bitmap_ip *map = set->data;
242 ipset_adtfn adtfn = set->variant->adt[adt];
243 u32 timeout = map->timeout;
244 u32 ip, ip_to, id;
245 int ret = 0;
246
247 if (unlikely(!tb[IPSET_ATTR_IP] ||
248 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
249 return -IPSET_ERR_PROTOCOL;
250
251 if (tb[IPSET_ATTR_LINENO])
252 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
253
254 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
255 if (ret)
256 return ret;
257
258 if (ip < map->first_ip || ip > map->last_ip)
259 return -IPSET_ERR_BITMAP_RANGE;
260
261 if (tb[IPSET_ATTR_TIMEOUT]) {
262 if (!with_timeout(map->timeout))
263 return -IPSET_ERR_TIMEOUT;
264 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
265 }
266
267 if (adt == IPSET_TEST) {
268 id = ip_to_id(map, ip);
269 return adtfn(set, &id, timeout);
270 }
271
272 if (tb[IPSET_ATTR_IP_TO]) {
273 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
274 if (ret)
275 return ret;
276 if (ip > ip_to) {
277 swap(ip, ip_to);
278 if (ip < map->first_ip)
279 return -IPSET_ERR_BITMAP_RANGE;
280 }
281 } else if (tb[IPSET_ATTR_CIDR]) {
282 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
283
284 if (cidr > 32)
285 return -IPSET_ERR_INVALID_CIDR;
286 ip &= ip_set_hostmask(cidr);
287 ip_to = ip | ~ip_set_hostmask(cidr);
288 } else
289 ip_to = ip;
290
291 if (ip_to > map->last_ip)
292 return -IPSET_ERR_BITMAP_RANGE;
293
294 for (; !before(ip_to, ip); ip += map->hosts) {
295 id = ip_to_id(map, ip);
296 ret = adtfn(set, &id, timeout);;
297
298 if (ret && !ip_set_eexist(ret, flags))
299 return ret;
300 else
301 ret = 0;
302 }
303 return ret;
304}
305
306static void
307bitmap_ip_destroy(struct ip_set *set)
308{
309 struct bitmap_ip *map = set->data;
310
311 if (with_timeout(map->timeout))
312 del_timer_sync(&map->gc);
313
314 ip_set_free(map->members);
315 kfree(map);
316
317 set->data = NULL;
318}
319
320static void
321bitmap_ip_flush(struct ip_set *set)
322{
323 struct bitmap_ip *map = set->data;
324
325 memset(map->members, 0, map->memsize);
326}
327
328static int
329bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
330{
331 const struct bitmap_ip *map = set->data;
332 struct nlattr *nested;
333
334 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
335 if (!nested)
336 goto nla_put_failure;
337 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
338 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
339 if (map->netmask != 32)
340 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
341 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
342 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
343 htonl(sizeof(*map) + map->memsize));
344 if (with_timeout(map->timeout))
345 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
346 ipset_nest_end(skb, nested);
347
348 return 0;
349nla_put_failure:
350 return -EMSGSIZE;
351}
352
353static bool
354bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
355{
356 const struct bitmap_ip *x = a->data;
357 const struct bitmap_ip *y = b->data;
358
359 return x->first_ip == y->first_ip &&
360 x->last_ip == y->last_ip &&
361 x->netmask == y->netmask &&
362 x->timeout == y->timeout;
363}
364
365static const struct ip_set_type_variant bitmap_ip = {
366 .kadt = bitmap_ip_kadt,
367 .uadt = bitmap_ip_uadt,
368 .adt = {
369 [IPSET_ADD] = bitmap_ip_add,
370 [IPSET_DEL] = bitmap_ip_del,
371 [IPSET_TEST] = bitmap_ip_test,
372 },
373 .destroy = bitmap_ip_destroy,
374 .flush = bitmap_ip_flush,
375 .head = bitmap_ip_head,
376 .list = bitmap_ip_list,
377 .same_set = bitmap_ip_same_set,
378};
379
380static const struct ip_set_type_variant bitmap_tip = {
381 .kadt = bitmap_ip_kadt,
382 .uadt = bitmap_ip_uadt,
383 .adt = {
384 [IPSET_ADD] = bitmap_ip_tadd,
385 [IPSET_DEL] = bitmap_ip_tdel,
386 [IPSET_TEST] = bitmap_ip_ttest,
387 },
388 .destroy = bitmap_ip_destroy,
389 .flush = bitmap_ip_flush,
390 .head = bitmap_ip_head,
391 .list = bitmap_ip_tlist,
392 .same_set = bitmap_ip_same_set,
393};
394
395static void
396bitmap_ip_gc(unsigned long ul_set)
397{
398 struct ip_set *set = (struct ip_set *) ul_set;
399 struct bitmap_ip *map = set->data;
400 unsigned long *table = map->members;
401 u32 id;
402
403 /* We run parallel with other readers (test element)
404 * but adding/deleting new entries is locked out */
405 read_lock_bh(&set->lock);
406 for (id = 0; id < map->elements; id++)
407 if (ip_set_timeout_expired(table[id]))
408 table[id] = IPSET_ELEM_UNSET;
409 read_unlock_bh(&set->lock);
410
411 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
412 add_timer(&map->gc);
413}
414
415static void
416bitmap_ip_gc_init(struct ip_set *set)
417{
418 struct bitmap_ip *map = set->data;
419
420 init_timer(&map->gc);
421 map->gc.data = (unsigned long) set;
422 map->gc.function = bitmap_ip_gc;
423 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
424 add_timer(&map->gc);
425}
426
427/* Create bitmap:ip type of sets */
428
429static bool
430init_map_ip(struct ip_set *set, struct bitmap_ip *map,
431 u32 first_ip, u32 last_ip,
432 u32 elements, u32 hosts, u8 netmask)
433{
434 map->members = ip_set_alloc(map->memsize);
435 if (!map->members)
436 return false;
437 map->first_ip = first_ip;
438 map->last_ip = last_ip;
439 map->elements = elements;
440 map->hosts = hosts;
441 map->netmask = netmask;
442 map->timeout = IPSET_NO_TIMEOUT;
443
444 set->data = map;
445 set->family = AF_INET;
446
447 return true;
448}
449
450static int
451bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
452{
453 struct bitmap_ip *map;
454 u32 first_ip, last_ip, hosts, elements;
455 u8 netmask = 32;
456 int ret;
457
458 if (unlikely(!tb[IPSET_ATTR_IP] ||
459 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
460 return -IPSET_ERR_PROTOCOL;
461
462 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
463 if (ret)
464 return ret;
465
466 if (tb[IPSET_ATTR_IP_TO]) {
467 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
468 if (ret)
469 return ret;
470 if (first_ip > last_ip) {
471 u32 tmp = first_ip;
472
473 first_ip = last_ip;
474 last_ip = tmp;
475 }
476 } else if (tb[IPSET_ATTR_CIDR]) {
477 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
478
479 if (cidr >= 32)
480 return -IPSET_ERR_INVALID_CIDR;
481 last_ip = first_ip | ~ip_set_hostmask(cidr);
482 } else
483 return -IPSET_ERR_PROTOCOL;
484
485 if (tb[IPSET_ATTR_NETMASK]) {
486 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
487
488 if (netmask > 32)
489 return -IPSET_ERR_INVALID_NETMASK;
490
491 first_ip &= ip_set_hostmask(netmask);
492 last_ip |= ~ip_set_hostmask(netmask);
493 }
494
495 if (netmask == 32) {
496 hosts = 1;
497 elements = last_ip - first_ip + 1;
498 } else {
499 u8 mask_bits;
500 u32 mask;
501
502 mask = range_to_mask(first_ip, last_ip, &mask_bits);
503
504 if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) ||
505 netmask <= mask_bits)
506 return -IPSET_ERR_BITMAP_RANGE;
507
508 pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
509 hosts = 2 << (32 - netmask - 1);
510 elements = 2 << (netmask - mask_bits - 1);
511 }
512 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
513 return -IPSET_ERR_BITMAP_RANGE_SIZE;
514
515 pr_debug("hosts %u, elements %u\n", hosts, elements);
516
517 map = kzalloc(sizeof(*map), GFP_KERNEL);
518 if (!map)
519 return -ENOMEM;
520
521 if (tb[IPSET_ATTR_TIMEOUT]) {
522 map->memsize = elements * sizeof(unsigned long);
523
524 if (!init_map_ip(set, map, first_ip, last_ip,
525 elements, hosts, netmask)) {
526 kfree(map);
527 return -ENOMEM;
528 }
529
530 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
531 set->variant = &bitmap_tip;
532
533 bitmap_ip_gc_init(set);
534 } else {
535 map->memsize = bitmap_bytes(0, elements - 1);
536
537 if (!init_map_ip(set, map, first_ip, last_ip,
538 elements, hosts, netmask)) {
539 kfree(map);
540 return -ENOMEM;
541 }
542
543 set->variant = &bitmap_ip;
544 }
545 return 0;
546}
547
548static struct ip_set_type bitmap_ip_type __read_mostly = {
549 .name = "bitmap:ip",
550 .protocol = IPSET_PROTOCOL,
551 .features = IPSET_TYPE_IP,
552 .dimension = IPSET_DIM_ONE,
553 .family = AF_INET,
554 .revision = 0,
555 .create = bitmap_ip_create,
556 .create_policy = {
557 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
558 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
559 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
560 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
561 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
562 },
563 .adt_policy = {
564 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
565 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
566 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
567 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
568 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
569 },
570 .me = THIS_MODULE,
571};
572
573static int __init
574bitmap_ip_init(void)
575{
576 return ip_set_type_register(&bitmap_ip_type);
577}
578
579static void __exit
580bitmap_ip_fini(void)
581{
582 ip_set_type_unregister(&bitmap_ip_type);
583}
584
585module_init(bitmap_ip_init);
586module_exit(bitmap_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
new file mode 100644
index 000000000000..a274300b6a56
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -0,0 +1,655 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Martin Josefsson <gandalf@wlug.westbo.se>
4 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* Kernel module implementing an IP set type: the bitmap:ip,mac type */
12
13#include <linux/module.h>
14#include <linux/ip.h>
15#include <linux/etherdevice.h>
16#include <linux/skbuff.h>
17#include <linux/errno.h>
18#include <linux/if_ether.h>
19#include <linux/netlink.h>
20#include <linux/jiffies.h>
21#include <linux/timer.h>
22#include <net/netlink.h>
23
24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_timeout.h>
27#include <linux/netfilter/ipset/ip_set_bitmap.h>
28
29MODULE_LICENSE("GPL");
30MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
31MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets");
32MODULE_ALIAS("ip_set_bitmap:ip,mac");
33
34enum {
35 MAC_EMPTY, /* element is not set */
36 MAC_FILLED, /* element is set with MAC */
37 MAC_UNSET, /* element is set, without MAC */
38};
39
40/* Type structure */
41struct bitmap_ipmac {
42 void *members; /* the set members */
43 u32 first_ip; /* host byte order, included in range */
44 u32 last_ip; /* host byte order, included in range */
45 u32 timeout; /* timeout value */
46 struct timer_list gc; /* garbage collector */
47 size_t dsize; /* size of element */
48};
49
50/* ADT structure for generic function args */
51struct ipmac {
52 u32 id; /* id in array */
53 unsigned char *ether; /* ethernet address */
54};
55
56/* Member element without and with timeout */
57
58struct ipmac_elem {
59 unsigned char ether[ETH_ALEN];
60 unsigned char match;
61} __attribute__ ((aligned));
62
63struct ipmac_telem {
64 unsigned char ether[ETH_ALEN];
65 unsigned char match;
66 unsigned long timeout;
67} __attribute__ ((aligned));
68
69static inline void *
70bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id)
71{
72 return (void *)((char *)map->members + id * map->dsize);
73}
74
75static inline bool
76bitmap_timeout(const struct bitmap_ipmac *map, u32 id)
77{
78 const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
79
80 return ip_set_timeout_test(elem->timeout);
81}
82
83static inline bool
84bitmap_expired(const struct bitmap_ipmac *map, u32 id)
85{
86 const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
87
88 return ip_set_timeout_expired(elem->timeout);
89}
90
91static inline int
92bitmap_ipmac_exist(const struct ipmac_telem *elem)
93{
94 return elem->match == MAC_UNSET ||
95 (elem->match == MAC_FILLED &&
96 !ip_set_timeout_expired(elem->timeout));
97}
98
99/* Base variant */
100
101static int
102bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout)
103{
104 const struct bitmap_ipmac *map = set->data;
105 const struct ipmac *data = value;
106 const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
107
108 switch (elem->match) {
109 case MAC_UNSET:
110 /* Trigger kernel to fill out the ethernet address */
111 return -EAGAIN;
112 case MAC_FILLED:
113 return data->ether == NULL ||
114 compare_ether_addr(data->ether, elem->ether) == 0;
115 }
116 return 0;
117}
118
119static int
120bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout)
121{
122 struct bitmap_ipmac *map = set->data;
123 const struct ipmac *data = value;
124 struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
125
126 switch (elem->match) {
127 case MAC_UNSET:
128 if (!data->ether)
129 /* Already added without ethernet address */
130 return -IPSET_ERR_EXIST;
131 /* Fill the MAC address */
132 memcpy(elem->ether, data->ether, ETH_ALEN);
133 elem->match = MAC_FILLED;
134 break;
135 case MAC_FILLED:
136 return -IPSET_ERR_EXIST;
137 case MAC_EMPTY:
138 if (data->ether) {
139 memcpy(elem->ether, data->ether, ETH_ALEN);
140 elem->match = MAC_FILLED;
141 } else
142 elem->match = MAC_UNSET;
143 }
144
145 return 0;
146}
147
148static int
149bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout)
150{
151 struct bitmap_ipmac *map = set->data;
152 const struct ipmac *data = value;
153 struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
154
155 if (elem->match == MAC_EMPTY)
156 return -IPSET_ERR_EXIST;
157
158 elem->match = MAC_EMPTY;
159
160 return 0;
161}
162
163static int
164bitmap_ipmac_list(const struct ip_set *set,
165 struct sk_buff *skb, struct netlink_callback *cb)
166{
167 const struct bitmap_ipmac *map = set->data;
168 const struct ipmac_elem *elem;
169 struct nlattr *atd, *nested;
170 u32 id, first = cb->args[2];
171 u32 last = map->last_ip - map->first_ip;
172
173 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
174 if (!atd)
175 return -EMSGSIZE;
176 for (; cb->args[2] <= last; cb->args[2]++) {
177 id = cb->args[2];
178 elem = bitmap_ipmac_elem(map, id);
179 if (elem->match == MAC_EMPTY)
180 continue;
181 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
182 if (!nested) {
183 if (id == first) {
184 nla_nest_cancel(skb, atd);
185 return -EMSGSIZE;
186 } else
187 goto nla_put_failure;
188 }
189 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
190 htonl(map->first_ip + id));
191 if (elem->match == MAC_FILLED)
192 NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
193 elem->ether);
194 ipset_nest_end(skb, nested);
195 }
196 ipset_nest_end(skb, atd);
197 /* Set listing finished */
198 cb->args[2] = 0;
199
200 return 0;
201
202nla_put_failure:
203 nla_nest_cancel(skb, nested);
204 ipset_nest_end(skb, atd);
205 if (unlikely(id == first)) {
206 cb->args[2] = 0;
207 return -EMSGSIZE;
208 }
209 return 0;
210}
211
212/* Timeout variant */
213
214static int
215bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout)
216{
217 const struct bitmap_ipmac *map = set->data;
218 const struct ipmac *data = value;
219 const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
220
221 switch (elem->match) {
222 case MAC_UNSET:
223 /* Trigger kernel to fill out the ethernet address */
224 return -EAGAIN;
225 case MAC_FILLED:
226 return (data->ether == NULL ||
227 compare_ether_addr(data->ether, elem->ether) == 0) &&
228 !bitmap_expired(map, data->id);
229 }
230 return 0;
231}
232
233static int
234bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout)
235{
236 struct bitmap_ipmac *map = set->data;
237 const struct ipmac *data = value;
238 struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
239
240 switch (elem->match) {
241 case MAC_UNSET:
242 if (!data->ether)
243 /* Already added without ethernet address */
244 return -IPSET_ERR_EXIST;
245 /* Fill the MAC address and activate the timer */
246 memcpy(elem->ether, data->ether, ETH_ALEN);
247 elem->match = MAC_FILLED;
248 if (timeout == map->timeout)
249 /* Timeout was not specified, get stored one */
250 timeout = elem->timeout;
251 elem->timeout = ip_set_timeout_set(timeout);
252 break;
253 case MAC_FILLED:
254 if (!bitmap_expired(map, data->id))
255 return -IPSET_ERR_EXIST;
256 /* Fall through */
257 case MAC_EMPTY:
258 if (data->ether) {
259 memcpy(elem->ether, data->ether, ETH_ALEN);
260 elem->match = MAC_FILLED;
261 } else
262 elem->match = MAC_UNSET;
263 /* If MAC is unset yet, we store plain timeout value
264 * because the timer is not activated yet
265 * and we can reuse it later when MAC is filled out,
266 * possibly by the kernel */
267 elem->timeout = data->ether ? ip_set_timeout_set(timeout)
268 : timeout;
269 break;
270 }
271
272 return 0;
273}
274
275static int
276bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout)
277{
278 struct bitmap_ipmac *map = set->data;
279 const struct ipmac *data = value;
280 struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
281
282 if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id))
283 return -IPSET_ERR_EXIST;
284
285 elem->match = MAC_EMPTY;
286
287 return 0;
288}
289
290static int
291bitmap_ipmac_tlist(const struct ip_set *set,
292 struct sk_buff *skb, struct netlink_callback *cb)
293{
294 const struct bitmap_ipmac *map = set->data;
295 const struct ipmac_telem *elem;
296 struct nlattr *atd, *nested;
297 u32 id, first = cb->args[2];
298 u32 timeout, last = map->last_ip - map->first_ip;
299
300 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
301 if (!atd)
302 return -EMSGSIZE;
303 for (; cb->args[2] <= last; cb->args[2]++) {
304 id = cb->args[2];
305 elem = bitmap_ipmac_elem(map, id);
306 if (!bitmap_ipmac_exist(elem))
307 continue;
308 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
309 if (!nested) {
310 if (id == first) {
311 nla_nest_cancel(skb, atd);
312 return -EMSGSIZE;
313 } else
314 goto nla_put_failure;
315 }
316 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
317 htonl(map->first_ip + id));
318 if (elem->match == MAC_FILLED)
319 NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
320 elem->ether);
321 timeout = elem->match == MAC_UNSET ? elem->timeout
322 : ip_set_timeout_get(elem->timeout);
323 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout));
324 ipset_nest_end(skb, nested);
325 }
326 ipset_nest_end(skb, atd);
327 /* Set listing finished */
328 cb->args[2] = 0;
329
330 return 0;
331
332nla_put_failure:
333 nla_nest_cancel(skb, nested);
334 ipset_nest_end(skb, atd);
335 return -EMSGSIZE;
336}
337
338static int
339bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
340 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
341{
342 struct bitmap_ipmac *map = set->data;
343 ipset_adtfn adtfn = set->variant->adt[adt];
344 struct ipmac data;
345
346 /* MAC can be src only */
347 if (!(flags & IPSET_DIM_TWO_SRC))
348 return 0;
349
350 data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
351 if (data.id < map->first_ip || data.id > map->last_ip)
352 return -IPSET_ERR_BITMAP_RANGE;
353
354 /* Backward compatibility: we don't check the second flag */
355 if (skb_mac_header(skb) < skb->head ||
356 (skb_mac_header(skb) + ETH_HLEN) > skb->data)
357 return -EINVAL;
358
359 data.id -= map->first_ip;
360 data.ether = eth_hdr(skb)->h_source;
361
362 return adtfn(set, &data, map->timeout);
363}
364
365static int
366bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
367 enum ipset_adt adt, u32 *lineno, u32 flags)
368{
369 const struct bitmap_ipmac *map = set->data;
370 ipset_adtfn adtfn = set->variant->adt[adt];
371 struct ipmac data;
372 u32 timeout = map->timeout;
373 int ret = 0;
374
375 if (unlikely(!tb[IPSET_ATTR_IP] ||
376 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
377 return -IPSET_ERR_PROTOCOL;
378
379 if (tb[IPSET_ATTR_LINENO])
380 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
381
382 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id);
383 if (ret)
384 return ret;
385
386 if (data.id < map->first_ip || data.id > map->last_ip)
387 return -IPSET_ERR_BITMAP_RANGE;
388
389 if (tb[IPSET_ATTR_ETHER])
390 data.ether = nla_data(tb[IPSET_ATTR_ETHER]);
391 else
392 data.ether = NULL;
393
394 if (tb[IPSET_ATTR_TIMEOUT]) {
395 if (!with_timeout(map->timeout))
396 return -IPSET_ERR_TIMEOUT;
397 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
398 }
399
400 data.id -= map->first_ip;
401
402 ret = adtfn(set, &data, timeout);
403
404 return ip_set_eexist(ret, flags) ? 0 : ret;
405}
406
407static void
408bitmap_ipmac_destroy(struct ip_set *set)
409{
410 struct bitmap_ipmac *map = set->data;
411
412 if (with_timeout(map->timeout))
413 del_timer_sync(&map->gc);
414
415 ip_set_free(map->members);
416 kfree(map);
417
418 set->data = NULL;
419}
420
421static void
422bitmap_ipmac_flush(struct ip_set *set)
423{
424 struct bitmap_ipmac *map = set->data;
425
426 memset(map->members, 0,
427 (map->last_ip - map->first_ip + 1) * map->dsize);
428}
429
430static int
431bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
432{
433 const struct bitmap_ipmac *map = set->data;
434 struct nlattr *nested;
435
436 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
437 if (!nested)
438 goto nla_put_failure;
439 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
440 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
441 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
442 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
443 htonl(sizeof(*map)
444 + (map->last_ip - map->first_ip + 1) * map->dsize));
445 if (with_timeout(map->timeout))
446 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
447 ipset_nest_end(skb, nested);
448
449 return 0;
450nla_put_failure:
451 return -EMSGSIZE;
452}
453
454static bool
455bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
456{
457 const struct bitmap_ipmac *x = a->data;
458 const struct bitmap_ipmac *y = b->data;
459
460 return x->first_ip == y->first_ip &&
461 x->last_ip == y->last_ip &&
462 x->timeout == y->timeout;
463}
464
465static const struct ip_set_type_variant bitmap_ipmac = {
466 .kadt = bitmap_ipmac_kadt,
467 .uadt = bitmap_ipmac_uadt,
468 .adt = {
469 [IPSET_ADD] = bitmap_ipmac_add,
470 [IPSET_DEL] = bitmap_ipmac_del,
471 [IPSET_TEST] = bitmap_ipmac_test,
472 },
473 .destroy = bitmap_ipmac_destroy,
474 .flush = bitmap_ipmac_flush,
475 .head = bitmap_ipmac_head,
476 .list = bitmap_ipmac_list,
477 .same_set = bitmap_ipmac_same_set,
478};
479
480static const struct ip_set_type_variant bitmap_tipmac = {
481 .kadt = bitmap_ipmac_kadt,
482 .uadt = bitmap_ipmac_uadt,
483 .adt = {
484 [IPSET_ADD] = bitmap_ipmac_tadd,
485 [IPSET_DEL] = bitmap_ipmac_tdel,
486 [IPSET_TEST] = bitmap_ipmac_ttest,
487 },
488 .destroy = bitmap_ipmac_destroy,
489 .flush = bitmap_ipmac_flush,
490 .head = bitmap_ipmac_head,
491 .list = bitmap_ipmac_tlist,
492 .same_set = bitmap_ipmac_same_set,
493};
494
495static void
496bitmap_ipmac_gc(unsigned long ul_set)
497{
498 struct ip_set *set = (struct ip_set *) ul_set;
499 struct bitmap_ipmac *map = set->data;
500 struct ipmac_telem *elem;
501 u32 id, last = map->last_ip - map->first_ip;
502
503 /* We run parallel with other readers (test element)
504 * but adding/deleting new entries is locked out */
505 read_lock_bh(&set->lock);
506 for (id = 0; id <= last; id++) {
507 elem = bitmap_ipmac_elem(map, id);
508 if (elem->match == MAC_FILLED &&
509 ip_set_timeout_expired(elem->timeout))
510 elem->match = MAC_EMPTY;
511 }
512 read_unlock_bh(&set->lock);
513
514 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
515 add_timer(&map->gc);
516}
517
518static void
519bitmap_ipmac_gc_init(struct ip_set *set)
520{
521 struct bitmap_ipmac *map = set->data;
522
523 init_timer(&map->gc);
524 map->gc.data = (unsigned long) set;
525 map->gc.function = bitmap_ipmac_gc;
526 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
527 add_timer(&map->gc);
528}
529
530/* Create bitmap:ip,mac type of sets */
531
532static bool
533init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
534 u32 first_ip, u32 last_ip)
535{
536 map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
537 if (!map->members)
538 return false;
539 map->first_ip = first_ip;
540 map->last_ip = last_ip;
541 map->timeout = IPSET_NO_TIMEOUT;
542
543 set->data = map;
544 set->family = AF_INET;
545
546 return true;
547}
548
549static int
550bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
551 u32 flags)
552{
553 u32 first_ip, last_ip, elements;
554 struct bitmap_ipmac *map;
555 int ret;
556
557 if (unlikely(!tb[IPSET_ATTR_IP] ||
558 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
559 return -IPSET_ERR_PROTOCOL;
560
561 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
562 if (ret)
563 return ret;
564
565 if (tb[IPSET_ATTR_IP_TO]) {
566 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
567 if (ret)
568 return ret;
569 if (first_ip > last_ip) {
570 u32 tmp = first_ip;
571
572 first_ip = last_ip;
573 last_ip = tmp;
574 }
575 } else if (tb[IPSET_ATTR_CIDR]) {
576 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
577
578 if (cidr >= 32)
579 return -IPSET_ERR_INVALID_CIDR;
580 last_ip = first_ip | ~ip_set_hostmask(cidr);
581 } else
582 return -IPSET_ERR_PROTOCOL;
583
584 elements = last_ip - first_ip + 1;
585
586 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
587 return -IPSET_ERR_BITMAP_RANGE_SIZE;
588
589 map = kzalloc(sizeof(*map), GFP_KERNEL);
590 if (!map)
591 return -ENOMEM;
592
593 if (tb[IPSET_ATTR_TIMEOUT]) {
594 map->dsize = sizeof(struct ipmac_telem);
595
596 if (!init_map_ipmac(set, map, first_ip, last_ip)) {
597 kfree(map);
598 return -ENOMEM;
599 }
600
601 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
602
603 set->variant = &bitmap_tipmac;
604
605 bitmap_ipmac_gc_init(set);
606 } else {
607 map->dsize = sizeof(struct ipmac_elem);
608
609 if (!init_map_ipmac(set, map, first_ip, last_ip)) {
610 kfree(map);
611 return -ENOMEM;
612 }
613 set->variant = &bitmap_ipmac;
614
615 }
616 return 0;
617}
618
619static struct ip_set_type bitmap_ipmac_type = {
620 .name = "bitmap:ip,mac",
621 .protocol = IPSET_PROTOCOL,
622 .features = IPSET_TYPE_IP | IPSET_TYPE_MAC,
623 .dimension = IPSET_DIM_TWO,
624 .family = AF_INET,
625 .revision = 0,
626 .create = bitmap_ipmac_create,
627 .create_policy = {
628 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
629 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
630 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
631 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
632 },
633 .adt_policy = {
634 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
635 [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN },
636 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
637 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
638 },
639 .me = THIS_MODULE,
640};
641
642static int __init
643bitmap_ipmac_init(void)
644{
645 return ip_set_type_register(&bitmap_ipmac_type);
646}
647
648static void __exit
649bitmap_ipmac_fini(void)
650{
651 ip_set_type_unregister(&bitmap_ipmac_type);
652}
653
654module_init(bitmap_ipmac_init);
655module_exit(bitmap_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
new file mode 100644
index 000000000000..6b38eb8f6ed8
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -0,0 +1,514 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the bitmap:port type */
9
10#include <linux/module.h>
11#include <linux/ip.h>
12#include <linux/skbuff.h>
13#include <linux/errno.h>
14#include <linux/netlink.h>
15#include <linux/jiffies.h>
16#include <linux/timer.h>
17#include <net/netlink.h>
18
19#include <linux/netfilter/ipset/ip_set.h>
20#include <linux/netfilter/ipset/ip_set_bitmap.h>
21#include <linux/netfilter/ipset/ip_set_getport.h>
22#define IP_SET_BITMAP_TIMEOUT
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24
25MODULE_LICENSE("GPL");
26MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
27MODULE_DESCRIPTION("bitmap:port type of IP sets");
28MODULE_ALIAS("ip_set_bitmap:port");
29
30/* Type structure */
31struct bitmap_port {
32 void *members; /* the set members */
33 u16 first_port; /* host byte order, included in range */
34 u16 last_port; /* host byte order, included in range */
35 size_t memsize; /* members size */
36 u32 timeout; /* timeout parameter */
37 struct timer_list gc; /* garbage collection */
38};
39
40/* Base variant */
41
42static int
43bitmap_port_test(struct ip_set *set, void *value, u32 timeout)
44{
45 const struct bitmap_port *map = set->data;
46 u16 id = *(u16 *)value;
47
48 return !!test_bit(id, map->members);
49}
50
51static int
52bitmap_port_add(struct ip_set *set, void *value, u32 timeout)
53{
54 struct bitmap_port *map = set->data;
55 u16 id = *(u16 *)value;
56
57 if (test_and_set_bit(id, map->members))
58 return -IPSET_ERR_EXIST;
59
60 return 0;
61}
62
63static int
64bitmap_port_del(struct ip_set *set, void *value, u32 timeout)
65{
66 struct bitmap_port *map = set->data;
67 u16 id = *(u16 *)value;
68
69 if (!test_and_clear_bit(id, map->members))
70 return -IPSET_ERR_EXIST;
71
72 return 0;
73}
74
75static int
76bitmap_port_list(const struct ip_set *set,
77 struct sk_buff *skb, struct netlink_callback *cb)
78{
79 const struct bitmap_port *map = set->data;
80 struct nlattr *atd, *nested;
81 u16 id, first = cb->args[2];
82 u16 last = map->last_port - map->first_port;
83
84 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
85 if (!atd)
86 return -EMSGSIZE;
87 for (; cb->args[2] <= last; cb->args[2]++) {
88 id = cb->args[2];
89 if (!test_bit(id, map->members))
90 continue;
91 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
92 if (!nested) {
93 if (id == first) {
94 nla_nest_cancel(skb, atd);
95 return -EMSGSIZE;
96 } else
97 goto nla_put_failure;
98 }
99 NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
100 htons(map->first_port + id));
101 ipset_nest_end(skb, nested);
102 }
103 ipset_nest_end(skb, atd);
104 /* Set listing finished */
105 cb->args[2] = 0;
106
107 return 0;
108
109nla_put_failure:
110 nla_nest_cancel(skb, nested);
111 ipset_nest_end(skb, atd);
112 if (unlikely(id == first)) {
113 cb->args[2] = 0;
114 return -EMSGSIZE;
115 }
116 return 0;
117}
118
119/* Timeout variant */
120
121static int
122bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout)
123{
124 const struct bitmap_port *map = set->data;
125 const unsigned long *members = map->members;
126 u16 id = *(u16 *)value;
127
128 return ip_set_timeout_test(members[id]);
129}
130
131static int
132bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout)
133{
134 struct bitmap_port *map = set->data;
135 unsigned long *members = map->members;
136 u16 id = *(u16 *)value;
137
138 if (ip_set_timeout_test(members[id]))
139 return -IPSET_ERR_EXIST;
140
141 members[id] = ip_set_timeout_set(timeout);
142
143 return 0;
144}
145
146static int
147bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout)
148{
149 struct bitmap_port *map = set->data;
150 unsigned long *members = map->members;
151 u16 id = *(u16 *)value;
152 int ret = -IPSET_ERR_EXIST;
153
154 if (ip_set_timeout_test(members[id]))
155 ret = 0;
156
157 members[id] = IPSET_ELEM_UNSET;
158 return ret;
159}
160
161static int
162bitmap_port_tlist(const struct ip_set *set,
163 struct sk_buff *skb, struct netlink_callback *cb)
164{
165 const struct bitmap_port *map = set->data;
166 struct nlattr *adt, *nested;
167 u16 id, first = cb->args[2];
168 u16 last = map->last_port - map->first_port;
169 const unsigned long *members = map->members;
170
171 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
172 if (!adt)
173 return -EMSGSIZE;
174 for (; cb->args[2] <= last; cb->args[2]++) {
175 id = cb->args[2];
176 if (!ip_set_timeout_test(members[id]))
177 continue;
178 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
179 if (!nested) {
180 if (id == first) {
181 nla_nest_cancel(skb, adt);
182 return -EMSGSIZE;
183 } else
184 goto nla_put_failure;
185 }
186 NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
187 htons(map->first_port + id));
188 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
189 htonl(ip_set_timeout_get(members[id])));
190 ipset_nest_end(skb, nested);
191 }
192 ipset_nest_end(skb, adt);
193
194 /* Set listing finished */
195 cb->args[2] = 0;
196
197 return 0;
198
199nla_put_failure:
200 nla_nest_cancel(skb, nested);
201 ipset_nest_end(skb, adt);
202 if (unlikely(id == first)) {
203 cb->args[2] = 0;
204 return -EMSGSIZE;
205 }
206 return 0;
207}
208
209static int
210bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
211 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
212{
213 struct bitmap_port *map = set->data;
214 ipset_adtfn adtfn = set->variant->adt[adt];
215 __be16 __port;
216 u16 port = 0;
217
218 if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port))
219 return -EINVAL;
220
221 port = ntohs(__port);
222
223 if (port < map->first_port || port > map->last_port)
224 return -IPSET_ERR_BITMAP_RANGE;
225
226 port -= map->first_port;
227
228 return adtfn(set, &port, map->timeout);
229}
230
231static int
232bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
233 enum ipset_adt adt, u32 *lineno, u32 flags)
234{
235 struct bitmap_port *map = set->data;
236 ipset_adtfn adtfn = set->variant->adt[adt];
237 u32 timeout = map->timeout;
238 u32 port; /* wraparound */
239 u16 id, port_to;
240 int ret = 0;
241
242 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
243 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
244 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
245 return -IPSET_ERR_PROTOCOL;
246
247 if (tb[IPSET_ATTR_LINENO])
248 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
249
250 port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
251 if (port < map->first_port || port > map->last_port)
252 return -IPSET_ERR_BITMAP_RANGE;
253
254 if (tb[IPSET_ATTR_TIMEOUT]) {
255 if (!with_timeout(map->timeout))
256 return -IPSET_ERR_TIMEOUT;
257 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
258 }
259
260 if (adt == IPSET_TEST) {
261 id = port - map->first_port;
262 return adtfn(set, &id, timeout);
263 }
264
265 if (tb[IPSET_ATTR_PORT_TO]) {
266 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
267 if (port > port_to) {
268 swap(port, port_to);
269 if (port < map->first_port)
270 return -IPSET_ERR_BITMAP_RANGE;
271 }
272 } else
273 port_to = port;
274
275 if (port_to > map->last_port)
276 return -IPSET_ERR_BITMAP_RANGE;
277
278 for (; port <= port_to; port++) {
279 id = port - map->first_port;
280 ret = adtfn(set, &id, timeout);
281
282 if (ret && !ip_set_eexist(ret, flags))
283 return ret;
284 else
285 ret = 0;
286 }
287 return ret;
288}
289
290static void
291bitmap_port_destroy(struct ip_set *set)
292{
293 struct bitmap_port *map = set->data;
294
295 if (with_timeout(map->timeout))
296 del_timer_sync(&map->gc);
297
298 ip_set_free(map->members);
299 kfree(map);
300
301 set->data = NULL;
302}
303
304static void
305bitmap_port_flush(struct ip_set *set)
306{
307 struct bitmap_port *map = set->data;
308
309 memset(map->members, 0, map->memsize);
310}
311
312static int
313bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
314{
315 const struct bitmap_port *map = set->data;
316 struct nlattr *nested;
317
318 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
319 if (!nested)
320 goto nla_put_failure;
321 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
322 NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
323 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
324 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
325 htonl(sizeof(*map) + map->memsize));
326 if (with_timeout(map->timeout))
327 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
328 ipset_nest_end(skb, nested);
329
330 return 0;
331nla_put_failure:
332 return -EMSGSIZE;
333}
334
335static bool
336bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
337{
338 const struct bitmap_port *x = a->data;
339 const struct bitmap_port *y = b->data;
340
341 return x->first_port == y->first_port &&
342 x->last_port == y->last_port &&
343 x->timeout == y->timeout;
344}
345
346static const struct ip_set_type_variant bitmap_port = {
347 .kadt = bitmap_port_kadt,
348 .uadt = bitmap_port_uadt,
349 .adt = {
350 [IPSET_ADD] = bitmap_port_add,
351 [IPSET_DEL] = bitmap_port_del,
352 [IPSET_TEST] = bitmap_port_test,
353 },
354 .destroy = bitmap_port_destroy,
355 .flush = bitmap_port_flush,
356 .head = bitmap_port_head,
357 .list = bitmap_port_list,
358 .same_set = bitmap_port_same_set,
359};
360
361static const struct ip_set_type_variant bitmap_tport = {
362 .kadt = bitmap_port_kadt,
363 .uadt = bitmap_port_uadt,
364 .adt = {
365 [IPSET_ADD] = bitmap_port_tadd,
366 [IPSET_DEL] = bitmap_port_tdel,
367 [IPSET_TEST] = bitmap_port_ttest,
368 },
369 .destroy = bitmap_port_destroy,
370 .flush = bitmap_port_flush,
371 .head = bitmap_port_head,
372 .list = bitmap_port_tlist,
373 .same_set = bitmap_port_same_set,
374};
375
376static void
377bitmap_port_gc(unsigned long ul_set)
378{
379 struct ip_set *set = (struct ip_set *) ul_set;
380 struct bitmap_port *map = set->data;
381 unsigned long *table = map->members;
382 u32 id; /* wraparound */
383 u16 last = map->last_port - map->first_port;
384
385 /* We run parallel with other readers (test element)
386 * but adding/deleting new entries is locked out */
387 read_lock_bh(&set->lock);
388 for (id = 0; id <= last; id++)
389 if (ip_set_timeout_expired(table[id]))
390 table[id] = IPSET_ELEM_UNSET;
391 read_unlock_bh(&set->lock);
392
393 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
394 add_timer(&map->gc);
395}
396
397static void
398bitmap_port_gc_init(struct ip_set *set)
399{
400 struct bitmap_port *map = set->data;
401
402 init_timer(&map->gc);
403 map->gc.data = (unsigned long) set;
404 map->gc.function = bitmap_port_gc;
405 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
406 add_timer(&map->gc);
407}
408
409/* Create bitmap:ip type of sets */
410
411static bool
412init_map_port(struct ip_set *set, struct bitmap_port *map,
413 u16 first_port, u16 last_port)
414{
415 map->members = ip_set_alloc(map->memsize);
416 if (!map->members)
417 return false;
418 map->first_port = first_port;
419 map->last_port = last_port;
420 map->timeout = IPSET_NO_TIMEOUT;
421
422 set->data = map;
423 set->family = AF_UNSPEC;
424
425 return true;
426}
427
428static int
429bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
430 u32 flags)
431{
432 struct bitmap_port *map;
433 u16 first_port, last_port;
434
435 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
436 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
437 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
438 return -IPSET_ERR_PROTOCOL;
439
440 first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
441 last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
442 if (first_port > last_port) {
443 u16 tmp = first_port;
444
445 first_port = last_port;
446 last_port = tmp;
447 }
448
449 map = kzalloc(sizeof(*map), GFP_KERNEL);
450 if (!map)
451 return -ENOMEM;
452
453 if (tb[IPSET_ATTR_TIMEOUT]) {
454 map->memsize = (last_port - first_port + 1)
455 * sizeof(unsigned long);
456
457 if (!init_map_port(set, map, first_port, last_port)) {
458 kfree(map);
459 return -ENOMEM;
460 }
461
462 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
463 set->variant = &bitmap_tport;
464
465 bitmap_port_gc_init(set);
466 } else {
467 map->memsize = bitmap_bytes(0, last_port - first_port);
468 pr_debug("memsize: %zu\n", map->memsize);
469 if (!init_map_port(set, map, first_port, last_port)) {
470 kfree(map);
471 return -ENOMEM;
472 }
473
474 set->variant = &bitmap_port;
475 }
476 return 0;
477}
478
479static struct ip_set_type bitmap_port_type = {
480 .name = "bitmap:port",
481 .protocol = IPSET_PROTOCOL,
482 .features = IPSET_TYPE_PORT,
483 .dimension = IPSET_DIM_ONE,
484 .family = AF_UNSPEC,
485 .revision = 0,
486 .create = bitmap_port_create,
487 .create_policy = {
488 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
489 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
490 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
491 },
492 .adt_policy = {
493 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
494 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
495 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
496 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
497 },
498 .me = THIS_MODULE,
499};
500
501static int __init
502bitmap_port_init(void)
503{
504 return ip_set_type_register(&bitmap_port_type);
505}
506
507static void __exit
508bitmap_port_fini(void)
509{
510 ip_set_type_unregister(&bitmap_port_type);
511}
512
513module_init(bitmap_port_init);
514module_exit(bitmap_port_fini);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
new file mode 100644
index 000000000000..72d1ac611fdc
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -0,0 +1,1708 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
20#include <linux/version.h>
21#include <net/netlink.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list); /* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
29static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
30
31static struct ip_set **ip_set_list; /* all individual sets */
32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
33
34#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36static unsigned int max_sets;
37
38module_param(max_sets, int, 0600);
39MODULE_PARM_DESC(max_sets, "maximal number of sets");
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
45/*
46 * The set types are implemented in modules and registered set types
47 * can be found in ip_set_type_list. Adding/deleting types is
48 * serialized by ip_set_type_mutex.
49 */
50
51static inline void
52ip_set_type_lock(void)
53{
54 mutex_lock(&ip_set_type_mutex);
55}
56
57static inline void
58ip_set_type_unlock(void)
59{
60 mutex_unlock(&ip_set_type_mutex);
61}
62
63/* Register and deregister settype */
64
65static struct ip_set_type *
66find_set_type(const char *name, u8 family, u8 revision)
67{
68 struct ip_set_type *type;
69
70 list_for_each_entry_rcu(type, &ip_set_type_list, list)
71 if (STREQ(type->name, name) &&
72 (type->family == family || type->family == AF_UNSPEC) &&
73 type->revision == revision)
74 return type;
75 return NULL;
76}
77
78/* Unlock, try to load a set type module and lock again */
79static int
80try_to_load_type(const char *name)
81{
82 nfnl_unlock();
83 pr_debug("try to load ip_set_%s\n", name);
84 if (request_module("ip_set_%s", name) < 0) {
85 pr_warning("Can't find ip_set type %s\n", name);
86 nfnl_lock();
87 return -IPSET_ERR_FIND_TYPE;
88 }
89 nfnl_lock();
90 return -EAGAIN;
91}
92
93/* Find a set type and reference it */
94static int
95find_set_type_get(const char *name, u8 family, u8 revision,
96 struct ip_set_type **found)
97{
98 struct ip_set_type *type;
99 int err;
100
101 rcu_read_lock();
102 *found = find_set_type(name, family, revision);
103 if (*found) {
104 err = !try_module_get((*found)->me) ? -EFAULT : 0;
105 goto unlock;
106 }
107 /* Make sure the type is loaded but we don't support the revision */
108 list_for_each_entry_rcu(type, &ip_set_type_list, list)
109 if (STREQ(type->name, name)) {
110 err = -IPSET_ERR_FIND_TYPE;
111 goto unlock;
112 }
113 rcu_read_unlock();
114
115 return try_to_load_type(name);
116
117unlock:
118 rcu_read_unlock();
119 return err;
120}
121
122/* Find a given set type by name and family.
123 * If we succeeded, the supported minimal and maximum revisions are
124 * filled out.
125 */
126static int
127find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
128{
129 struct ip_set_type *type;
130 bool found = false;
131
132 *min = 255; *max = 0;
133 rcu_read_lock();
134 list_for_each_entry_rcu(type, &ip_set_type_list, list)
135 if (STREQ(type->name, name) &&
136 (type->family == family || type->family == AF_UNSPEC)) {
137 found = true;
138 if (type->revision < *min)
139 *min = type->revision;
140 if (type->revision > *max)
141 *max = type->revision;
142 }
143 rcu_read_unlock();
144 if (found)
145 return 0;
146
147 return try_to_load_type(name);
148}
149
150#define family_name(f) ((f) == AF_INET ? "inet" : \
151 (f) == AF_INET6 ? "inet6" : "any")
152
153/* Register a set type structure. The type is identified by
154 * the unique triple of name, family and revision.
155 */
156int
157ip_set_type_register(struct ip_set_type *type)
158{
159 int ret = 0;
160
161 if (type->protocol != IPSET_PROTOCOL) {
162 pr_warning("ip_set type %s, family %s, revision %u uses "
163 "wrong protocol version %u (want %u)\n",
164 type->name, family_name(type->family),
165 type->revision, type->protocol, IPSET_PROTOCOL);
166 return -EINVAL;
167 }
168
169 ip_set_type_lock();
170 if (find_set_type(type->name, type->family, type->revision)) {
171 /* Duplicate! */
172 pr_warning("ip_set type %s, family %s, revision %u "
173 "already registered!\n", type->name,
174 family_name(type->family), type->revision);
175 ret = -EINVAL;
176 goto unlock;
177 }
178 list_add_rcu(&type->list, &ip_set_type_list);
179 pr_debug("type %s, family %s, revision %u registered.\n",
180 type->name, family_name(type->family), type->revision);
181unlock:
182 ip_set_type_unlock();
183 return ret;
184}
185EXPORT_SYMBOL_GPL(ip_set_type_register);
186
187/* Unregister a set type. There's a small race with ip_set_create */
188void
189ip_set_type_unregister(struct ip_set_type *type)
190{
191 ip_set_type_lock();
192 if (!find_set_type(type->name, type->family, type->revision)) {
193 pr_warning("ip_set type %s, family %s, revision %u "
194 "not registered\n", type->name,
195 family_name(type->family), type->revision);
196 goto unlock;
197 }
198 list_del_rcu(&type->list);
199 pr_debug("type %s, family %s, revision %u unregistered.\n",
200 type->name, family_name(type->family), type->revision);
201unlock:
202 ip_set_type_unlock();
203
204 synchronize_rcu();
205}
206EXPORT_SYMBOL_GPL(ip_set_type_unregister);
207
208/* Utility functions */
209void *
210ip_set_alloc(size_t size)
211{
212 void *members = NULL;
213
214 if (size < KMALLOC_MAX_SIZE)
215 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
216
217 if (members) {
218 pr_debug("%p: allocated with kmalloc\n", members);
219 return members;
220 }
221
222 members = vzalloc(size);
223 if (!members)
224 return NULL;
225 pr_debug("%p: allocated with vmalloc\n", members);
226
227 return members;
228}
229EXPORT_SYMBOL_GPL(ip_set_alloc);
230
231void
232ip_set_free(void *members)
233{
234 pr_debug("%p: free with %s\n", members,
235 is_vmalloc_addr(members) ? "vfree" : "kfree");
236 if (is_vmalloc_addr(members))
237 vfree(members);
238 else
239 kfree(members);
240}
241EXPORT_SYMBOL_GPL(ip_set_free);
242
243static inline bool
244flag_nested(const struct nlattr *nla)
245{
246 return nla->nla_type & NLA_F_NESTED;
247}
248
249static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
250 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
251 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
252 .len = sizeof(struct in6_addr) },
253};
254
255int
256ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
257{
258 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
259
260 if (unlikely(!flag_nested(nla)))
261 return -IPSET_ERR_PROTOCOL;
262 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
263 return -IPSET_ERR_PROTOCOL;
264 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
265 return -IPSET_ERR_PROTOCOL;
266
267 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
268 return 0;
269}
270EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
271
272int
273ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
274{
275 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
276
277 if (unlikely(!flag_nested(nla)))
278 return -IPSET_ERR_PROTOCOL;
279
280 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
281 return -IPSET_ERR_PROTOCOL;
282 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
283 return -IPSET_ERR_PROTOCOL;
284
285 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
286 sizeof(struct in6_addr));
287 return 0;
288}
289EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
290
291/*
292 * Creating/destroying/renaming/swapping affect the existence and
293 * the properties of a set. All of these can be executed from userspace
294 * only and serialized by the nfnl mutex indirectly from nfnetlink.
295 *
296 * Sets are identified by their index in ip_set_list and the index
297 * is used by the external references (set/SET netfilter modules).
298 *
299 * The set behind an index may change by swapping only, from userspace.
300 */
301
302static inline void
303__ip_set_get(ip_set_id_t index)
304{
305 write_lock_bh(&ip_set_ref_lock);
306 ip_set_list[index]->ref++;
307 write_unlock_bh(&ip_set_ref_lock);
308}
309
310static inline void
311__ip_set_put(ip_set_id_t index)
312{
313 write_lock_bh(&ip_set_ref_lock);
314 BUG_ON(ip_set_list[index]->ref == 0);
315 ip_set_list[index]->ref--;
316 write_unlock_bh(&ip_set_ref_lock);
317}
318
319/*
320 * Add, del and test set entries from kernel.
321 *
322 * The set behind the index must exist and must be referenced
323 * so it can't be destroyed (or changed) under our foot.
324 */
325
326int
327ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
328 u8 family, u8 dim, u8 flags)
329{
330 struct ip_set *set = ip_set_list[index];
331 int ret = 0;
332
333 BUG_ON(set == NULL);
334 pr_debug("set %s, index %u\n", set->name, index);
335
336 if (dim < set->type->dimension ||
337 !(family == set->family || set->family == AF_UNSPEC))
338 return 0;
339
340 read_lock_bh(&set->lock);
341 ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags);
342 read_unlock_bh(&set->lock);
343
344 if (ret == -EAGAIN) {
345 /* Type requests element to be completed */
346 pr_debug("element must be competed, ADD is triggered\n");
347 write_lock_bh(&set->lock);
348 set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
349 write_unlock_bh(&set->lock);
350 ret = 1;
351 }
352
353 /* Convert error codes to nomatch */
354 return (ret < 0 ? 0 : ret);
355}
356EXPORT_SYMBOL_GPL(ip_set_test);
357
358int
359ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
360 u8 family, u8 dim, u8 flags)
361{
362 struct ip_set *set = ip_set_list[index];
363 int ret;
364
365 BUG_ON(set == NULL);
366 pr_debug("set %s, index %u\n", set->name, index);
367
368 if (dim < set->type->dimension ||
369 !(family == set->family || set->family == AF_UNSPEC))
370 return 0;
371
372 write_lock_bh(&set->lock);
373 ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
374 write_unlock_bh(&set->lock);
375
376 return ret;
377}
378EXPORT_SYMBOL_GPL(ip_set_add);
379
380int
381ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
382 u8 family, u8 dim, u8 flags)
383{
384 struct ip_set *set = ip_set_list[index];
385 int ret = 0;
386
387 BUG_ON(set == NULL);
388 pr_debug("set %s, index %u\n", set->name, index);
389
390 if (dim < set->type->dimension ||
391 !(family == set->family || set->family == AF_UNSPEC))
392 return 0;
393
394 write_lock_bh(&set->lock);
395 ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags);
396 write_unlock_bh(&set->lock);
397
398 return ret;
399}
400EXPORT_SYMBOL_GPL(ip_set_del);
401
402/*
403 * Find set by name, reference it once. The reference makes sure the
404 * thing pointed to, does not go away under our feet.
405 *
406 */
407ip_set_id_t
408ip_set_get_byname(const char *name, struct ip_set **set)
409{
410 ip_set_id_t i, index = IPSET_INVALID_ID;
411 struct ip_set *s;
412
413 for (i = 0; i < ip_set_max; i++) {
414 s = ip_set_list[i];
415 if (s != NULL && STREQ(s->name, name)) {
416 __ip_set_get(i);
417 index = i;
418 *set = s;
419 }
420 }
421
422 return index;
423}
424EXPORT_SYMBOL_GPL(ip_set_get_byname);
425
426/*
427 * If the given set pointer points to a valid set, decrement
428 * reference count by 1. The caller shall not assume the index
429 * to be valid, after calling this function.
430 *
431 */
432void
433ip_set_put_byindex(ip_set_id_t index)
434{
435 if (ip_set_list[index] != NULL)
436 __ip_set_put(index);
437}
438EXPORT_SYMBOL_GPL(ip_set_put_byindex);
439
440/*
441 * Get the name of a set behind a set index.
442 * We assume the set is referenced, so it does exist and
443 * can't be destroyed. The set cannot be renamed due to
444 * the referencing either.
445 *
446 */
447const char *
448ip_set_name_byindex(ip_set_id_t index)
449{
450 const struct ip_set *set = ip_set_list[index];
451
452 BUG_ON(set == NULL);
453 BUG_ON(set->ref == 0);
454
455 /* Referenced, so it's safe */
456 return set->name;
457}
458EXPORT_SYMBOL_GPL(ip_set_name_byindex);
459
460/*
461 * Routines to call by external subsystems, which do not
462 * call nfnl_lock for us.
463 */
464
465/*
466 * Find set by name, reference it once. The reference makes sure the
467 * thing pointed to, does not go away under our feet.
468 *
469 * The nfnl mutex is used in the function.
470 */
471ip_set_id_t
472ip_set_nfnl_get(const char *name)
473{
474 struct ip_set *s;
475 ip_set_id_t index;
476
477 nfnl_lock();
478 index = ip_set_get_byname(name, &s);
479 nfnl_unlock();
480
481 return index;
482}
483EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
484
485/*
486 * Find set by index, reference it once. The reference makes sure the
487 * thing pointed to, does not go away under our feet.
488 *
489 * The nfnl mutex is used in the function.
490 */
491ip_set_id_t
492ip_set_nfnl_get_byindex(ip_set_id_t index)
493{
494 if (index > ip_set_max)
495 return IPSET_INVALID_ID;
496
497 nfnl_lock();
498 if (ip_set_list[index])
499 __ip_set_get(index);
500 else
501 index = IPSET_INVALID_ID;
502 nfnl_unlock();
503
504 return index;
505}
506EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
507
508/*
509 * If the given set pointer points to a valid set, decrement
510 * reference count by 1. The caller shall not assume the index
511 * to be valid, after calling this function.
512 *
513 * The nfnl mutex is used in the function.
514 */
515void
516ip_set_nfnl_put(ip_set_id_t index)
517{
518 nfnl_lock();
519 ip_set_put_byindex(index);
520 nfnl_unlock();
521}
522EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
523
524/*
525 * Communication protocol with userspace over netlink.
526 *
527 * The commands are serialized by the nfnl mutex.
528 */
529
530static inline bool
531protocol_failed(const struct nlattr * const tb[])
532{
533 return !tb[IPSET_ATTR_PROTOCOL] ||
534 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
535}
536
537static inline u32
538flag_exist(const struct nlmsghdr *nlh)
539{
540 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
541}
542
543static struct nlmsghdr *
544start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
545 enum ipset_cmd cmd)
546{
547 struct nlmsghdr *nlh;
548 struct nfgenmsg *nfmsg;
549
550 nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
551 sizeof(*nfmsg), flags);
552 if (nlh == NULL)
553 return NULL;
554
555 nfmsg = nlmsg_data(nlh);
556 nfmsg->nfgen_family = AF_INET;
557 nfmsg->version = NFNETLINK_V0;
558 nfmsg->res_id = 0;
559
560 return nlh;
561}
562
563/* Create a set */
564
565static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
566 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
567 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
568 .len = IPSET_MAXNAMELEN - 1 },
569 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
570 .len = IPSET_MAXNAMELEN - 1},
571 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
572 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
573 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
574};
575
576static ip_set_id_t
577find_set_id(const char *name)
578{
579 ip_set_id_t i, index = IPSET_INVALID_ID;
580 const struct ip_set *set;
581
582 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
583 set = ip_set_list[i];
584 if (set != NULL && STREQ(set->name, name))
585 index = i;
586 }
587 return index;
588}
589
590static inline struct ip_set *
591find_set(const char *name)
592{
593 ip_set_id_t index = find_set_id(name);
594
595 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
596}
597
598static int
599find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
600{
601 ip_set_id_t i;
602
603 *index = IPSET_INVALID_ID;
604 for (i = 0; i < ip_set_max; i++) {
605 if (ip_set_list[i] == NULL) {
606 if (*index == IPSET_INVALID_ID)
607 *index = i;
608 } else if (STREQ(name, ip_set_list[i]->name)) {
609 /* Name clash */
610 *set = ip_set_list[i];
611 return -EEXIST;
612 }
613 }
614 if (*index == IPSET_INVALID_ID)
615 /* No free slot remained */
616 return -IPSET_ERR_MAX_SETS;
617 return 0;
618}
619
620static int
621ip_set_create(struct sock *ctnl, struct sk_buff *skb,
622 const struct nlmsghdr *nlh,
623 const struct nlattr * const attr[])
624{
625 struct ip_set *set, *clash = NULL;
626 ip_set_id_t index = IPSET_INVALID_ID;
627 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
628 const char *name, *typename;
629 u8 family, revision;
630 u32 flags = flag_exist(nlh);
631 int ret = 0;
632
633 if (unlikely(protocol_failed(attr) ||
634 attr[IPSET_ATTR_SETNAME] == NULL ||
635 attr[IPSET_ATTR_TYPENAME] == NULL ||
636 attr[IPSET_ATTR_REVISION] == NULL ||
637 attr[IPSET_ATTR_FAMILY] == NULL ||
638 (attr[IPSET_ATTR_DATA] != NULL &&
639 !flag_nested(attr[IPSET_ATTR_DATA]))))
640 return -IPSET_ERR_PROTOCOL;
641
642 name = nla_data(attr[IPSET_ATTR_SETNAME]);
643 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
644 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
645 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
646 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
647 name, typename, family_name(family), revision);
648
649 /*
650 * First, and without any locks, allocate and initialize
651 * a normal base set structure.
652 */
653 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
654 if (!set)
655 return -ENOMEM;
656 rwlock_init(&set->lock);
657 strlcpy(set->name, name, IPSET_MAXNAMELEN);
658 set->family = family;
659
660 /*
661 * Next, check that we know the type, and take
662 * a reference on the type, to make sure it stays available
663 * while constructing our new set.
664 *
665 * After referencing the type, we try to create the type
666 * specific part of the set without holding any locks.
667 */
668 ret = find_set_type_get(typename, family, revision, &(set->type));
669 if (ret)
670 goto out;
671
672 /*
673 * Without holding any locks, create private part.
674 */
675 if (attr[IPSET_ATTR_DATA] &&
676 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
677 set->type->create_policy)) {
678 ret = -IPSET_ERR_PROTOCOL;
679 goto put_out;
680 }
681
682 ret = set->type->create(set, tb, flags);
683 if (ret != 0)
684 goto put_out;
685
686 /* BTW, ret==0 here. */
687
688 /*
689 * Here, we have a valid, constructed set and we are protected
690 * by the nfnl mutex. Find the first free index in ip_set_list
691 * and check clashing.
692 */
693 if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
694 /* If this is the same set and requested, ignore error */
695 if (ret == -EEXIST &&
696 (flags & IPSET_FLAG_EXIST) &&
697 STREQ(set->type->name, clash->type->name) &&
698 set->type->family == clash->type->family &&
699 set->type->revision == clash->type->revision &&
700 set->variant->same_set(set, clash))
701 ret = 0;
702 goto cleanup;
703 }
704
705 /*
706 * Finally! Add our shiny new set to the list, and be done.
707 */
708 pr_debug("create: '%s' created with index %u!\n", set->name, index);
709 ip_set_list[index] = set;
710
711 return ret;
712
713cleanup:
714 set->variant->destroy(set);
715put_out:
716 module_put(set->type->me);
717out:
718 kfree(set);
719 return ret;
720}
721
722/* Destroy sets */
723
724static const struct nla_policy
725ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
726 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
727 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
728 .len = IPSET_MAXNAMELEN - 1 },
729};
730
731static void
732ip_set_destroy_set(ip_set_id_t index)
733{
734 struct ip_set *set = ip_set_list[index];
735
736 pr_debug("set: %s\n", set->name);
737 ip_set_list[index] = NULL;
738
739 /* Must call it without holding any lock */
740 set->variant->destroy(set);
741 module_put(set->type->me);
742 kfree(set);
743}
744
745static int
746ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
747 const struct nlmsghdr *nlh,
748 const struct nlattr * const attr[])
749{
750 ip_set_id_t i;
751 int ret = 0;
752
753 if (unlikely(protocol_failed(attr)))
754 return -IPSET_ERR_PROTOCOL;
755
756 /* Commands are serialized and references are
757 * protected by the ip_set_ref_lock.
758 * External systems (i.e. xt_set) must call
759 * ip_set_put|get_nfnl_* functions, that way we
760 * can safely check references here.
761 *
762 * list:set timer can only decrement the reference
763 * counter, so if it's already zero, we can proceed
764 * without holding the lock.
765 */
766 read_lock_bh(&ip_set_ref_lock);
767 if (!attr[IPSET_ATTR_SETNAME]) {
768 for (i = 0; i < ip_set_max; i++) {
769 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
770 ret = IPSET_ERR_BUSY;
771 goto out;
772 }
773 }
774 read_unlock_bh(&ip_set_ref_lock);
775 for (i = 0; i < ip_set_max; i++) {
776 if (ip_set_list[i] != NULL)
777 ip_set_destroy_set(i);
778 }
779 } else {
780 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
781 if (i == IPSET_INVALID_ID) {
782 ret = -ENOENT;
783 goto out;
784 } else if (ip_set_list[i]->ref) {
785 ret = -IPSET_ERR_BUSY;
786 goto out;
787 }
788 read_unlock_bh(&ip_set_ref_lock);
789
790 ip_set_destroy_set(i);
791 }
792 return 0;
793out:
794 read_unlock_bh(&ip_set_ref_lock);
795 return ret;
796}
797
798/* Flush sets */
799
800static void
801ip_set_flush_set(struct ip_set *set)
802{
803 pr_debug("set: %s\n", set->name);
804
805 write_lock_bh(&set->lock);
806 set->variant->flush(set);
807 write_unlock_bh(&set->lock);
808}
809
810static int
811ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
812 const struct nlmsghdr *nlh,
813 const struct nlattr * const attr[])
814{
815 ip_set_id_t i;
816
817 if (unlikely(protocol_failed(attr)))
818 return -EPROTO;
819
820 if (!attr[IPSET_ATTR_SETNAME]) {
821 for (i = 0; i < ip_set_max; i++)
822 if (ip_set_list[i] != NULL)
823 ip_set_flush_set(ip_set_list[i]);
824 } else {
825 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
826 if (i == IPSET_INVALID_ID)
827 return -ENOENT;
828
829 ip_set_flush_set(ip_set_list[i]);
830 }
831
832 return 0;
833}
834
835/* Rename a set */
836
837static const struct nla_policy
838ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
839 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
840 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
841 .len = IPSET_MAXNAMELEN - 1 },
842 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
843 .len = IPSET_MAXNAMELEN - 1 },
844};
845
846static int
847ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
848 const struct nlmsghdr *nlh,
849 const struct nlattr * const attr[])
850{
851 struct ip_set *set;
852 const char *name2;
853 ip_set_id_t i;
854 int ret = 0;
855
856 if (unlikely(protocol_failed(attr) ||
857 attr[IPSET_ATTR_SETNAME] == NULL ||
858 attr[IPSET_ATTR_SETNAME2] == NULL))
859 return -IPSET_ERR_PROTOCOL;
860
861 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
862 if (set == NULL)
863 return -ENOENT;
864
865 read_lock_bh(&ip_set_ref_lock);
866 if (set->ref != 0) {
867 ret = -IPSET_ERR_REFERENCED;
868 goto out;
869 }
870
871 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
872 for (i = 0; i < ip_set_max; i++) {
873 if (ip_set_list[i] != NULL &&
874 STREQ(ip_set_list[i]->name, name2)) {
875 ret = -IPSET_ERR_EXIST_SETNAME2;
876 goto out;
877 }
878 }
879 strncpy(set->name, name2, IPSET_MAXNAMELEN);
880
881out:
882 read_unlock_bh(&ip_set_ref_lock);
883 return ret;
884}
885
886/* Swap two sets so that name/index points to the other.
887 * References and set names are also swapped.
888 *
889 * The commands are serialized by the nfnl mutex and references are
890 * protected by the ip_set_ref_lock. The kernel interfaces
891 * do not hold the mutex but the pointer settings are atomic
892 * so the ip_set_list always contains valid pointers to the sets.
893 */
894
895static int
896ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
897 const struct nlmsghdr *nlh,
898 const struct nlattr * const attr[])
899{
900 struct ip_set *from, *to;
901 ip_set_id_t from_id, to_id;
902 char from_name[IPSET_MAXNAMELEN];
903
904 if (unlikely(protocol_failed(attr) ||
905 attr[IPSET_ATTR_SETNAME] == NULL ||
906 attr[IPSET_ATTR_SETNAME2] == NULL))
907 return -IPSET_ERR_PROTOCOL;
908
909 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
910 if (from_id == IPSET_INVALID_ID)
911 return -ENOENT;
912
913 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
914 if (to_id == IPSET_INVALID_ID)
915 return -IPSET_ERR_EXIST_SETNAME2;
916
917 from = ip_set_list[from_id];
918 to = ip_set_list[to_id];
919
920 /* Features must not change.
921 * Not an artificial restriction anymore, as we must prevent
922 * possible loops created by swapping in setlist type of sets. */
923 if (!(from->type->features == to->type->features &&
924 from->type->family == to->type->family))
925 return -IPSET_ERR_TYPE_MISMATCH;
926
927 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
928 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
929 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
930
931 write_lock_bh(&ip_set_ref_lock);
932 swap(from->ref, to->ref);
933 ip_set_list[from_id] = to;
934 ip_set_list[to_id] = from;
935 write_unlock_bh(&ip_set_ref_lock);
936
937 return 0;
938}
939
940/* List/save set data */
941
942#define DUMP_INIT 0L
943#define DUMP_ALL 1L
944#define DUMP_ONE 2L
945#define DUMP_LAST 3L
946
947static int
948ip_set_dump_done(struct netlink_callback *cb)
949{
950 if (cb->args[2]) {
951 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
952 ip_set_put_byindex((ip_set_id_t) cb->args[1]);
953 }
954 return 0;
955}
956
957static inline void
958dump_attrs(struct nlmsghdr *nlh)
959{
960 const struct nlattr *attr;
961 int rem;
962
963 pr_debug("dump nlmsg\n");
964 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
965 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
966 }
967}
968
969static int
970dump_init(struct netlink_callback *cb)
971{
972 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
973 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
974 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
975 struct nlattr *attr = (void *)nlh + min_len;
976 ip_set_id_t index;
977
978 /* Second pass, so parser can't fail */
979 nla_parse(cda, IPSET_ATTR_CMD_MAX,
980 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
981
982 /* cb->args[0] : dump single set/all sets
983 * [1] : set index
984 * [..]: type specific
985 */
986
987 if (!cda[IPSET_ATTR_SETNAME]) {
988 cb->args[0] = DUMP_ALL;
989 return 0;
990 }
991
992 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
993 if (index == IPSET_INVALID_ID)
994 return -ENOENT;
995
996 cb->args[0] = DUMP_ONE;
997 cb->args[1] = index;
998 return 0;
999}
1000
1001static int
1002ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1003{
1004 ip_set_id_t index = IPSET_INVALID_ID, max;
1005 struct ip_set *set = NULL;
1006 struct nlmsghdr *nlh = NULL;
1007 unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
1008 int ret = 0;
1009
1010 if (cb->args[0] == DUMP_INIT) {
1011 ret = dump_init(cb);
1012 if (ret < 0) {
1013 nlh = nlmsg_hdr(cb->skb);
1014 /* We have to create and send the error message
1015 * manually :-( */
1016 if (nlh->nlmsg_flags & NLM_F_ACK)
1017 netlink_ack(cb->skb, nlh, ret);
1018 return ret;
1019 }
1020 }
1021
1022 if (cb->args[1] >= ip_set_max)
1023 goto out;
1024
1025 max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
1026dump_last:
1027 pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
1028 for (; cb->args[1] < max; cb->args[1]++) {
1029 index = (ip_set_id_t) cb->args[1];
1030 set = ip_set_list[index];
1031 if (set == NULL) {
1032 if (cb->args[0] == DUMP_ONE) {
1033 ret = -ENOENT;
1034 goto out;
1035 }
1036 continue;
1037 }
1038 /* When dumping all sets, we must dump "sorted"
1039 * so that lists (unions of sets) are dumped last.
1040 */
1041 if (cb->args[0] != DUMP_ONE &&
1042 ((cb->args[0] == DUMP_ALL) ==
1043 !!(set->type->features & IPSET_DUMP_LAST)))
1044 continue;
1045 pr_debug("List set: %s\n", set->name);
1046 if (!cb->args[2]) {
1047 /* Start listing: make sure set won't be destroyed */
1048 pr_debug("reference set\n");
1049 __ip_set_get(index);
1050 }
1051 nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
1052 cb->nlh->nlmsg_seq, flags,
1053 IPSET_CMD_LIST);
1054 if (!nlh) {
1055 ret = -EMSGSIZE;
1056 goto release_refcount;
1057 }
1058 NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1059 NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
1060 switch (cb->args[2]) {
1061 case 0:
1062 /* Core header data */
1063 NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
1064 set->type->name);
1065 NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
1066 set->family);
1067 NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
1068 set->type->revision);
1069 ret = set->variant->head(set, skb);
1070 if (ret < 0)
1071 goto release_refcount;
1072 /* Fall through and add elements */
1073 default:
1074 read_lock_bh(&set->lock);
1075 ret = set->variant->list(set, skb, cb);
1076 read_unlock_bh(&set->lock);
1077 if (!cb->args[2]) {
1078 /* Set is done, proceed with next one */
1079 if (cb->args[0] == DUMP_ONE)
1080 cb->args[1] = IPSET_INVALID_ID;
1081 else
1082 cb->args[1]++;
1083 }
1084 goto release_refcount;
1085 }
1086 }
1087 /* If we dump all sets, continue with dumping last ones */
1088 if (cb->args[0] == DUMP_ALL) {
1089 cb->args[0] = DUMP_LAST;
1090 cb->args[1] = 0;
1091 goto dump_last;
1092 }
1093 goto out;
1094
1095nla_put_failure:
1096 ret = -EFAULT;
1097release_refcount:
1098 /* If there was an error or set is done, release set */
1099 if (ret || !cb->args[2]) {
1100 pr_debug("release set %s\n", ip_set_list[index]->name);
1101 ip_set_put_byindex(index);
1102 }
1103out:
1104 if (nlh) {
1105 nlmsg_end(skb, nlh);
1106 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1107 dump_attrs(nlh);
1108 }
1109
1110 return ret < 0 ? ret : skb->len;
1111}
1112
1113static int
1114ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1115 const struct nlmsghdr *nlh,
1116 const struct nlattr * const attr[])
1117{
1118 if (unlikely(protocol_failed(attr)))
1119 return -IPSET_ERR_PROTOCOL;
1120
1121 return netlink_dump_start(ctnl, skb, nlh,
1122 ip_set_dump_start,
1123 ip_set_dump_done);
1124}
1125
1126/* Add, del and test */
1127
1128static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1129 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1130 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1131 .len = IPSET_MAXNAMELEN - 1 },
1132 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1133 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1134 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1135};
1136
1137static int
1138call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1139 struct nlattr *tb[], enum ipset_adt adt,
1140 u32 flags, bool use_lineno)
1141{
1142 int ret, retried = 0;
1143 u32 lineno = 0;
1144 bool eexist = flags & IPSET_FLAG_EXIST;
1145
1146 do {
1147 write_lock_bh(&set->lock);
1148 ret = set->variant->uadt(set, tb, adt, &lineno, flags);
1149 write_unlock_bh(&set->lock);
1150 } while (ret == -EAGAIN &&
1151 set->variant->resize &&
1152 (ret = set->variant->resize(set, retried++)) == 0);
1153
1154 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1155 return 0;
1156 if (lineno && use_lineno) {
1157 /* Error in restore/batch mode: send back lineno */
1158 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1159 struct sk_buff *skb2;
1160 struct nlmsgerr *errmsg;
1161 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1162 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1163 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1164 struct nlattr *cmdattr;
1165 u32 *errline;
1166
1167 skb2 = nlmsg_new(payload, GFP_KERNEL);
1168 if (skb2 == NULL)
1169 return -ENOMEM;
1170 rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
1171 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1172 errmsg = nlmsg_data(rep);
1173 errmsg->error = ret;
1174 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1175 cmdattr = (void *)&errmsg->msg + min_len;
1176
1177 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1178 cmdattr, nlh->nlmsg_len - min_len,
1179 ip_set_adt_policy);
1180
1181 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1182
1183 *errline = lineno;
1184
1185 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1186 /* Signal netlink not to send its ACK/errmsg. */
1187 return -EINTR;
1188 }
1189
1190 return ret;
1191}
1192
1193static int
1194ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1195 const struct nlmsghdr *nlh,
1196 const struct nlattr * const attr[])
1197{
1198 struct ip_set *set;
1199 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1200 const struct nlattr *nla;
1201 u32 flags = flag_exist(nlh);
1202 bool use_lineno;
1203 int ret = 0;
1204
1205 if (unlikely(protocol_failed(attr) ||
1206 attr[IPSET_ATTR_SETNAME] == NULL ||
1207 !((attr[IPSET_ATTR_DATA] != NULL) ^
1208 (attr[IPSET_ATTR_ADT] != NULL)) ||
1209 (attr[IPSET_ATTR_DATA] != NULL &&
1210 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1211 (attr[IPSET_ATTR_ADT] != NULL &&
1212 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1213 attr[IPSET_ATTR_LINENO] == NULL))))
1214 return -IPSET_ERR_PROTOCOL;
1215
1216 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1217 if (set == NULL)
1218 return -ENOENT;
1219
1220 use_lineno = !!attr[IPSET_ATTR_LINENO];
1221 if (attr[IPSET_ATTR_DATA]) {
1222 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1223 attr[IPSET_ATTR_DATA],
1224 set->type->adt_policy))
1225 return -IPSET_ERR_PROTOCOL;
1226 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1227 use_lineno);
1228 } else {
1229 int nla_rem;
1230
1231 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1232 memset(tb, 0, sizeof(tb));
1233 if (nla_type(nla) != IPSET_ATTR_DATA ||
1234 !flag_nested(nla) ||
1235 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1236 set->type->adt_policy))
1237 return -IPSET_ERR_PROTOCOL;
1238 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1239 flags, use_lineno);
1240 if (ret < 0)
1241 return ret;
1242 }
1243 }
1244 return ret;
1245}
1246
1247static int
1248ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1249 const struct nlmsghdr *nlh,
1250 const struct nlattr * const attr[])
1251{
1252 struct ip_set *set;
1253 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1254 const struct nlattr *nla;
1255 u32 flags = flag_exist(nlh);
1256 bool use_lineno;
1257 int ret = 0;
1258
1259 if (unlikely(protocol_failed(attr) ||
1260 attr[IPSET_ATTR_SETNAME] == NULL ||
1261 !((attr[IPSET_ATTR_DATA] != NULL) ^
1262 (attr[IPSET_ATTR_ADT] != NULL)) ||
1263 (attr[IPSET_ATTR_DATA] != NULL &&
1264 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1265 (attr[IPSET_ATTR_ADT] != NULL &&
1266 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1267 attr[IPSET_ATTR_LINENO] == NULL))))
1268 return -IPSET_ERR_PROTOCOL;
1269
1270 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1271 if (set == NULL)
1272 return -ENOENT;
1273
1274 use_lineno = !!attr[IPSET_ATTR_LINENO];
1275 if (attr[IPSET_ATTR_DATA]) {
1276 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1277 attr[IPSET_ATTR_DATA],
1278 set->type->adt_policy))
1279 return -IPSET_ERR_PROTOCOL;
1280 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1281 use_lineno);
1282 } else {
1283 int nla_rem;
1284
1285 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1286 memset(tb, 0, sizeof(*tb));
1287 if (nla_type(nla) != IPSET_ATTR_DATA ||
1288 !flag_nested(nla) ||
1289 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1290 set->type->adt_policy))
1291 return -IPSET_ERR_PROTOCOL;
1292 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1293 flags, use_lineno);
1294 if (ret < 0)
1295 return ret;
1296 }
1297 }
1298 return ret;
1299}
1300
1301static int
1302ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1303 const struct nlmsghdr *nlh,
1304 const struct nlattr * const attr[])
1305{
1306 struct ip_set *set;
1307 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1308 int ret = 0;
1309
1310 if (unlikely(protocol_failed(attr) ||
1311 attr[IPSET_ATTR_SETNAME] == NULL ||
1312 attr[IPSET_ATTR_DATA] == NULL ||
1313 !flag_nested(attr[IPSET_ATTR_DATA])))
1314 return -IPSET_ERR_PROTOCOL;
1315
1316 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1317 if (set == NULL)
1318 return -ENOENT;
1319
1320 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1321 set->type->adt_policy))
1322 return -IPSET_ERR_PROTOCOL;
1323
1324 read_lock_bh(&set->lock);
1325 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0);
1326 read_unlock_bh(&set->lock);
1327 /* Userspace can't trigger element to be re-added */
1328 if (ret == -EAGAIN)
1329 ret = 1;
1330
1331 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1332}
1333
1334/* Get headed data of a set */
1335
1336static int
1337ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1338 const struct nlmsghdr *nlh,
1339 const struct nlattr * const attr[])
1340{
1341 const struct ip_set *set;
1342 struct sk_buff *skb2;
1343 struct nlmsghdr *nlh2;
1344 ip_set_id_t index;
1345 int ret = 0;
1346
1347 if (unlikely(protocol_failed(attr) ||
1348 attr[IPSET_ATTR_SETNAME] == NULL))
1349 return -IPSET_ERR_PROTOCOL;
1350
1351 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1352 if (index == IPSET_INVALID_ID)
1353 return -ENOENT;
1354 set = ip_set_list[index];
1355
1356 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1357 if (skb2 == NULL)
1358 return -ENOMEM;
1359
1360 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1361 IPSET_CMD_HEADER);
1362 if (!nlh2)
1363 goto nlmsg_failure;
1364 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1365 NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
1366 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
1367 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family);
1368 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
1369 nlmsg_end(skb2, nlh2);
1370
1371 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1372 if (ret < 0)
1373 return ret;
1374
1375 return 0;
1376
1377nla_put_failure:
1378 nlmsg_cancel(skb2, nlh2);
1379nlmsg_failure:
1380 kfree_skb(skb2);
1381 return -EMSGSIZE;
1382}
1383
1384/* Get type data */
1385
1386static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1387 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1388 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1389 .len = IPSET_MAXNAMELEN - 1 },
1390 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1391};
1392
1393static int
1394ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1395 const struct nlmsghdr *nlh,
1396 const struct nlattr * const attr[])
1397{
1398 struct sk_buff *skb2;
1399 struct nlmsghdr *nlh2;
1400 u8 family, min, max;
1401 const char *typename;
1402 int ret = 0;
1403
1404 if (unlikely(protocol_failed(attr) ||
1405 attr[IPSET_ATTR_TYPENAME] == NULL ||
1406 attr[IPSET_ATTR_FAMILY] == NULL))
1407 return -IPSET_ERR_PROTOCOL;
1408
1409 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1410 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1411 ret = find_set_type_minmax(typename, family, &min, &max);
1412 if (ret)
1413 return ret;
1414
1415 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1416 if (skb2 == NULL)
1417 return -ENOMEM;
1418
1419 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1420 IPSET_CMD_TYPE);
1421 if (!nlh2)
1422 goto nlmsg_failure;
1423 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1424 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
1425 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
1426 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
1427 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
1428 nlmsg_end(skb2, nlh2);
1429
1430 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1431 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1432 if (ret < 0)
1433 return ret;
1434
1435 return 0;
1436
1437nla_put_failure:
1438 nlmsg_cancel(skb2, nlh2);
1439nlmsg_failure:
1440 kfree_skb(skb2);
1441 return -EMSGSIZE;
1442}
1443
1444/* Get protocol version */
1445
1446static const struct nla_policy
1447ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1448 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1449};
1450
1451static int
1452ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1453 const struct nlmsghdr *nlh,
1454 const struct nlattr * const attr[])
1455{
1456 struct sk_buff *skb2;
1457 struct nlmsghdr *nlh2;
1458 int ret = 0;
1459
1460 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1461 return -IPSET_ERR_PROTOCOL;
1462
1463 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1464 if (skb2 == NULL)
1465 return -ENOMEM;
1466
1467 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1468 IPSET_CMD_PROTOCOL);
1469 if (!nlh2)
1470 goto nlmsg_failure;
1471 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1472 nlmsg_end(skb2, nlh2);
1473
1474 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1475 if (ret < 0)
1476 return ret;
1477
1478 return 0;
1479
1480nla_put_failure:
1481 nlmsg_cancel(skb2, nlh2);
1482nlmsg_failure:
1483 kfree_skb(skb2);
1484 return -EMSGSIZE;
1485}
1486
1487static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1488 [IPSET_CMD_CREATE] = {
1489 .call = ip_set_create,
1490 .attr_count = IPSET_ATTR_CMD_MAX,
1491 .policy = ip_set_create_policy,
1492 },
1493 [IPSET_CMD_DESTROY] = {
1494 .call = ip_set_destroy,
1495 .attr_count = IPSET_ATTR_CMD_MAX,
1496 .policy = ip_set_setname_policy,
1497 },
1498 [IPSET_CMD_FLUSH] = {
1499 .call = ip_set_flush,
1500 .attr_count = IPSET_ATTR_CMD_MAX,
1501 .policy = ip_set_setname_policy,
1502 },
1503 [IPSET_CMD_RENAME] = {
1504 .call = ip_set_rename,
1505 .attr_count = IPSET_ATTR_CMD_MAX,
1506 .policy = ip_set_setname2_policy,
1507 },
1508 [IPSET_CMD_SWAP] = {
1509 .call = ip_set_swap,
1510 .attr_count = IPSET_ATTR_CMD_MAX,
1511 .policy = ip_set_setname2_policy,
1512 },
1513 [IPSET_CMD_LIST] = {
1514 .call = ip_set_dump,
1515 .attr_count = IPSET_ATTR_CMD_MAX,
1516 .policy = ip_set_setname_policy,
1517 },
1518 [IPSET_CMD_SAVE] = {
1519 .call = ip_set_dump,
1520 .attr_count = IPSET_ATTR_CMD_MAX,
1521 .policy = ip_set_setname_policy,
1522 },
1523 [IPSET_CMD_ADD] = {
1524 .call = ip_set_uadd,
1525 .attr_count = IPSET_ATTR_CMD_MAX,
1526 .policy = ip_set_adt_policy,
1527 },
1528 [IPSET_CMD_DEL] = {
1529 .call = ip_set_udel,
1530 .attr_count = IPSET_ATTR_CMD_MAX,
1531 .policy = ip_set_adt_policy,
1532 },
1533 [IPSET_CMD_TEST] = {
1534 .call = ip_set_utest,
1535 .attr_count = IPSET_ATTR_CMD_MAX,
1536 .policy = ip_set_adt_policy,
1537 },
1538 [IPSET_CMD_HEADER] = {
1539 .call = ip_set_header,
1540 .attr_count = IPSET_ATTR_CMD_MAX,
1541 .policy = ip_set_setname_policy,
1542 },
1543 [IPSET_CMD_TYPE] = {
1544 .call = ip_set_type,
1545 .attr_count = IPSET_ATTR_CMD_MAX,
1546 .policy = ip_set_type_policy,
1547 },
1548 [IPSET_CMD_PROTOCOL] = {
1549 .call = ip_set_protocol,
1550 .attr_count = IPSET_ATTR_CMD_MAX,
1551 .policy = ip_set_protocol_policy,
1552 },
1553};
1554
1555static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1556 .name = "ip_set",
1557 .subsys_id = NFNL_SUBSYS_IPSET,
1558 .cb_count = IPSET_MSG_MAX,
1559 .cb = ip_set_netlink_subsys_cb,
1560};
1561
1562/* Interface to iptables/ip6tables */
1563
1564static int
1565ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1566{
1567 unsigned *op;
1568 void *data;
1569 int copylen = *len, ret = 0;
1570
1571 if (!capable(CAP_NET_ADMIN))
1572 return -EPERM;
1573 if (optval != SO_IP_SET)
1574 return -EBADF;
1575 if (*len < sizeof(unsigned))
1576 return -EINVAL;
1577
1578 data = vmalloc(*len);
1579 if (!data)
1580 return -ENOMEM;
1581 if (copy_from_user(data, user, *len) != 0) {
1582 ret = -EFAULT;
1583 goto done;
1584 }
1585 op = (unsigned *) data;
1586
1587 if (*op < IP_SET_OP_VERSION) {
1588 /* Check the version at the beginning of operations */
1589 struct ip_set_req_version *req_version = data;
1590 if (req_version->version != IPSET_PROTOCOL) {
1591 ret = -EPROTO;
1592 goto done;
1593 }
1594 }
1595
1596 switch (*op) {
1597 case IP_SET_OP_VERSION: {
1598 struct ip_set_req_version *req_version = data;
1599
1600 if (*len != sizeof(struct ip_set_req_version)) {
1601 ret = -EINVAL;
1602 goto done;
1603 }
1604
1605 req_version->version = IPSET_PROTOCOL;
1606 ret = copy_to_user(user, req_version,
1607 sizeof(struct ip_set_req_version));
1608 goto done;
1609 }
1610 case IP_SET_OP_GET_BYNAME: {
1611 struct ip_set_req_get_set *req_get = data;
1612
1613 if (*len != sizeof(struct ip_set_req_get_set)) {
1614 ret = -EINVAL;
1615 goto done;
1616 }
1617 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1618 nfnl_lock();
1619 req_get->set.index = find_set_id(req_get->set.name);
1620 nfnl_unlock();
1621 goto copy;
1622 }
1623 case IP_SET_OP_GET_BYINDEX: {
1624 struct ip_set_req_get_set *req_get = data;
1625
1626 if (*len != sizeof(struct ip_set_req_get_set) ||
1627 req_get->set.index >= ip_set_max) {
1628 ret = -EINVAL;
1629 goto done;
1630 }
1631 nfnl_lock();
1632 strncpy(req_get->set.name,
1633 ip_set_list[req_get->set.index]
1634 ? ip_set_list[req_get->set.index]->name : "",
1635 IPSET_MAXNAMELEN);
1636 nfnl_unlock();
1637 goto copy;
1638 }
1639 default:
1640 ret = -EBADMSG;
1641 goto done;
1642 } /* end of switch(op) */
1643
1644copy:
1645 ret = copy_to_user(user, data, copylen);
1646
1647done:
1648 vfree(data);
1649 if (ret > 0)
1650 ret = 0;
1651 return ret;
1652}
1653
1654static struct nf_sockopt_ops so_set __read_mostly = {
1655 .pf = PF_INET,
1656 .get_optmin = SO_IP_SET,
1657 .get_optmax = SO_IP_SET + 1,
1658 .get = &ip_set_sockfn_get,
1659 .owner = THIS_MODULE,
1660};
1661
1662static int __init
1663ip_set_init(void)
1664{
1665 int ret;
1666
1667 if (max_sets)
1668 ip_set_max = max_sets;
1669 if (ip_set_max >= IPSET_INVALID_ID)
1670 ip_set_max = IPSET_INVALID_ID - 1;
1671
1672 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1673 GFP_KERNEL);
1674 if (!ip_set_list) {
1675 pr_err("ip_set: Unable to create ip_set_list\n");
1676 return -ENOMEM;
1677 }
1678
1679 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1680 if (ret != 0) {
1681 pr_err("ip_set: cannot register with nfnetlink.\n");
1682 kfree(ip_set_list);
1683 return ret;
1684 }
1685 ret = nf_register_sockopt(&so_set);
1686 if (ret != 0) {
1687 pr_err("SO_SET registry failed: %d\n", ret);
1688 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1689 kfree(ip_set_list);
1690 return ret;
1691 }
1692
1693 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1694 return 0;
1695}
1696
1697static void __exit
1698ip_set_fini(void)
1699{
1700 /* There can't be any existing set */
1701 nf_unregister_sockopt(&so_set);
1702 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1703 kfree(ip_set_list);
1704 pr_debug("these are the famous last words\n");
1705}
1706
1707module_init(ip_set_init);
1708module_exit(ip_set_fini);
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
new file mode 100644
index 000000000000..757143b2240a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -0,0 +1,155 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Get Layer-4 data from the packets */
9
10#include <linux/ip.h>
11#include <linux/skbuff.h>
12#include <linux/icmp.h>
13#include <linux/icmpv6.h>
14#include <linux/sctp.h>
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18
19#include <linux/netfilter/ipset/ip_set_getport.h>
20
21/* We must handle non-linear skbs */
22static bool
23get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
24 bool src, __be16 *port, u8 *proto)
25{
26 switch (protocol) {
27 case IPPROTO_TCP: {
28 struct tcphdr _tcph;
29 const struct tcphdr *th;
30
31 th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
32 if (th == NULL)
33 /* No choice either */
34 return false;
35
36 *port = src ? th->source : th->dest;
37 break;
38 }
39 case IPPROTO_SCTP: {
40 sctp_sctphdr_t _sh;
41 const sctp_sctphdr_t *sh;
42
43 sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
44 if (sh == NULL)
45 /* No choice either */
46 return false;
47
48 *port = src ? sh->source : sh->dest;
49 break;
50 }
51 case IPPROTO_UDP:
52 case IPPROTO_UDPLITE: {
53 struct udphdr _udph;
54 const struct udphdr *uh;
55
56 uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
57 if (uh == NULL)
58 /* No choice either */
59 return false;
60
61 *port = src ? uh->source : uh->dest;
62 break;
63 }
64 case IPPROTO_ICMP: {
65 struct icmphdr _ich;
66 const struct icmphdr *ic;
67
68 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
69 if (ic == NULL)
70 return false;
71
72 *port = (__force __be16)htons((ic->type << 8) | ic->code);
73 break;
74 }
75 case IPPROTO_ICMPV6: {
76 struct icmp6hdr _ich;
77 const struct icmp6hdr *ic;
78
79 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
80 if (ic == NULL)
81 return false;
82
83 *port = (__force __be16)
84 htons((ic->icmp6_type << 8) | ic->icmp6_code);
85 break;
86 }
87 default:
88 break;
89 }
90 *proto = protocol;
91
92 return true;
93}
94
95bool
96ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
97 __be16 *port, u8 *proto)
98{
99 const struct iphdr *iph = ip_hdr(skb);
100 unsigned int protooff = ip_hdrlen(skb);
101 int protocol = iph->protocol;
102
103 /* See comments at tcp_match in ip_tables.c */
104 if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET))
105 return false;
106
107 return get_port(skb, protocol, protooff, src, port, proto);
108}
109EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
110
111#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
112bool
113ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
114 __be16 *port, u8 *proto)
115{
116 int protoff;
117 u8 nexthdr;
118
119 nexthdr = ipv6_hdr(skb)->nexthdr;
120 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
121 if (protoff < 0)
122 return false;
123
124 return get_port(skb, nexthdr, protoff, src, port, proto);
125}
126EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
127#endif
128
129bool
130ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
131{
132 bool ret;
133 u8 proto;
134
135 switch (pf) {
136 case AF_INET:
137 ret = ip_set_get_ip4_port(skb, src, port, &proto);
138 break;
139 case AF_INET6:
140 ret = ip_set_get_ip6_port(skb, src, port, &proto);
141 break;
142 default:
143 return false;
144 }
145 if (!ret)
146 return ret;
147 switch (proto) {
148 case IPPROTO_TCP:
149 case IPPROTO_UDP:
150 return true;
151 default:
152 return false;
153 }
154}
155EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
new file mode 100644
index 000000000000..43bcce200129
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -0,0 +1,464 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_hash.h>
26
27MODULE_LICENSE("GPL");
28MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
29MODULE_DESCRIPTION("hash:ip type of IP sets");
30MODULE_ALIAS("ip_set_hash:ip");
31
32/* Type specific function prefix */
33#define TYPE hash_ip
34
35static bool
36hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
37
38#define hash_ip4_same_set hash_ip_same_set
39#define hash_ip6_same_set hash_ip_same_set
40
41/* The type variant functions: IPv4 */
42
43/* Member elements without timeout */
44struct hash_ip4_elem {
45 __be32 ip;
46};
47
48/* Member elements with timeout support */
49struct hash_ip4_telem {
50 __be32 ip;
51 unsigned long timeout;
52};
53
54static inline bool
55hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
56 const struct hash_ip4_elem *ip2)
57{
58 return ip1->ip == ip2->ip;
59}
60
61static inline bool
62hash_ip4_data_isnull(const struct hash_ip4_elem *elem)
63{
64 return elem->ip == 0;
65}
66
67static inline void
68hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
69{
70 dst->ip = src->ip;
71}
72
73/* Zero valued IP addresses cannot be stored */
74static inline void
75hash_ip4_data_zero_out(struct hash_ip4_elem *elem)
76{
77 elem->ip = 0;
78}
79
80static inline bool
81hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data)
82{
83 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
84 return 0;
85
86nla_put_failure:
87 return 1;
88}
89
90static bool
91hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data)
92{
93 const struct hash_ip4_telem *tdata =
94 (const struct hash_ip4_telem *)data;
95
96 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
97 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
98 htonl(ip_set_timeout_get(tdata->timeout)));
99
100 return 0;
101
102nla_put_failure:
103 return 1;
104}
105
106#define IP_SET_HASH_WITH_NETMASK
107#define PF 4
108#define HOST_MASK 32
109#include <linux/netfilter/ipset/ip_set_ahash.h>
110
111static int
112hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
113 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
114{
115 const struct ip_set_hash *h = set->data;
116 ipset_adtfn adtfn = set->variant->adt[adt];
117 __be32 ip;
118
119 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip);
120 ip &= ip_set_netmask(h->netmask);
121 if (ip == 0)
122 return -EINVAL;
123
124 return adtfn(set, &ip, h->timeout);
125}
126
127static int
128hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
129 enum ipset_adt adt, u32 *lineno, u32 flags)
130{
131 const struct ip_set_hash *h = set->data;
132 ipset_adtfn adtfn = set->variant->adt[adt];
133 u32 ip, ip_to, hosts, timeout = h->timeout;
134 __be32 nip;
135 int ret = 0;
136
137 if (unlikely(!tb[IPSET_ATTR_IP] ||
138 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
139 return -IPSET_ERR_PROTOCOL;
140
141 if (tb[IPSET_ATTR_LINENO])
142 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
143
144 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
145 if (ret)
146 return ret;
147
148 ip &= ip_set_hostmask(h->netmask);
149
150 if (tb[IPSET_ATTR_TIMEOUT]) {
151 if (!with_timeout(h->timeout))
152 return -IPSET_ERR_TIMEOUT;
153 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
154 }
155
156 if (adt == IPSET_TEST) {
157 nip = htonl(ip);
158 if (nip == 0)
159 return -IPSET_ERR_HASH_ELEM;
160 return adtfn(set, &nip, timeout);
161 }
162
163 if (tb[IPSET_ATTR_IP_TO]) {
164 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
165 if (ret)
166 return ret;
167 if (ip > ip_to)
168 swap(ip, ip_to);
169 } else if (tb[IPSET_ATTR_CIDR]) {
170 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
171
172 if (cidr > 32)
173 return -IPSET_ERR_INVALID_CIDR;
174 ip &= ip_set_hostmask(cidr);
175 ip_to = ip | ~ip_set_hostmask(cidr);
176 } else
177 ip_to = ip;
178
179 hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
180
181 for (; !before(ip_to, ip); ip += hosts) {
182 nip = htonl(ip);
183 if (nip == 0)
184 return -IPSET_ERR_HASH_ELEM;
185 ret = adtfn(set, &nip, timeout);
186
187 if (ret && !ip_set_eexist(ret, flags))
188 return ret;
189 else
190 ret = 0;
191 }
192 return ret;
193}
194
195static bool
196hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
197{
198 const struct ip_set_hash *x = a->data;
199 const struct ip_set_hash *y = b->data;
200
201 /* Resizing changes htable_bits, so we ignore it */
202 return x->maxelem == y->maxelem &&
203 x->timeout == y->timeout &&
204 x->netmask == y->netmask;
205}
206
207/* The type variant functions: IPv6 */
208
209struct hash_ip6_elem {
210 union nf_inet_addr ip;
211};
212
213struct hash_ip6_telem {
214 union nf_inet_addr ip;
215 unsigned long timeout;
216};
217
218static inline bool
219hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
220 const struct hash_ip6_elem *ip2)
221{
222 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
223}
224
225static inline bool
226hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
227{
228 return ipv6_addr_any(&elem->ip.in6);
229}
230
231static inline void
232hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
233{
234 ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
235}
236
237static inline void
238hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
239{
240 ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
241}
242
243static inline void
244ip6_netmask(union nf_inet_addr *ip, u8 prefix)
245{
246 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
247 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
248 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
249 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
250}
251
252static bool
253hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data)
254{
255 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
256 return 0;
257
258nla_put_failure:
259 return 1;
260}
261
262static bool
263hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data)
264{
265 const struct hash_ip6_telem *e =
266 (const struct hash_ip6_telem *)data;
267
268 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
269 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
270 htonl(ip_set_timeout_get(e->timeout)));
271 return 0;
272
273nla_put_failure:
274 return 1;
275}
276
277#undef PF
278#undef HOST_MASK
279
280#define PF 6
281#define HOST_MASK 128
282#include <linux/netfilter/ipset/ip_set_ahash.h>
283
284static int
285hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
286 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
287{
288 const struct ip_set_hash *h = set->data;
289 ipset_adtfn adtfn = set->variant->adt[adt];
290 union nf_inet_addr ip;
291
292 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6);
293 ip6_netmask(&ip, h->netmask);
294 if (ipv6_addr_any(&ip.in6))
295 return -EINVAL;
296
297 return adtfn(set, &ip, h->timeout);
298}
299
300static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
301 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
302 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
303 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
304};
305
306static int
307hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
308 enum ipset_adt adt, u32 *lineno, u32 flags)
309{
310 const struct ip_set_hash *h = set->data;
311 ipset_adtfn adtfn = set->variant->adt[adt];
312 union nf_inet_addr ip;
313 u32 timeout = h->timeout;
314 int ret;
315
316 if (unlikely(!tb[IPSET_ATTR_IP] ||
317 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
318 tb[IPSET_ATTR_IP_TO] ||
319 tb[IPSET_ATTR_CIDR]))
320 return -IPSET_ERR_PROTOCOL;
321
322 if (tb[IPSET_ATTR_LINENO])
323 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
324
325 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip);
326 if (ret)
327 return ret;
328
329 ip6_netmask(&ip, h->netmask);
330 if (ipv6_addr_any(&ip.in6))
331 return -IPSET_ERR_HASH_ELEM;
332
333 if (tb[IPSET_ATTR_TIMEOUT]) {
334 if (!with_timeout(h->timeout))
335 return -IPSET_ERR_TIMEOUT;
336 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
337 }
338
339 ret = adtfn(set, &ip, timeout);
340
341 return ip_set_eexist(ret, flags) ? 0 : ret;
342}
343
344/* Create hash:ip type of sets */
345
346static int
347hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
348{
349 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
350 u8 netmask, hbits;
351 struct ip_set_hash *h;
352
353 if (!(set->family == AF_INET || set->family == AF_INET6))
354 return -IPSET_ERR_INVALID_FAMILY;
355 netmask = set->family == AF_INET ? 32 : 128;
356 pr_debug("Create set %s with family %s\n",
357 set->name, set->family == AF_INET ? "inet" : "inet6");
358
359 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
360 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
361 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
362 return -IPSET_ERR_PROTOCOL;
363
364 if (tb[IPSET_ATTR_HASHSIZE]) {
365 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
366 if (hashsize < IPSET_MIMINAL_HASHSIZE)
367 hashsize = IPSET_MIMINAL_HASHSIZE;
368 }
369
370 if (tb[IPSET_ATTR_MAXELEM])
371 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
372
373 if (tb[IPSET_ATTR_NETMASK]) {
374 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
375
376 if ((set->family == AF_INET && netmask > 32) ||
377 (set->family == AF_INET6 && netmask > 128) ||
378 netmask == 0)
379 return -IPSET_ERR_INVALID_NETMASK;
380 }
381
382 h = kzalloc(sizeof(*h), GFP_KERNEL);
383 if (!h)
384 return -ENOMEM;
385
386 h->maxelem = maxelem;
387 h->netmask = netmask;
388 get_random_bytes(&h->initval, sizeof(h->initval));
389 h->timeout = IPSET_NO_TIMEOUT;
390
391 hbits = htable_bits(hashsize);
392 h->table = ip_set_alloc(
393 sizeof(struct htable)
394 + jhash_size(hbits) * sizeof(struct hbucket));
395 if (!h->table) {
396 kfree(h);
397 return -ENOMEM;
398 }
399 h->table->htable_bits = hbits;
400
401 set->data = h;
402
403 if (tb[IPSET_ATTR_TIMEOUT]) {
404 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
405
406 set->variant = set->family == AF_INET
407 ? &hash_ip4_tvariant : &hash_ip6_tvariant;
408
409 if (set->family == AF_INET)
410 hash_ip4_gc_init(set);
411 else
412 hash_ip6_gc_init(set);
413 } else {
414 set->variant = set->family == AF_INET
415 ? &hash_ip4_variant : &hash_ip6_variant;
416 }
417
418 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
419 set->name, jhash_size(h->table->htable_bits),
420 h->table->htable_bits, h->maxelem, set->data, h->table);
421
422 return 0;
423}
424
425static struct ip_set_type hash_ip_type __read_mostly = {
426 .name = "hash:ip",
427 .protocol = IPSET_PROTOCOL,
428 .features = IPSET_TYPE_IP,
429 .dimension = IPSET_DIM_ONE,
430 .family = AF_UNSPEC,
431 .revision = 0,
432 .create = hash_ip_create,
433 .create_policy = {
434 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
435 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
436 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
437 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
438 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
439 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
440 },
441 .adt_policy = {
442 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
443 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
444 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
445 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
446 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
447 },
448 .me = THIS_MODULE,
449};
450
451static int __init
452hash_ip_init(void)
453{
454 return ip_set_type_register(&hash_ip_type);
455}
456
457static void __exit
458hash_ip_fini(void)
459{
460 ip_set_type_unregister(&hash_ip_type);
461}
462
463module_init(hash_ip_init);
464module_exit(hash_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
new file mode 100644
index 000000000000..14281b6b8074
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -0,0 +1,530 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,port type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30MODULE_DESCRIPTION("hash:ip,port type of IP sets");
31MODULE_ALIAS("ip_set_hash:ip,port");
32
33/* Type specific function prefix */
34#define TYPE hash_ipport
35
36static bool
37hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b);
38
39#define hash_ipport4_same_set hash_ipport_same_set
40#define hash_ipport6_same_set hash_ipport_same_set
41
42/* The type variant functions: IPv4 */
43
44/* Member elements without timeout */
45struct hash_ipport4_elem {
46 __be32 ip;
47 __be16 port;
48 u8 proto;
49 u8 padding;
50};
51
52/* Member elements with timeout support */
53struct hash_ipport4_telem {
54 __be32 ip;
55 __be16 port;
56 u8 proto;
57 u8 padding;
58 unsigned long timeout;
59};
60
61static inline bool
62hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
63 const struct hash_ipport4_elem *ip2)
64{
65 return ip1->ip == ip2->ip &&
66 ip1->port == ip2->port &&
67 ip1->proto == ip2->proto;
68}
69
70static inline bool
71hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem)
72{
73 return elem->proto == 0;
74}
75
76static inline void
77hash_ipport4_data_copy(struct hash_ipport4_elem *dst,
78 const struct hash_ipport4_elem *src)
79{
80 dst->ip = src->ip;
81 dst->port = src->port;
82 dst->proto = src->proto;
83}
84
85static inline void
86hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem)
87{
88 elem->proto = 0;
89}
90
91static bool
92hash_ipport4_data_list(struct sk_buff *skb,
93 const struct hash_ipport4_elem *data)
94{
95 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
96 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
97 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
98 return 0;
99
100nla_put_failure:
101 return 1;
102}
103
104static bool
105hash_ipport4_data_tlist(struct sk_buff *skb,
106 const struct hash_ipport4_elem *data)
107{
108 const struct hash_ipport4_telem *tdata =
109 (const struct hash_ipport4_telem *)data;
110
111 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
112 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
113 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
114 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
115 htonl(ip_set_timeout_get(tdata->timeout)));
116
117 return 0;
118
119nla_put_failure:
120 return 1;
121}
122
123#define PF 4
124#define HOST_MASK 32
125#include <linux/netfilter/ipset/ip_set_ahash.h>
126
127static int
128hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
129 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
130{
131 const struct ip_set_hash *h = set->data;
132 ipset_adtfn adtfn = set->variant->adt[adt];
133 struct hash_ipport4_elem data = { };
134
135 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
136 &data.port, &data.proto))
137 return -EINVAL;
138
139 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
140
141 return adtfn(set, &data, h->timeout);
142}
143
144static int
145hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
146 enum ipset_adt adt, u32 *lineno, u32 flags)
147{
148 const struct ip_set_hash *h = set->data;
149 ipset_adtfn adtfn = set->variant->adt[adt];
150 struct hash_ipport4_elem data = { };
151 u32 ip, ip_to, p, port, port_to;
152 u32 timeout = h->timeout;
153 bool with_ports = false;
154 int ret;
155
156 if (unlikely(!tb[IPSET_ATTR_IP] ||
157 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
158 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
159 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
160 return -IPSET_ERR_PROTOCOL;
161
162 if (tb[IPSET_ATTR_LINENO])
163 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
164
165 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
166 if (ret)
167 return ret;
168
169 if (tb[IPSET_ATTR_PORT])
170 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
171 else
172 return -IPSET_ERR_PROTOCOL;
173
174 if (tb[IPSET_ATTR_PROTO]) {
175 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
176 with_ports = ip_set_proto_with_ports(data.proto);
177
178 if (data.proto == 0)
179 return -IPSET_ERR_INVALID_PROTO;
180 } else
181 return -IPSET_ERR_MISSING_PROTO;
182
183 if (!(with_ports || data.proto == IPPROTO_ICMP))
184 data.port = 0;
185
186 if (tb[IPSET_ATTR_TIMEOUT]) {
187 if (!with_timeout(h->timeout))
188 return -IPSET_ERR_TIMEOUT;
189 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
190 }
191
192 if (adt == IPSET_TEST ||
193 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
194 tb[IPSET_ATTR_PORT_TO])) {
195 ret = adtfn(set, &data, timeout);
196 return ip_set_eexist(ret, flags) ? 0 : ret;
197 }
198
199 ip = ntohl(data.ip);
200 if (tb[IPSET_ATTR_IP_TO]) {
201 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
202 if (ret)
203 return ret;
204 if (ip > ip_to)
205 swap(ip, ip_to);
206 } else if (tb[IPSET_ATTR_CIDR]) {
207 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
208
209 if (cidr > 32)
210 return -IPSET_ERR_INVALID_CIDR;
211 ip &= ip_set_hostmask(cidr);
212 ip_to = ip | ~ip_set_hostmask(cidr);
213 } else
214 ip_to = ip;
215
216 port_to = port = ntohs(data.port);
217 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
218 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
219 if (port > port_to)
220 swap(port, port_to);
221 }
222
223 for (; !before(ip_to, ip); ip++)
224 for (p = port; p <= port_to; p++) {
225 data.ip = htonl(ip);
226 data.port = htons(p);
227 ret = adtfn(set, &data, timeout);
228
229 if (ret && !ip_set_eexist(ret, flags))
230 return ret;
231 else
232 ret = 0;
233 }
234 return ret;
235}
236
237static bool
238hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b)
239{
240 const struct ip_set_hash *x = a->data;
241 const struct ip_set_hash *y = b->data;
242
243 /* Resizing changes htable_bits, so we ignore it */
244 return x->maxelem == y->maxelem &&
245 x->timeout == y->timeout;
246}
247
248/* The type variant functions: IPv6 */
249
250struct hash_ipport6_elem {
251 union nf_inet_addr ip;
252 __be16 port;
253 u8 proto;
254 u8 padding;
255};
256
257struct hash_ipport6_telem {
258 union nf_inet_addr ip;
259 __be16 port;
260 u8 proto;
261 u8 padding;
262 unsigned long timeout;
263};
264
265static inline bool
266hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
267 const struct hash_ipport6_elem *ip2)
268{
269 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
270 ip1->port == ip2->port &&
271 ip1->proto == ip2->proto;
272}
273
274static inline bool
275hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem)
276{
277 return elem->proto == 0;
278}
279
280static inline void
281hash_ipport6_data_copy(struct hash_ipport6_elem *dst,
282 const struct hash_ipport6_elem *src)
283{
284 memcpy(dst, src, sizeof(*dst));
285}
286
287static inline void
288hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem)
289{
290 elem->proto = 0;
291}
292
293static bool
294hash_ipport6_data_list(struct sk_buff *skb,
295 const struct hash_ipport6_elem *data)
296{
297 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
298 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
299 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
300 return 0;
301
302nla_put_failure:
303 return 1;
304}
305
306static bool
307hash_ipport6_data_tlist(struct sk_buff *skb,
308 const struct hash_ipport6_elem *data)
309{
310 const struct hash_ipport6_telem *e =
311 (const struct hash_ipport6_telem *)data;
312
313 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
314 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
315 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
316 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
317 htonl(ip_set_timeout_get(e->timeout)));
318 return 0;
319
320nla_put_failure:
321 return 1;
322}
323
324#undef PF
325#undef HOST_MASK
326
327#define PF 6
328#define HOST_MASK 128
329#include <linux/netfilter/ipset/ip_set_ahash.h>
330
331static int
332hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
333 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
334{
335 const struct ip_set_hash *h = set->data;
336 ipset_adtfn adtfn = set->variant->adt[adt];
337 struct hash_ipport6_elem data = { };
338
339 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
340 &data.port, &data.proto))
341 return -EINVAL;
342
343 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
344
345 return adtfn(set, &data, h->timeout);
346}
347
348static int
349hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
350 enum ipset_adt adt, u32 *lineno, u32 flags)
351{
352 const struct ip_set_hash *h = set->data;
353 ipset_adtfn adtfn = set->variant->adt[adt];
354 struct hash_ipport6_elem data = { };
355 u32 port, port_to;
356 u32 timeout = h->timeout;
357 bool with_ports = false;
358 int ret;
359
360 if (unlikely(!tb[IPSET_ATTR_IP] ||
361 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
362 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
363 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
364 tb[IPSET_ATTR_IP_TO] ||
365 tb[IPSET_ATTR_CIDR]))
366 return -IPSET_ERR_PROTOCOL;
367
368 if (tb[IPSET_ATTR_LINENO])
369 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
370
371 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
372 if (ret)
373 return ret;
374
375 if (tb[IPSET_ATTR_PORT])
376 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
377 else
378 return -IPSET_ERR_PROTOCOL;
379
380 if (tb[IPSET_ATTR_PROTO]) {
381 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
382 with_ports = ip_set_proto_with_ports(data.proto);
383
384 if (data.proto == 0)
385 return -IPSET_ERR_INVALID_PROTO;
386 } else
387 return -IPSET_ERR_MISSING_PROTO;
388
389 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
390 data.port = 0;
391
392 if (tb[IPSET_ATTR_TIMEOUT]) {
393 if (!with_timeout(h->timeout))
394 return -IPSET_ERR_TIMEOUT;
395 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
396 }
397
398 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
399 ret = adtfn(set, &data, timeout);
400 return ip_set_eexist(ret, flags) ? 0 : ret;
401 }
402
403 port = ntohs(data.port);
404 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
405 if (port > port_to)
406 swap(port, port_to);
407
408 for (; port <= port_to; port++) {
409 data.port = htons(port);
410 ret = adtfn(set, &data, timeout);
411
412 if (ret && !ip_set_eexist(ret, flags))
413 return ret;
414 else
415 ret = 0;
416 }
417 return ret;
418}
419
420/* Create hash:ip type of sets */
421
422static int
423hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
424{
425 struct ip_set_hash *h;
426 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
427 u8 hbits;
428
429 if (!(set->family == AF_INET || set->family == AF_INET6))
430 return -IPSET_ERR_INVALID_FAMILY;
431
432 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
433 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
434 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
435 return -IPSET_ERR_PROTOCOL;
436
437 if (tb[IPSET_ATTR_HASHSIZE]) {
438 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
439 if (hashsize < IPSET_MIMINAL_HASHSIZE)
440 hashsize = IPSET_MIMINAL_HASHSIZE;
441 }
442
443 if (tb[IPSET_ATTR_MAXELEM])
444 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
445
446 h = kzalloc(sizeof(*h), GFP_KERNEL);
447 if (!h)
448 return -ENOMEM;
449
450 h->maxelem = maxelem;
451 get_random_bytes(&h->initval, sizeof(h->initval));
452 h->timeout = IPSET_NO_TIMEOUT;
453
454 hbits = htable_bits(hashsize);
455 h->table = ip_set_alloc(
456 sizeof(struct htable)
457 + jhash_size(hbits) * sizeof(struct hbucket));
458 if (!h->table) {
459 kfree(h);
460 return -ENOMEM;
461 }
462 h->table->htable_bits = hbits;
463
464 set->data = h;
465
466 if (tb[IPSET_ATTR_TIMEOUT]) {
467 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
468
469 set->variant = set->family == AF_INET
470 ? &hash_ipport4_tvariant : &hash_ipport6_tvariant;
471
472 if (set->family == AF_INET)
473 hash_ipport4_gc_init(set);
474 else
475 hash_ipport6_gc_init(set);
476 } else {
477 set->variant = set->family == AF_INET
478 ? &hash_ipport4_variant : &hash_ipport6_variant;
479 }
480
481 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
482 set->name, jhash_size(h->table->htable_bits),
483 h->table->htable_bits, h->maxelem, set->data, h->table);
484
485 return 0;
486}
487
488static struct ip_set_type hash_ipport_type __read_mostly = {
489 .name = "hash:ip,port",
490 .protocol = IPSET_PROTOCOL,
491 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
492 .dimension = IPSET_DIM_TWO,
493 .family = AF_UNSPEC,
494 .revision = 1,
495 .create = hash_ipport_create,
496 .create_policy = {
497 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
498 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
499 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
500 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
501 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
502 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
503 },
504 .adt_policy = {
505 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
506 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
507 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
508 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
509 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
510 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
511 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
512 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
513 },
514 .me = THIS_MODULE,
515};
516
517static int __init
518hash_ipport_init(void)
519{
520 return ip_set_type_register(&hash_ipport_type);
521}
522
523static void __exit
524hash_ipport_fini(void)
525{
526 ip_set_type_unregister(&hash_ipport_type);
527}
528
529module_init(hash_ipport_init);
530module_exit(hash_ipport_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
new file mode 100644
index 000000000000..401c8a2531db
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -0,0 +1,548 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,port,ip type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets");
31MODULE_ALIAS("ip_set_hash:ip,port,ip");
32
33/* Type specific function prefix */
34#define TYPE hash_ipportip
35
36static bool
37hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b);
38
39#define hash_ipportip4_same_set hash_ipportip_same_set
40#define hash_ipportip6_same_set hash_ipportip_same_set
41
42/* The type variant functions: IPv4 */
43
44/* Member elements without timeout */
45struct hash_ipportip4_elem {
46 __be32 ip;
47 __be32 ip2;
48 __be16 port;
49 u8 proto;
50 u8 padding;
51};
52
53/* Member elements with timeout support */
54struct hash_ipportip4_telem {
55 __be32 ip;
56 __be32 ip2;
57 __be16 port;
58 u8 proto;
59 u8 padding;
60 unsigned long timeout;
61};
62
63static inline bool
64hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
65 const struct hash_ipportip4_elem *ip2)
66{
67 return ip1->ip == ip2->ip &&
68 ip1->ip2 == ip2->ip2 &&
69 ip1->port == ip2->port &&
70 ip1->proto == ip2->proto;
71}
72
73static inline bool
74hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem)
75{
76 return elem->proto == 0;
77}
78
79static inline void
80hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst,
81 const struct hash_ipportip4_elem *src)
82{
83 memcpy(dst, src, sizeof(*dst));
84}
85
86static inline void
87hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem)
88{
89 elem->proto = 0;
90}
91
92static bool
93hash_ipportip4_data_list(struct sk_buff *skb,
94 const struct hash_ipportip4_elem *data)
95{
96 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
97 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2);
98 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
99 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
100 return 0;
101
102nla_put_failure:
103 return 1;
104}
105
106static bool
107hash_ipportip4_data_tlist(struct sk_buff *skb,
108 const struct hash_ipportip4_elem *data)
109{
110 const struct hash_ipportip4_telem *tdata =
111 (const struct hash_ipportip4_telem *)data;
112
113 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
114 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2);
115 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
116 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
117 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
118 htonl(ip_set_timeout_get(tdata->timeout)));
119
120 return 0;
121
122nla_put_failure:
123 return 1;
124}
125
126#define PF 4
127#define HOST_MASK 32
128#include <linux/netfilter/ipset/ip_set_ahash.h>
129
130static int
131hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
132 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
133{
134 const struct ip_set_hash *h = set->data;
135 ipset_adtfn adtfn = set->variant->adt[adt];
136 struct hash_ipportip4_elem data = { };
137
138 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
139 &data.port, &data.proto))
140 return -EINVAL;
141
142 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
143 ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2);
144
145 return adtfn(set, &data, h->timeout);
146}
147
148static int
149hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
150 enum ipset_adt adt, u32 *lineno, u32 flags)
151{
152 const struct ip_set_hash *h = set->data;
153 ipset_adtfn adtfn = set->variant->adt[adt];
154 struct hash_ipportip4_elem data = { };
155 u32 ip, ip_to, p, port, port_to;
156 u32 timeout = h->timeout;
157 bool with_ports = false;
158 int ret;
159
160 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
161 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
162 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
163 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
164 return -IPSET_ERR_PROTOCOL;
165
166 if (tb[IPSET_ATTR_LINENO])
167 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
168
169 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
170 if (ret)
171 return ret;
172
173 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
174 if (ret)
175 return ret;
176
177 if (tb[IPSET_ATTR_PORT])
178 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
179 else
180 return -IPSET_ERR_PROTOCOL;
181
182 if (tb[IPSET_ATTR_PROTO]) {
183 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
184 with_ports = ip_set_proto_with_ports(data.proto);
185
186 if (data.proto == 0)
187 return -IPSET_ERR_INVALID_PROTO;
188 } else
189 return -IPSET_ERR_MISSING_PROTO;
190
191 if (!(with_ports || data.proto == IPPROTO_ICMP))
192 data.port = 0;
193
194 if (tb[IPSET_ATTR_TIMEOUT]) {
195 if (!with_timeout(h->timeout))
196 return -IPSET_ERR_TIMEOUT;
197 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
198 }
199
200 if (adt == IPSET_TEST ||
201 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
202 tb[IPSET_ATTR_PORT_TO])) {
203 ret = adtfn(set, &data, timeout);
204 return ip_set_eexist(ret, flags) ? 0 : ret;
205 }
206
207 ip = ntohl(data.ip);
208 if (tb[IPSET_ATTR_IP_TO]) {
209 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
210 if (ret)
211 return ret;
212 if (ip > ip_to)
213 swap(ip, ip_to);
214 } else if (tb[IPSET_ATTR_CIDR]) {
215 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
216
217 if (cidr > 32)
218 return -IPSET_ERR_INVALID_CIDR;
219 ip &= ip_set_hostmask(cidr);
220 ip_to = ip | ~ip_set_hostmask(cidr);
221 } else
222 ip_to = ip;
223
224 port_to = port = ntohs(data.port);
225 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
226 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
227 if (port > port_to)
228 swap(port, port_to);
229 }
230
231 for (; !before(ip_to, ip); ip++)
232 for (p = port; p <= port_to; p++) {
233 data.ip = htonl(ip);
234 data.port = htons(p);
235 ret = adtfn(set, &data, timeout);
236
237 if (ret && !ip_set_eexist(ret, flags))
238 return ret;
239 else
240 ret = 0;
241 }
242 return ret;
243}
244
245static bool
246hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b)
247{
248 const struct ip_set_hash *x = a->data;
249 const struct ip_set_hash *y = b->data;
250
251 /* Resizing changes htable_bits, so we ignore it */
252 return x->maxelem == y->maxelem &&
253 x->timeout == y->timeout;
254}
255
256/* The type variant functions: IPv6 */
257
258struct hash_ipportip6_elem {
259 union nf_inet_addr ip;
260 union nf_inet_addr ip2;
261 __be16 port;
262 u8 proto;
263 u8 padding;
264};
265
266struct hash_ipportip6_telem {
267 union nf_inet_addr ip;
268 union nf_inet_addr ip2;
269 __be16 port;
270 u8 proto;
271 u8 padding;
272 unsigned long timeout;
273};
274
275static inline bool
276hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
277 const struct hash_ipportip6_elem *ip2)
278{
279 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
280 ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
281 ip1->port == ip2->port &&
282 ip1->proto == ip2->proto;
283}
284
285static inline bool
286hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem)
287{
288 return elem->proto == 0;
289}
290
291static inline void
292hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst,
293 const struct hash_ipportip6_elem *src)
294{
295 memcpy(dst, src, sizeof(*dst));
296}
297
298static inline void
299hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem)
300{
301 elem->proto = 0;
302}
303
304static bool
305hash_ipportip6_data_list(struct sk_buff *skb,
306 const struct hash_ipportip6_elem *data)
307{
308 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
309 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
310 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
311 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
312 return 0;
313
314nla_put_failure:
315 return 1;
316}
317
318static bool
319hash_ipportip6_data_tlist(struct sk_buff *skb,
320 const struct hash_ipportip6_elem *data)
321{
322 const struct hash_ipportip6_telem *e =
323 (const struct hash_ipportip6_telem *)data;
324
325 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
326 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
327 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
328 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
329 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
330 htonl(ip_set_timeout_get(e->timeout)));
331 return 0;
332
333nla_put_failure:
334 return 1;
335}
336
337#undef PF
338#undef HOST_MASK
339
340#define PF 6
341#define HOST_MASK 128
342#include <linux/netfilter/ipset/ip_set_ahash.h>
343
344static int
345hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
346 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
347{
348 const struct ip_set_hash *h = set->data;
349 ipset_adtfn adtfn = set->variant->adt[adt];
350 struct hash_ipportip6_elem data = { };
351
352 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
353 &data.port, &data.proto))
354 return -EINVAL;
355
356 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
357 ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
358
359 return adtfn(set, &data, h->timeout);
360}
361
362static int
363hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
364 enum ipset_adt adt, u32 *lineno, u32 flags)
365{
366 const struct ip_set_hash *h = set->data;
367 ipset_adtfn adtfn = set->variant->adt[adt];
368 struct hash_ipportip6_elem data = { };
369 u32 port, port_to;
370 u32 timeout = h->timeout;
371 bool with_ports = false;
372 int ret;
373
374 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
375 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
376 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
377 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
378 tb[IPSET_ATTR_IP_TO] ||
379 tb[IPSET_ATTR_CIDR]))
380 return -IPSET_ERR_PROTOCOL;
381
382 if (tb[IPSET_ATTR_LINENO])
383 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
384
385 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
386 if (ret)
387 return ret;
388
389 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
390 if (ret)
391 return ret;
392
393 if (tb[IPSET_ATTR_PORT])
394 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
395 else
396 return -IPSET_ERR_PROTOCOL;
397
398 if (tb[IPSET_ATTR_PROTO]) {
399 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
400 with_ports = ip_set_proto_with_ports(data.proto);
401
402 if (data.proto == 0)
403 return -IPSET_ERR_INVALID_PROTO;
404 } else
405 return -IPSET_ERR_MISSING_PROTO;
406
407 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
408 data.port = 0;
409
410 if (tb[IPSET_ATTR_TIMEOUT]) {
411 if (!with_timeout(h->timeout))
412 return -IPSET_ERR_TIMEOUT;
413 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
414 }
415
416 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
417 ret = adtfn(set, &data, timeout);
418 return ip_set_eexist(ret, flags) ? 0 : ret;
419 }
420
421 port = ntohs(data.port);
422 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
423 if (port > port_to)
424 swap(port, port_to);
425
426 for (; port <= port_to; port++) {
427 data.port = htons(port);
428 ret = adtfn(set, &data, timeout);
429
430 if (ret && !ip_set_eexist(ret, flags))
431 return ret;
432 else
433 ret = 0;
434 }
435 return ret;
436}
437
438/* Create hash:ip type of sets */
439
440static int
441hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
442{
443 struct ip_set_hash *h;
444 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
445 u8 hbits;
446
447 if (!(set->family == AF_INET || set->family == AF_INET6))
448 return -IPSET_ERR_INVALID_FAMILY;
449
450 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
451 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
452 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
453 return -IPSET_ERR_PROTOCOL;
454
455 if (tb[IPSET_ATTR_HASHSIZE]) {
456 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
457 if (hashsize < IPSET_MIMINAL_HASHSIZE)
458 hashsize = IPSET_MIMINAL_HASHSIZE;
459 }
460
461 if (tb[IPSET_ATTR_MAXELEM])
462 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
463
464 h = kzalloc(sizeof(*h), GFP_KERNEL);
465 if (!h)
466 return -ENOMEM;
467
468 h->maxelem = maxelem;
469 get_random_bytes(&h->initval, sizeof(h->initval));
470 h->timeout = IPSET_NO_TIMEOUT;
471
472 hbits = htable_bits(hashsize);
473 h->table = ip_set_alloc(
474 sizeof(struct htable)
475 + jhash_size(hbits) * sizeof(struct hbucket));
476 if (!h->table) {
477 kfree(h);
478 return -ENOMEM;
479 }
480 h->table->htable_bits = hbits;
481
482 set->data = h;
483
484 if (tb[IPSET_ATTR_TIMEOUT]) {
485 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
486
487 set->variant = set->family == AF_INET
488 ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant;
489
490 if (set->family == AF_INET)
491 hash_ipportip4_gc_init(set);
492 else
493 hash_ipportip6_gc_init(set);
494 } else {
495 set->variant = set->family == AF_INET
496 ? &hash_ipportip4_variant : &hash_ipportip6_variant;
497 }
498
499 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
500 set->name, jhash_size(h->table->htable_bits),
501 h->table->htable_bits, h->maxelem, set->data, h->table);
502
503 return 0;
504}
505
506static struct ip_set_type hash_ipportip_type __read_mostly = {
507 .name = "hash:ip,port,ip",
508 .protocol = IPSET_PROTOCOL,
509 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
510 .dimension = IPSET_DIM_THREE,
511 .family = AF_UNSPEC,
512 .revision = 1,
513 .create = hash_ipportip_create,
514 .create_policy = {
515 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
516 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
517 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
518 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
519 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
520 },
521 .adt_policy = {
522 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
523 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
524 [IPSET_ATTR_IP2] = { .type = NLA_NESTED },
525 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
526 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
527 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
528 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
529 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
530 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
531 },
532 .me = THIS_MODULE,
533};
534
535static int __init
536hash_ipportip_init(void)
537{
538 return ip_set_type_register(&hash_ipportip_type);
539}
540
541static void __exit
542hash_ipportip_fini(void)
543{
544 ip_set_type_unregister(&hash_ipportip_type);
545}
546
547module_init(hash_ipportip_init);
548module_exit(hash_ipportip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
new file mode 100644
index 000000000000..4743e5402522
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -0,0 +1,614 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,port,net type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30MODULE_DESCRIPTION("hash:ip,port,net type of IP sets");
31MODULE_ALIAS("ip_set_hash:ip,port,net");
32
33/* Type specific function prefix */
34#define TYPE hash_ipportnet
35
36static bool
37hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b);
38
39#define hash_ipportnet4_same_set hash_ipportnet_same_set
40#define hash_ipportnet6_same_set hash_ipportnet_same_set
41
42/* The type variant functions: IPv4 */
43
44/* Member elements without timeout */
45struct hash_ipportnet4_elem {
46 __be32 ip;
47 __be32 ip2;
48 __be16 port;
49 u8 cidr;
50 u8 proto;
51};
52
53/* Member elements with timeout support */
54struct hash_ipportnet4_telem {
55 __be32 ip;
56 __be32 ip2;
57 __be16 port;
58 u8 cidr;
59 u8 proto;
60 unsigned long timeout;
61};
62
63static inline bool
64hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
65 const struct hash_ipportnet4_elem *ip2)
66{
67 return ip1->ip == ip2->ip &&
68 ip1->ip2 == ip2->ip2 &&
69 ip1->cidr == ip2->cidr &&
70 ip1->port == ip2->port &&
71 ip1->proto == ip2->proto;
72}
73
74static inline bool
75hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem)
76{
77 return elem->proto == 0;
78}
79
80static inline void
81hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst,
82 const struct hash_ipportnet4_elem *src)
83{
84 memcpy(dst, src, sizeof(*dst));
85}
86
87static inline void
88hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
89{
90 elem->ip2 &= ip_set_netmask(cidr);
91 elem->cidr = cidr;
92}
93
94static inline void
95hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem)
96{
97 elem->proto = 0;
98}
99
100static bool
101hash_ipportnet4_data_list(struct sk_buff *skb,
102 const struct hash_ipportnet4_elem *data)
103{
104 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
105 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2);
106 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
107 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
108 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
109 return 0;
110
111nla_put_failure:
112 return 1;
113}
114
115static bool
116hash_ipportnet4_data_tlist(struct sk_buff *skb,
117 const struct hash_ipportnet4_elem *data)
118{
119 const struct hash_ipportnet4_telem *tdata =
120 (const struct hash_ipportnet4_telem *)data;
121
122 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
123 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2);
124 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
125 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
126 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
127 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
128 htonl(ip_set_timeout_get(tdata->timeout)));
129
130 return 0;
131
132nla_put_failure:
133 return 1;
134}
135
136#define IP_SET_HASH_WITH_PROTO
137#define IP_SET_HASH_WITH_NETS
138
139#define PF 4
140#define HOST_MASK 32
141#include <linux/netfilter/ipset/ip_set_ahash.h>
142
143static int
144hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
145 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
146{
147 const struct ip_set_hash *h = set->data;
148 ipset_adtfn adtfn = set->variant->adt[adt];
149 struct hash_ipportnet4_elem data =
150 { .cidr = h->nets[0].cidr || HOST_MASK };
151
152 if (data.cidr == 0)
153 return -EINVAL;
154 if (adt == IPSET_TEST)
155 data.cidr = HOST_MASK;
156
157 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
158 &data.port, &data.proto))
159 return -EINVAL;
160
161 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
162 ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2);
163 data.ip2 &= ip_set_netmask(data.cidr);
164
165 return adtfn(set, &data, h->timeout);
166}
167
168static int
169hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
170 enum ipset_adt adt, u32 *lineno, u32 flags)
171{
172 const struct ip_set_hash *h = set->data;
173 ipset_adtfn adtfn = set->variant->adt[adt];
174 struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
175 u32 ip, ip_to, p, port, port_to;
176 u32 timeout = h->timeout;
177 bool with_ports = false;
178 int ret;
179
180 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
181 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
182 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
183 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
184 return -IPSET_ERR_PROTOCOL;
185
186 if (tb[IPSET_ATTR_LINENO])
187 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
188
189 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
190 if (ret)
191 return ret;
192
193 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
194 if (ret)
195 return ret;
196
197 if (tb[IPSET_ATTR_CIDR2])
198 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
199
200 if (!data.cidr)
201 return -IPSET_ERR_INVALID_CIDR;
202
203 data.ip2 &= ip_set_netmask(data.cidr);
204
205 if (tb[IPSET_ATTR_PORT])
206 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
207 else
208 return -IPSET_ERR_PROTOCOL;
209
210 if (tb[IPSET_ATTR_PROTO]) {
211 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
212 with_ports = ip_set_proto_with_ports(data.proto);
213
214 if (data.proto == 0)
215 return -IPSET_ERR_INVALID_PROTO;
216 } else
217 return -IPSET_ERR_MISSING_PROTO;
218
219 if (!(with_ports || data.proto == IPPROTO_ICMP))
220 data.port = 0;
221
222 if (tb[IPSET_ATTR_TIMEOUT]) {
223 if (!with_timeout(h->timeout))
224 return -IPSET_ERR_TIMEOUT;
225 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
226 }
227
228 if (adt == IPSET_TEST ||
229 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
230 tb[IPSET_ATTR_PORT_TO])) {
231 ret = adtfn(set, &data, timeout);
232 return ip_set_eexist(ret, flags) ? 0 : ret;
233 }
234
235 ip = ntohl(data.ip);
236 if (tb[IPSET_ATTR_IP_TO]) {
237 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
238 if (ret)
239 return ret;
240 if (ip > ip_to)
241 swap(ip, ip_to);
242 } else if (tb[IPSET_ATTR_CIDR]) {
243 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
244
245 if (cidr > 32)
246 return -IPSET_ERR_INVALID_CIDR;
247 ip &= ip_set_hostmask(cidr);
248 ip_to = ip | ~ip_set_hostmask(cidr);
249 } else
250 ip_to = ip;
251
252 port_to = port = ntohs(data.port);
253 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
254 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
255 if (port > port_to)
256 swap(port, port_to);
257 }
258
259 for (; !before(ip_to, ip); ip++)
260 for (p = port; p <= port_to; p++) {
261 data.ip = htonl(ip);
262 data.port = htons(p);
263 ret = adtfn(set, &data, timeout);
264
265 if (ret && !ip_set_eexist(ret, flags))
266 return ret;
267 else
268 ret = 0;
269 }
270 return ret;
271}
272
273static bool
274hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b)
275{
276 const struct ip_set_hash *x = a->data;
277 const struct ip_set_hash *y = b->data;
278
279 /* Resizing changes htable_bits, so we ignore it */
280 return x->maxelem == y->maxelem &&
281 x->timeout == y->timeout;
282}
283
284/* The type variant functions: IPv6 */
285
286struct hash_ipportnet6_elem {
287 union nf_inet_addr ip;
288 union nf_inet_addr ip2;
289 __be16 port;
290 u8 cidr;
291 u8 proto;
292};
293
294struct hash_ipportnet6_telem {
295 union nf_inet_addr ip;
296 union nf_inet_addr ip2;
297 __be16 port;
298 u8 cidr;
299 u8 proto;
300 unsigned long timeout;
301};
302
303static inline bool
304hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
305 const struct hash_ipportnet6_elem *ip2)
306{
307 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
308 ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
309 ip1->cidr == ip2->cidr &&
310 ip1->port == ip2->port &&
311 ip1->proto == ip2->proto;
312}
313
314static inline bool
315hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem)
316{
317 return elem->proto == 0;
318}
319
320static inline void
321hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst,
322 const struct hash_ipportnet6_elem *src)
323{
324 memcpy(dst, src, sizeof(*dst));
325}
326
327static inline void
328hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem)
329{
330 elem->proto = 0;
331}
332
333static inline void
334ip6_netmask(union nf_inet_addr *ip, u8 prefix)
335{
336 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
337 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
338 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
339 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
340}
341
342static inline void
343hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
344{
345 ip6_netmask(&elem->ip2, cidr);
346 elem->cidr = cidr;
347}
348
349static bool
350hash_ipportnet6_data_list(struct sk_buff *skb,
351 const struct hash_ipportnet6_elem *data)
352{
353 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
354 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
355 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
356 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
357 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
358 return 0;
359
360nla_put_failure:
361 return 1;
362}
363
364static bool
365hash_ipportnet6_data_tlist(struct sk_buff *skb,
366 const struct hash_ipportnet6_elem *data)
367{
368 const struct hash_ipportnet6_telem *e =
369 (const struct hash_ipportnet6_telem *)data;
370
371 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
372 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
373 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
374 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
375 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
376 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
377 htonl(ip_set_timeout_get(e->timeout)));
378 return 0;
379
380nla_put_failure:
381 return 1;
382}
383
384#undef PF
385#undef HOST_MASK
386
387#define PF 6
388#define HOST_MASK 128
389#include <linux/netfilter/ipset/ip_set_ahash.h>
390
391static int
392hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
393 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
394{
395 const struct ip_set_hash *h = set->data;
396 ipset_adtfn adtfn = set->variant->adt[adt];
397 struct hash_ipportnet6_elem data =
398 { .cidr = h->nets[0].cidr || HOST_MASK };
399
400 if (data.cidr == 0)
401 return -EINVAL;
402 if (adt == IPSET_TEST)
403 data.cidr = HOST_MASK;
404
405 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
406 &data.port, &data.proto))
407 return -EINVAL;
408
409 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
410 ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
411 ip6_netmask(&data.ip2, data.cidr);
412
413 return adtfn(set, &data, h->timeout);
414}
415
416static int
417hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
418 enum ipset_adt adt, u32 *lineno, u32 flags)
419{
420 const struct ip_set_hash *h = set->data;
421 ipset_adtfn adtfn = set->variant->adt[adt];
422 struct hash_ipportnet6_elem data = { .cidr = HOST_MASK };
423 u32 port, port_to;
424 u32 timeout = h->timeout;
425 bool with_ports = false;
426 int ret;
427
428 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
429 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
430 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
431 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
432 tb[IPSET_ATTR_IP_TO] ||
433 tb[IPSET_ATTR_CIDR]))
434 return -IPSET_ERR_PROTOCOL;
435
436 if (tb[IPSET_ATTR_LINENO])
437 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
438
439 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
440 if (ret)
441 return ret;
442
443 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
444 if (ret)
445 return ret;
446
447 if (tb[IPSET_ATTR_CIDR2])
448 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
449
450 if (!data.cidr)
451 return -IPSET_ERR_INVALID_CIDR;
452
453 ip6_netmask(&data.ip2, data.cidr);
454
455 if (tb[IPSET_ATTR_PORT])
456 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
457 else
458 return -IPSET_ERR_PROTOCOL;
459
460 if (tb[IPSET_ATTR_PROTO]) {
461 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
462 with_ports = ip_set_proto_with_ports(data.proto);
463
464 if (data.proto == 0)
465 return -IPSET_ERR_INVALID_PROTO;
466 } else
467 return -IPSET_ERR_MISSING_PROTO;
468
469 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
470 data.port = 0;
471
472 if (tb[IPSET_ATTR_TIMEOUT]) {
473 if (!with_timeout(h->timeout))
474 return -IPSET_ERR_TIMEOUT;
475 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
476 }
477
478 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
479 ret = adtfn(set, &data, timeout);
480 return ip_set_eexist(ret, flags) ? 0 : ret;
481 }
482
483 port = ntohs(data.port);
484 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
485 if (port > port_to)
486 swap(port, port_to);
487
488 for (; port <= port_to; port++) {
489 data.port = htons(port);
490 ret = adtfn(set, &data, timeout);
491
492 if (ret && !ip_set_eexist(ret, flags))
493 return ret;
494 else
495 ret = 0;
496 }
497 return ret;
498}
499
500/* Create hash:ip type of sets */
501
502static int
503hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
504{
505 struct ip_set_hash *h;
506 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
507 u8 hbits;
508
509 if (!(set->family == AF_INET || set->family == AF_INET6))
510 return -IPSET_ERR_INVALID_FAMILY;
511
512 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
513 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
514 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
515 return -IPSET_ERR_PROTOCOL;
516
517 if (tb[IPSET_ATTR_HASHSIZE]) {
518 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
519 if (hashsize < IPSET_MIMINAL_HASHSIZE)
520 hashsize = IPSET_MIMINAL_HASHSIZE;
521 }
522
523 if (tb[IPSET_ATTR_MAXELEM])
524 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
525
526 h = kzalloc(sizeof(*h)
527 + sizeof(struct ip_set_hash_nets)
528 * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
529 if (!h)
530 return -ENOMEM;
531
532 h->maxelem = maxelem;
533 get_random_bytes(&h->initval, sizeof(h->initval));
534 h->timeout = IPSET_NO_TIMEOUT;
535
536 hbits = htable_bits(hashsize);
537 h->table = ip_set_alloc(
538 sizeof(struct htable)
539 + jhash_size(hbits) * sizeof(struct hbucket));
540 if (!h->table) {
541 kfree(h);
542 return -ENOMEM;
543 }
544 h->table->htable_bits = hbits;
545
546 set->data = h;
547
548 if (tb[IPSET_ATTR_TIMEOUT]) {
549 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
550
551 set->variant = set->family == AF_INET
552 ? &hash_ipportnet4_tvariant
553 : &hash_ipportnet6_tvariant;
554
555 if (set->family == AF_INET)
556 hash_ipportnet4_gc_init(set);
557 else
558 hash_ipportnet6_gc_init(set);
559 } else {
560 set->variant = set->family == AF_INET
561 ? &hash_ipportnet4_variant : &hash_ipportnet6_variant;
562 }
563
564 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
565 set->name, jhash_size(h->table->htable_bits),
566 h->table->htable_bits, h->maxelem, set->data, h->table);
567
568 return 0;
569}
570
571static struct ip_set_type hash_ipportnet_type __read_mostly = {
572 .name = "hash:ip,port,net",
573 .protocol = IPSET_PROTOCOL,
574 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
575 .dimension = IPSET_DIM_THREE,
576 .family = AF_UNSPEC,
577 .revision = 1,
578 .create = hash_ipportnet_create,
579 .create_policy = {
580 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
581 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
582 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
583 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
584 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
585 },
586 .adt_policy = {
587 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
588 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
589 [IPSET_ATTR_IP2] = { .type = NLA_NESTED },
590 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
591 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
592 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
593 [IPSET_ATTR_CIDR2] = { .type = NLA_U8 },
594 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
595 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
596 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
597 },
598 .me = THIS_MODULE,
599};
600
601static int __init
602hash_ipportnet_init(void)
603{
604 return ip_set_type_register(&hash_ipportnet_type);
605}
606
607static void __exit
608hash_ipportnet_fini(void)
609{
610 ip_set_type_unregister(&hash_ipportnet_type);
611}
612
613module_init(hash_ipportnet_init);
614module_exit(hash_ipportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
new file mode 100644
index 000000000000..c4db202b7da4
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -0,0 +1,458 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:net type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19
20#include <linux/netfilter.h>
21#include <linux/netfilter/ipset/pfxlen.h>
22#include <linux/netfilter/ipset/ip_set.h>
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24#include <linux/netfilter/ipset/ip_set_hash.h>
25
26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
28MODULE_DESCRIPTION("hash:net type of IP sets");
29MODULE_ALIAS("ip_set_hash:net");
30
31/* Type specific function prefix */
32#define TYPE hash_net
33
34static bool
35hash_net_same_set(const struct ip_set *a, const struct ip_set *b);
36
37#define hash_net4_same_set hash_net_same_set
38#define hash_net6_same_set hash_net_same_set
39
40/* The type variant functions: IPv4 */
41
42/* Member elements without timeout */
43struct hash_net4_elem {
44 __be32 ip;
45 u16 padding0;
46 u8 padding1;
47 u8 cidr;
48};
49
50/* Member elements with timeout support */
51struct hash_net4_telem {
52 __be32 ip;
53 u16 padding0;
54 u8 padding1;
55 u8 cidr;
56 unsigned long timeout;
57};
58
59static inline bool
60hash_net4_data_equal(const struct hash_net4_elem *ip1,
61 const struct hash_net4_elem *ip2)
62{
63 return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr;
64}
65
66static inline bool
67hash_net4_data_isnull(const struct hash_net4_elem *elem)
68{
69 return elem->cidr == 0;
70}
71
72static inline void
73hash_net4_data_copy(struct hash_net4_elem *dst,
74 const struct hash_net4_elem *src)
75{
76 dst->ip = src->ip;
77 dst->cidr = src->cidr;
78}
79
80static inline void
81hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
82{
83 elem->ip &= ip_set_netmask(cidr);
84 elem->cidr = cidr;
85}
86
87/* Zero CIDR values cannot be stored */
88static inline void
89hash_net4_data_zero_out(struct hash_net4_elem *elem)
90{
91 elem->cidr = 0;
92}
93
94static bool
95hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
96{
97 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
98 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
99 return 0;
100
101nla_put_failure:
102 return 1;
103}
104
105static bool
106hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data)
107{
108 const struct hash_net4_telem *tdata =
109 (const struct hash_net4_telem *)data;
110
111 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
112 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr);
113 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
114 htonl(ip_set_timeout_get(tdata->timeout)));
115
116 return 0;
117
118nla_put_failure:
119 return 1;
120}
121
122#define IP_SET_HASH_WITH_NETS
123
124#define PF 4
125#define HOST_MASK 32
126#include <linux/netfilter/ipset/ip_set_ahash.h>
127
128static int
129hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
130 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
131{
132 const struct ip_set_hash *h = set->data;
133 ipset_adtfn adtfn = set->variant->adt[adt];
134 struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK };
135
136 if (data.cidr == 0)
137 return -EINVAL;
138 if (adt == IPSET_TEST)
139 data.cidr = HOST_MASK;
140
141 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
142 data.ip &= ip_set_netmask(data.cidr);
143
144 return adtfn(set, &data, h->timeout);
145}
146
147static int
148hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
149 enum ipset_adt adt, u32 *lineno, u32 flags)
150{
151 const struct ip_set_hash *h = set->data;
152 ipset_adtfn adtfn = set->variant->adt[adt];
153 struct hash_net4_elem data = { .cidr = HOST_MASK };
154 u32 timeout = h->timeout;
155 int ret;
156
157 if (unlikely(!tb[IPSET_ATTR_IP] ||
158 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
159 return -IPSET_ERR_PROTOCOL;
160
161 if (tb[IPSET_ATTR_LINENO])
162 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
163
164 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
165 if (ret)
166 return ret;
167
168 if (tb[IPSET_ATTR_CIDR])
169 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
170
171 if (!data.cidr)
172 return -IPSET_ERR_INVALID_CIDR;
173
174 data.ip &= ip_set_netmask(data.cidr);
175
176 if (tb[IPSET_ATTR_TIMEOUT]) {
177 if (!with_timeout(h->timeout))
178 return -IPSET_ERR_TIMEOUT;
179 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
180 }
181
182 ret = adtfn(set, &data, timeout);
183
184 return ip_set_eexist(ret, flags) ? 0 : ret;
185}
186
187static bool
188hash_net_same_set(const struct ip_set *a, const struct ip_set *b)
189{
190 const struct ip_set_hash *x = a->data;
191 const struct ip_set_hash *y = b->data;
192
193 /* Resizing changes htable_bits, so we ignore it */
194 return x->maxelem == y->maxelem &&
195 x->timeout == y->timeout;
196}
197
198/* The type variant functions: IPv6 */
199
200struct hash_net6_elem {
201 union nf_inet_addr ip;
202 u16 padding0;
203 u8 padding1;
204 u8 cidr;
205};
206
207struct hash_net6_telem {
208 union nf_inet_addr ip;
209 u16 padding0;
210 u8 padding1;
211 u8 cidr;
212 unsigned long timeout;
213};
214
215static inline bool
216hash_net6_data_equal(const struct hash_net6_elem *ip1,
217 const struct hash_net6_elem *ip2)
218{
219 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
220 ip1->cidr == ip2->cidr;
221}
222
223static inline bool
224hash_net6_data_isnull(const struct hash_net6_elem *elem)
225{
226 return elem->cidr == 0;
227}
228
229static inline void
230hash_net6_data_copy(struct hash_net6_elem *dst,
231 const struct hash_net6_elem *src)
232{
233 ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
234 dst->cidr = src->cidr;
235}
236
237static inline void
238hash_net6_data_zero_out(struct hash_net6_elem *elem)
239{
240 elem->cidr = 0;
241}
242
243static inline void
244ip6_netmask(union nf_inet_addr *ip, u8 prefix)
245{
246 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
247 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
248 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
249 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
250}
251
252static inline void
253hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
254{
255 ip6_netmask(&elem->ip, cidr);
256 elem->cidr = cidr;
257}
258
259static bool
260hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
261{
262 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
263 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
264 return 0;
265
266nla_put_failure:
267 return 1;
268}
269
270static bool
271hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data)
272{
273 const struct hash_net6_telem *e =
274 (const struct hash_net6_telem *)data;
275
276 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
277 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr);
278 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
279 htonl(ip_set_timeout_get(e->timeout)));
280 return 0;
281
282nla_put_failure:
283 return 1;
284}
285
286#undef PF
287#undef HOST_MASK
288
289#define PF 6
290#define HOST_MASK 128
291#include <linux/netfilter/ipset/ip_set_ahash.h>
292
293static int
294hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
295 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
296{
297 const struct ip_set_hash *h = set->data;
298 ipset_adtfn adtfn = set->variant->adt[adt];
299 struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK };
300
301 if (data.cidr == 0)
302 return -EINVAL;
303 if (adt == IPSET_TEST)
304 data.cidr = HOST_MASK;
305
306 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
307 ip6_netmask(&data.ip, data.cidr);
308
309 return adtfn(set, &data, h->timeout);
310}
311
312static int
313hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
314 enum ipset_adt adt, u32 *lineno, u32 flags)
315{
316 const struct ip_set_hash *h = set->data;
317 ipset_adtfn adtfn = set->variant->adt[adt];
318 struct hash_net6_elem data = { .cidr = HOST_MASK };
319 u32 timeout = h->timeout;
320 int ret;
321
322 if (unlikely(!tb[IPSET_ATTR_IP] ||
323 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
324 return -IPSET_ERR_PROTOCOL;
325
326 if (tb[IPSET_ATTR_LINENO])
327 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
328
329 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
330 if (ret)
331 return ret;
332
333 if (tb[IPSET_ATTR_CIDR])
334 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
335
336 if (!data.cidr)
337 return -IPSET_ERR_INVALID_CIDR;
338
339 ip6_netmask(&data.ip, data.cidr);
340
341 if (tb[IPSET_ATTR_TIMEOUT]) {
342 if (!with_timeout(h->timeout))
343 return -IPSET_ERR_TIMEOUT;
344 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
345 }
346
347 ret = adtfn(set, &data, timeout);
348
349 return ip_set_eexist(ret, flags) ? 0 : ret;
350}
351
352/* Create hash:ip type of sets */
353
354static int
355hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
356{
357 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
358 struct ip_set_hash *h;
359 u8 hbits;
360
361 if (!(set->family == AF_INET || set->family == AF_INET6))
362 return -IPSET_ERR_INVALID_FAMILY;
363
364 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
365 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
366 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
367 return -IPSET_ERR_PROTOCOL;
368
369 if (tb[IPSET_ATTR_HASHSIZE]) {
370 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
371 if (hashsize < IPSET_MIMINAL_HASHSIZE)
372 hashsize = IPSET_MIMINAL_HASHSIZE;
373 }
374
375 if (tb[IPSET_ATTR_MAXELEM])
376 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
377
378 h = kzalloc(sizeof(*h)
379 + sizeof(struct ip_set_hash_nets)
380 * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
381 if (!h)
382 return -ENOMEM;
383
384 h->maxelem = maxelem;
385 get_random_bytes(&h->initval, sizeof(h->initval));
386 h->timeout = IPSET_NO_TIMEOUT;
387
388 hbits = htable_bits(hashsize);
389 h->table = ip_set_alloc(
390 sizeof(struct htable)
391 + jhash_size(hbits) * sizeof(struct hbucket));
392 if (!h->table) {
393 kfree(h);
394 return -ENOMEM;
395 }
396 h->table->htable_bits = hbits;
397
398 set->data = h;
399
400 if (tb[IPSET_ATTR_TIMEOUT]) {
401 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
402
403 set->variant = set->family == AF_INET
404 ? &hash_net4_tvariant : &hash_net6_tvariant;
405
406 if (set->family == AF_INET)
407 hash_net4_gc_init(set);
408 else
409 hash_net6_gc_init(set);
410 } else {
411 set->variant = set->family == AF_INET
412 ? &hash_net4_variant : &hash_net6_variant;
413 }
414
415 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
416 set->name, jhash_size(h->table->htable_bits),
417 h->table->htable_bits, h->maxelem, set->data, h->table);
418
419 return 0;
420}
421
422static struct ip_set_type hash_net_type __read_mostly = {
423 .name = "hash:net",
424 .protocol = IPSET_PROTOCOL,
425 .features = IPSET_TYPE_IP,
426 .dimension = IPSET_DIM_ONE,
427 .family = AF_UNSPEC,
428 .revision = 0,
429 .create = hash_net_create,
430 .create_policy = {
431 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
432 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
433 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
434 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
435 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
436 },
437 .adt_policy = {
438 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
439 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
440 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
441 },
442 .me = THIS_MODULE,
443};
444
445static int __init
446hash_net_init(void)
447{
448 return ip_set_type_register(&hash_net_type);
449}
450
451static void __exit
452hash_net_fini(void)
453{
454 ip_set_type_unregister(&hash_net_type);
455}
456
457module_init(hash_net_init);
458module_exit(hash_net_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
new file mode 100644
index 000000000000..d2a40362dd3a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -0,0 +1,564 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:net,port type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19
20#include <linux/netfilter.h>
21#include <linux/netfilter/ipset/pfxlen.h>
22#include <linux/netfilter/ipset/ip_set.h>
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24#include <linux/netfilter/ipset/ip_set_getport.h>
25#include <linux/netfilter/ipset/ip_set_hash.h>
26
27MODULE_LICENSE("GPL");
28MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
29MODULE_DESCRIPTION("hash:net,port type of IP sets");
30MODULE_ALIAS("ip_set_hash:net,port");
31
32/* Type specific function prefix */
33#define TYPE hash_netport
34
35static bool
36hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
37
38#define hash_netport4_same_set hash_netport_same_set
39#define hash_netport6_same_set hash_netport_same_set
40
41/* The type variant functions: IPv4 */
42
43/* Member elements without timeout */
44struct hash_netport4_elem {
45 __be32 ip;
46 __be16 port;
47 u8 proto;
48 u8 cidr;
49};
50
51/* Member elements with timeout support */
52struct hash_netport4_telem {
53 __be32 ip;
54 __be16 port;
55 u8 proto;
56 u8 cidr;
57 unsigned long timeout;
58};
59
60static inline bool
61hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
62 const struct hash_netport4_elem *ip2)
63{
64 return ip1->ip == ip2->ip &&
65 ip1->port == ip2->port &&
66 ip1->proto == ip2->proto &&
67 ip1->cidr == ip2->cidr;
68}
69
70static inline bool
71hash_netport4_data_isnull(const struct hash_netport4_elem *elem)
72{
73 return elem->proto == 0;
74}
75
76static inline void
77hash_netport4_data_copy(struct hash_netport4_elem *dst,
78 const struct hash_netport4_elem *src)
79{
80 dst->ip = src->ip;
81 dst->port = src->port;
82 dst->proto = src->proto;
83 dst->cidr = src->cidr;
84}
85
86static inline void
87hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
88{
89 elem->ip &= ip_set_netmask(cidr);
90 elem->cidr = cidr;
91}
92
93static inline void
94hash_netport4_data_zero_out(struct hash_netport4_elem *elem)
95{
96 elem->proto = 0;
97}
98
99static bool
100hash_netport4_data_list(struct sk_buff *skb,
101 const struct hash_netport4_elem *data)
102{
103 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
104 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
105 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
106 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
107 return 0;
108
109nla_put_failure:
110 return 1;
111}
112
113static bool
114hash_netport4_data_tlist(struct sk_buff *skb,
115 const struct hash_netport4_elem *data)
116{
117 const struct hash_netport4_telem *tdata =
118 (const struct hash_netport4_telem *)data;
119
120 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
121 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
122 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
123 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
124 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
125 htonl(ip_set_timeout_get(tdata->timeout)));
126
127 return 0;
128
129nla_put_failure:
130 return 1;
131}
132
133#define IP_SET_HASH_WITH_PROTO
134#define IP_SET_HASH_WITH_NETS
135
136#define PF 4
137#define HOST_MASK 32
138#include <linux/netfilter/ipset/ip_set_ahash.h>
139
140static int
141hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
142 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
143{
144 const struct ip_set_hash *h = set->data;
145 ipset_adtfn adtfn = set->variant->adt[adt];
146 struct hash_netport4_elem data = {
147 .cidr = h->nets[0].cidr || HOST_MASK };
148
149 if (data.cidr == 0)
150 return -EINVAL;
151 if (adt == IPSET_TEST)
152 data.cidr = HOST_MASK;
153
154 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
155 &data.port, &data.proto))
156 return -EINVAL;
157
158 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
159 data.ip &= ip_set_netmask(data.cidr);
160
161 return adtfn(set, &data, h->timeout);
162}
163
164static int
165hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
166 enum ipset_adt adt, u32 *lineno, u32 flags)
167{
168 const struct ip_set_hash *h = set->data;
169 ipset_adtfn adtfn = set->variant->adt[adt];
170 struct hash_netport4_elem data = { .cidr = HOST_MASK };
171 u32 port, port_to;
172 u32 timeout = h->timeout;
173 bool with_ports = false;
174 int ret;
175
176 if (unlikely(!tb[IPSET_ATTR_IP] ||
177 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
178 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
179 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
180 return -IPSET_ERR_PROTOCOL;
181
182 if (tb[IPSET_ATTR_LINENO])
183 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
184
185 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
186 if (ret)
187 return ret;
188
189 if (tb[IPSET_ATTR_CIDR])
190 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
191 if (!data.cidr)
192 return -IPSET_ERR_INVALID_CIDR;
193 data.ip &= ip_set_netmask(data.cidr);
194
195 if (tb[IPSET_ATTR_PORT])
196 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
197 else
198 return -IPSET_ERR_PROTOCOL;
199
200 if (tb[IPSET_ATTR_PROTO]) {
201 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
202 with_ports = ip_set_proto_with_ports(data.proto);
203
204 if (data.proto == 0)
205 return -IPSET_ERR_INVALID_PROTO;
206 } else
207 return -IPSET_ERR_MISSING_PROTO;
208
209 if (!(with_ports || data.proto == IPPROTO_ICMP))
210 data.port = 0;
211
212 if (tb[IPSET_ATTR_TIMEOUT]) {
213 if (!with_timeout(h->timeout))
214 return -IPSET_ERR_TIMEOUT;
215 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
216 }
217
218 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
219 ret = adtfn(set, &data, timeout);
220 return ip_set_eexist(ret, flags) ? 0 : ret;
221 }
222
223 port = ntohs(data.port);
224 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
225 if (port > port_to)
226 swap(port, port_to);
227
228 for (; port <= port_to; port++) {
229 data.port = htons(port);
230 ret = adtfn(set, &data, timeout);
231
232 if (ret && !ip_set_eexist(ret, flags))
233 return ret;
234 else
235 ret = 0;
236 }
237 return ret;
238}
239
240static bool
241hash_netport_same_set(const struct ip_set *a, const struct ip_set *b)
242{
243 const struct ip_set_hash *x = a->data;
244 const struct ip_set_hash *y = b->data;
245
246 /* Resizing changes htable_bits, so we ignore it */
247 return x->maxelem == y->maxelem &&
248 x->timeout == y->timeout;
249}
250
251/* The type variant functions: IPv6 */
252
253struct hash_netport6_elem {
254 union nf_inet_addr ip;
255 __be16 port;
256 u8 proto;
257 u8 cidr;
258};
259
260struct hash_netport6_telem {
261 union nf_inet_addr ip;
262 __be16 port;
263 u8 proto;
264 u8 cidr;
265 unsigned long timeout;
266};
267
268static inline bool
269hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
270 const struct hash_netport6_elem *ip2)
271{
272 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
273 ip1->port == ip2->port &&
274 ip1->proto == ip2->proto &&
275 ip1->cidr == ip2->cidr;
276}
277
278static inline bool
279hash_netport6_data_isnull(const struct hash_netport6_elem *elem)
280{
281 return elem->proto == 0;
282}
283
284static inline void
285hash_netport6_data_copy(struct hash_netport6_elem *dst,
286 const struct hash_netport6_elem *src)
287{
288 memcpy(dst, src, sizeof(*dst));
289}
290
291static inline void
292hash_netport6_data_zero_out(struct hash_netport6_elem *elem)
293{
294 elem->proto = 0;
295}
296
297static inline void
298ip6_netmask(union nf_inet_addr *ip, u8 prefix)
299{
300 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
301 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
302 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
303 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
304}
305
306static inline void
307hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
308{
309 ip6_netmask(&elem->ip, cidr);
310 elem->cidr = cidr;
311}
312
313static bool
314hash_netport6_data_list(struct sk_buff *skb,
315 const struct hash_netport6_elem *data)
316{
317 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
318 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
319 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
320 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
321 return 0;
322
323nla_put_failure:
324 return 1;
325}
326
327static bool
328hash_netport6_data_tlist(struct sk_buff *skb,
329 const struct hash_netport6_elem *data)
330{
331 const struct hash_netport6_telem *e =
332 (const struct hash_netport6_telem *)data;
333
334 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
335 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
336 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
337 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
338 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
339 htonl(ip_set_timeout_get(e->timeout)));
340 return 0;
341
342nla_put_failure:
343 return 1;
344}
345
346#undef PF
347#undef HOST_MASK
348
349#define PF 6
350#define HOST_MASK 128
351#include <linux/netfilter/ipset/ip_set_ahash.h>
352
353static int
354hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
355 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
356{
357 const struct ip_set_hash *h = set->data;
358 ipset_adtfn adtfn = set->variant->adt[adt];
359 struct hash_netport6_elem data = {
360 .cidr = h->nets[0].cidr || HOST_MASK };
361
362 if (data.cidr == 0)
363 return -EINVAL;
364 if (adt == IPSET_TEST)
365 data.cidr = HOST_MASK;
366
367 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
368 &data.port, &data.proto))
369 return -EINVAL;
370
371 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
372 ip6_netmask(&data.ip, data.cidr);
373
374 return adtfn(set, &data, h->timeout);
375}
376
377static int
378hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
379 enum ipset_adt adt, u32 *lineno, u32 flags)
380{
381 const struct ip_set_hash *h = set->data;
382 ipset_adtfn adtfn = set->variant->adt[adt];
383 struct hash_netport6_elem data = { .cidr = HOST_MASK };
384 u32 port, port_to;
385 u32 timeout = h->timeout;
386 bool with_ports = false;
387 int ret;
388
389 if (unlikely(!tb[IPSET_ATTR_IP] ||
390 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
391 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
392 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
393 return -IPSET_ERR_PROTOCOL;
394
395 if (tb[IPSET_ATTR_LINENO])
396 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
397
398 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
399 if (ret)
400 return ret;
401
402 if (tb[IPSET_ATTR_CIDR])
403 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
404 if (!data.cidr)
405 return -IPSET_ERR_INVALID_CIDR;
406 ip6_netmask(&data.ip, data.cidr);
407
408 if (tb[IPSET_ATTR_PORT])
409 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
410 else
411 return -IPSET_ERR_PROTOCOL;
412
413 if (tb[IPSET_ATTR_PROTO]) {
414 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
415 with_ports = ip_set_proto_with_ports(data.proto);
416
417 if (data.proto == 0)
418 return -IPSET_ERR_INVALID_PROTO;
419 } else
420 return -IPSET_ERR_MISSING_PROTO;
421
422 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
423 data.port = 0;
424
425 if (tb[IPSET_ATTR_TIMEOUT]) {
426 if (!with_timeout(h->timeout))
427 return -IPSET_ERR_TIMEOUT;
428 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
429 }
430
431 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
432 ret = adtfn(set, &data, timeout);
433 return ip_set_eexist(ret, flags) ? 0 : ret;
434 }
435
436 port = ntohs(data.port);
437 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
438 if (port > port_to)
439 swap(port, port_to);
440
441 for (; port <= port_to; port++) {
442 data.port = htons(port);
443 ret = adtfn(set, &data, timeout);
444
445 if (ret && !ip_set_eexist(ret, flags))
446 return ret;
447 else
448 ret = 0;
449 }
450 return ret;
451}
452
453/* Create hash:ip type of sets */
454
455static int
456hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
457{
458 struct ip_set_hash *h;
459 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
460 u8 hbits;
461
462 if (!(set->family == AF_INET || set->family == AF_INET6))
463 return -IPSET_ERR_INVALID_FAMILY;
464
465 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
466 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
467 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
468 return -IPSET_ERR_PROTOCOL;
469
470 if (tb[IPSET_ATTR_HASHSIZE]) {
471 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
472 if (hashsize < IPSET_MIMINAL_HASHSIZE)
473 hashsize = IPSET_MIMINAL_HASHSIZE;
474 }
475
476 if (tb[IPSET_ATTR_MAXELEM])
477 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
478
479 h = kzalloc(sizeof(*h)
480 + sizeof(struct ip_set_hash_nets)
481 * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
482 if (!h)
483 return -ENOMEM;
484
485 h->maxelem = maxelem;
486 get_random_bytes(&h->initval, sizeof(h->initval));
487 h->timeout = IPSET_NO_TIMEOUT;
488
489 hbits = htable_bits(hashsize);
490 h->table = ip_set_alloc(
491 sizeof(struct htable)
492 + jhash_size(hbits) * sizeof(struct hbucket));
493 if (!h->table) {
494 kfree(h);
495 return -ENOMEM;
496 }
497 h->table->htable_bits = hbits;
498
499 set->data = h;
500
501 if (tb[IPSET_ATTR_TIMEOUT]) {
502 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
503
504 set->variant = set->family == AF_INET
505 ? &hash_netport4_tvariant : &hash_netport6_tvariant;
506
507 if (set->family == AF_INET)
508 hash_netport4_gc_init(set);
509 else
510 hash_netport6_gc_init(set);
511 } else {
512 set->variant = set->family == AF_INET
513 ? &hash_netport4_variant : &hash_netport6_variant;
514 }
515
516 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
517 set->name, jhash_size(h->table->htable_bits),
518 h->table->htable_bits, h->maxelem, set->data, h->table);
519
520 return 0;
521}
522
523static struct ip_set_type hash_netport_type __read_mostly = {
524 .name = "hash:net,port",
525 .protocol = IPSET_PROTOCOL,
526 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
527 .dimension = IPSET_DIM_TWO,
528 .family = AF_UNSPEC,
529 .revision = 1,
530 .create = hash_netport_create,
531 .create_policy = {
532 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
533 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
534 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
535 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
536 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
537 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
538 },
539 .adt_policy = {
540 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
541 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
542 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
543 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
544 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
545 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
546 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
547 },
548 .me = THIS_MODULE,
549};
550
551static int __init
552hash_netport_init(void)
553{
554 return ip_set_type_register(&hash_netport_type);
555}
556
557static void __exit
558hash_netport_fini(void)
559{
560 ip_set_type_unregister(&hash_netport_type);
561}
562
563module_init(hash_netport_init);
564module_exit(hash_netport_fini);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
new file mode 100644
index 000000000000..e9159e99fc4b
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -0,0 +1,577 @@
1/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the list:set type */
9
10#include <linux/module.h>
11#include <linux/ip.h>
12#include <linux/skbuff.h>
13#include <linux/errno.h>
14
15#include <linux/netfilter/ipset/ip_set.h>
16#include <linux/netfilter/ipset/ip_set_timeout.h>
17#include <linux/netfilter/ipset/ip_set_list.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
21MODULE_DESCRIPTION("list:set type of IP sets");
22MODULE_ALIAS("ip_set_list:set");
23
24/* Member elements without and with timeout */
25struct set_elem {
26 ip_set_id_t id;
27};
28
29struct set_telem {
30 ip_set_id_t id;
31 unsigned long timeout;
32};
33
34/* Type structure */
35struct list_set {
36 size_t dsize; /* element size */
37 u32 size; /* size of set list array */
38 u32 timeout; /* timeout value */
39 struct timer_list gc; /* garbage collection */
40 struct set_elem members[0]; /* the set members */
41};
42
43static inline struct set_elem *
44list_set_elem(const struct list_set *map, u32 id)
45{
46 return (struct set_elem *)((void *)map->members + id * map->dsize);
47}
48
49static inline struct set_telem *
50list_set_telem(const struct list_set *map, u32 id)
51{
52 return (struct set_telem *)((void *)map->members + id * map->dsize);
53}
54
55static inline bool
56list_set_timeout(const struct list_set *map, u32 id)
57{
58 const struct set_telem *elem = list_set_telem(map, id);
59
60 return ip_set_timeout_test(elem->timeout);
61}
62
63static inline bool
64list_set_expired(const struct list_set *map, u32 id)
65{
66 const struct set_telem *elem = list_set_telem(map, id);
67
68 return ip_set_timeout_expired(elem->timeout);
69}
70
71/* Set list without and with timeout */
72
73static int
74list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
75 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
76{
77 struct list_set *map = set->data;
78 struct set_elem *elem;
79 u32 i;
80 int ret;
81
82 for (i = 0; i < map->size; i++) {
83 elem = list_set_elem(map, i);
84 if (elem->id == IPSET_INVALID_ID)
85 return 0;
86 if (with_timeout(map->timeout) && list_set_expired(map, i))
87 continue;
88 switch (adt) {
89 case IPSET_TEST:
90 ret = ip_set_test(elem->id, skb, pf, dim, flags);
91 if (ret > 0)
92 return ret;
93 break;
94 case IPSET_ADD:
95 ret = ip_set_add(elem->id, skb, pf, dim, flags);
96 if (ret == 0)
97 return ret;
98 break;
99 case IPSET_DEL:
100 ret = ip_set_del(elem->id, skb, pf, dim, flags);
101 if (ret == 0)
102 return ret;
103 break;
104 default:
105 break;
106 }
107 }
108 return -EINVAL;
109}
110
111static bool
112next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id)
113{
114 const struct set_elem *elem;
115
116 if (i + 1 < map->size) {
117 elem = list_set_elem(map, i + 1);
118 return !!(elem->id == id &&
119 !(with_timeout(map->timeout) &&
120 list_set_expired(map, i + 1)));
121 }
122
123 return 0;
124}
125
126static void
127list_elem_add(struct list_set *map, u32 i, ip_set_id_t id)
128{
129 struct set_elem *e;
130
131 for (; i < map->size; i++) {
132 e = list_set_elem(map, i);
133 swap(e->id, id);
134 if (e->id == IPSET_INVALID_ID)
135 break;
136 }
137}
138
139static void
140list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
141 unsigned long timeout)
142{
143 struct set_telem *e;
144
145 for (; i < map->size; i++) {
146 e = list_set_telem(map, i);
147 swap(e->id, id);
148 swap(e->timeout, timeout);
149 if (e->id == IPSET_INVALID_ID)
150 break;
151 }
152}
153
154static int
155list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
156 unsigned long timeout)
157{
158 const struct set_elem *e = list_set_elem(map, i);
159
160 if (i == map->size - 1 && e->id != IPSET_INVALID_ID)
161 /* Last element replaced: e.g. add new,before,last */
162 ip_set_put_byindex(e->id);
163 if (with_timeout(map->timeout))
164 list_elem_tadd(map, i, id, ip_set_timeout_set(timeout));
165 else
166 list_elem_add(map, i, id);
167
168 return 0;
169}
170
171static int
172list_set_del(struct list_set *map, u32 i)
173{
174 struct set_elem *a = list_set_elem(map, i), *b;
175
176 ip_set_put_byindex(a->id);
177
178 for (; i < map->size - 1; i++) {
179 b = list_set_elem(map, i + 1);
180 a->id = b->id;
181 if (with_timeout(map->timeout))
182 ((struct set_telem *)a)->timeout =
183 ((struct set_telem *)b)->timeout;
184 a = b;
185 if (a->id == IPSET_INVALID_ID)
186 break;
187 }
188 /* Last element */
189 a->id = IPSET_INVALID_ID;
190 return 0;
191}
192
193static int
194list_set_uadt(struct ip_set *set, struct nlattr *tb[],
195 enum ipset_adt adt, u32 *lineno, u32 flags)
196{
197 struct list_set *map = set->data;
198 bool with_timeout = with_timeout(map->timeout);
199 int before = 0;
200 u32 timeout = map->timeout;
201 ip_set_id_t id, refid = IPSET_INVALID_ID;
202 const struct set_elem *elem;
203 struct ip_set *s;
204 u32 i;
205 int ret = 0;
206
207 if (unlikely(!tb[IPSET_ATTR_NAME] ||
208 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
209 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
210 return -IPSET_ERR_PROTOCOL;
211
212 if (tb[IPSET_ATTR_LINENO])
213 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
214
215 id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
216 if (id == IPSET_INVALID_ID)
217 return -IPSET_ERR_NAME;
218 /* "Loop detection" */
219 if (s->type->features & IPSET_TYPE_NAME) {
220 ret = -IPSET_ERR_LOOP;
221 goto finish;
222 }
223
224 if (tb[IPSET_ATTR_CADT_FLAGS]) {
225 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
226 before = f & IPSET_FLAG_BEFORE;
227 }
228
229 if (before && !tb[IPSET_ATTR_NAMEREF]) {
230 ret = -IPSET_ERR_BEFORE;
231 goto finish;
232 }
233
234 if (tb[IPSET_ATTR_NAMEREF]) {
235 refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
236 &s);
237 if (refid == IPSET_INVALID_ID) {
238 ret = -IPSET_ERR_NAMEREF;
239 goto finish;
240 }
241 if (!before)
242 before = -1;
243 }
244 if (tb[IPSET_ATTR_TIMEOUT]) {
245 if (!with_timeout) {
246 ret = -IPSET_ERR_TIMEOUT;
247 goto finish;
248 }
249 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
250 }
251
252 switch (adt) {
253 case IPSET_TEST:
254 for (i = 0; i < map->size && !ret; i++) {
255 elem = list_set_elem(map, i);
256 if (elem->id == IPSET_INVALID_ID ||
257 (before != 0 && i + 1 >= map->size))
258 break;
259 else if (with_timeout && list_set_expired(map, i))
260 continue;
261 else if (before > 0 && elem->id == id)
262 ret = next_id_eq(map, i, refid);
263 else if (before < 0 && elem->id == refid)
264 ret = next_id_eq(map, i, id);
265 else if (before == 0 && elem->id == id)
266 ret = 1;
267 }
268 break;
269 case IPSET_ADD:
270 for (i = 0; i < map->size && !ret; i++) {
271 elem = list_set_elem(map, i);
272 if (elem->id == id &&
273 !(with_timeout && list_set_expired(map, i)))
274 ret = -IPSET_ERR_EXIST;
275 }
276 if (ret == -IPSET_ERR_EXIST)
277 break;
278 ret = -IPSET_ERR_LIST_FULL;
279 for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
280 elem = list_set_elem(map, i);
281 if (elem->id == IPSET_INVALID_ID)
282 ret = before != 0 ? -IPSET_ERR_REF_EXIST
283 : list_set_add(map, i, id, timeout);
284 else if (elem->id != refid)
285 continue;
286 else if (with_timeout && list_set_expired(map, i))
287 ret = -IPSET_ERR_REF_EXIST;
288 else if (before)
289 ret = list_set_add(map, i, id, timeout);
290 else if (i + 1 < map->size)
291 ret = list_set_add(map, i + 1, id, timeout);
292 }
293 break;
294 case IPSET_DEL:
295 ret = -IPSET_ERR_EXIST;
296 for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) {
297 elem = list_set_elem(map, i);
298 if (elem->id == IPSET_INVALID_ID) {
299 ret = before != 0 ? -IPSET_ERR_REF_EXIST
300 : -IPSET_ERR_EXIST;
301 break;
302 } else if (with_timeout && list_set_expired(map, i))
303 continue;
304 else if (elem->id == id &&
305 (before == 0 ||
306 (before > 0 &&
307 next_id_eq(map, i, refid))))
308 ret = list_set_del(map, i);
309 else if (before < 0 &&
310 elem->id == refid &&
311 next_id_eq(map, i, id))
312 ret = list_set_del(map, i + 1);
313 }
314 break;
315 default:
316 break;
317 }
318
319finish:
320 if (refid != IPSET_INVALID_ID)
321 ip_set_put_byindex(refid);
322 if (adt != IPSET_ADD || ret)
323 ip_set_put_byindex(id);
324
325 return ip_set_eexist(ret, flags) ? 0 : ret;
326}
327
328static void
329list_set_flush(struct ip_set *set)
330{
331 struct list_set *map = set->data;
332 struct set_elem *elem;
333 u32 i;
334
335 for (i = 0; i < map->size; i++) {
336 elem = list_set_elem(map, i);
337 if (elem->id != IPSET_INVALID_ID) {
338 ip_set_put_byindex(elem->id);
339 elem->id = IPSET_INVALID_ID;
340 }
341 }
342}
343
344static void
345list_set_destroy(struct ip_set *set)
346{
347 struct list_set *map = set->data;
348
349 if (with_timeout(map->timeout))
350 del_timer_sync(&map->gc);
351 list_set_flush(set);
352 kfree(map);
353
354 set->data = NULL;
355}
356
357static int
358list_set_head(struct ip_set *set, struct sk_buff *skb)
359{
360 const struct list_set *map = set->data;
361 struct nlattr *nested;
362
363 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
364 if (!nested)
365 goto nla_put_failure;
366 NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size));
367 if (with_timeout(map->timeout))
368 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
369 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
370 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
371 htonl(sizeof(*map) + map->size * map->dsize));
372 ipset_nest_end(skb, nested);
373
374 return 0;
375nla_put_failure:
376 return -EMSGSIZE;
377}
378
379static int
380list_set_list(const struct ip_set *set,
381 struct sk_buff *skb, struct netlink_callback *cb)
382{
383 const struct list_set *map = set->data;
384 struct nlattr *atd, *nested;
385 u32 i, first = cb->args[2];
386 const struct set_elem *e;
387
388 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
389 if (!atd)
390 return -EMSGSIZE;
391 for (; cb->args[2] < map->size; cb->args[2]++) {
392 i = cb->args[2];
393 e = list_set_elem(map, i);
394 if (e->id == IPSET_INVALID_ID)
395 goto finish;
396 if (with_timeout(map->timeout) && list_set_expired(map, i))
397 continue;
398 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
399 if (!nested) {
400 if (i == first) {
401 nla_nest_cancel(skb, atd);
402 return -EMSGSIZE;
403 } else
404 goto nla_put_failure;
405 }
406 NLA_PUT_STRING(skb, IPSET_ATTR_NAME,
407 ip_set_name_byindex(e->id));
408 if (with_timeout(map->timeout)) {
409 const struct set_telem *te =
410 (const struct set_telem *) e;
411 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
412 htonl(ip_set_timeout_get(te->timeout)));
413 }
414 ipset_nest_end(skb, nested);
415 }
416finish:
417 ipset_nest_end(skb, atd);
418 /* Set listing finished */
419 cb->args[2] = 0;
420 return 0;
421
422nla_put_failure:
423 nla_nest_cancel(skb, nested);
424 ipset_nest_end(skb, atd);
425 if (unlikely(i == first)) {
426 cb->args[2] = 0;
427 return -EMSGSIZE;
428 }
429 return 0;
430}
431
432static bool
433list_set_same_set(const struct ip_set *a, const struct ip_set *b)
434{
435 const struct list_set *x = a->data;
436 const struct list_set *y = b->data;
437
438 return x->size == y->size &&
439 x->timeout == y->timeout;
440}
441
442static const struct ip_set_type_variant list_set = {
443 .kadt = list_set_kadt,
444 .uadt = list_set_uadt,
445 .destroy = list_set_destroy,
446 .flush = list_set_flush,
447 .head = list_set_head,
448 .list = list_set_list,
449 .same_set = list_set_same_set,
450};
451
452static void
453list_set_gc(unsigned long ul_set)
454{
455 struct ip_set *set = (struct ip_set *) ul_set;
456 struct list_set *map = set->data;
457 struct set_telem *e;
458 u32 i;
459
460 write_lock_bh(&set->lock);
461 for (i = 0; i < map->size; i++) {
462 e = list_set_telem(map, i);
463 if (e->id != IPSET_INVALID_ID && list_set_expired(map, i))
464 list_set_del(map, i);
465 }
466 write_unlock_bh(&set->lock);
467
468 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
469 add_timer(&map->gc);
470}
471
472static void
473list_set_gc_init(struct ip_set *set)
474{
475 struct list_set *map = set->data;
476
477 init_timer(&map->gc);
478 map->gc.data = (unsigned long) set;
479 map->gc.function = list_set_gc;
480 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
481 add_timer(&map->gc);
482}
483
484/* Create list:set type of sets */
485
486static bool
487init_list_set(struct ip_set *set, u32 size, size_t dsize,
488 unsigned long timeout)
489{
490 struct list_set *map;
491 struct set_elem *e;
492 u32 i;
493
494 map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
495 if (!map)
496 return false;
497
498 map->size = size;
499 map->dsize = dsize;
500 map->timeout = timeout;
501 set->data = map;
502
503 for (i = 0; i < size; i++) {
504 e = list_set_elem(map, i);
505 e->id = IPSET_INVALID_ID;
506 }
507
508 return true;
509}
510
511static int
512list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
513{
514 u32 size = IP_SET_LIST_DEFAULT_SIZE;
515
516 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
517 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
518 return -IPSET_ERR_PROTOCOL;
519
520 if (tb[IPSET_ATTR_SIZE])
521 size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]);
522 if (size < IP_SET_LIST_MIN_SIZE)
523 size = IP_SET_LIST_MIN_SIZE;
524
525 if (tb[IPSET_ATTR_TIMEOUT]) {
526 if (!init_list_set(set, size, sizeof(struct set_telem),
527 ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT])))
528 return -ENOMEM;
529
530 list_set_gc_init(set);
531 } else {
532 if (!init_list_set(set, size, sizeof(struct set_elem),
533 IPSET_NO_TIMEOUT))
534 return -ENOMEM;
535 }
536 set->variant = &list_set;
537 return 0;
538}
539
540static struct ip_set_type list_set_type __read_mostly = {
541 .name = "list:set",
542 .protocol = IPSET_PROTOCOL,
543 .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST,
544 .dimension = IPSET_DIM_ONE,
545 .family = AF_UNSPEC,
546 .revision = 0,
547 .create = list_set_create,
548 .create_policy = {
549 [IPSET_ATTR_SIZE] = { .type = NLA_U32 },
550 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
551 },
552 .adt_policy = {
553 [IPSET_ATTR_NAME] = { .type = NLA_STRING,
554 .len = IPSET_MAXNAMELEN },
555 [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING,
556 .len = IPSET_MAXNAMELEN },
557 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
558 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
559 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
560 },
561 .me = THIS_MODULE,
562};
563
564static int __init
565list_set_init(void)
566{
567 return ip_set_type_register(&list_set_type);
568}
569
570static void __exit
571list_set_fini(void)
572{
573 ip_set_type_unregister(&list_set_type);
574}
575
576module_init(list_set_init);
577module_exit(list_set_fini);
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
new file mode 100644
index 000000000000..23f8c8162214
--- /dev/null
+++ b/net/netfilter/ipset/pfxlen.c
@@ -0,0 +1,291 @@
1#include <linux/netfilter/ipset/pfxlen.h>
2
3/*
4 * Prefixlen maps for fast conversions, by Jan Engelhardt.
5 */
6
7#define E(a, b, c, d) \
8 {.ip6 = { \
9 __constant_htonl(a), __constant_htonl(b), \
10 __constant_htonl(c), __constant_htonl(d), \
11 } }
12
13/*
14 * This table works for both IPv4 and IPv6;
15 * just use prefixlen_netmask_map[prefixlength].ip.
16 */
17const union nf_inet_addr ip_set_netmask_map[] = {
18 E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
19 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
20 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
21 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
22 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
23 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
24 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
25 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
26 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
27 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
28 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
29 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
30 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
31 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
32 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
33 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
34 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
35 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
36 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
37 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
38 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
39 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
40 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
41 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
42 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
43 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
44 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
45 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
46 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
47 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
48 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
49 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
50 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
51 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
52 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
53 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
54 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
55 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
56 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
57 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
58 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
59 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
60 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
61 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
62 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
63 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
64 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
65 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
66 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
67 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
68 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
69 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
70 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
71 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
72 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
73 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
74 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
75 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
76 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
77 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
78 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
79 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
80 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
81 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
82 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
83 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
84 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
85 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
86 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
87 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
88 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
89 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
90 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
91 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
92 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
93 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
94 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
95 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
96 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
97 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
98 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
99 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
100 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
101 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
102 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
103 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
104 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
105 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
106 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
107 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
108 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
109 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
110 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
111 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
112 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
113 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
114 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
115 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
116 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
117 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
118 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
119 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
120 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
121 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
122 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
123 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
124 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
125 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
126 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
127 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
128 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
129 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
130 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
131 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
132 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
133 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
134 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
135 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
136 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
137 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
138 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
139 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
140 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
141 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
142 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
143 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
144 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
145 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
146 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
147};
148EXPORT_SYMBOL_GPL(ip_set_netmask_map);
149
150#undef E
151#define E(a, b, c, d) \
152 {.ip6 = { (__force __be32) a, (__force __be32) b, \
153 (__force __be32) c, (__force __be32) d, \
154 } }
155
156/*
157 * This table works for both IPv4 and IPv6;
158 * just use prefixlen_hostmask_map[prefixlength].ip.
159 */
160const union nf_inet_addr ip_set_hostmask_map[] = {
161 E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
162 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
163 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
164 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
165 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
166 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
167 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
168 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
169 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
170 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
171 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
172 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
173 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
174 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
175 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
176 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
177 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
178 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
179 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
180 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
181 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
182 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
183 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
184 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
185 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
186 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
187 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
188 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
189 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
190 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
191 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
192 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
193 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
194 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
195 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
196 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
197 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
198 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
199 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
200 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
201 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
202 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
203 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
204 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
205 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
206 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
207 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
208 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
209 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
210 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
211 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
212 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
213 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
214 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
215 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
216 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
217 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
218 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
219 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
220 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
221 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
222 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
223 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
224 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
225 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
226 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
227 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
228 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
229 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
230 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
231 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
232 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
233 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
234 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
235 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
236 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
237 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
238 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
239 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
240 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
241 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
242 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
243 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
244 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
245 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
246 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
247 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
248 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
249 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
250 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
251 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
252 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
253 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
254 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
255 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
256 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
257 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
258 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
259 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
260 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
261 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
262 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
263 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
264 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
265 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
266 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
267 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
268 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
269 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
270 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
271 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
272 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
273 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
274 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
275 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
276 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
277 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
278 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
279 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
280 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
281 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
282 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
283 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
284 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
285 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
286 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
287 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
288 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
289 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
290};
291EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index a475edee0912..059af3120be7 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
43EXPORT_SYMBOL(unregister_ip_vs_app); 43EXPORT_SYMBOL(unregister_ip_vs_app);
44EXPORT_SYMBOL(register_ip_vs_app_inc); 44EXPORT_SYMBOL(register_ip_vs_app_inc);
45 45
46/* ipvs application list head */
47static LIST_HEAD(ip_vs_app_list);
48static DEFINE_MUTEX(__ip_vs_app_mutex); 46static DEFINE_MUTEX(__ip_vs_app_mutex);
49 47
50
51/* 48/*
52 * Get an ip_vs_app object 49 * Get an ip_vs_app object
53 */ 50 */
@@ -67,7 +64,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
67 * Allocate/initialize app incarnation and register it in proto apps. 64 * Allocate/initialize app incarnation and register it in proto apps.
68 */ 65 */
69static int 66static int
70ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) 67ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
68 __u16 port)
71{ 69{
72 struct ip_vs_protocol *pp; 70 struct ip_vs_protocol *pp;
73 struct ip_vs_app *inc; 71 struct ip_vs_app *inc;
@@ -98,7 +96,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
98 } 96 }
99 } 97 }
100 98
101 ret = pp->register_app(inc); 99 ret = pp->register_app(net, inc);
102 if (ret) 100 if (ret)
103 goto out; 101 goto out;
104 102
@@ -119,7 +117,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
119 * Release app incarnation 117 * Release app incarnation
120 */ 118 */
121static void 119static void
122ip_vs_app_inc_release(struct ip_vs_app *inc) 120ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
123{ 121{
124 struct ip_vs_protocol *pp; 122 struct ip_vs_protocol *pp;
125 123
@@ -127,7 +125,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
127 return; 125 return;
128 126
129 if (pp->unregister_app) 127 if (pp->unregister_app)
130 pp->unregister_app(inc); 128 pp->unregister_app(net, inc);
131 129
132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 130 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
133 pp->name, inc->name, ntohs(inc->port)); 131 pp->name, inc->name, ntohs(inc->port));
@@ -168,13 +166,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
168 * Register an application incarnation in protocol applications 166 * Register an application incarnation in protocol applications
169 */ 167 */
170int 168int
171register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) 169register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
170 __u16 port)
172{ 171{
173 int result; 172 int result;
174 173
175 mutex_lock(&__ip_vs_app_mutex); 174 mutex_lock(&__ip_vs_app_mutex);
176 175
177 result = ip_vs_app_inc_new(app, proto, port); 176 result = ip_vs_app_inc_new(net, app, proto, port);
178 177
179 mutex_unlock(&__ip_vs_app_mutex); 178 mutex_unlock(&__ip_vs_app_mutex);
180 179
@@ -185,14 +184,15 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
185/* 184/*
186 * ip_vs_app registration routine 185 * ip_vs_app registration routine
187 */ 186 */
188int register_ip_vs_app(struct ip_vs_app *app) 187int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
189{ 188{
189 struct netns_ipvs *ipvs = net_ipvs(net);
190 /* increase the module use count */ 190 /* increase the module use count */
191 ip_vs_use_count_inc(); 191 ip_vs_use_count_inc();
192 192
193 mutex_lock(&__ip_vs_app_mutex); 193 mutex_lock(&__ip_vs_app_mutex);
194 194
195 list_add(&app->a_list, &ip_vs_app_list); 195 list_add(&app->a_list, &ipvs->app_list);
196 196
197 mutex_unlock(&__ip_vs_app_mutex); 197 mutex_unlock(&__ip_vs_app_mutex);
198 198
@@ -204,14 +204,14 @@ int register_ip_vs_app(struct ip_vs_app *app)
204 * ip_vs_app unregistration routine 204 * ip_vs_app unregistration routine
205 * We are sure there are no app incarnations attached to services 205 * We are sure there are no app incarnations attached to services
206 */ 206 */
207void unregister_ip_vs_app(struct ip_vs_app *app) 207void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
208{ 208{
209 struct ip_vs_app *inc, *nxt; 209 struct ip_vs_app *inc, *nxt;
210 210
211 mutex_lock(&__ip_vs_app_mutex); 211 mutex_lock(&__ip_vs_app_mutex);
212 212
213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { 213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
214 ip_vs_app_inc_release(inc); 214 ip_vs_app_inc_release(net, inc);
215 } 215 }
216 216
217 list_del(&app->a_list); 217 list_del(&app->a_list);
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
226/* 226/*
227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
228 */ 228 */
229int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) 229int ip_vs_bind_app(struct ip_vs_conn *cp,
230 struct ip_vs_protocol *pp)
230{ 231{
231 return pp->app_conn_bind(cp); 232 return pp->app_conn_bind(cp);
232} 233}
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
481 * /proc/net/ip_vs_app entry function 482 * /proc/net/ip_vs_app entry function
482 */ 483 */
483 484
484static struct ip_vs_app *ip_vs_app_idx(loff_t pos) 485static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
485{ 486{
486 struct ip_vs_app *app, *inc; 487 struct ip_vs_app *app, *inc;
487 488
488 list_for_each_entry(app, &ip_vs_app_list, a_list) { 489 list_for_each_entry(app, &ipvs->app_list, a_list) {
489 list_for_each_entry(inc, &app->incs_list, a_list) { 490 list_for_each_entry(inc, &app->incs_list, a_list) {
490 if (pos-- == 0) 491 if (pos-- == 0)
491 return inc; 492 return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
497 498
498static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 499static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
499{ 500{
501 struct net *net = seq_file_net(seq);
502 struct netns_ipvs *ipvs = net_ipvs(net);
503
500 mutex_lock(&__ip_vs_app_mutex); 504 mutex_lock(&__ip_vs_app_mutex);
501 505
502 return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; 506 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
503} 507}
504 508
505static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 509static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
506{ 510{
507 struct ip_vs_app *inc, *app; 511 struct ip_vs_app *inc, *app;
508 struct list_head *e; 512 struct list_head *e;
513 struct net *net = seq_file_net(seq);
514 struct netns_ipvs *ipvs = net_ipvs(net);
509 515
510 ++*pos; 516 ++*pos;
511 if (v == SEQ_START_TOKEN) 517 if (v == SEQ_START_TOKEN)
512 return ip_vs_app_idx(0); 518 return ip_vs_app_idx(ipvs, 0);
513 519
514 inc = v; 520 inc = v;
515 app = inc->app; 521 app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518 return list_entry(e, struct ip_vs_app, a_list); 524 return list_entry(e, struct ip_vs_app, a_list);
519 525
520 /* go on to next application */ 526 /* go on to next application */
521 for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { 527 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
522 app = list_entry(e, struct ip_vs_app, a_list); 528 app = list_entry(e, struct ip_vs_app, a_list);
523 list_for_each_entry(inc, &app->incs_list, a_list) { 529 list_for_each_entry(inc, &app->incs_list, a_list) {
524 return inc; 530 return inc;
@@ -557,7 +563,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
557 563
558static int ip_vs_app_open(struct inode *inode, struct file *file) 564static int ip_vs_app_open(struct inode *inode, struct file *file)
559{ 565{
560 return seq_open(file, &ip_vs_app_seq_ops); 566 return seq_open_net(inode, file, &ip_vs_app_seq_ops,
567 sizeof(struct seq_net_private));
561} 568}
562 569
563static const struct file_operations ip_vs_app_fops = { 570static const struct file_operations ip_vs_app_fops = {
@@ -565,19 +572,30 @@ static const struct file_operations ip_vs_app_fops = {
565 .open = ip_vs_app_open, 572 .open = ip_vs_app_open,
566 .read = seq_read, 573 .read = seq_read,
567 .llseek = seq_lseek, 574 .llseek = seq_lseek,
568 .release = seq_release, 575 .release = seq_release_net,
569}; 576};
570#endif 577#endif
571 578
579int __net_init __ip_vs_app_init(struct net *net)
580{
581 struct netns_ipvs *ipvs = net_ipvs(net);
582
583 INIT_LIST_HEAD(&ipvs->app_list);
584 proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
585 return 0;
586}
587
588void __net_exit __ip_vs_app_cleanup(struct net *net)
589{
590 proc_net_remove(net, "ip_vs_app");
591}
592
572int __init ip_vs_app_init(void) 593int __init ip_vs_app_init(void)
573{ 594{
574 /* we will replace it with proc_net_ipvs_create() soon */
575 proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
576 return 0; 595 return 0;
577} 596}
578 597
579 598
580void ip_vs_app_cleanup(void) 599void ip_vs_app_cleanup(void)
581{ 600{
582 proc_net_remove(&init_net, "ip_vs_app");
583} 601}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecdc8ca4..bf28ac2fc99b 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
48/* 48/*
49 * Connection hash size. Default is what was selected at compile time. 49 * Connection hash size. Default is what was selected at compile time.
50*/ 50*/
51int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; 51static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
52module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); 52module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
53MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); 53MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
54 54
55/* size and mask values */ 55/* size and mask values */
56int ip_vs_conn_tab_size; 56int ip_vs_conn_tab_size __read_mostly;
57int ip_vs_conn_tab_mask; 57static int ip_vs_conn_tab_mask __read_mostly;
58 58
59/* 59/*
60 * Connection hash table: for input and output packets lookups of IPVS 60 * Connection hash table: for input and output packets lookups of IPVS
61 */ 61 */
62static struct list_head *ip_vs_conn_tab; 62static struct hlist_head *ip_vs_conn_tab __read_mostly;
63 63
64/* SLAB cache for IPVS connections */ 64/* SLAB cache for IPVS connections */
65static struct kmem_cache *ip_vs_conn_cachep __read_mostly; 65static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
66 66
67/* counter for current IPVS connections */
68static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
69
70/* counter for no client port connections */ 67/* counter for no client port connections */
71static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); 68static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
72 69
73/* random value for IPVS connection hash */ 70/* random value for IPVS connection hash */
74static unsigned int ip_vs_conn_rnd; 71static unsigned int ip_vs_conn_rnd __read_mostly;
75 72
76/* 73/*
77 * Fine locking granularity for big connection hash table 74 * Fine locking granularity for big connection hash table
78 */ 75 */
79#define CT_LOCKARRAY_BITS 4 76#define CT_LOCKARRAY_BITS 5
80#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) 77#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
81#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) 78#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
82 79
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
133/* 130/*
134 * Returns hash value for IPVS connection entry 131 * Returns hash value for IPVS connection entry
135 */ 132 */
136static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, 133static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
137 const union nf_inet_addr *addr, 134 const union nf_inet_addr *addr,
138 __be16 port) 135 __be16 port)
139{ 136{
140#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
141 if (af == AF_INET6) 138 if (af == AF_INET6)
142 return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), 139 return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
143 (__force u32)port, proto, ip_vs_conn_rnd) 140 (__force u32)port, proto, ip_vs_conn_rnd) ^
144 & ip_vs_conn_tab_mask; 141 ((size_t)net>>8)) & ip_vs_conn_tab_mask;
145#endif 142#endif
146 return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, 143 return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
147 ip_vs_conn_rnd) 144 ip_vs_conn_rnd) ^
148 & ip_vs_conn_tab_mask; 145 ((size_t)net>>8)) & ip_vs_conn_tab_mask;
149} 146}
150 147
151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, 148static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
166 port = p->vport; 163 port = p->vport;
167 } 164 }
168 165
169 return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); 166 return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
170} 167}
171 168
172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) 169static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
173{ 170{
174 struct ip_vs_conn_param p; 171 struct ip_vs_conn_param p;
175 172
176 ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, 173 ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
177 NULL, 0, &p); 174 &cp->caddr, cp->cport, NULL, 0, &p);
178 175
179 if (cp->dest && cp->dest->svc->pe) { 176 if (cp->pe) {
180 p.pe = cp->dest->svc->pe; 177 p.pe = cp->pe;
181 p.pe_data = cp->pe_data; 178 p.pe_data = cp->pe_data;
182 p.pe_data_len = cp->pe_data_len; 179 p.pe_data_len = cp->pe_data_len;
183 } 180 }
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
186} 183}
187 184
188/* 185/*
189 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 186 * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
190 * returns bool success. 187 * returns bool success.
191 */ 188 */
192static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) 189static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -204,7 +201,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
204 spin_lock(&cp->lock); 201 spin_lock(&cp->lock);
205 202
206 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 203 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
207 list_add(&cp->c_list, &ip_vs_conn_tab[hash]); 204 hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
208 cp->flags |= IP_VS_CONN_F_HASHED; 205 cp->flags |= IP_VS_CONN_F_HASHED;
209 atomic_inc(&cp->refcnt); 206 atomic_inc(&cp->refcnt);
210 ret = 1; 207 ret = 1;
@@ -237,7 +234,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
237 spin_lock(&cp->lock); 234 spin_lock(&cp->lock);
238 235
239 if (cp->flags & IP_VS_CONN_F_HASHED) { 236 if (cp->flags & IP_VS_CONN_F_HASHED) {
240 list_del(&cp->c_list); 237 hlist_del(&cp->c_list);
241 cp->flags &= ~IP_VS_CONN_F_HASHED; 238 cp->flags &= ~IP_VS_CONN_F_HASHED;
242 atomic_dec(&cp->refcnt); 239 atomic_dec(&cp->refcnt);
243 ret = 1; 240 ret = 1;
@@ -262,18 +259,20 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
262{ 259{
263 unsigned hash; 260 unsigned hash;
264 struct ip_vs_conn *cp; 261 struct ip_vs_conn *cp;
262 struct hlist_node *n;
265 263
266 hash = ip_vs_conn_hashkey_param(p, false); 264 hash = ip_vs_conn_hashkey_param(p, false);
267 265
268 ct_read_lock(hash); 266 ct_read_lock(hash);
269 267
270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 268 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
271 if (cp->af == p->af && 269 if (cp->af == p->af &&
270 p->cport == cp->cport && p->vport == cp->vport &&
272 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && 271 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
273 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 272 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
274 p->cport == cp->cport && p->vport == cp->vport &&
275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 273 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
276 p->protocol == cp->protocol) { 274 p->protocol == cp->protocol &&
275 ip_vs_conn_net_eq(cp, p->net)) {
277 /* HIT */ 276 /* HIT */
278 atomic_inc(&cp->refcnt); 277 atomic_inc(&cp->refcnt);
279 ct_read_unlock(hash); 278 ct_read_unlock(hash);
@@ -313,23 +312,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
313 struct ip_vs_conn_param *p) 312 struct ip_vs_conn_param *p)
314{ 313{
315 __be16 _ports[2], *pptr; 314 __be16 _ports[2], *pptr;
315 struct net *net = skb_net(skb);
316 316
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
318 if (pptr == NULL) 318 if (pptr == NULL)
319 return 1; 319 return 1;
320 320
321 if (likely(!inverse)) 321 if (likely(!inverse))
322 ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], 322 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
323 &iph->daddr, pptr[1], p); 323 pptr[0], &iph->daddr, pptr[1], p);
324 else 324 else
325 ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], 325 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
326 &iph->saddr, pptr[0], p); 326 pptr[1], &iph->saddr, pptr[0], p);
327 return 0; 327 return 0;
328} 328}
329 329
330struct ip_vs_conn * 330struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 struct ip_vs_protocol *pp,
333 const struct ip_vs_iphdr *iph, 332 const struct ip_vs_iphdr *iph,
334 unsigned int proto_off, int inverse) 333 unsigned int proto_off, int inverse)
335{ 334{
@@ -347,14 +346,17 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
347{ 346{
348 unsigned hash; 347 unsigned hash;
349 struct ip_vs_conn *cp; 348 struct ip_vs_conn *cp;
349 struct hlist_node *n;
350 350
351 hash = ip_vs_conn_hashkey_param(p, false); 351 hash = ip_vs_conn_hashkey_param(p, false);
352 352
353 ct_read_lock(hash); 353 ct_read_lock(hash);
354 354
355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 355 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
356 if (!ip_vs_conn_net_eq(cp, p->net))
357 continue;
356 if (p->pe_data && p->pe->ct_match) { 358 if (p->pe_data && p->pe->ct_match) {
357 if (p->pe->ct_match(p, cp)) 359 if (p->pe == cp->pe && p->pe->ct_match(p, cp))
358 goto out; 360 goto out;
359 continue; 361 continue;
360 } 362 }
@@ -394,6 +396,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
394{ 396{
395 unsigned hash; 397 unsigned hash;
396 struct ip_vs_conn *cp, *ret=NULL; 398 struct ip_vs_conn *cp, *ret=NULL;
399 struct hlist_node *n;
397 400
398 /* 401 /*
399 * Check for "full" addressed entries 402 * Check for "full" addressed entries
@@ -402,12 +405,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
402 405
403 ct_read_lock(hash); 406 ct_read_lock(hash);
404 407
405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 408 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
406 if (cp->af == p->af && 409 if (cp->af == p->af &&
410 p->vport == cp->cport && p->cport == cp->dport &&
407 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 411 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
408 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 412 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
409 p->vport == cp->cport && p->cport == cp->dport && 413 p->protocol == cp->protocol &&
410 p->protocol == cp->protocol) { 414 ip_vs_conn_net_eq(cp, p->net)) {
411 /* HIT */ 415 /* HIT */
412 atomic_inc(&cp->refcnt); 416 atomic_inc(&cp->refcnt);
413 ret = cp; 417 ret = cp;
@@ -428,7 +432,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
428 432
429struct ip_vs_conn * 433struct ip_vs_conn *
430ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 434ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
431 struct ip_vs_protocol *pp,
432 const struct ip_vs_iphdr *iph, 435 const struct ip_vs_iphdr *iph,
433 unsigned int proto_off, int inverse) 436 unsigned int proto_off, int inverse)
434{ 437{
@@ -592,7 +595,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
592 atomic_inc(&dest->inactconns); 595 atomic_inc(&dest->inactconns);
593 } else { 596 } else {
594 /* It is a persistent connection/template, so increase 597 /* It is a persistent connection/template, so increase
595 the peristent connection counter */ 598 the persistent connection counter */
596 atomic_inc(&dest->persistconns); 599 atomic_inc(&dest->persistconns);
597 } 600 }
598 601
@@ -611,9 +614,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
611 struct ip_vs_dest *dest; 614 struct ip_vs_dest *dest;
612 615
613 if ((cp) && (!cp->dest)) { 616 if ((cp) && (!cp->dest)) {
614 dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, 617 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
615 &cp->vaddr, cp->vport, 618 cp->dport, &cp->vaddr, cp->vport,
616 cp->protocol); 619 cp->protocol, cp->fwmark);
617 ip_vs_bind_dest(cp, dest); 620 ip_vs_bind_dest(cp, dest);
618 return dest; 621 return dest;
619 } else 622 } else
@@ -654,7 +657,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
654 } 657 }
655 } else { 658 } else {
656 /* It is a persistent connection/template, so decrease 659 /* It is a persistent connection/template, so decrease
657 the peristent connection counter */ 660 the persistent connection counter */
658 atomic_dec(&dest->persistconns); 661 atomic_dec(&dest->persistconns);
659 } 662 }
660 663
@@ -677,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
677 atomic_dec(&dest->refcnt); 680 atomic_dec(&dest->refcnt);
678} 681}
679 682
683static int expire_quiescent_template(struct netns_ipvs *ipvs,
684 struct ip_vs_dest *dest)
685{
686#ifdef CONFIG_SYSCTL
687 return ipvs->sysctl_expire_quiescent_template &&
688 (atomic_read(&dest->weight) == 0);
689#else
690 return 0;
691#endif
692}
680 693
681/* 694/*
682 * Checking if the destination of a connection template is available. 695 * Checking if the destination of a connection template is available.
@@ -686,14 +699,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
686int ip_vs_check_template(struct ip_vs_conn *ct) 699int ip_vs_check_template(struct ip_vs_conn *ct)
687{ 700{
688 struct ip_vs_dest *dest = ct->dest; 701 struct ip_vs_dest *dest = ct->dest;
702 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
689 703
690 /* 704 /*
691 * Checking the dest server status. 705 * Checking the dest server status.
692 */ 706 */
693 if ((dest == NULL) || 707 if ((dest == NULL) ||
694 !(dest->flags & IP_VS_DEST_F_AVAILABLE) || 708 !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
695 (sysctl_ip_vs_expire_quiescent_template && 709 expire_quiescent_template(ipvs, dest)) {
696 (atomic_read(&dest->weight) == 0))) {
697 IP_VS_DBG_BUF(9, "check_template: dest not available for " 710 IP_VS_DBG_BUF(9, "check_template: dest not available for "
698 "protocol %s s:%s:%d v:%s:%d " 711 "protocol %s s:%s:%d v:%s:%d "
699 "-> d:%s:%d\n", 712 "-> d:%s:%d\n",
@@ -730,6 +743,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
730static void ip_vs_conn_expire(unsigned long data) 743static void ip_vs_conn_expire(unsigned long data)
731{ 744{
732 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 745 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
746 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
733 747
734 cp->timeout = 60*HZ; 748 cp->timeout = 60*HZ;
735 749
@@ -765,13 +779,14 @@ static void ip_vs_conn_expire(unsigned long data)
765 if (cp->flags & IP_VS_CONN_F_NFCT) 779 if (cp->flags & IP_VS_CONN_F_NFCT)
766 ip_vs_conn_drop_conntrack(cp); 780 ip_vs_conn_drop_conntrack(cp);
767 781
782 ip_vs_pe_put(cp->pe);
768 kfree(cp->pe_data); 783 kfree(cp->pe_data);
769 if (unlikely(cp->app != NULL)) 784 if (unlikely(cp->app != NULL))
770 ip_vs_unbind_app(cp); 785 ip_vs_unbind_app(cp);
771 ip_vs_unbind_dest(cp); 786 ip_vs_unbind_dest(cp);
772 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 787 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
773 atomic_dec(&ip_vs_conn_no_cport_cnt); 788 atomic_dec(&ip_vs_conn_no_cport_cnt);
774 atomic_dec(&ip_vs_conn_count); 789 atomic_dec(&ipvs->conn_count);
775 790
776 kmem_cache_free(ip_vs_conn_cachep, cp); 791 kmem_cache_free(ip_vs_conn_cachep, cp);
777 return; 792 return;
@@ -802,10 +817,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
802struct ip_vs_conn * 817struct ip_vs_conn *
803ip_vs_conn_new(const struct ip_vs_conn_param *p, 818ip_vs_conn_new(const struct ip_vs_conn_param *p,
804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 819 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
805 struct ip_vs_dest *dest) 820 struct ip_vs_dest *dest, __u32 fwmark)
806{ 821{
807 struct ip_vs_conn *cp; 822 struct ip_vs_conn *cp;
808 struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); 823 struct netns_ipvs *ipvs = net_ipvs(p->net);
824 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
825 p->protocol);
809 826
810 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 827 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
811 if (cp == NULL) { 828 if (cp == NULL) {
@@ -813,8 +830,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
813 return NULL; 830 return NULL;
814 } 831 }
815 832
816 INIT_LIST_HEAD(&cp->c_list); 833 INIT_HLIST_NODE(&cp->c_list);
817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 834 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
835 ip_vs_conn_net_set(cp, p->net);
818 cp->af = p->af; 836 cp->af = p->af;
819 cp->protocol = p->protocol; 837 cp->protocol = p->protocol;
820 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); 838 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +844,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
826 &cp->daddr, daddr); 844 &cp->daddr, daddr);
827 cp->dport = dport; 845 cp->dport = dport;
828 cp->flags = flags; 846 cp->flags = flags;
829 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { 847 cp->fwmark = fwmark;
848 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
849 ip_vs_pe_get(p->pe);
850 cp->pe = p->pe;
830 cp->pe_data = p->pe_data; 851 cp->pe_data = p->pe_data;
831 cp->pe_data_len = p->pe_data_len; 852 cp->pe_data_len = p->pe_data_len;
832 } 853 }
@@ -842,7 +863,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
842 atomic_set(&cp->n_control, 0); 863 atomic_set(&cp->n_control, 0);
843 atomic_set(&cp->in_pkts, 0); 864 atomic_set(&cp->in_pkts, 0);
844 865
845 atomic_inc(&ip_vs_conn_count); 866 atomic_inc(&ipvs->conn_count);
846 if (flags & IP_VS_CONN_F_NO_CPORT) 867 if (flags & IP_VS_CONN_F_NO_CPORT)
847 atomic_inc(&ip_vs_conn_no_cport_cnt); 868 atomic_inc(&ip_vs_conn_no_cport_cnt);
848 869
@@ -861,8 +882,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
861#endif 882#endif
862 ip_vs_bind_xmit(cp); 883 ip_vs_bind_xmit(cp);
863 884
864 if (unlikely(pp && atomic_read(&pp->appcnt))) 885 if (unlikely(pd && atomic_read(&pd->appcnt)))
865 ip_vs_bind_app(cp, pp); 886 ip_vs_bind_app(cp, pd->pp);
866 887
867 /* 888 /*
868 * Allow conntrack to be preserved. By default, conntrack 889 * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +892,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
871 * IP_VS_CONN_F_ONE_PACKET too. 892 * IP_VS_CONN_F_ONE_PACKET too.
872 */ 893 */
873 894
874 if (ip_vs_conntrack_enabled()) 895 if (ip_vs_conntrack_enabled(ipvs))
875 cp->flags |= IP_VS_CONN_F_NFCT; 896 cp->flags |= IP_VS_CONN_F_NFCT;
876 897
877 /* Hash it in the ip_vs_conn_tab finally */ 898 /* Hash it in the ip_vs_conn_tab finally */
@@ -884,18 +905,24 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
884 * /proc/net/ip_vs_conn entries 905 * /proc/net/ip_vs_conn entries
885 */ 906 */
886#ifdef CONFIG_PROC_FS 907#ifdef CONFIG_PROC_FS
908struct ip_vs_iter_state {
909 struct seq_net_private p;
910 struct hlist_head *l;
911};
887 912
888static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) 913static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
889{ 914{
890 int idx; 915 int idx;
891 struct ip_vs_conn *cp; 916 struct ip_vs_conn *cp;
917 struct ip_vs_iter_state *iter = seq->private;
918 struct hlist_node *n;
892 919
893 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 920 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
894 ct_read_lock_bh(idx); 921 ct_read_lock_bh(idx);
895 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 922 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
896 if (pos-- == 0) { 923 if (pos-- == 0) {
897 seq->private = &ip_vs_conn_tab[idx]; 924 iter->l = &ip_vs_conn_tab[idx];
898 return cp; 925 return cp;
899 } 926 }
900 } 927 }
901 ct_read_unlock_bh(idx); 928 ct_read_unlock_bh(idx);
@@ -906,14 +933,18 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
906 933
907static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) 934static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
908{ 935{
909 seq->private = NULL; 936 struct ip_vs_iter_state *iter = seq->private;
937
938 iter->l = NULL;
910 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; 939 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
911} 940}
912 941
913static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) 942static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
914{ 943{
915 struct ip_vs_conn *cp = v; 944 struct ip_vs_conn *cp = v;
916 struct list_head *e, *l = seq->private; 945 struct ip_vs_iter_state *iter = seq->private;
946 struct hlist_node *e;
947 struct hlist_head *l = iter->l;
917 int idx; 948 int idx;
918 949
919 ++*pos; 950 ++*pos;
@@ -921,27 +952,28 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
921 return ip_vs_conn_array(seq, 0); 952 return ip_vs_conn_array(seq, 0);
922 953
923 /* more on same hash chain? */ 954 /* more on same hash chain? */
924 if ((e = cp->c_list.next) != l) 955 if ((e = cp->c_list.next))
925 return list_entry(e, struct ip_vs_conn, c_list); 956 return hlist_entry(e, struct ip_vs_conn, c_list);
926 957
927 idx = l - ip_vs_conn_tab; 958 idx = l - ip_vs_conn_tab;
928 ct_read_unlock_bh(idx); 959 ct_read_unlock_bh(idx);
929 960
930 while (++idx < ip_vs_conn_tab_size) { 961 while (++idx < ip_vs_conn_tab_size) {
931 ct_read_lock_bh(idx); 962 ct_read_lock_bh(idx);
932 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 963 hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) {
933 seq->private = &ip_vs_conn_tab[idx]; 964 iter->l = &ip_vs_conn_tab[idx];
934 return cp; 965 return cp;
935 } 966 }
936 ct_read_unlock_bh(idx); 967 ct_read_unlock_bh(idx);
937 } 968 }
938 seq->private = NULL; 969 iter->l = NULL;
939 return NULL; 970 return NULL;
940} 971}
941 972
942static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) 973static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
943{ 974{
944 struct list_head *l = seq->private; 975 struct ip_vs_iter_state *iter = seq->private;
976 struct hlist_head *l = iter->l;
945 977
946 if (l) 978 if (l)
947 ct_read_unlock_bh(l - ip_vs_conn_tab); 979 ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +987,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); 987 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
956 else { 988 else {
957 const struct ip_vs_conn *cp = v; 989 const struct ip_vs_conn *cp = v;
990 struct net *net = seq_file_net(seq);
958 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; 991 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
959 size_t len = 0; 992 size_t len = 0;
960 993
961 if (cp->dest && cp->pe_data && 994 if (!ip_vs_conn_net_eq(cp, net))
962 cp->dest->svc->pe->show_pe_data) { 995 return 0;
996 if (cp->pe_data) {
963 pe_data[0] = ' '; 997 pe_data[0] = ' ';
964 len = strlen(cp->dest->svc->pe->name); 998 len = strlen(cp->pe->name);
965 memcpy(pe_data + 1, cp->dest->svc->pe->name, len); 999 memcpy(pe_data + 1, cp->pe->name, len);
966 pe_data[len + 1] = ' '; 1000 pe_data[len + 1] = ' ';
967 len += 2; 1001 len += 2;
968 len += cp->dest->svc->pe->show_pe_data(cp, 1002 len += cp->pe->show_pe_data(cp, pe_data + len);
969 pe_data + len);
970 } 1003 }
971 pe_data[len] = '\0'; 1004 pe_data[len] = '\0';
972 1005
@@ -1004,7 +1037,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
1004 1037
1005static int ip_vs_conn_open(struct inode *inode, struct file *file) 1038static int ip_vs_conn_open(struct inode *inode, struct file *file)
1006{ 1039{
1007 return seq_open(file, &ip_vs_conn_seq_ops); 1040 return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
1041 sizeof(struct ip_vs_iter_state));
1008} 1042}
1009 1043
1010static const struct file_operations ip_vs_conn_fops = { 1044static const struct file_operations ip_vs_conn_fops = {
@@ -1012,7 +1046,7 @@ static const struct file_operations ip_vs_conn_fops = {
1012 .open = ip_vs_conn_open, 1046 .open = ip_vs_conn_open,
1013 .read = seq_read, 1047 .read = seq_read,
1014 .llseek = seq_lseek, 1048 .llseek = seq_lseek,
1015 .release = seq_release, 1049 .release = seq_release_net,
1016}; 1050};
1017 1051
1018static const char *ip_vs_origin_name(unsigned flags) 1052static const char *ip_vs_origin_name(unsigned flags)
@@ -1031,6 +1065,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
1031 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); 1065 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
1032 else { 1066 else {
1033 const struct ip_vs_conn *cp = v; 1067 const struct ip_vs_conn *cp = v;
1068 struct net *net = seq_file_net(seq);
1069
1070 if (!ip_vs_conn_net_eq(cp, net))
1071 return 0;
1034 1072
1035#ifdef CONFIG_IP_VS_IPV6 1073#ifdef CONFIG_IP_VS_IPV6
1036 if (cp->af == AF_INET6) 1074 if (cp->af == AF_INET6)
@@ -1067,7 +1105,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
1067 1105
1068static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) 1106static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
1069{ 1107{
1070 return seq_open(file, &ip_vs_conn_sync_seq_ops); 1108 return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
1109 sizeof(struct ip_vs_iter_state));
1071} 1110}
1072 1111
1073static const struct file_operations ip_vs_conn_sync_fops = { 1112static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1075,7 +1114,7 @@ static const struct file_operations ip_vs_conn_sync_fops = {
1075 .open = ip_vs_conn_sync_open, 1114 .open = ip_vs_conn_sync_open,
1076 .read = seq_read, 1115 .read = seq_read,
1077 .llseek = seq_lseek, 1116 .llseek = seq_lseek,
1078 .release = seq_release, 1117 .release = seq_release_net,
1079}; 1118};
1080 1119
1081#endif 1120#endif
@@ -1113,7 +1152,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1113} 1152}
1114 1153
1115/* Called from keventd and must protect itself from softirqs */ 1154/* Called from keventd and must protect itself from softirqs */
1116void ip_vs_random_dropentry(void) 1155void ip_vs_random_dropentry(struct net *net)
1117{ 1156{
1118 int idx; 1157 int idx;
1119 struct ip_vs_conn *cp; 1158 struct ip_vs_conn *cp;
@@ -1123,17 +1162,19 @@ void ip_vs_random_dropentry(void)
1123 */ 1162 */
1124 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { 1163 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
1125 unsigned hash = net_random() & ip_vs_conn_tab_mask; 1164 unsigned hash = net_random() & ip_vs_conn_tab_mask;
1165 struct hlist_node *n;
1126 1166
1127 /* 1167 /*
1128 * Lock is actually needed in this loop. 1168 * Lock is actually needed in this loop.
1129 */ 1169 */
1130 ct_write_lock_bh(hash); 1170 ct_write_lock_bh(hash);
1131 1171
1132 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 1172 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
1133 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1173 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1134 /* connection template */ 1174 /* connection template */
1135 continue; 1175 continue;
1136 1176 if (!ip_vs_conn_net_eq(cp, net))
1177 continue;
1137 if (cp->protocol == IPPROTO_TCP) { 1178 if (cp->protocol == IPPROTO_TCP) {
1138 switch(cp->state) { 1179 switch(cp->state) {
1139 case IP_VS_TCP_S_SYN_RECV: 1180 case IP_VS_TCP_S_SYN_RECV:
@@ -1168,20 +1209,24 @@ void ip_vs_random_dropentry(void)
1168/* 1209/*
1169 * Flush all the connection entries in the ip_vs_conn_tab 1210 * Flush all the connection entries in the ip_vs_conn_tab
1170 */ 1211 */
1171static void ip_vs_conn_flush(void) 1212static void ip_vs_conn_flush(struct net *net)
1172{ 1213{
1173 int idx; 1214 int idx;
1174 struct ip_vs_conn *cp; 1215 struct ip_vs_conn *cp;
1216 struct netns_ipvs *ipvs = net_ipvs(net);
1175 1217
1176 flush_again: 1218flush_again:
1177 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1219 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
1220 struct hlist_node *n;
1221
1178 /* 1222 /*
1179 * Lock is actually needed in this loop. 1223 * Lock is actually needed in this loop.
1180 */ 1224 */
1181 ct_write_lock_bh(idx); 1225 ct_write_lock_bh(idx);
1182 1226
1183 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1227 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
1184 1228 if (!ip_vs_conn_net_eq(cp, net))
1229 continue;
1185 IP_VS_DBG(4, "del connection\n"); 1230 IP_VS_DBG(4, "del connection\n");
1186 ip_vs_conn_expire_now(cp); 1231 ip_vs_conn_expire_now(cp);
1187 if (cp->control) { 1232 if (cp->control) {
@@ -1194,12 +1239,32 @@ static void ip_vs_conn_flush(void)
1194 1239
1195 /* the counter may be not NULL, because maybe some conn entries 1240 /* the counter may be not NULL, because maybe some conn entries
1196 are run by slow timer handler or unhashed but still referred */ 1241 are run by slow timer handler or unhashed but still referred */
1197 if (atomic_read(&ip_vs_conn_count) != 0) { 1242 if (atomic_read(&ipvs->conn_count) != 0) {
1198 schedule(); 1243 schedule();
1199 goto flush_again; 1244 goto flush_again;
1200 } 1245 }
1201} 1246}
1247/*
1248 * per netns init and exit
1249 */
1250int __net_init __ip_vs_conn_init(struct net *net)
1251{
1252 struct netns_ipvs *ipvs = net_ipvs(net);
1202 1253
1254 atomic_set(&ipvs->conn_count, 0);
1255
1256 proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
1257 proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
1258 return 0;
1259}
1260
1261void __net_exit __ip_vs_conn_cleanup(struct net *net)
1262{
1263 /* flush all the connection entries first */
1264 ip_vs_conn_flush(net);
1265 proc_net_remove(net, "ip_vs_conn");
1266 proc_net_remove(net, "ip_vs_conn_sync");
1267}
1203 1268
1204int __init ip_vs_conn_init(void) 1269int __init ip_vs_conn_init(void)
1205{ 1270{
@@ -1212,8 +1277,7 @@ int __init ip_vs_conn_init(void)
1212 /* 1277 /*
1213 * Allocate the connection hash table and initialize its list heads 1278 * Allocate the connection hash table and initialize its list heads
1214 */ 1279 */
1215 ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * 1280 ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(*ip_vs_conn_tab));
1216 sizeof(struct list_head));
1217 if (!ip_vs_conn_tab) 1281 if (!ip_vs_conn_tab)
1218 return -ENOMEM; 1282 return -ENOMEM;
1219 1283
@@ -1233,32 +1297,22 @@ int __init ip_vs_conn_init(void)
1233 IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n", 1297 IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
1234 sizeof(struct ip_vs_conn)); 1298 sizeof(struct ip_vs_conn));
1235 1299
1236 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1300 for (idx = 0; idx < ip_vs_conn_tab_size; idx++)
1237 INIT_LIST_HEAD(&ip_vs_conn_tab[idx]); 1301 INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
1238 }
1239 1302
1240 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { 1303 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
1241 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); 1304 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
1242 } 1305 }
1243 1306
1244 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
1245 proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
1246
1247 /* calculate the random value for connection hash */ 1307 /* calculate the random value for connection hash */
1248 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); 1308 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
1249 1309
1250 return 0; 1310 return 0;
1251} 1311}
1252 1312
1253
1254void ip_vs_conn_cleanup(void) 1313void ip_vs_conn_cleanup(void)
1255{ 1314{
1256 /* flush all the connection entries first */
1257 ip_vs_conn_flush();
1258
1259 /* Release the empty cache */ 1315 /* Release the empty cache */
1260 kmem_cache_destroy(ip_vs_conn_cachep); 1316 kmem_cache_destroy(ip_vs_conn_cachep);
1261 proc_net_remove(&init_net, "ip_vs_conn");
1262 proc_net_remove(&init_net, "ip_vs_conn_sync");
1263 vfree(ip_vs_conn_tab); 1317 vfree(ip_vs_conn_tab);
1264} 1318}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9c5a04..bfa808f4da13 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h> 43#include <net/ip6_checksum.h>
44#include <net/netns/generic.h> /* net_generic() */
44 45
45#include <linux/netfilter.h> 46#include <linux/netfilter.h>
46#include <linux/netfilter_ipv4.h> 47#include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
68EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
69#endif 70#endif
70 71
72int ip_vs_net_id __read_mostly;
73#ifdef IP_VS_GENERIC_NETNS
74EXPORT_SYMBOL(ip_vs_net_id);
75#endif
76/* netns cnt used for uniqueness */
77static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
71 78
72/* ID used in ICMP lookups */ 79/* ID used in ICMP lookups */
73#define icmp_id(icmph) (((icmph)->un).echo.id) 80#define icmp_id(icmph) (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
108ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 115ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
109{ 116{
110 struct ip_vs_dest *dest = cp->dest; 117 struct ip_vs_dest *dest = cp->dest;
118 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
119
111 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 120 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
112 spin_lock(&dest->stats.lock); 121 struct ip_vs_cpu_stats *s;
113 dest->stats.ustats.inpkts++; 122
114 dest->stats.ustats.inbytes += skb->len; 123 s = this_cpu_ptr(dest->stats.cpustats);
115 spin_unlock(&dest->stats.lock); 124 s->ustats.inpkts++;
116 125 u64_stats_update_begin(&s->syncp);
117 spin_lock(&dest->svc->stats.lock); 126 s->ustats.inbytes += skb->len;
118 dest->svc->stats.ustats.inpkts++; 127 u64_stats_update_end(&s->syncp);
119 dest->svc->stats.ustats.inbytes += skb->len; 128
120 spin_unlock(&dest->svc->stats.lock); 129 s = this_cpu_ptr(dest->svc->stats.cpustats);
121 130 s->ustats.inpkts++;
122 spin_lock(&ip_vs_stats.lock); 131 u64_stats_update_begin(&s->syncp);
123 ip_vs_stats.ustats.inpkts++; 132 s->ustats.inbytes += skb->len;
124 ip_vs_stats.ustats.inbytes += skb->len; 133 u64_stats_update_end(&s->syncp);
125 spin_unlock(&ip_vs_stats.lock); 134
135 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
136 s->ustats.inpkts++;
137 u64_stats_update_begin(&s->syncp);
138 s->ustats.inbytes += skb->len;
139 u64_stats_update_end(&s->syncp);
126 } 140 }
127} 141}
128 142
@@ -131,21 +145,28 @@ static inline void
131ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 145ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
132{ 146{
133 struct ip_vs_dest *dest = cp->dest; 147 struct ip_vs_dest *dest = cp->dest;
148 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
149
134 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 150 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
135 spin_lock(&dest->stats.lock); 151 struct ip_vs_cpu_stats *s;
136 dest->stats.ustats.outpkts++; 152
137 dest->stats.ustats.outbytes += skb->len; 153 s = this_cpu_ptr(dest->stats.cpustats);
138 spin_unlock(&dest->stats.lock); 154 s->ustats.outpkts++;
139 155 u64_stats_update_begin(&s->syncp);
140 spin_lock(&dest->svc->stats.lock); 156 s->ustats.outbytes += skb->len;
141 dest->svc->stats.ustats.outpkts++; 157 u64_stats_update_end(&s->syncp);
142 dest->svc->stats.ustats.outbytes += skb->len; 158
143 spin_unlock(&dest->svc->stats.lock); 159 s = this_cpu_ptr(dest->svc->stats.cpustats);
144 160 s->ustats.outpkts++;
145 spin_lock(&ip_vs_stats.lock); 161 u64_stats_update_begin(&s->syncp);
146 ip_vs_stats.ustats.outpkts++; 162 s->ustats.outbytes += skb->len;
147 ip_vs_stats.ustats.outbytes += skb->len; 163 u64_stats_update_end(&s->syncp);
148 spin_unlock(&ip_vs_stats.lock); 164
165 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
166 s->ustats.outpkts++;
167 u64_stats_update_begin(&s->syncp);
168 s->ustats.outbytes += skb->len;
169 u64_stats_update_end(&s->syncp);
149 } 170 }
150} 171}
151 172
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153static inline void 174static inline void
154ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) 175ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
155{ 176{
156 spin_lock(&cp->dest->stats.lock); 177 struct netns_ipvs *ipvs = net_ipvs(svc->net);
157 cp->dest->stats.ustats.conns++; 178 struct ip_vs_cpu_stats *s;
158 spin_unlock(&cp->dest->stats.lock);
159 179
160 spin_lock(&svc->stats.lock); 180 s = this_cpu_ptr(cp->dest->stats.cpustats);
161 svc->stats.ustats.conns++; 181 s->ustats.conns++;
162 spin_unlock(&svc->stats.lock);
163 182
164 spin_lock(&ip_vs_stats.lock); 183 s = this_cpu_ptr(svc->stats.cpustats);
165 ip_vs_stats.ustats.conns++; 184 s->ustats.conns++;
166 spin_unlock(&ip_vs_stats.lock); 185
186 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
187 s->ustats.conns++;
167} 188}
168 189
169 190
170static inline int 191static inline int
171ip_vs_set_state(struct ip_vs_conn *cp, int direction, 192ip_vs_set_state(struct ip_vs_conn *cp, int direction,
172 const struct sk_buff *skb, 193 const struct sk_buff *skb,
173 struct ip_vs_protocol *pp) 194 struct ip_vs_proto_data *pd)
174{ 195{
175 if (unlikely(!pp->state_transition)) 196 if (unlikely(!pd->pp->state_transition))
176 return 0; 197 return 0;
177 return pp->state_transition(cp, direction, skb, pp); 198 return pd->pp->state_transition(cp, direction, skb, pd);
178} 199}
179 200
180static inline void 201static inline int
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, 202ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol, 203 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport, 204 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport, 205 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p) 206 struct ip_vs_conn_param *p)
186{ 207{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); 208 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
209 vport, p);
188 p->pe = svc->pe; 210 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param) 211 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb); 212 return p->pe->fill_param(p, skb);
213
214 return 0;
191} 215}
192 216
193/* 217/*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
200static struct ip_vs_conn * 224static struct ip_vs_conn *
201ip_vs_sched_persist(struct ip_vs_service *svc, 225ip_vs_sched_persist(struct ip_vs_service *svc,
202 struct sk_buff *skb, 226 struct sk_buff *skb,
203 __be16 ports[2]) 227 __be16 src_port, __be16 dst_port, int *ignored)
204{ 228{
205 struct ip_vs_conn *cp = NULL; 229 struct ip_vs_conn *cp = NULL;
206 struct ip_vs_iphdr iph; 230 struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
224 248
225 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
226 "mnet %s\n", 250 "mnet %s\n",
227 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), 251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
228 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), 252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
229 IP_VS_DBG_ADDR(svc->af, &snet)); 253 IP_VS_DBG_ADDR(svc->af, &snet));
230 254
231 /* 255 /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; 271 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
248 __be16 vport = 0; 272 __be16 vport = 0;
249 273
250 if (ports[1] == svc->port) { 274 if (dst_port == svc->port) {
251 /* non-FTP template: 275 /* non-FTP template:
252 * <protocol, caddr, 0, vaddr, vport, daddr, dport> 276 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
253 * FTP template: 277 * FTP template:
254 * <protocol, caddr, 0, vaddr, 0, daddr, 0> 278 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
255 */ 279 */
256 if (svc->port != FTPPORT) 280 if (svc->port != FTPPORT)
257 vport = ports[1]; 281 vport = dst_port;
258 } else { 282 } else {
259 /* Note: persistent fwmark-based services and 283 /* Note: persistent fwmark-based services and
260 * persistent port zero service are handled here. 284 * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
268 vaddr = &fwmark; 292 vaddr = &fwmark;
269 } 293 }
270 } 294 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, 295 /* return *ignored = -1 so NF_DROP can be used */
272 vaddr, vport, &param); 296 if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
297 vaddr, vport, &param) < 0) {
298 *ignored = -1;
299 return NULL;
300 }
273 } 301 }
274 302
275 /* Check if a template already exists */ 303 /* Check if a template already exists */
276 ct = ip_vs_ct_in_get(&param); 304 ct = ip_vs_ct_in_get(&param);
277 if (!ct || !ip_vs_check_template(ct)) { 305 if (!ct || !ip_vs_check_template(ct)) {
278 /* No template found or the dest of the connection 306 /*
307 * No template found or the dest of the connection
279 * template is not available. 308 * template is not available.
309 * return *ignored=0 i.e. ICMP and NF_DROP
280 */ 310 */
281 dest = svc->scheduler->schedule(svc, skb); 311 dest = svc->scheduler->schedule(svc, skb);
282 if (!dest) { 312 if (!dest) {
283 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 313 IP_VS_DBG(1, "p-schedule: no dest found.\n");
284 kfree(param.pe_data); 314 kfree(param.pe_data);
315 *ignored = 0;
285 return NULL; 316 return NULL;
286 } 317 }
287 318
288 if (ports[1] == svc->port && svc->port != FTPPORT) 319 if (dst_port == svc->port && svc->port != FTPPORT)
289 dport = dest->port; 320 dport = dest->port;
290 321
291 /* Create a template 322 /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
293 * and thus param.pe_data will be destroyed 324 * and thus param.pe_data will be destroyed
294 * when the template expires */ 325 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport, 326 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest); 327 IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
297 if (ct == NULL) { 328 if (ct == NULL) {
298 kfree(param.pe_data); 329 kfree(param.pe_data);
330 *ignored = -1;
299 return NULL; 331 return NULL;
300 } 332 }
301 333
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
306 kfree(param.pe_data); 338 kfree(param.pe_data);
307 } 339 }
308 340
309 dport = ports[1]; 341 dport = dst_port;
310 if (dport == svc->port && dest->port) 342 if (dport == svc->port && dest->port)
311 dport = dest->port; 343 dport = dest->port;
312 344
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
317 /* 349 /*
318 * Create a new connection according to the template 350 * Create a new connection according to the template
319 */ 351 */
320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], 352 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
321 &iph.daddr, ports[1], &param); 353 src_port, &iph.daddr, dst_port, &param);
322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest); 354
355 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
323 if (cp == NULL) { 356 if (cp == NULL) {
324 ip_vs_conn_put(ct); 357 ip_vs_conn_put(ct);
358 *ignored = -1;
325 return NULL; 359 return NULL;
326 } 360 }
327 361
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
341 * It selects a server according to the virtual service, and 375 * It selects a server according to the virtual service, and
342 * creates a connection entry. 376 * creates a connection entry.
343 * Protocols supported: TCP, UDP 377 * Protocols supported: TCP, UDP
378 *
379 * Usage of *ignored
380 *
381 * 1 : protocol tried to schedule (eg. on SYN), found svc but the
382 * svc/scheduler decides that this packet should be accepted with
383 * NF_ACCEPT because it must not be scheduled.
384 *
385 * 0 : scheduler can not find destination, so try bypass or
386 * return ICMP and then NF_DROP (ip_vs_leave).
387 *
388 * -1 : scheduler tried to schedule but fatal error occurred, eg.
389 * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
390 * failure such as missing Call-ID, ENOMEM on skb_linearize
391 * or pe_data. In this case we should return NF_DROP without
392 * any attempts to send ICMP with ip_vs_leave.
344 */ 393 */
345struct ip_vs_conn * 394struct ip_vs_conn *
346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 395ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored) 396 struct ip_vs_proto_data *pd, int *ignored)
348{ 397{
398 struct ip_vs_protocol *pp = pd->pp;
349 struct ip_vs_conn *cp = NULL; 399 struct ip_vs_conn *cp = NULL;
350 struct ip_vs_iphdr iph; 400 struct ip_vs_iphdr iph;
351 struct ip_vs_dest *dest; 401 struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
371 } 421 }
372 422
373 /* 423 /*
374 * Do not schedule replies from local real server. It is risky 424 * Do not schedule replies from local real server.
375 * for fwmark services but mostly for persistent services.
376 */ 425 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 426 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && 427 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 428 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection"); 429 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp); 430 __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
386 /* 434 /*
387 * Persistent service 435 * Persistent service
388 */ 436 */
389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) { 437 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
390 *ignored = 0; 438 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
391 return ip_vs_sched_persist(svc, skb, pptr); 439
392 } 440 *ignored = 0;
393 441
394 /* 442 /*
395 * Non-persistent service 443 * Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
402 return NULL; 450 return NULL;
403 } 451 }
404 452
405 *ignored = 0;
406
407 dest = svc->scheduler->schedule(svc, skb); 453 dest = svc->scheduler->schedule(svc, skb);
408 if (dest == NULL) { 454 if (dest == NULL) {
409 IP_VS_DBG(1, "Schedule: no dest found.\n"); 455 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
419 */ 465 */
420 { 466 {
421 struct ip_vs_conn_param p; 467 struct ip_vs_conn_param p;
422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, 468
423 pptr[0], &iph.daddr, pptr[1], &p); 469 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
470 &iph.saddr, pptr[0], &iph.daddr, pptr[1],
471 &p);
424 cp = ip_vs_conn_new(&p, &dest->addr, 472 cp = ip_vs_conn_new(&p, &dest->addr,
425 dest->port ? dest->port : pptr[1], 473 dest->port ? dest->port : pptr[1],
426 flags, dest); 474 flags, dest, skb->mark);
427 if (!cp) 475 if (!cp) {
476 *ignored = -1;
428 return NULL; 477 return NULL;
478 }
429 } 479 }
430 480
431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 481 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
447 * no destination is available for a new connection. 497 * no destination is available for a new connection.
448 */ 498 */
449int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 499int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
450 struct ip_vs_protocol *pp) 500 struct ip_vs_proto_data *pd)
451{ 501{
452 __be16 _ports[2], *pptr; 502 __be16 _ports[2], *pptr;
453 struct ip_vs_iphdr iph; 503 struct ip_vs_iphdr iph;
504#ifdef CONFIG_SYSCTL
505 struct net *net;
506 struct netns_ipvs *ipvs;
454 int unicast; 507 int unicast;
508#endif
509
455 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 510 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
456 511
457 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 512 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -460,17 +515,21 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
460 return NF_DROP; 515 return NF_DROP;
461 } 516 }
462 517
518#ifdef CONFIG_SYSCTL
519 net = skb_net(skb);
520
463#ifdef CONFIG_IP_VS_IPV6 521#ifdef CONFIG_IP_VS_IPV6
464 if (svc->af == AF_INET6) 522 if (svc->af == AF_INET6)
465 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 523 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
466 else 524 else
467#endif 525#endif
468 unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); 526 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
469 527
470 /* if it is fwmark-based service, the cache_bypass sysctl is up 528 /* if it is fwmark-based service, the cache_bypass sysctl is up
471 and the destination is a non-local unicast, then create 529 and the destination is a non-local unicast, then create
472 a cache_bypass connection entry */ 530 a cache_bypass connection entry */
473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 531 ipvs = net_ipvs(net);
532 if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
474 int ret, cs; 533 int ret, cs;
475 struct ip_vs_conn *cp; 534 struct ip_vs_conn *cp;
476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 535 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +543,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 543 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
485 { 544 {
486 struct ip_vs_conn_param p; 545 struct ip_vs_conn_param p;
487 ip_vs_conn_fill_param(svc->af, iph.protocol, 546 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
488 &iph.saddr, pptr[0], 547 &iph.saddr, pptr[0],
489 &iph.daddr, pptr[1], &p); 548 &iph.daddr, pptr[1], &p);
490 cp = ip_vs_conn_new(&p, &daddr, 0, 549 cp = ip_vs_conn_new(&p, &daddr, 0,
491 IP_VS_CONN_F_BYPASS | flags, 550 IP_VS_CONN_F_BYPASS | flags,
492 NULL); 551 NULL, skb->mark);
493 if (!cp) 552 if (!cp)
494 return NF_DROP; 553 return NF_DROP;
495 } 554 }
@@ -498,16 +557,17 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
498 ip_vs_in_stats(cp, skb); 557 ip_vs_in_stats(cp, skb);
499 558
500 /* set state */ 559 /* set state */
501 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 560 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
502 561
503 /* transmit the first SYN packet */ 562 /* transmit the first SYN packet */
504 ret = cp->packet_xmit(skb, cp, pp); 563 ret = cp->packet_xmit(skb, cp, pd->pp);
505 /* do not touch skb anymore */ 564 /* do not touch skb anymore */
506 565
507 atomic_inc(&cp->in_pkts); 566 atomic_inc(&cp->in_pkts);
508 ip_vs_conn_put(cp); 567 ip_vs_conn_put(cp);
509 return ret; 568 return ret;
510 } 569 }
570#endif
511 571
512 /* 572 /*
513 * When the virtual ftp service is presented, packets destined 573 * When the virtual ftp service is presented, packets destined
@@ -544,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
544 return NF_DROP; 604 return NF_DROP;
545} 605}
546 606
607#ifdef CONFIG_SYSCTL
608
609static int sysctl_snat_reroute(struct sk_buff *skb)
610{
611 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
612 return ipvs->sysctl_snat_reroute;
613}
614
615static int sysctl_nat_icmp_send(struct net *net)
616{
617 struct netns_ipvs *ipvs = net_ipvs(net);
618 return ipvs->sysctl_nat_icmp_send;
619}
620
621static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
622{
623 return ipvs->sysctl_expire_nodest_conn;
624}
625
626#else
627
628static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
629static int sysctl_nat_icmp_send(struct net *net) { return 0; }
630static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
631
632#endif
633
547__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 634__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
548{ 635{
549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 636 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -576,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
576} 663}
577#endif 664#endif
578 665
666static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
667{
668#ifdef CONFIG_IP_VS_IPV6
669 if (af == AF_INET6) {
670 if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
671 return 1;
672 } else
673#endif
674 if ((sysctl_snat_reroute(skb) ||
675 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
676 ip_route_me_harder(skb, RTN_LOCAL) != 0)
677 return 1;
678
679 return 0;
680}
681
579/* 682/*
580 * Packet has been made sufficiently writable in caller 683 * Packet has been made sufficiently writable in caller
581 * - inout: 1=in->out, 0=out->in 684 * - inout: 1=in->out, 0=out->in
@@ -674,7 +777,7 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
674#endif 777#endif
675 778
676/* Handle relevant response ICMP messages - forward to the right 779/* Handle relevant response ICMP messages - forward to the right
677 * destination host. Used for NAT and local client. 780 * destination host.
678 */ 781 */
679static int handle_response_icmp(int af, struct sk_buff *skb, 782static int handle_response_icmp(int af, struct sk_buff *skb,
680 union nf_inet_addr *snet, 783 union nf_inet_addr *snet,
@@ -710,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
710#endif 813#endif
711 ip_vs_nat_icmp(skb, pp, cp, 1); 814 ip_vs_nat_icmp(skb, pp, cp, 1);
712 815
713#ifdef CONFIG_IP_VS_IPV6 816 if (ip_vs_route_me_harder(af, skb))
714 if (af == AF_INET6) { 817 goto out;
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out;
717 } else
718#endif
719 if ((sysctl_ip_vs_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out;
723 818
724 /* do the statistics and put it back */ 819 /* do the statistics and put it back */
725 ip_vs_out_stats(cp, skb); 820 ip_vs_out_stats(cp, skb);
@@ -808,7 +903,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
808 903
809 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 904 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
810 /* The embedded headers contain source and dest in reverse order */ 905 /* The embedded headers contain source and dest in reverse order */
811 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 906 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
812 if (!cp) 907 if (!cp)
813 return NF_ACCEPT; 908 return NF_ACCEPT;
814 909
@@ -885,7 +980,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
885 980
886 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 981 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
887 /* The embedded headers contain source and dest in reverse order */ 982 /* The embedded headers contain source and dest in reverse order */
888 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 983 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
889 if (!cp) 984 if (!cp)
890 return NF_ACCEPT; 985 return NF_ACCEPT;
891 986
@@ -921,12 +1016,13 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
921} 1016}
922 1017
923/* Handle response packets: rewrite addresses and send away... 1018/* Handle response packets: rewrite addresses and send away...
924 * Used for NAT and local client.
925 */ 1019 */
926static unsigned int 1020static unsigned int
927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 1021handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
928 struct ip_vs_conn *cp, int ihl) 1022 struct ip_vs_conn *cp, int ihl)
929{ 1023{
1024 struct ip_vs_protocol *pp = pd->pp;
1025
930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1026 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
931 1027
932 if (!skb_make_writable(skb, ihl)) 1028 if (!skb_make_writable(skb, ihl))
@@ -961,21 +1057,13 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
961 * if it came from this machine itself. So re-compute 1057 * if it came from this machine itself. So re-compute
962 * the routing information. 1058 * the routing information.
963 */ 1059 */
964#ifdef CONFIG_IP_VS_IPV6 1060 if (ip_vs_route_me_harder(af, skb))
965 if (af == AF_INET6) { 1061 goto drop;
966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
967 goto drop;
968 } else
969#endif
970 if ((sysctl_ip_vs_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0)
973 goto drop;
974 1062
975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1063 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
976 1064
977 ip_vs_out_stats(cp, skb); 1065 ip_vs_out_stats(cp, skb);
978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 1066 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
979 skb->ipvs_property = 1; 1067 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT)) 1068 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb); 1069 ip_vs_notrack(skb);
@@ -999,8 +1087,10 @@ drop:
999static unsigned int 1087static unsigned int
1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) 1088ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1001{ 1089{
1090 struct net *net = NULL;
1002 struct ip_vs_iphdr iph; 1091 struct ip_vs_iphdr iph;
1003 struct ip_vs_protocol *pp; 1092 struct ip_vs_protocol *pp;
1093 struct ip_vs_proto_data *pd;
1004 struct ip_vs_conn *cp; 1094 struct ip_vs_conn *cp;
1005 1095
1006 EnterFunction(11); 1096 EnterFunction(11);
@@ -1022,6 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1022 if (unlikely(!skb_dst(skb))) 1112 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT; 1113 return NF_ACCEPT;
1024 1114
1115 net = skb_net(skb);
1116 if (!net_ipvs(net)->enable)
1117 return NF_ACCEPT;
1118
1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1119 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1026#ifdef CONFIG_IP_VS_IPV6 1120#ifdef CONFIG_IP_VS_IPV6
1027 if (af == AF_INET6) { 1121 if (af == AF_INET6) {
@@ -1045,9 +1139,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1045 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1139 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1046 } 1140 }
1047 1141
1048 pp = ip_vs_proto_get(iph.protocol); 1142 pd = ip_vs_proto_data_get(net, iph.protocol);
1049 if (unlikely(!pp)) 1143 if (unlikely(!pd))
1050 return NF_ACCEPT; 1144 return NF_ACCEPT;
1145 pp = pd->pp;
1051 1146
1052 /* reassemble IP fragments */ 1147 /* reassemble IP fragments */
1053#ifdef CONFIG_IP_VS_IPV6 1148#ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1168,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1073 /* 1168 /*
1074 * Check if the packet belongs to an existing entry 1169 * Check if the packet belongs to an existing entry
1075 */ 1170 */
1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1171 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
1077 1172
1078 if (likely(cp)) 1173 if (likely(cp))
1079 return handle_response(af, skb, pp, cp, iph.len); 1174 return handle_response(af, skb, pd, cp, iph.len);
1080 if (sysctl_ip_vs_nat_icmp_send && 1175 if (sysctl_nat_icmp_send(net) &&
1081 (pp->protocol == IPPROTO_TCP || 1176 (pp->protocol == IPPROTO_TCP ||
1082 pp->protocol == IPPROTO_UDP || 1177 pp->protocol == IPPROTO_UDP ||
1083 pp->protocol == IPPROTO_SCTP)) { 1178 pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1182,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1087 sizeof(_ports), _ports); 1182 sizeof(_ports), _ports);
1088 if (pptr == NULL) 1183 if (pptr == NULL)
1089 return NF_ACCEPT; /* Not for me */ 1184 return NF_ACCEPT; /* Not for me */
1090 if (ip_vs_lookup_real_service(af, iph.protocol, 1185 if (ip_vs_lookup_real_service(net, af, iph.protocol,
1091 &iph.saddr, 1186 &iph.saddr,
1092 pptr[0])) { 1187 pptr[0])) {
1093 /* 1188 /*
@@ -1202,14 +1297,15 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1202static int 1297static int
1203ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) 1298ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1204{ 1299{
1300 struct net *net = NULL;
1205 struct iphdr *iph; 1301 struct iphdr *iph;
1206 struct icmphdr _icmph, *ic; 1302 struct icmphdr _icmph, *ic;
1207 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ 1303 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
1208 struct ip_vs_iphdr ciph; 1304 struct ip_vs_iphdr ciph;
1209 struct ip_vs_conn *cp; 1305 struct ip_vs_conn *cp;
1210 struct ip_vs_protocol *pp; 1306 struct ip_vs_protocol *pp;
1307 struct ip_vs_proto_data *pd;
1211 unsigned int offset, ihl, verdict; 1308 unsigned int offset, ihl, verdict;
1212 union nf_inet_addr snet;
1213 1309
1214 *related = 1; 1310 *related = 1;
1215 1311
@@ -1249,9 +1345,12 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1249 if (cih == NULL) 1345 if (cih == NULL)
1250 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1346 return NF_ACCEPT; /* The packet looks wrong, ignore */
1251 1347
1252 pp = ip_vs_proto_get(cih->protocol); 1348 net = skb_net(skb);
1253 if (!pp) 1349
1350 pd = ip_vs_proto_data_get(net, cih->protocol);
1351 if (!pd)
1254 return NF_ACCEPT; 1352 return NF_ACCEPT;
1353 pp = pd->pp;
1255 1354
1256 /* Is the embedded protocol header present? */ 1355 /* Is the embedded protocol header present? */
1257 if (unlikely(cih->frag_off & htons(IP_OFFSET) && 1356 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,18 +1364,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1265 1364
1266 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1365 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
1267 /* The embedded headers contain source and dest in reverse order */ 1366 /* The embedded headers contain source and dest in reverse order */
1268 cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); 1367 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
1269 if (!cp) { 1368 if (!cp)
1270 /* The packet could also belong to a local client */
1271 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
1272 if (cp) {
1273 snet.ip = iph->saddr;
1274 return handle_response_icmp(AF_INET, skb, &snet,
1275 cih->protocol, cp, pp,
1276 offset, ihl);
1277 }
1278 return NF_ACCEPT; 1369 return NF_ACCEPT;
1279 }
1280 1370
1281 verdict = NF_DROP; 1371 verdict = NF_DROP;
1282 1372
@@ -1292,15 +1382,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1292 ip_vs_in_stats(cp, skb); 1382 ip_vs_in_stats(cp, skb);
1293 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1383 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1294 offset += 2 * sizeof(__u16); 1384 offset += 2 * sizeof(__u16);
1295 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); 1385 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum);
1296 /* LOCALNODE from FORWARD hook is not supported */
1297 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1298 skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
1299 IP_VS_DBG(1, "%s(): "
1300 "local delivery to %pI4 but in FORWARD\n",
1301 __func__, &skb_rtable(skb)->rt_dst);
1302 verdict = NF_DROP;
1303 }
1304 1386
1305 out: 1387 out:
1306 __ip_vs_conn_put(cp); 1388 __ip_vs_conn_put(cp);
@@ -1312,6 +1394,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1312static int 1394static int
1313ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1395ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1314{ 1396{
1397 struct net *net = NULL;
1315 struct ipv6hdr *iph; 1398 struct ipv6hdr *iph;
1316 struct icmp6hdr _icmph, *ic; 1399 struct icmp6hdr _icmph, *ic;
1317 struct ipv6hdr _ciph, *cih; /* The ip header contained 1400 struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1319,9 +1402,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1319 struct ip_vs_iphdr ciph; 1402 struct ip_vs_iphdr ciph;
1320 struct ip_vs_conn *cp; 1403 struct ip_vs_conn *cp;
1321 struct ip_vs_protocol *pp; 1404 struct ip_vs_protocol *pp;
1405 struct ip_vs_proto_data *pd;
1322 unsigned int offset, verdict; 1406 unsigned int offset, verdict;
1323 union nf_inet_addr snet;
1324 struct rt6_info *rt;
1325 1407
1326 *related = 1; 1408 *related = 1;
1327 1409
@@ -1361,9 +1443,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1361 if (cih == NULL) 1443 if (cih == NULL)
1362 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1444 return NF_ACCEPT; /* The packet looks wrong, ignore */
1363 1445
1364 pp = ip_vs_proto_get(cih->nexthdr); 1446 net = skb_net(skb);
1365 if (!pp) 1447 pd = ip_vs_proto_data_get(net, cih->nexthdr);
1448 if (!pd)
1366 return NF_ACCEPT; 1449 return NF_ACCEPT;
1450 pp = pd->pp;
1367 1451
1368 /* Is the embedded protocol header present? */ 1452 /* Is the embedded protocol header present? */
1369 /* TODO: we don't support fragmentation at the moment anyways */ 1453 /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,37 +1461,16 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1377 1461
1378 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 1462 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1379 /* The embedded headers contain source and dest in reverse order */ 1463 /* The embedded headers contain source and dest in reverse order */
1380 cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); 1464 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1381 if (!cp) { 1465 if (!cp)
1382 /* The packet could also belong to a local client */
1383 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
1384 if (cp) {
1385 ipv6_addr_copy(&snet.in6, &iph->saddr);
1386 return handle_response_icmp(AF_INET6, skb, &snet,
1387 cih->nexthdr,
1388 cp, pp, offset,
1389 sizeof(struct ipv6hdr));
1390 }
1391 return NF_ACCEPT; 1466 return NF_ACCEPT;
1392 }
1393
1394 verdict = NF_DROP;
1395 1467
1396 /* do the statistics and put it back */ 1468 /* do the statistics and put it back */
1397 ip_vs_in_stats(cp, skb); 1469 ip_vs_in_stats(cp, skb);
1398 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || 1470 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
1399 IPPROTO_SCTP == cih->nexthdr) 1471 IPPROTO_SCTP == cih->nexthdr)
1400 offset += 2 * sizeof(__u16); 1472 offset += 2 * sizeof(__u16);
1401 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); 1473 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum);
1402 /* LOCALNODE from FORWARD hook is not supported */
1403 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1404 (rt = (struct rt6_info *) skb_dst(skb)) &&
1405 rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
1406 IP_VS_DBG(1, "%s(): "
1407 "local delivery to %pI6 but in FORWARD\n",
1408 __func__, &rt->rt6i_dst);
1409 verdict = NF_DROP;
1410 }
1411 1474
1412 __ip_vs_conn_put(cp); 1475 __ip_vs_conn_put(cp);
1413 1476
@@ -1423,10 +1486,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1423static unsigned int 1486static unsigned int
1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) 1487ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1425{ 1488{
1489 struct net *net;
1426 struct ip_vs_iphdr iph; 1490 struct ip_vs_iphdr iph;
1427 struct ip_vs_protocol *pp; 1491 struct ip_vs_protocol *pp;
1492 struct ip_vs_proto_data *pd;
1428 struct ip_vs_conn *cp; 1493 struct ip_vs_conn *cp;
1429 int ret, restart, pkts; 1494 int ret, restart, pkts;
1495 struct netns_ipvs *ipvs;
1430 1496
1431 /* Already marked as IPVS request or reply? */ 1497 /* Already marked as IPVS request or reply? */
1432 if (skb->ipvs_property) 1498 if (skb->ipvs_property)
@@ -1447,6 +1513,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1447 IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); 1513 IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
1448 return NF_ACCEPT; 1514 return NF_ACCEPT;
1449 } 1515 }
1516 /* ipvs enabled in this netns ? */
1517 net = skb_net(skb);
1518 if (!net_ipvs(net)->enable)
1519 return NF_ACCEPT;
1520
1450 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1521 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1451 1522
1452 /* Bad... Do not break raw sockets */ 1523 /* Bad... Do not break raw sockets */
@@ -1481,19 +1552,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1481 } 1552 }
1482 1553
1483 /* Protocol supported? */ 1554 /* Protocol supported? */
1484 pp = ip_vs_proto_get(iph.protocol); 1555 pd = ip_vs_proto_data_get(net, iph.protocol);
1485 if (unlikely(!pp)) 1556 if (unlikely(!pd))
1486 return NF_ACCEPT; 1557 return NF_ACCEPT;
1487 1558 pp = pd->pp;
1488 /* 1559 /*
1489 * Check if the packet belongs to an existing connection entry 1560 * Check if the packet belongs to an existing connection entry
1490 */ 1561 */
1491 cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); 1562 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
1492 1563
1493 if (unlikely(!cp)) { 1564 if (unlikely(!cp)) {
1494 int v; 1565 int v;
1495 1566
1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1567 if (!pp->conn_schedule(af, skb, pd, &v, &cp))
1497 return v; 1568 return v;
1498 } 1569 }
1499 1570
@@ -1505,12 +1576,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1505 } 1576 }
1506 1577
1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); 1578 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1508 1579 ipvs = net_ipvs(net);
1509 /* Check the server status */ 1580 /* Check the server status */
1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1581 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1511 /* the destination server is not available */ 1582 /* the destination server is not available */
1512 1583
1513 if (sysctl_ip_vs_expire_nodest_conn) { 1584 if (sysctl_expire_nodest_conn(ipvs)) {
1514 /* try to expire the connection immediately */ 1585 /* try to expire the connection immediately */
1515 ip_vs_conn_expire_now(cp); 1586 ip_vs_conn_expire_now(cp);
1516 } 1587 }
@@ -1521,7 +1592,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1521 } 1592 }
1522 1593
1523 ip_vs_in_stats(cp, skb); 1594 ip_vs_in_stats(cp, skb);
1524 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 1595 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1525 if (cp->packet_xmit) 1596 if (cp->packet_xmit)
1526 ret = cp->packet_xmit(skb, cp, pp); 1597 ret = cp->packet_xmit(skb, cp, pp);
1527 /* do not touch skb anymore */ 1598 /* do not touch skb anymore */
@@ -1535,35 +1606,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1535 * 1606 *
1536 * Sync connection if it is about to close to 1607 * Sync connection if it is about to close to
1537 * encorage the standby servers to update the connections timeout 1608 * encorage the standby servers to update the connections timeout
1609 *
1610 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
1538 */ 1611 */
1539 pkts = atomic_add_return(1, &cp->in_pkts); 1612
1540 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1613 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
1614 pkts = sysctl_sync_threshold(ipvs);
1615 else
1616 pkts = atomic_add_return(1, &cp->in_pkts);
1617
1618 if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1541 cp->protocol == IPPROTO_SCTP) { 1619 cp->protocol == IPPROTO_SCTP) {
1542 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1620 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1543 (pkts % sysctl_ip_vs_sync_threshold[1] 1621 (pkts % sysctl_sync_period(ipvs)
1544 == sysctl_ip_vs_sync_threshold[0])) || 1622 == sysctl_sync_threshold(ipvs))) ||
1545 (cp->old_state != cp->state && 1623 (cp->old_state != cp->state &&
1546 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1624 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1547 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || 1625 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1548 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { 1626 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1549 ip_vs_sync_conn(cp); 1627 ip_vs_sync_conn(net, cp);
1550 goto out; 1628 goto out;
1551 } 1629 }
1552 } 1630 }
1553 1631
1554 /* Keep this block last: TCP and others with pp->num_states <= 1 */ 1632 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1555 else if (af == AF_INET && 1633 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1556 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1557 (((cp->protocol != IPPROTO_TCP || 1634 (((cp->protocol != IPPROTO_TCP ||
1558 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1635 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1559 (pkts % sysctl_ip_vs_sync_threshold[1] 1636 (pkts % sysctl_sync_period(ipvs)
1560 == sysctl_ip_vs_sync_threshold[0])) || 1637 == sysctl_sync_threshold(ipvs))) ||
1561 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 1638 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1562 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 1639 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1563 (cp->state == IP_VS_TCP_S_CLOSE) || 1640 (cp->state == IP_VS_TCP_S_CLOSE) ||
1564 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || 1641 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1565 (cp->state == IP_VS_TCP_S_TIME_WAIT))))) 1642 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1566 ip_vs_sync_conn(cp); 1643 ip_vs_sync_conn(net, cp);
1567out: 1644out:
1568 cp->old_state = cp->state; 1645 cp->old_state = cp->state;
1569 1646
@@ -1653,10 +1730,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
1653 int (*okfn)(struct sk_buff *)) 1730 int (*okfn)(struct sk_buff *))
1654{ 1731{
1655 int r; 1732 int r;
1733 struct net *net;
1656 1734
1657 if (ip_hdr(skb)->protocol != IPPROTO_ICMP) 1735 if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
1658 return NF_ACCEPT; 1736 return NF_ACCEPT;
1659 1737
1738 /* ipvs enabled in this netns ? */
1739 net = skb_net(skb);
1740 if (!net_ipvs(net)->enable)
1741 return NF_ACCEPT;
1742
1660 return ip_vs_in_icmp(skb, &r, hooknum); 1743 return ip_vs_in_icmp(skb, &r, hooknum);
1661} 1744}
1662 1745
@@ -1667,10 +1750,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1667 int (*okfn)(struct sk_buff *)) 1750 int (*okfn)(struct sk_buff *))
1668{ 1751{
1669 int r; 1752 int r;
1753 struct net *net;
1670 1754
1671 if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) 1755 if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
1672 return NF_ACCEPT; 1756 return NF_ACCEPT;
1673 1757
1758 /* ipvs enabled in this netns ? */
1759 net = skb_net(skb);
1760 if (!net_ipvs(net)->enable)
1761 return NF_ACCEPT;
1762
1674 return ip_vs_in_icmp_v6(skb, &r, hooknum); 1763 return ip_vs_in_icmp_v6(skb, &r, hooknum);
1675} 1764}
1676#endif 1765#endif
@@ -1782,7 +1871,94 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1782 }, 1871 },
1783#endif 1872#endif
1784}; 1873};
1874/*
1875 * Initialize IP Virtual Server netns mem.
1876 */
1877static int __net_init __ip_vs_init(struct net *net)
1878{
1879 struct netns_ipvs *ipvs;
1880
1881 ipvs = net_generic(net, ip_vs_net_id);
1882 if (ipvs == NULL) {
1883 pr_err("%s(): no memory.\n", __func__);
1884 return -ENOMEM;
1885 }
1886 /* Hold the beast until a service is registerd */
1887 ipvs->enable = 0;
1888 ipvs->net = net;
1889 /* Counters used for creating unique names */
1890 ipvs->gen = atomic_read(&ipvs_netns_cnt);
1891 atomic_inc(&ipvs_netns_cnt);
1892 net->ipvs = ipvs;
1893
1894 if (__ip_vs_estimator_init(net) < 0)
1895 goto estimator_fail;
1896
1897 if (__ip_vs_control_init(net) < 0)
1898 goto control_fail;
1785 1899
1900 if (__ip_vs_protocol_init(net) < 0)
1901 goto protocol_fail;
1902
1903 if (__ip_vs_app_init(net) < 0)
1904 goto app_fail;
1905
1906 if (__ip_vs_conn_init(net) < 0)
1907 goto conn_fail;
1908
1909 if (__ip_vs_sync_init(net) < 0)
1910 goto sync_fail;
1911
1912 printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
1913 sizeof(struct netns_ipvs), ipvs->gen);
1914 return 0;
1915/*
1916 * Error handling
1917 */
1918
1919sync_fail:
1920 __ip_vs_conn_cleanup(net);
1921conn_fail:
1922 __ip_vs_app_cleanup(net);
1923app_fail:
1924 __ip_vs_protocol_cleanup(net);
1925protocol_fail:
1926 __ip_vs_control_cleanup(net);
1927control_fail:
1928 __ip_vs_estimator_cleanup(net);
1929estimator_fail:
1930 return -ENOMEM;
1931}
1932
1933static void __net_exit __ip_vs_cleanup(struct net *net)
1934{
1935 __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */
1936 __ip_vs_conn_cleanup(net);
1937 __ip_vs_app_cleanup(net);
1938 __ip_vs_protocol_cleanup(net);
1939 __ip_vs_control_cleanup(net);
1940 __ip_vs_estimator_cleanup(net);
1941 IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
1942}
1943
1944static void __net_exit __ip_vs_dev_cleanup(struct net *net)
1945{
1946 EnterFunction(2);
1947 net_ipvs(net)->enable = 0; /* Disable packet reception */
1948 __ip_vs_sync_cleanup(net);
1949 LeaveFunction(2);
1950}
1951
1952static struct pernet_operations ipvs_core_ops = {
1953 .init = __ip_vs_init,
1954 .exit = __ip_vs_cleanup,
1955 .id = &ip_vs_net_id,
1956 .size = sizeof(struct netns_ipvs),
1957};
1958
1959static struct pernet_operations ipvs_core_dev_ops = {
1960 .exit = __ip_vs_dev_cleanup,
1961};
1786 1962
1787/* 1963/*
1788 * Initialize IP Virtual Server 1964 * Initialize IP Virtual Server
@@ -1792,7 +1968,6 @@ static int __init ip_vs_init(void)
1792 int ret; 1968 int ret;
1793 1969
1794 ip_vs_estimator_init(); 1970 ip_vs_estimator_init();
1795
1796 ret = ip_vs_control_init(); 1971 ret = ip_vs_control_init();
1797 if (ret < 0) { 1972 if (ret < 0) {
1798 pr_err("can't setup control.\n"); 1973 pr_err("can't setup control.\n");
@@ -1813,15 +1988,36 @@ static int __init ip_vs_init(void)
1813 goto cleanup_app; 1988 goto cleanup_app;
1814 } 1989 }
1815 1990
1991 ret = ip_vs_sync_init();
1992 if (ret < 0) {
1993 pr_err("can't setup sync data.\n");
1994 goto cleanup_conn;
1995 }
1996
1997 ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
1998 if (ret < 0)
1999 goto cleanup_sync;
2000
2001 ret = register_pernet_device(&ipvs_core_dev_ops);
2002 if (ret < 0)
2003 goto cleanup_sub;
2004
1816 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 2005 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1817 if (ret < 0) { 2006 if (ret < 0) {
1818 pr_err("can't register hooks.\n"); 2007 pr_err("can't register hooks.\n");
1819 goto cleanup_conn; 2008 goto cleanup_dev;
1820 } 2009 }
1821 2010
1822 pr_info("ipvs loaded.\n"); 2011 pr_info("ipvs loaded.\n");
2012
1823 return ret; 2013 return ret;
1824 2014
2015cleanup_dev:
2016 unregister_pernet_device(&ipvs_core_dev_ops);
2017cleanup_sub:
2018 unregister_pernet_subsys(&ipvs_core_ops);
2019cleanup_sync:
2020 ip_vs_sync_cleanup();
1825 cleanup_conn: 2021 cleanup_conn:
1826 ip_vs_conn_cleanup(); 2022 ip_vs_conn_cleanup();
1827 cleanup_app: 2023 cleanup_app:
@@ -1837,6 +2033,9 @@ static int __init ip_vs_init(void)
1837static void __exit ip_vs_cleanup(void) 2033static void __exit ip_vs_cleanup(void)
1838{ 2034{
1839 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 2035 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
2036 unregister_pernet_device(&ipvs_core_dev_ops);
2037 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
2038 ip_vs_sync_cleanup();
1840 ip_vs_conn_cleanup(); 2039 ip_vs_conn_cleanup();
1841 ip_vs_app_cleanup(); 2040 ip_vs_app_cleanup();
1842 ip_vs_protocol_cleanup(); 2041 ip_vs_protocol_cleanup();
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101ab..699c79a55657 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39 39
40#include <net/net_namespace.h> 40#include <net/net_namespace.h>
41#include <linux/nsproxy.h>
41#include <net/ip.h> 42#include <net/ip.h>
42#ifdef CONFIG_IP_VS_IPV6 43#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h> 44#include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
57/* lock for service table */ 58/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock); 59static DEFINE_RWLOCK(__ip_vs_svc_lock);
59 60
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */ 61/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
95
96 62
97#ifdef CONFIG_IP_VS_DEBUG 63#ifdef CONFIG_IP_VS_DEBUG
98static int sysctl_ip_vs_debug_level = 0; 64static int sysctl_ip_vs_debug_level = 0;
@@ -103,29 +69,35 @@ int ip_vs_get_debug_level(void)
103} 69}
104#endif 70#endif
105 71
72
73/* Protos */
74static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
106#ifdef CONFIG_IP_VS_IPV6 77#ifdef CONFIG_IP_VS_IPV6
107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 78/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) 79static int __ip_vs_addr_is_local_v6(struct net *net,
80 const struct in6_addr *addr)
109{ 81{
110 struct rt6_info *rt; 82 struct rt6_info *rt;
111 struct flowi fl = { 83 struct flowi6 fl6 = {
112 .oif = 0, 84 .daddr = *addr,
113 .fl6_dst = *addr,
114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
115 }; 85 };
116 86
117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 87 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) 88 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119 return 1; 89 return 1;
120 90
121 return 0; 91 return 0;
122} 92}
123#endif 93#endif
94
95#ifdef CONFIG_SYSCTL
124/* 96/*
125 * update_defense_level is called from keventd and from sysctl, 97 * update_defense_level is called from keventd and from sysctl,
126 * so it needs to protect itself from softirqs 98 * so it needs to protect itself from softirqs
127 */ 99 */
128static void update_defense_level(void) 100static void update_defense_level(struct netns_ipvs *ipvs)
129{ 101{
130 struct sysinfo i; 102 struct sysinfo i;
131 static int old_secure_tcp = 0; 103 static int old_secure_tcp = 0;
@@ -141,73 +113,73 @@ static void update_defense_level(void)
141 /* si_swapinfo(&i); */ 113 /* si_swapinfo(&i); */
142 /* availmem = availmem - (i.totalswap - i.freeswap); */ 114 /* availmem = availmem - (i.totalswap - i.freeswap); */
143 115
144 nomem = (availmem < sysctl_ip_vs_amemthresh); 116 nomem = (availmem < ipvs->sysctl_amemthresh);
145 117
146 local_bh_disable(); 118 local_bh_disable();
147 119
148 /* drop_entry */ 120 /* drop_entry */
149 spin_lock(&__ip_vs_dropentry_lock); 121 spin_lock(&ipvs->dropentry_lock);
150 switch (sysctl_ip_vs_drop_entry) { 122 switch (ipvs->sysctl_drop_entry) {
151 case 0: 123 case 0:
152 atomic_set(&ip_vs_dropentry, 0); 124 atomic_set(&ipvs->dropentry, 0);
153 break; 125 break;
154 case 1: 126 case 1:
155 if (nomem) { 127 if (nomem) {
156 atomic_set(&ip_vs_dropentry, 1); 128 atomic_set(&ipvs->dropentry, 1);
157 sysctl_ip_vs_drop_entry = 2; 129 ipvs->sysctl_drop_entry = 2;
158 } else { 130 } else {
159 atomic_set(&ip_vs_dropentry, 0); 131 atomic_set(&ipvs->dropentry, 0);
160 } 132 }
161 break; 133 break;
162 case 2: 134 case 2:
163 if (nomem) { 135 if (nomem) {
164 atomic_set(&ip_vs_dropentry, 1); 136 atomic_set(&ipvs->dropentry, 1);
165 } else { 137 } else {
166 atomic_set(&ip_vs_dropentry, 0); 138 atomic_set(&ipvs->dropentry, 0);
167 sysctl_ip_vs_drop_entry = 1; 139 ipvs->sysctl_drop_entry = 1;
168 }; 140 };
169 break; 141 break;
170 case 3: 142 case 3:
171 atomic_set(&ip_vs_dropentry, 1); 143 atomic_set(&ipvs->dropentry, 1);
172 break; 144 break;
173 } 145 }
174 spin_unlock(&__ip_vs_dropentry_lock); 146 spin_unlock(&ipvs->dropentry_lock);
175 147
176 /* drop_packet */ 148 /* drop_packet */
177 spin_lock(&__ip_vs_droppacket_lock); 149 spin_lock(&ipvs->droppacket_lock);
178 switch (sysctl_ip_vs_drop_packet) { 150 switch (ipvs->sysctl_drop_packet) {
179 case 0: 151 case 0:
180 ip_vs_drop_rate = 0; 152 ipvs->drop_rate = 0;
181 break; 153 break;
182 case 1: 154 case 1:
183 if (nomem) { 155 if (nomem) {
184 ip_vs_drop_rate = ip_vs_drop_counter 156 ipvs->drop_rate = ipvs->drop_counter
185 = sysctl_ip_vs_amemthresh / 157 = ipvs->sysctl_amemthresh /
186 (sysctl_ip_vs_amemthresh-availmem); 158 (ipvs->sysctl_amemthresh-availmem);
187 sysctl_ip_vs_drop_packet = 2; 159 ipvs->sysctl_drop_packet = 2;
188 } else { 160 } else {
189 ip_vs_drop_rate = 0; 161 ipvs->drop_rate = 0;
190 } 162 }
191 break; 163 break;
192 case 2: 164 case 2:
193 if (nomem) { 165 if (nomem) {
194 ip_vs_drop_rate = ip_vs_drop_counter 166 ipvs->drop_rate = ipvs->drop_counter
195 = sysctl_ip_vs_amemthresh / 167 = ipvs->sysctl_amemthresh /
196 (sysctl_ip_vs_amemthresh-availmem); 168 (ipvs->sysctl_amemthresh-availmem);
197 } else { 169 } else {
198 ip_vs_drop_rate = 0; 170 ipvs->drop_rate = 0;
199 sysctl_ip_vs_drop_packet = 1; 171 ipvs->sysctl_drop_packet = 1;
200 } 172 }
201 break; 173 break;
202 case 3: 174 case 3:
203 ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 175 ipvs->drop_rate = ipvs->sysctl_am_droprate;
204 break; 176 break;
205 } 177 }
206 spin_unlock(&__ip_vs_droppacket_lock); 178 spin_unlock(&ipvs->droppacket_lock);
207 179
208 /* secure_tcp */ 180 /* secure_tcp */
209 spin_lock(&ip_vs_securetcp_lock); 181 spin_lock(&ipvs->securetcp_lock);
210 switch (sysctl_ip_vs_secure_tcp) { 182 switch (ipvs->sysctl_secure_tcp) {
211 case 0: 183 case 0:
212 if (old_secure_tcp >= 2) 184 if (old_secure_tcp >= 2)
213 to_change = 0; 185 to_change = 0;
@@ -216,7 +188,7 @@ static void update_defense_level(void)
216 if (nomem) { 188 if (nomem) {
217 if (old_secure_tcp < 2) 189 if (old_secure_tcp < 2)
218 to_change = 1; 190 to_change = 1;
219 sysctl_ip_vs_secure_tcp = 2; 191 ipvs->sysctl_secure_tcp = 2;
220 } else { 192 } else {
221 if (old_secure_tcp >= 2) 193 if (old_secure_tcp >= 2)
222 to_change = 0; 194 to_change = 0;
@@ -229,7 +201,7 @@ static void update_defense_level(void)
229 } else { 201 } else {
230 if (old_secure_tcp >= 2) 202 if (old_secure_tcp >= 2)
231 to_change = 0; 203 to_change = 0;
232 sysctl_ip_vs_secure_tcp = 1; 204 ipvs->sysctl_secure_tcp = 1;
233 } 205 }
234 break; 206 break;
235 case 3: 207 case 3:
@@ -237,10 +209,11 @@ static void update_defense_level(void)
237 to_change = 1; 209 to_change = 1;
238 break; 210 break;
239 } 211 }
240 old_secure_tcp = sysctl_ip_vs_secure_tcp; 212 old_secure_tcp = ipvs->sysctl_secure_tcp;
241 if (to_change >= 0) 213 if (to_change >= 0)
242 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 214 ip_vs_protocol_timeout_change(ipvs,
243 spin_unlock(&ip_vs_securetcp_lock); 215 ipvs->sysctl_secure_tcp > 1);
216 spin_unlock(&ipvs->securetcp_lock);
244 217
245 local_bh_enable(); 218 local_bh_enable();
246} 219}
@@ -250,17 +223,18 @@ static void update_defense_level(void)
250 * Timer for checking the defense 223 * Timer for checking the defense
251 */ 224 */
252#define DEFENSE_TIMER_PERIOD 1*HZ 225#define DEFENSE_TIMER_PERIOD 1*HZ
253static void defense_work_handler(struct work_struct *work);
254static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
255 226
256static void defense_work_handler(struct work_struct *work) 227static void defense_work_handler(struct work_struct *work)
257{ 228{
258 update_defense_level(); 229 struct netns_ipvs *ipvs =
259 if (atomic_read(&ip_vs_dropentry)) 230 container_of(work, struct netns_ipvs, defense_work.work);
260 ip_vs_random_dropentry();
261 231
262 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 232 update_defense_level(ipvs);
233 if (atomic_read(&ipvs->dropentry))
234 ip_vs_random_dropentry(ipvs->net);
235 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
263} 236}
237#endif
264 238
265int 239int
266ip_vs_use_count_inc(void) 240ip_vs_use_count_inc(void)
@@ -287,33 +261,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287/* the service table hashed by fwmark */ 261/* the service table hashed by fwmark */
288static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
289 263
290/*
291 * Hash table: for real service lookups
292 */
293#define IP_VS_RTAB_BITS 4
294#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
295#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
296
297static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
298
299/*
300 * Trash for destinations
301 */
302static LIST_HEAD(ip_vs_dest_trash);
303
304/*
305 * FTP & NULL virtual service counters
306 */
307static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
308static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
309
310 264
311/* 265/*
312 * Returns hash value for virtual service 266 * Returns hash value for virtual service
313 */ 267 */
314static __inline__ unsigned 268static inline unsigned
315ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, 269ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
316 __be16 port) 270 const union nf_inet_addr *addr, __be16 port)
317{ 271{
318 register unsigned porth = ntohs(port); 272 register unsigned porth = ntohs(port);
319 __be32 addr_fold = addr->ip; 273 __be32 addr_fold = addr->ip;
@@ -323,6 +277,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
323 addr_fold = addr->ip6[0]^addr->ip6[1]^ 277 addr_fold = addr->ip6[0]^addr->ip6[1]^
324 addr->ip6[2]^addr->ip6[3]; 278 addr->ip6[2]^addr->ip6[3];
325#endif 279#endif
280 addr_fold ^= ((size_t)net>>8);
326 281
327 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 282 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
328 & IP_VS_SVC_TAB_MASK; 283 & IP_VS_SVC_TAB_MASK;
@@ -331,13 +286,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
331/* 286/*
332 * Returns hash value of fwmark for virtual service lookup 287 * Returns hash value of fwmark for virtual service lookup
333 */ 288 */
334static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 289static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
335{ 290{
336 return fwmark & IP_VS_SVC_TAB_MASK; 291 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
337} 292}
338 293
339/* 294/*
340 * Hashes a service in the ip_vs_svc_table by <proto,addr,port> 295 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
341 * or in the ip_vs_svc_fwm_table by fwmark. 296 * or in the ip_vs_svc_fwm_table by fwmark.
342 * Should be called with locked tables. 297 * Should be called with locked tables.
343 */ 298 */
@@ -353,16 +308,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
353 308
354 if (svc->fwmark == 0) { 309 if (svc->fwmark == 0) {
355 /* 310 /*
356 * Hash it by <protocol,addr,port> in ip_vs_svc_table 311 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
357 */ 312 */
358 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, 313 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
359 svc->port); 314 &svc->addr, svc->port);
360 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 315 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
361 } else { 316 } else {
362 /* 317 /*
363 * Hash it by fwmark in ip_vs_svc_fwm_table 318 * Hash it by fwmark in svc_fwm_table
364 */ 319 */
365 hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 320 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 321 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
367 } 322 }
368 323
@@ -374,7 +329,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
374 329
375 330
376/* 331/*
377 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. 332 * Unhashes a service from svc_table / svc_fwm_table.
378 * Should be called with locked tables. 333 * Should be called with locked tables.
379 */ 334 */
380static int ip_vs_svc_unhash(struct ip_vs_service *svc) 335static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
386 } 341 }
387 342
388 if (svc->fwmark == 0) { 343 if (svc->fwmark == 0) {
389 /* Remove it from the ip_vs_svc_table table */ 344 /* Remove it from the svc_table table */
390 list_del(&svc->s_list); 345 list_del(&svc->s_list);
391 } else { 346 } else {
392 /* Remove it from the ip_vs_svc_fwm_table table */ 347 /* Remove it from the svc_fwm_table table */
393 list_del(&svc->f_list); 348 list_del(&svc->f_list);
394 } 349 }
395 350
@@ -400,23 +355,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
400 355
401 356
402/* 357/*
403 * Get service by {proto,addr,port} in the service table. 358 * Get service by {netns, proto,addr,port} in the service table.
404 */ 359 */
405static inline struct ip_vs_service * 360static inline struct ip_vs_service *
406__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, 361__ip_vs_service_find(struct net *net, int af, __u16 protocol,
407 __be16 vport) 362 const union nf_inet_addr *vaddr, __be16 vport)
408{ 363{
409 unsigned hash; 364 unsigned hash;
410 struct ip_vs_service *svc; 365 struct ip_vs_service *svc;
411 366
412 /* Check for "full" addressed entries */ 367 /* Check for "full" addressed entries */
413 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); 368 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
414 369
415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 370 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
416 if ((svc->af == af) 371 if ((svc->af == af)
417 && ip_vs_addr_equal(af, &svc->addr, vaddr) 372 && ip_vs_addr_equal(af, &svc->addr, vaddr)
418 && (svc->port == vport) 373 && (svc->port == vport)
419 && (svc->protocol == protocol)) { 374 && (svc->protocol == protocol)
375 && net_eq(svc->net, net)) {
420 /* HIT */ 376 /* HIT */
421 return svc; 377 return svc;
422 } 378 }
@@ -430,16 +386,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
430 * Get service by {fwmark} in the service table. 386 * Get service by {fwmark} in the service table.
431 */ 387 */
432static inline struct ip_vs_service * 388static inline struct ip_vs_service *
433__ip_vs_svc_fwm_find(int af, __u32 fwmark) 389__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
434{ 390{
435 unsigned hash; 391 unsigned hash;
436 struct ip_vs_service *svc; 392 struct ip_vs_service *svc;
437 393
438 /* Check for fwmark addressed entries */ 394 /* Check for fwmark addressed entries */
439 hash = ip_vs_svc_fwm_hashkey(fwmark); 395 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
440 396
441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 397 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
442 if (svc->fwmark == fwmark && svc->af == af) { 398 if (svc->fwmark == fwmark && svc->af == af
399 && net_eq(svc->net, net)) {
443 /* HIT */ 400 /* HIT */
444 return svc; 401 return svc;
445 } 402 }
@@ -449,42 +406,46 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
449} 406}
450 407
451struct ip_vs_service * 408struct ip_vs_service *
452ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 409ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
453 const union nf_inet_addr *vaddr, __be16 vport) 410 const union nf_inet_addr *vaddr, __be16 vport)
454{ 411{
455 struct ip_vs_service *svc; 412 struct ip_vs_service *svc;
413 struct netns_ipvs *ipvs = net_ipvs(net);
456 414
457 read_lock(&__ip_vs_svc_lock); 415 read_lock(&__ip_vs_svc_lock);
458 416
459 /* 417 /*
460 * Check the table hashed by fwmark first 418 * Check the table hashed by fwmark first
461 */ 419 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) 420 if (fwmark) {
463 goto out; 421 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422 if (svc)
423 goto out;
424 }
464 425
465 /* 426 /*
466 * Check the table hashed by <protocol,addr,port> 427 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries 428 * for "full" addressed entries
468 */ 429 */
469 svc = __ip_vs_service_find(af, protocol, vaddr, vport); 430 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
470 431
471 if (svc == NULL 432 if (svc == NULL
472 && protocol == IPPROTO_TCP 433 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter) 434 && atomic_read(&ipvs->ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 435 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475 /* 436 /*
476 * Check if ftp service entry exists, the packet 437 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections. 438 * might belong to FTP data connections.
478 */ 439 */
479 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); 440 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
480 } 441 }
481 442
482 if (svc == NULL 443 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) { 444 && atomic_read(&ipvs->nullsvc_counter)) {
484 /* 445 /*
485 * Check if the catch-all port (port zero) exists 446 * Check if the catch-all port (port zero) exists
486 */ 447 */
487 svc = __ip_vs_service_find(af, protocol, vaddr, 0); 448 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
488 } 449 }
489 450
490 out: 451 out:
@@ -519,6 +480,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
519 svc->fwmark, 480 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr), 481 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt)); 482 ntohs(svc->port), atomic_read(&svc->usecnt));
483 free_percpu(svc->stats.cpustats);
522 kfree(svc); 484 kfree(svc);
523 } 485 }
524} 486}
@@ -545,10 +507,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
545} 507}
546 508
547/* 509/*
548 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. 510 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
549 * should be called with locked tables. 511 * should be called with locked tables.
550 */ 512 */
551static int ip_vs_rs_hash(struct ip_vs_dest *dest) 513static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
552{ 514{
553 unsigned hash; 515 unsigned hash;
554 516
@@ -562,19 +524,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
562 */ 524 */
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 525 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564 526
565 list_add(&dest->d_list, &ip_vs_rtable[hash]); 527 list_add(&dest->d_list, &ipvs->rs_table[hash]);
566 528
567 return 1; 529 return 1;
568} 530}
569 531
570/* 532/*
571 * UNhashes ip_vs_dest from ip_vs_rtable. 533 * UNhashes ip_vs_dest from rs_table.
572 * should be called with locked tables. 534 * should be called with locked tables.
573 */ 535 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 536static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{ 537{
576 /* 538 /*
577 * Remove it from the ip_vs_rtable table. 539 * Remove it from the rs_table table.
578 */ 540 */
579 if (!list_empty(&dest->d_list)) { 541 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list); 542 list_del(&dest->d_list);
@@ -588,10 +550,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
588 * Lookup real service by <proto,addr,port> in the real service table. 550 * Lookup real service by <proto,addr,port> in the real service table.
589 */ 551 */
590struct ip_vs_dest * 552struct ip_vs_dest *
591ip_vs_lookup_real_service(int af, __u16 protocol, 553ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
592 const union nf_inet_addr *daddr, 554 const union nf_inet_addr *daddr,
593 __be16 dport) 555 __be16 dport)
594{ 556{
557 struct netns_ipvs *ipvs = net_ipvs(net);
595 unsigned hash; 558 unsigned hash;
596 struct ip_vs_dest *dest; 559 struct ip_vs_dest *dest;
597 560
@@ -601,19 +564,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
601 */ 564 */
602 hash = ip_vs_rs_hashkey(af, daddr, dport); 565 hash = ip_vs_rs_hashkey(af, daddr, dport);
603 566
604 read_lock(&__ip_vs_rs_lock); 567 read_lock(&ipvs->rs_lock);
605 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { 568 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
606 if ((dest->af == af) 569 if ((dest->af == af)
607 && ip_vs_addr_equal(af, &dest->addr, daddr) 570 && ip_vs_addr_equal(af, &dest->addr, daddr)
608 && (dest->port == dport) 571 && (dest->port == dport)
609 && ((dest->protocol == protocol) || 572 && ((dest->protocol == protocol) ||
610 dest->vfwmark)) { 573 dest->vfwmark)) {
611 /* HIT */ 574 /* HIT */
612 read_unlock(&__ip_vs_rs_lock); 575 read_unlock(&ipvs->rs_lock);
613 return dest; 576 return dest;
614 } 577 }
615 } 578 }
616 read_unlock(&__ip_vs_rs_lock); 579 read_unlock(&ipvs->rs_lock);
617 580
618 return NULL; 581 return NULL;
619} 582}
@@ -652,15 +615,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
652 * ip_vs_lookup_real_service() looked promissing, but 615 * ip_vs_lookup_real_service() looked promissing, but
653 * seems not working as expected. 616 * seems not working as expected.
654 */ 617 */
655struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, 618struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
619 const union nf_inet_addr *daddr,
656 __be16 dport, 620 __be16 dport,
657 const union nf_inet_addr *vaddr, 621 const union nf_inet_addr *vaddr,
658 __be16 vport, __u16 protocol) 622 __be16 vport, __u16 protocol, __u32 fwmark)
659{ 623{
660 struct ip_vs_dest *dest; 624 struct ip_vs_dest *dest;
661 struct ip_vs_service *svc; 625 struct ip_vs_service *svc;
662 626
663 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); 627 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
664 if (!svc) 628 if (!svc)
665 return NULL; 629 return NULL;
666 dest = ip_vs_lookup_dest(svc, daddr, dport); 630 dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +649,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
685 __be16 dport) 649 __be16 dport)
686{ 650{
687 struct ip_vs_dest *dest, *nxt; 651 struct ip_vs_dest *dest, *nxt;
652 struct netns_ipvs *ipvs = net_ipvs(svc->net);
688 653
689 /* 654 /*
690 * Find the destination in trash 655 * Find the destination in trash
691 */ 656 */
692 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 657 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
693 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 658 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
694 "dest->refcnt=%d\n", 659 "dest->refcnt=%d\n",
695 dest->vfwmark, 660 dest->vfwmark,
@@ -720,6 +685,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
720 list_del(&dest->n_list); 685 list_del(&dest->n_list);
721 ip_vs_dst_reset(dest); 686 ip_vs_dst_reset(dest);
722 __ip_vs_unbind_svc(dest); 687 __ip_vs_unbind_svc(dest);
688 free_percpu(dest->stats.cpustats);
723 kfree(dest); 689 kfree(dest);
724 } 690 }
725 } 691 }
@@ -737,25 +703,53 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
737 * are expired, and the refcnt of each destination in the trash must 703 * are expired, and the refcnt of each destination in the trash must
738 * be 1, so we simply release them here. 704 * be 1, so we simply release them here.
739 */ 705 */
740static void ip_vs_trash_cleanup(void) 706static void ip_vs_trash_cleanup(struct net *net)
741{ 707{
742 struct ip_vs_dest *dest, *nxt; 708 struct ip_vs_dest *dest, *nxt;
709 struct netns_ipvs *ipvs = net_ipvs(net);
743 710
744 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 711 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
745 list_del(&dest->n_list); 712 list_del(&dest->n_list);
746 ip_vs_dst_reset(dest); 713 ip_vs_dst_reset(dest);
747 __ip_vs_unbind_svc(dest); 714 __ip_vs_unbind_svc(dest);
715 free_percpu(dest->stats.cpustats);
748 kfree(dest); 716 kfree(dest);
749 } 717 }
750} 718}
751 719
720static void
721ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
722{
723#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
724
725 spin_lock_bh(&src->lock);
726
727 IP_VS_SHOW_STATS_COUNTER(conns);
728 IP_VS_SHOW_STATS_COUNTER(inpkts);
729 IP_VS_SHOW_STATS_COUNTER(outpkts);
730 IP_VS_SHOW_STATS_COUNTER(inbytes);
731 IP_VS_SHOW_STATS_COUNTER(outbytes);
732
733 ip_vs_read_estimator(dst, src);
734
735 spin_unlock_bh(&src->lock);
736}
752 737
753static void 738static void
754ip_vs_zero_stats(struct ip_vs_stats *stats) 739ip_vs_zero_stats(struct ip_vs_stats *stats)
755{ 740{
756 spin_lock_bh(&stats->lock); 741 spin_lock_bh(&stats->lock);
757 742
758 memset(&stats->ustats, 0, sizeof(stats->ustats)); 743 /* get current counters as zero point, rates are zeroed */
744
745#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
746
747 IP_VS_ZERO_STATS_COUNTER(conns);
748 IP_VS_ZERO_STATS_COUNTER(inpkts);
749 IP_VS_ZERO_STATS_COUNTER(outpkts);
750 IP_VS_ZERO_STATS_COUNTER(inbytes);
751 IP_VS_ZERO_STATS_COUNTER(outbytes);
752
759 ip_vs_zero_estimator(stats); 753 ip_vs_zero_estimator(stats);
760 754
761 spin_unlock_bh(&stats->lock); 755 spin_unlock_bh(&stats->lock);
@@ -768,6 +762,7 @@ static void
768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 762__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add) 763 struct ip_vs_dest_user_kern *udest, int add)
770{ 764{
765 struct netns_ipvs *ipvs = net_ipvs(svc->net);
771 int conn_flags; 766 int conn_flags;
772 767
773 /* set the weight and the flags */ 768 /* set the weight and the flags */
@@ -780,12 +775,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
780 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 775 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781 } else { 776 } else {
782 /* 777 /*
783 * Put the real service in ip_vs_rtable if not present. 778 * Put the real service in rs_table if not present.
784 * For now only for NAT! 779 * For now only for NAT!
785 */ 780 */
786 write_lock_bh(&__ip_vs_rs_lock); 781 write_lock_bh(&ipvs->rs_lock);
787 ip_vs_rs_hash(dest); 782 ip_vs_rs_hash(ipvs, dest);
788 write_unlock_bh(&__ip_vs_rs_lock); 783 write_unlock_bh(&ipvs->rs_lock);
789 } 784 }
790 atomic_set(&dest->conn_flags, conn_flags); 785 atomic_set(&dest->conn_flags, conn_flags);
791 786
@@ -808,12 +803,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
808 dest->u_threshold = udest->u_threshold; 803 dest->u_threshold = udest->u_threshold;
809 dest->l_threshold = udest->l_threshold; 804 dest->l_threshold = udest->l_threshold;
810 805
811 spin_lock(&dest->dst_lock); 806 spin_lock_bh(&dest->dst_lock);
812 ip_vs_dst_reset(dest); 807 ip_vs_dst_reset(dest);
813 spin_unlock(&dest->dst_lock); 808 spin_unlock_bh(&dest->dst_lock);
814 809
815 if (add) 810 if (add)
816 ip_vs_new_estimator(&dest->stats); 811 ip_vs_start_estimator(svc->net, &dest->stats);
817 812
818 write_lock_bh(&__ip_vs_svc_lock); 813 write_lock_bh(&__ip_vs_svc_lock);
819 814
@@ -850,12 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
850 atype = ipv6_addr_type(&udest->addr.in6); 845 atype = ipv6_addr_type(&udest->addr.in6);
851 if ((!(atype & IPV6_ADDR_UNICAST) || 846 if ((!(atype & IPV6_ADDR_UNICAST) ||
852 atype & IPV6_ADDR_LINKLOCAL) && 847 atype & IPV6_ADDR_LINKLOCAL) &&
853 !__ip_vs_addr_is_local_v6(&udest->addr.in6)) 848 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
854 return -EINVAL; 849 return -EINVAL;
855 } else 850 } else
856#endif 851#endif
857 { 852 {
858 atype = inet_addr_type(&init_net, udest->addr.ip); 853 atype = inet_addr_type(svc->net, udest->addr.ip);
859 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 854 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860 return -EINVAL; 855 return -EINVAL;
861 } 856 }
@@ -865,6 +860,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
865 pr_err("%s(): no memory.\n", __func__); 860 pr_err("%s(): no memory.\n", __func__);
866 return -ENOMEM; 861 return -ENOMEM;
867 } 862 }
863 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
864 if (!dest->stats.cpustats) {
865 pr_err("%s() alloc_percpu failed\n", __func__);
866 goto err_alloc;
867 }
868 868
869 dest->af = svc->af; 869 dest->af = svc->af;
870 dest->protocol = svc->protocol; 870 dest->protocol = svc->protocol;
@@ -888,6 +888,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
888 888
889 LeaveFunction(2); 889 LeaveFunction(2);
890 return 0; 890 return 0;
891
892err_alloc:
893 kfree(dest);
894 return -ENOMEM;
891} 895}
892 896
893 897
@@ -1006,16 +1010,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1006/* 1010/*
1007 * Delete a destination (must be already unlinked from the service) 1011 * Delete a destination (must be already unlinked from the service)
1008 */ 1012 */
1009static void __ip_vs_del_dest(struct ip_vs_dest *dest) 1013static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1010{ 1014{
1011 ip_vs_kill_estimator(&dest->stats); 1015 struct netns_ipvs *ipvs = net_ipvs(net);
1016
1017 ip_vs_stop_estimator(net, &dest->stats);
1012 1018
1013 /* 1019 /*
1014 * Remove it from the d-linked list with the real services. 1020 * Remove it from the d-linked list with the real services.
1015 */ 1021 */
1016 write_lock_bh(&__ip_vs_rs_lock); 1022 write_lock_bh(&ipvs->rs_lock);
1017 ip_vs_rs_unhash(dest); 1023 ip_vs_rs_unhash(dest);
1018 write_unlock_bh(&__ip_vs_rs_lock); 1024 write_unlock_bh(&ipvs->rs_lock);
1019 1025
1020 /* 1026 /*
1021 * Decrease the refcnt of the dest, and free the dest 1027 * Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1040,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1034 and only one user context can update virtual service at a 1040 and only one user context can update virtual service at a
1035 time, so the operation here is OK */ 1041 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt); 1042 atomic_dec(&dest->svc->refcnt);
1043 free_percpu(dest->stats.cpustats);
1037 kfree(dest); 1044 kfree(dest);
1038 } else { 1045 } else {
1039 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " 1046 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1048,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1041 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1048 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1042 ntohs(dest->port), 1049 ntohs(dest->port),
1043 atomic_read(&dest->refcnt)); 1050 atomic_read(&dest->refcnt));
1044 list_add(&dest->n_list, &ip_vs_dest_trash); 1051 list_add(&dest->n_list, &ipvs->dest_trash);
1045 atomic_inc(&dest->refcnt); 1052 atomic_inc(&dest->refcnt);
1046 } 1053 }
1047} 1054}
@@ -1105,7 +1112,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1105 /* 1112 /*
1106 * Delete the destination 1113 * Delete the destination
1107 */ 1114 */
1108 __ip_vs_del_dest(dest); 1115 __ip_vs_del_dest(svc->net, dest);
1109 1116
1110 LeaveFunction(2); 1117 LeaveFunction(2);
1111 1118
@@ -1117,13 +1124,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1117 * Add a service into the service hash table 1124 * Add a service into the service hash table
1118 */ 1125 */
1119static int 1126static int
1120ip_vs_add_service(struct ip_vs_service_user_kern *u, 1127ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1121 struct ip_vs_service **svc_p) 1128 struct ip_vs_service **svc_p)
1122{ 1129{
1123 int ret = 0; 1130 int ret = 0;
1124 struct ip_vs_scheduler *sched = NULL; 1131 struct ip_vs_scheduler *sched = NULL;
1125 struct ip_vs_pe *pe = NULL; 1132 struct ip_vs_pe *pe = NULL;
1126 struct ip_vs_service *svc = NULL; 1133 struct ip_vs_service *svc = NULL;
1134 struct netns_ipvs *ipvs = net_ipvs(net);
1127 1135
1128 /* increase the module use count */ 1136 /* increase the module use count */
1129 ip_vs_use_count_inc(); 1137 ip_vs_use_count_inc();
@@ -1137,7 +1145,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1137 } 1145 }
1138 1146
1139 if (u->pe_name && *u->pe_name) { 1147 if (u->pe_name && *u->pe_name) {
1140 pe = ip_vs_pe_get(u->pe_name); 1148 pe = ip_vs_pe_getbyname(u->pe_name);
1141 if (pe == NULL) { 1149 if (pe == NULL) {
1142 pr_info("persistence engine module ip_vs_pe_%s " 1150 pr_info("persistence engine module ip_vs_pe_%s "
1143 "not found\n", u->pe_name); 1151 "not found\n", u->pe_name);
@@ -1159,6 +1167,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1159 ret = -ENOMEM; 1167 ret = -ENOMEM;
1160 goto out_err; 1168 goto out_err;
1161 } 1169 }
1170 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1171 if (!svc->stats.cpustats) {
1172 pr_err("%s() alloc_percpu failed\n", __func__);
1173 goto out_err;
1174 }
1162 1175
1163 /* I'm the first user of the service */ 1176 /* I'm the first user of the service */
1164 atomic_set(&svc->usecnt, 0); 1177 atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1185,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1172 svc->flags = u->flags; 1185 svc->flags = u->flags;
1173 svc->timeout = u->timeout * HZ; 1186 svc->timeout = u->timeout * HZ;
1174 svc->netmask = u->netmask; 1187 svc->netmask = u->netmask;
1188 svc->net = net;
1175 1189
1176 INIT_LIST_HEAD(&svc->destinations); 1190 INIT_LIST_HEAD(&svc->destinations);
1177 rwlock_init(&svc->sched_lock); 1191 rwlock_init(&svc->sched_lock);
@@ -1189,15 +1203,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1189 1203
1190 /* Update the virtual service counters */ 1204 /* Update the virtual service counters */
1191 if (svc->port == FTPPORT) 1205 if (svc->port == FTPPORT)
1192 atomic_inc(&ip_vs_ftpsvc_counter); 1206 atomic_inc(&ipvs->ftpsvc_counter);
1193 else if (svc->port == 0) 1207 else if (svc->port == 0)
1194 atomic_inc(&ip_vs_nullsvc_counter); 1208 atomic_inc(&ipvs->nullsvc_counter);
1195 1209
1196 ip_vs_new_estimator(&svc->stats); 1210 ip_vs_start_estimator(net, &svc->stats);
1197 1211
1198 /* Count only IPv4 services for old get/setsockopt interface */ 1212 /* Count only IPv4 services for old get/setsockopt interface */
1199 if (svc->af == AF_INET) 1213 if (svc->af == AF_INET)
1200 ip_vs_num_services++; 1214 ipvs->num_services++;
1201 1215
1202 /* Hash the service into the service table */ 1216 /* Hash the service into the service table */
1203 write_lock_bh(&__ip_vs_svc_lock); 1217 write_lock_bh(&__ip_vs_svc_lock);
@@ -1205,8 +1219,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1205 write_unlock_bh(&__ip_vs_svc_lock); 1219 write_unlock_bh(&__ip_vs_svc_lock);
1206 1220
1207 *svc_p = svc; 1221 *svc_p = svc;
1222 /* Now there is a service - full throttle */
1223 ipvs->enable = 1;
1208 return 0; 1224 return 0;
1209 1225
1226
1210 out_err: 1227 out_err:
1211 if (svc != NULL) { 1228 if (svc != NULL) {
1212 ip_vs_unbind_scheduler(svc); 1229 ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1232,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1215 ip_vs_app_inc_put(svc->inc); 1232 ip_vs_app_inc_put(svc->inc);
1216 local_bh_enable(); 1233 local_bh_enable();
1217 } 1234 }
1235 if (svc->stats.cpustats)
1236 free_percpu(svc->stats.cpustats);
1218 kfree(svc); 1237 kfree(svc);
1219 } 1238 }
1220 ip_vs_scheduler_put(sched); 1239 ip_vs_scheduler_put(sched);
@@ -1248,7 +1267,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1248 old_sched = sched; 1267 old_sched = sched;
1249 1268
1250 if (u->pe_name && *u->pe_name) { 1269 if (u->pe_name && *u->pe_name) {
1251 pe = ip_vs_pe_get(u->pe_name); 1270 pe = ip_vs_pe_getbyname(u->pe_name);
1252 if (pe == NULL) { 1271 if (pe == NULL) {
1253 pr_info("persistence engine module ip_vs_pe_%s " 1272 pr_info("persistence engine module ip_vs_pe_%s "
1254 "not found\n", u->pe_name); 1273 "not found\n", u->pe_name);
@@ -1334,14 +1353,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1334 struct ip_vs_dest *dest, *nxt; 1353 struct ip_vs_dest *dest, *nxt;
1335 struct ip_vs_scheduler *old_sched; 1354 struct ip_vs_scheduler *old_sched;
1336 struct ip_vs_pe *old_pe; 1355 struct ip_vs_pe *old_pe;
1356 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1337 1357
1338 pr_info("%s: enter\n", __func__); 1358 pr_info("%s: enter\n", __func__);
1339 1359
1340 /* Count only IPv4 services for old get/setsockopt interface */ 1360 /* Count only IPv4 services for old get/setsockopt interface */
1341 if (svc->af == AF_INET) 1361 if (svc->af == AF_INET)
1342 ip_vs_num_services--; 1362 ipvs->num_services--;
1343 1363
1344 ip_vs_kill_estimator(&svc->stats); 1364 ip_vs_stop_estimator(svc->net, &svc->stats);
1345 1365
1346 /* Unbind scheduler */ 1366 /* Unbind scheduler */
1347 old_sched = svc->scheduler; 1367 old_sched = svc->scheduler;
@@ -1364,16 +1384,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1364 */ 1384 */
1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1385 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366 __ip_vs_unlink_dest(svc, dest, 0); 1386 __ip_vs_unlink_dest(svc, dest, 0);
1367 __ip_vs_del_dest(dest); 1387 __ip_vs_del_dest(svc->net, dest);
1368 } 1388 }
1369 1389
1370 /* 1390 /*
1371 * Update the virtual service counters 1391 * Update the virtual service counters
1372 */ 1392 */
1373 if (svc->port == FTPPORT) 1393 if (svc->port == FTPPORT)
1374 atomic_dec(&ip_vs_ftpsvc_counter); 1394 atomic_dec(&ipvs->ftpsvc_counter);
1375 else if (svc->port == 0) 1395 else if (svc->port == 0)
1376 atomic_dec(&ip_vs_nullsvc_counter); 1396 atomic_dec(&ipvs->nullsvc_counter);
1377 1397
1378 /* 1398 /*
1379 * Free the service if nobody refers to it 1399 * Free the service if nobody refers to it
@@ -1383,6 +1403,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1383 svc->fwmark, 1403 svc->fwmark,
1384 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1404 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1385 ntohs(svc->port), atomic_read(&svc->usecnt)); 1405 ntohs(svc->port), atomic_read(&svc->usecnt));
1406 free_percpu(svc->stats.cpustats);
1386 kfree(svc); 1407 kfree(svc);
1387 } 1408 }
1388 1409
@@ -1428,17 +1449,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1428/* 1449/*
1429 * Flush all the virtual services 1450 * Flush all the virtual services
1430 */ 1451 */
1431static int ip_vs_flush(void) 1452static int ip_vs_flush(struct net *net)
1432{ 1453{
1433 int idx; 1454 int idx;
1434 struct ip_vs_service *svc, *nxt; 1455 struct ip_vs_service *svc, *nxt;
1435 1456
1436 /* 1457 /*
1437 * Flush the service table hashed by <protocol,addr,port> 1458 * Flush the service table hashed by <netns,protocol,addr,port>
1438 */ 1459 */
1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1460 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1461 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1441 ip_vs_unlink_service(svc); 1462 s_list) {
1463 if (net_eq(svc->net, net))
1464 ip_vs_unlink_service(svc);
1442 } 1465 }
1443 } 1466 }
1444 1467
@@ -1448,13 +1471,92 @@ static int ip_vs_flush(void)
1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1471 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449 list_for_each_entry_safe(svc, nxt, 1472 list_for_each_entry_safe(svc, nxt,
1450 &ip_vs_svc_fwm_table[idx], f_list) { 1473 &ip_vs_svc_fwm_table[idx], f_list) {
1451 ip_vs_unlink_service(svc); 1474 if (net_eq(svc->net, net))
1475 ip_vs_unlink_service(svc);
1452 } 1476 }
1453 } 1477 }
1454 1478
1455 return 0; 1479 return 0;
1456} 1480}
1457 1481
1482/*
1483 * Delete service by {netns} in the service table.
1484 * Called by __ip_vs_cleanup()
1485 */
1486void __ip_vs_service_cleanup(struct net *net)
1487{
1488 EnterFunction(2);
1489 /* Check for "full" addressed entries */
1490 mutex_lock(&__ip_vs_mutex);
1491 ip_vs_flush(net);
1492 mutex_unlock(&__ip_vs_mutex);
1493 LeaveFunction(2);
1494}
1495/*
1496 * Release dst hold by dst_cache
1497 */
1498static inline void
1499__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1500{
1501 spin_lock_bh(&dest->dst_lock);
1502 if (dest->dst_cache && dest->dst_cache->dev == dev) {
1503 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1504 dev->name,
1505 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1506 ntohs(dest->port),
1507 atomic_read(&dest->refcnt));
1508 ip_vs_dst_reset(dest);
1509 }
1510 spin_unlock_bh(&dest->dst_lock);
1511
1512}
1513/*
1514 * Netdev event receiver
1515 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1516 * a device that is "unregister" it must be released.
1517 */
1518static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1519 void *ptr)
1520{
1521 struct net_device *dev = ptr;
1522 struct net *net = dev_net(dev);
1523 struct ip_vs_service *svc;
1524 struct ip_vs_dest *dest;
1525 unsigned int idx;
1526
1527 if (event != NETDEV_UNREGISTER)
1528 return NOTIFY_DONE;
1529 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1530 EnterFunction(2);
1531 mutex_lock(&__ip_vs_mutex);
1532 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1533 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1534 if (net_eq(svc->net, net)) {
1535 list_for_each_entry(dest, &svc->destinations,
1536 n_list) {
1537 __ip_vs_dev_reset(dest, dev);
1538 }
1539 }
1540 }
1541
1542 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1543 if (net_eq(svc->net, net)) {
1544 list_for_each_entry(dest, &svc->destinations,
1545 n_list) {
1546 __ip_vs_dev_reset(dest, dev);
1547 }
1548 }
1549
1550 }
1551 }
1552
1553 list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
1554 __ip_vs_dev_reset(dest, dev);
1555 }
1556 mutex_unlock(&__ip_vs_mutex);
1557 LeaveFunction(2);
1558 return NOTIFY_DONE;
1559}
1458 1560
1459/* 1561/*
1460 * Zero counters in a service or all services 1562 * Zero counters in a service or all services
@@ -1472,32 +1574,35 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
1472 return 0; 1574 return 0;
1473} 1575}
1474 1576
1475static int ip_vs_zero_all(void) 1577static int ip_vs_zero_all(struct net *net)
1476{ 1578{
1477 int idx; 1579 int idx;
1478 struct ip_vs_service *svc; 1580 struct ip_vs_service *svc;
1479 1581
1480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1582 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1583 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1482 ip_vs_zero_service(svc); 1584 if (net_eq(svc->net, net))
1585 ip_vs_zero_service(svc);
1483 } 1586 }
1484 } 1587 }
1485 1588
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1589 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1590 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1488 ip_vs_zero_service(svc); 1591 if (net_eq(svc->net, net))
1592 ip_vs_zero_service(svc);
1489 } 1593 }
1490 } 1594 }
1491 1595
1492 ip_vs_zero_stats(&ip_vs_stats); 1596 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1493 return 0; 1597 return 0;
1494} 1598}
1495 1599
1496 1600#ifdef CONFIG_SYSCTL
1497static int 1601static int
1498proc_do_defense_mode(ctl_table *table, int write, 1602proc_do_defense_mode(ctl_table *table, int write,
1499 void __user *buffer, size_t *lenp, loff_t *ppos) 1603 void __user *buffer, size_t *lenp, loff_t *ppos)
1500{ 1604{
1605 struct net *net = current->nsproxy->net_ns;
1501 int *valp = table->data; 1606 int *valp = table->data;
1502 int val = *valp; 1607 int val = *valp;
1503 int rc; 1608 int rc;
@@ -1508,13 +1613,12 @@ proc_do_defense_mode(ctl_table *table, int write,
1508 /* Restore the correct value */ 1613 /* Restore the correct value */
1509 *valp = val; 1614 *valp = val;
1510 } else { 1615 } else {
1511 update_defense_level(); 1616 update_defense_level(net_ipvs(net));
1512 } 1617 }
1513 } 1618 }
1514 return rc; 1619 return rc;
1515} 1620}
1516 1621
1517
1518static int 1622static int
1519proc_do_sync_threshold(ctl_table *table, int write, 1623proc_do_sync_threshold(ctl_table *table, int write,
1520 void __user *buffer, size_t *lenp, loff_t *ppos) 1624 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1534,45 +1638,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
1534 return rc; 1638 return rc;
1535} 1639}
1536 1640
1641static int
1642proc_do_sync_mode(ctl_table *table, int write,
1643 void __user *buffer, size_t *lenp, loff_t *ppos)
1644{
1645 int *valp = table->data;
1646 int val = *valp;
1647 int rc;
1648
1649 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1650 if (write && (*valp != val)) {
1651 if ((*valp < 0) || (*valp > 1)) {
1652 /* Restore the correct value */
1653 *valp = val;
1654 } else {
1655 struct net *net = current->nsproxy->net_ns;
1656 ip_vs_sync_switch_mode(net, val);
1657 }
1658 }
1659 return rc;
1660}
1537 1661
1538/* 1662/*
1539 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1663 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1664 * Do not change order or insert new entries without
1665 * align with netns init in __ip_vs_control_init()
1540 */ 1666 */
1541 1667
1542static struct ctl_table vs_vars[] = { 1668static struct ctl_table vs_vars[] = {
1543 { 1669 {
1544 .procname = "amemthresh", 1670 .procname = "amemthresh",
1545 .data = &sysctl_ip_vs_amemthresh,
1546 .maxlen = sizeof(int), 1671 .maxlen = sizeof(int),
1547 .mode = 0644, 1672 .mode = 0644,
1548 .proc_handler = proc_dointvec, 1673 .proc_handler = proc_dointvec,
1549 }, 1674 },
1550#ifdef CONFIG_IP_VS_DEBUG
1551 {
1552 .procname = "debug_level",
1553 .data = &sysctl_ip_vs_debug_level,
1554 .maxlen = sizeof(int),
1555 .mode = 0644,
1556 .proc_handler = proc_dointvec,
1557 },
1558#endif
1559 { 1675 {
1560 .procname = "am_droprate", 1676 .procname = "am_droprate",
1561 .data = &sysctl_ip_vs_am_droprate,
1562 .maxlen = sizeof(int), 1677 .maxlen = sizeof(int),
1563 .mode = 0644, 1678 .mode = 0644,
1564 .proc_handler = proc_dointvec, 1679 .proc_handler = proc_dointvec,
1565 }, 1680 },
1566 { 1681 {
1567 .procname = "drop_entry", 1682 .procname = "drop_entry",
1568 .data = &sysctl_ip_vs_drop_entry,
1569 .maxlen = sizeof(int), 1683 .maxlen = sizeof(int),
1570 .mode = 0644, 1684 .mode = 0644,
1571 .proc_handler = proc_do_defense_mode, 1685 .proc_handler = proc_do_defense_mode,
1572 }, 1686 },
1573 { 1687 {
1574 .procname = "drop_packet", 1688 .procname = "drop_packet",
1575 .data = &sysctl_ip_vs_drop_packet,
1576 .maxlen = sizeof(int), 1689 .maxlen = sizeof(int),
1577 .mode = 0644, 1690 .mode = 0644,
1578 .proc_handler = proc_do_defense_mode, 1691 .proc_handler = proc_do_defense_mode,
@@ -1580,7 +1693,6 @@ static struct ctl_table vs_vars[] = {
1580#ifdef CONFIG_IP_VS_NFCT 1693#ifdef CONFIG_IP_VS_NFCT
1581 { 1694 {
1582 .procname = "conntrack", 1695 .procname = "conntrack",
1583 .data = &sysctl_ip_vs_conntrack,
1584 .maxlen = sizeof(int), 1696 .maxlen = sizeof(int),
1585 .mode = 0644, 1697 .mode = 0644,
1586 .proc_handler = &proc_dointvec, 1698 .proc_handler = &proc_dointvec,
@@ -1588,18 +1700,62 @@ static struct ctl_table vs_vars[] = {
1588#endif 1700#endif
1589 { 1701 {
1590 .procname = "secure_tcp", 1702 .procname = "secure_tcp",
1591 .data = &sysctl_ip_vs_secure_tcp,
1592 .maxlen = sizeof(int), 1703 .maxlen = sizeof(int),
1593 .mode = 0644, 1704 .mode = 0644,
1594 .proc_handler = proc_do_defense_mode, 1705 .proc_handler = proc_do_defense_mode,
1595 }, 1706 },
1596 { 1707 {
1597 .procname = "snat_reroute", 1708 .procname = "snat_reroute",
1598 .data = &sysctl_ip_vs_snat_reroute,
1599 .maxlen = sizeof(int), 1709 .maxlen = sizeof(int),
1600 .mode = 0644, 1710 .mode = 0644,
1601 .proc_handler = &proc_dointvec, 1711 .proc_handler = &proc_dointvec,
1602 }, 1712 },
1713 {
1714 .procname = "sync_version",
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
1717 .proc_handler = &proc_do_sync_mode,
1718 },
1719 {
1720 .procname = "cache_bypass",
1721 .maxlen = sizeof(int),
1722 .mode = 0644,
1723 .proc_handler = proc_dointvec,
1724 },
1725 {
1726 .procname = "expire_nodest_conn",
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
1729 .proc_handler = proc_dointvec,
1730 },
1731 {
1732 .procname = "expire_quiescent_template",
1733 .maxlen = sizeof(int),
1734 .mode = 0644,
1735 .proc_handler = proc_dointvec,
1736 },
1737 {
1738 .procname = "sync_threshold",
1739 .maxlen =
1740 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1741 .mode = 0644,
1742 .proc_handler = proc_do_sync_threshold,
1743 },
1744 {
1745 .procname = "nat_icmp_send",
1746 .maxlen = sizeof(int),
1747 .mode = 0644,
1748 .proc_handler = proc_dointvec,
1749 },
1750#ifdef CONFIG_IP_VS_DEBUG
1751 {
1752 .procname = "debug_level",
1753 .data = &sysctl_ip_vs_debug_level,
1754 .maxlen = sizeof(int),
1755 .mode = 0644,
1756 .proc_handler = proc_dointvec,
1757 },
1758#endif
1603#if 0 1759#if 0
1604 { 1760 {
1605 .procname = "timeout_established", 1761 .procname = "timeout_established",
@@ -1686,41 +1842,6 @@ static struct ctl_table vs_vars[] = {
1686 .proc_handler = proc_dointvec_jiffies, 1842 .proc_handler = proc_dointvec_jiffies,
1687 }, 1843 },
1688#endif 1844#endif
1689 {
1690 .procname = "cache_bypass",
1691 .data = &sysctl_ip_vs_cache_bypass,
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
1694 .proc_handler = proc_dointvec,
1695 },
1696 {
1697 .procname = "expire_nodest_conn",
1698 .data = &sysctl_ip_vs_expire_nodest_conn,
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
1701 .proc_handler = proc_dointvec,
1702 },
1703 {
1704 .procname = "expire_quiescent_template",
1705 .data = &sysctl_ip_vs_expire_quiescent_template,
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
1708 .proc_handler = proc_dointvec,
1709 },
1710 {
1711 .procname = "sync_threshold",
1712 .data = &sysctl_ip_vs_sync_threshold,
1713 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1714 .mode = 0644,
1715 .proc_handler = proc_do_sync_threshold,
1716 },
1717 {
1718 .procname = "nat_icmp_send",
1719 .data = &sysctl_ip_vs_nat_icmp_send,
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
1722 .proc_handler = proc_dointvec,
1723 },
1724 { } 1845 { }
1725}; 1846};
1726 1847
@@ -1731,12 +1852,12 @@ const struct ctl_path net_vs_ctl_path[] = {
1731 { } 1852 { }
1732}; 1853};
1733EXPORT_SYMBOL_GPL(net_vs_ctl_path); 1854EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1734 1855#endif
1735static struct ctl_table_header * sysctl_header;
1736 1856
1737#ifdef CONFIG_PROC_FS 1857#ifdef CONFIG_PROC_FS
1738 1858
1739struct ip_vs_iter { 1859struct ip_vs_iter {
1860 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1740 struct list_head *table; 1861 struct list_head *table;
1741 int bucket; 1862 int bucket;
1742}; 1863};
@@ -1763,6 +1884,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
1763/* Get the Nth entry in the two lists */ 1884/* Get the Nth entry in the two lists */
1764static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1885static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1765{ 1886{
1887 struct net *net = seq_file_net(seq);
1766 struct ip_vs_iter *iter = seq->private; 1888 struct ip_vs_iter *iter = seq->private;
1767 int idx; 1889 int idx;
1768 struct ip_vs_service *svc; 1890 struct ip_vs_service *svc;
@@ -1770,7 +1892,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1770 /* look in hash by protocol */ 1892 /* look in hash by protocol */
1771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1893 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1894 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1773 if (pos-- == 0){ 1895 if (net_eq(svc->net, net) && pos-- == 0) {
1774 iter->table = ip_vs_svc_table; 1896 iter->table = ip_vs_svc_table;
1775 iter->bucket = idx; 1897 iter->bucket = idx;
1776 return svc; 1898 return svc;
@@ -1781,7 +1903,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1781 /* keep looking in fwmark */ 1903 /* keep looking in fwmark */
1782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1904 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1905 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1784 if (pos-- == 0) { 1906 if (net_eq(svc->net, net) && pos-- == 0) {
1785 iter->table = ip_vs_svc_fwm_table; 1907 iter->table = ip_vs_svc_fwm_table;
1786 iter->bucket = idx; 1908 iter->bucket = idx;
1787 return svc; 1909 return svc;
@@ -1935,7 +2057,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
1935 2057
1936static int ip_vs_info_open(struct inode *inode, struct file *file) 2058static int ip_vs_info_open(struct inode *inode, struct file *file)
1937{ 2059{
1938 return seq_open_private(file, &ip_vs_info_seq_ops, 2060 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter)); 2061 sizeof(struct ip_vs_iter));
1940} 2062}
1941 2063
@@ -1944,18 +2066,13 @@ static const struct file_operations ip_vs_info_fops = {
1944 .open = ip_vs_info_open, 2066 .open = ip_vs_info_open,
1945 .read = seq_read, 2067 .read = seq_read,
1946 .llseek = seq_lseek, 2068 .llseek = seq_lseek,
1947 .release = seq_release_private, 2069 .release = seq_release_net,
1948};
1949
1950#endif
1951
1952struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1954}; 2070};
1955 2071
1956#ifdef CONFIG_PROC_FS
1957static int ip_vs_stats_show(struct seq_file *seq, void *v) 2072static int ip_vs_stats_show(struct seq_file *seq, void *v)
1958{ 2073{
2074 struct net *net = seq_file_single_net(seq);
2075 struct ip_vs_stats_user show;
1959 2076
1960/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2077/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1961 seq_puts(seq, 2078 seq_puts(seq,
@@ -1963,29 +2080,25 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
1963 seq_printf(seq, 2080 seq_printf(seq,
1964 " Conns Packets Packets Bytes Bytes\n"); 2081 " Conns Packets Packets Bytes Bytes\n");
1965 2082
1966 spin_lock_bh(&ip_vs_stats.lock); 2083 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, 2084 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
1968 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, 2085 show.inpkts, show.outpkts,
1969 (unsigned long long) ip_vs_stats.ustats.inbytes, 2086 (unsigned long long) show.inbytes,
1970 (unsigned long long) ip_vs_stats.ustats.outbytes); 2087 (unsigned long long) show.outbytes);
1971 2088
1972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2089/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973 seq_puts(seq, 2090 seq_puts(seq,
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2091 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 2092 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.ustats.cps, 2093 show.cps, show.inpps, show.outpps,
1977 ip_vs_stats.ustats.inpps, 2094 show.inbps, show.outbps);
1978 ip_vs_stats.ustats.outpps,
1979 ip_vs_stats.ustats.inbps,
1980 ip_vs_stats.ustats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock);
1982 2095
1983 return 0; 2096 return 0;
1984} 2097}
1985 2098
1986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 2099static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1987{ 2100{
1988 return single_open(file, ip_vs_stats_show, NULL); 2101 return single_open_net(inode, file, ip_vs_stats_show);
1989} 2102}
1990 2103
1991static const struct file_operations ip_vs_stats_fops = { 2104static const struct file_operations ip_vs_stats_fops = {
@@ -1993,16 +2106,88 @@ static const struct file_operations ip_vs_stats_fops = {
1993 .open = ip_vs_stats_seq_open, 2106 .open = ip_vs_stats_seq_open,
1994 .read = seq_read, 2107 .read = seq_read,
1995 .llseek = seq_lseek, 2108 .llseek = seq_lseek,
1996 .release = single_release, 2109 .release = single_release_net,
1997}; 2110};
1998 2111
2112static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2113{
2114 struct net *net = seq_file_single_net(seq);
2115 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2116 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2117 struct ip_vs_stats_user rates;
2118 int i;
2119
2120/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2121 seq_puts(seq,
2122 " Total Incoming Outgoing Incoming Outgoing\n");
2123 seq_printf(seq,
2124 "CPU Conns Packets Packets Bytes Bytes\n");
2125
2126 for_each_possible_cpu(i) {
2127 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2128 unsigned int start;
2129 __u64 inbytes, outbytes;
2130
2131 do {
2132 start = u64_stats_fetch_begin_bh(&u->syncp);
2133 inbytes = u->ustats.inbytes;
2134 outbytes = u->ustats.outbytes;
2135 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2136
2137 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2138 i, u->ustats.conns, u->ustats.inpkts,
2139 u->ustats.outpkts, (__u64)inbytes,
2140 (__u64)outbytes);
2141 }
2142
2143 spin_lock_bh(&tot_stats->lock);
2144
2145 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2146 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2147 tot_stats->ustats.outpkts,
2148 (unsigned long long) tot_stats->ustats.inbytes,
2149 (unsigned long long) tot_stats->ustats.outbytes);
2150
2151 ip_vs_read_estimator(&rates, tot_stats);
2152
2153 spin_unlock_bh(&tot_stats->lock);
2154
2155/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2156 seq_puts(seq,
2157 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2158 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2159 rates.cps,
2160 rates.inpps,
2161 rates.outpps,
2162 rates.inbps,
2163 rates.outbps);
2164
2165 return 0;
2166}
2167
2168static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2169{
2170 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2171}
2172
2173static const struct file_operations ip_vs_stats_percpu_fops = {
2174 .owner = THIS_MODULE,
2175 .open = ip_vs_stats_percpu_seq_open,
2176 .read = seq_read,
2177 .llseek = seq_lseek,
2178 .release = single_release_net,
2179};
1999#endif 2180#endif
2000 2181
2001/* 2182/*
2002 * Set timeout values for tcp tcpfin udp in the timeout_table. 2183 * Set timeout values for tcp tcpfin udp in the timeout_table.
2003 */ 2184 */
2004static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) 2185static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2005{ 2186{
2187#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2188 struct ip_vs_proto_data *pd;
2189#endif
2190
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2191 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2007 u->tcp_timeout, 2192 u->tcp_timeout,
2008 u->tcp_fin_timeout, 2193 u->tcp_fin_timeout,
@@ -2010,19 +2195,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2010 2195
2011#ifdef CONFIG_IP_VS_PROTO_TCP 2196#ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) { 2197 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] 2198 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2199 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ; 2200 = u->tcp_timeout * HZ;
2015 } 2201 }
2016 2202
2017 if (u->tcp_fin_timeout) { 2203 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] 2204 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2205 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ; 2206 = u->tcp_fin_timeout * HZ;
2020 } 2207 }
2021#endif 2208#endif
2022 2209
2023#ifdef CONFIG_IP_VS_PROTO_UDP 2210#ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) { 2211 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] 2212 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2213 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ; 2214 = u->udp_timeout * HZ;
2027 } 2215 }
2028#endif 2216#endif
@@ -2087,6 +2275,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2087static int 2275static int
2088do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2276do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2089{ 2277{
2278 struct net *net = sock_net(sk);
2090 int ret; 2279 int ret;
2091 unsigned char arg[MAX_ARG_LEN]; 2280 unsigned char arg[MAX_ARG_LEN];
2092 struct ip_vs_service_user *usvc_compat; 2281 struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2310,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2121 2310
2122 if (cmd == IP_VS_SO_SET_FLUSH) { 2311 if (cmd == IP_VS_SO_SET_FLUSH) {
2123 /* Flush the virtual service */ 2312 /* Flush the virtual service */
2124 ret = ip_vs_flush(); 2313 ret = ip_vs_flush(net);
2125 goto out_unlock; 2314 goto out_unlock;
2126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2315 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2127 /* Set timeout values for (tcp tcpfin udp) */ 2316 /* Set timeout values for (tcp tcpfin udp) */
2128 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); 2317 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2129 goto out_unlock; 2318 goto out_unlock;
2130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2319 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2320 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2132 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); 2321 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2322 dm->syncid);
2133 goto out_unlock; 2323 goto out_unlock;
2134 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 2324 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2135 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2325 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2136 ret = stop_sync_thread(dm->state); 2326 ret = stop_sync_thread(net, dm->state);
2137 goto out_unlock; 2327 goto out_unlock;
2138 } 2328 }
2139 2329
@@ -2148,7 +2338,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2148 if (cmd == IP_VS_SO_SET_ZERO) { 2338 if (cmd == IP_VS_SO_SET_ZERO) {
2149 /* if no service address is set, zero counters in all */ 2339 /* if no service address is set, zero counters in all */
2150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2340 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2151 ret = ip_vs_zero_all(); 2341 ret = ip_vs_zero_all(net);
2152 goto out_unlock; 2342 goto out_unlock;
2153 } 2343 }
2154 } 2344 }
@@ -2165,10 +2355,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2165 2355
2166 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2356 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2167 if (usvc.fwmark == 0) 2357 if (usvc.fwmark == 0)
2168 svc = __ip_vs_service_find(usvc.af, usvc.protocol, 2358 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2169 &usvc.addr, usvc.port); 2359 &usvc.addr, usvc.port);
2170 else 2360 else
2171 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); 2361 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2172 2362
2173 if (cmd != IP_VS_SO_SET_ADD 2363 if (cmd != IP_VS_SO_SET_ADD
2174 && (svc == NULL || svc->protocol != usvc.protocol)) { 2364 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2371,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2181 if (svc != NULL) 2371 if (svc != NULL)
2182 ret = -EEXIST; 2372 ret = -EEXIST;
2183 else 2373 else
2184 ret = ip_vs_add_service(&usvc, &svc); 2374 ret = ip_vs_add_service(net, &usvc, &svc);
2185 break; 2375 break;
2186 case IP_VS_SO_SET_EDIT: 2376 case IP_VS_SO_SET_EDIT:
2187 ret = ip_vs_edit_service(svc, &usvc); 2377 ret = ip_vs_edit_service(svc, &usvc);
@@ -2218,14 +2408,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2218 2408
2219 2409
2220static void 2410static void
2221ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2222{
2223 spin_lock_bh(&src->lock);
2224 memcpy(dst, &src->ustats, sizeof(*dst));
2225 spin_unlock_bh(&src->lock);
2226}
2227
2228static void
2229ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2411ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2230{ 2412{
2231 dst->protocol = src->protocol; 2413 dst->protocol = src->protocol;
@@ -2241,7 +2423,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2241} 2423}
2242 2424
2243static inline int 2425static inline int
2244__ip_vs_get_service_entries(const struct ip_vs_get_services *get, 2426__ip_vs_get_service_entries(struct net *net,
2427 const struct ip_vs_get_services *get,
2245 struct ip_vs_get_services __user *uptr) 2428 struct ip_vs_get_services __user *uptr)
2246{ 2429{
2247 int idx, count=0; 2430 int idx, count=0;
@@ -2252,7 +2435,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2435 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2436 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2254 /* Only expose IPv4 entries to old interface */ 2437 /* Only expose IPv4 entries to old interface */
2255 if (svc->af != AF_INET) 2438 if (svc->af != AF_INET || !net_eq(svc->net, net))
2256 continue; 2439 continue;
2257 2440
2258 if (count >= get->num_services) 2441 if (count >= get->num_services)
@@ -2271,7 +2454,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2454 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2455 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2273 /* Only expose IPv4 entries to old interface */ 2456 /* Only expose IPv4 entries to old interface */
2274 if (svc->af != AF_INET) 2457 if (svc->af != AF_INET || !net_eq(svc->net, net))
2275 continue; 2458 continue;
2276 2459
2277 if (count >= get->num_services) 2460 if (count >= get->num_services)
@@ -2291,7 +2474,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2291} 2474}
2292 2475
2293static inline int 2476static inline int
2294__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 2477__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2295 struct ip_vs_get_dests __user *uptr) 2478 struct ip_vs_get_dests __user *uptr)
2296{ 2479{
2297 struct ip_vs_service *svc; 2480 struct ip_vs_service *svc;
@@ -2299,9 +2482,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2299 int ret = 0; 2482 int ret = 0;
2300 2483
2301 if (get->fwmark) 2484 if (get->fwmark)
2302 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); 2485 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2303 else 2486 else
2304 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, 2487 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2305 get->port); 2488 get->port);
2306 2489
2307 if (svc) { 2490 if (svc) {
@@ -2336,17 +2519,21 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2336} 2519}
2337 2520
2338static inline void 2521static inline void
2339__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 2522__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2340{ 2523{
2524#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2525 struct ip_vs_proto_data *pd;
2526#endif
2527
2341#ifdef CONFIG_IP_VS_PROTO_TCP 2528#ifdef CONFIG_IP_VS_PROTO_TCP
2342 u->tcp_timeout = 2529 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2343 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2530 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344 u->tcp_fin_timeout = 2531 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2346#endif 2532#endif
2347#ifdef CONFIG_IP_VS_PROTO_UDP 2533#ifdef CONFIG_IP_VS_PROTO_UDP
2534 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2348 u->udp_timeout = 2535 u->udp_timeout =
2349 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2536 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2350#endif 2537#endif
2351} 2538}
2352 2539
@@ -2375,7 +2562,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2375 unsigned char arg[128]; 2562 unsigned char arg[128];
2376 int ret = 0; 2563 int ret = 0;
2377 unsigned int copylen; 2564 unsigned int copylen;
2565 struct net *net = sock_net(sk);
2566 struct netns_ipvs *ipvs = net_ipvs(net);
2378 2567
2568 BUG_ON(!net);
2379 if (!capable(CAP_NET_ADMIN)) 2569 if (!capable(CAP_NET_ADMIN))
2380 return -EPERM; 2570 return -EPERM;
2381 2571
@@ -2418,7 +2608,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2418 struct ip_vs_getinfo info; 2608 struct ip_vs_getinfo info;
2419 info.version = IP_VS_VERSION_CODE; 2609 info.version = IP_VS_VERSION_CODE;
2420 info.size = ip_vs_conn_tab_size; 2610 info.size = ip_vs_conn_tab_size;
2421 info.num_services = ip_vs_num_services; 2611 info.num_services = ipvs->num_services;
2422 if (copy_to_user(user, &info, sizeof(info)) != 0) 2612 if (copy_to_user(user, &info, sizeof(info)) != 0)
2423 ret = -EFAULT; 2613 ret = -EFAULT;
2424 } 2614 }
@@ -2437,7 +2627,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2437 ret = -EINVAL; 2627 ret = -EINVAL;
2438 goto out; 2628 goto out;
2439 } 2629 }
2440 ret = __ip_vs_get_service_entries(get, user); 2630 ret = __ip_vs_get_service_entries(net, get, user);
2441 } 2631 }
2442 break; 2632 break;
2443 2633
@@ -2450,10 +2640,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2450 entry = (struct ip_vs_service_entry *)arg; 2640 entry = (struct ip_vs_service_entry *)arg;
2451 addr.ip = entry->addr; 2641 addr.ip = entry->addr;
2452 if (entry->fwmark) 2642 if (entry->fwmark)
2453 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); 2643 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2454 else 2644 else
2455 svc = __ip_vs_service_find(AF_INET, entry->protocol, 2645 svc = __ip_vs_service_find(net, AF_INET,
2456 &addr, entry->port); 2646 entry->protocol, &addr,
2647 entry->port);
2457 if (svc) { 2648 if (svc) {
2458 ip_vs_copy_service(entry, svc); 2649 ip_vs_copy_service(entry, svc);
2459 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2650 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2667,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2476 ret = -EINVAL; 2667 ret = -EINVAL;
2477 goto out; 2668 goto out;
2478 } 2669 }
2479 ret = __ip_vs_get_dest_entries(get, user); 2670 ret = __ip_vs_get_dest_entries(net, get, user);
2480 } 2671 }
2481 break; 2672 break;
2482 2673
@@ -2484,7 +2675,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2484 { 2675 {
2485 struct ip_vs_timeout_user t; 2676 struct ip_vs_timeout_user t;
2486 2677
2487 __ip_vs_get_timeouts(&t); 2678 __ip_vs_get_timeouts(net, &t);
2488 if (copy_to_user(user, &t, sizeof(t)) != 0) 2679 if (copy_to_user(user, &t, sizeof(t)) != 0)
2489 ret = -EFAULT; 2680 ret = -EFAULT;
2490 } 2681 }
@@ -2495,15 +2686,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2495 struct ip_vs_daemon_user d[2]; 2686 struct ip_vs_daemon_user d[2];
2496 2687
2497 memset(&d, 0, sizeof(d)); 2688 memset(&d, 0, sizeof(d));
2498 if (ip_vs_sync_state & IP_VS_STATE_MASTER) { 2689 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2499 d[0].state = IP_VS_STATE_MASTER; 2690 d[0].state = IP_VS_STATE_MASTER;
2500 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); 2691 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2501 d[0].syncid = ip_vs_master_syncid; 2692 sizeof(d[0].mcast_ifn));
2693 d[0].syncid = ipvs->master_syncid;
2502 } 2694 }
2503 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { 2695 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2504 d[1].state = IP_VS_STATE_BACKUP; 2696 d[1].state = IP_VS_STATE_BACKUP;
2505 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); 2697 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2506 d[1].syncid = ip_vs_backup_syncid; 2698 sizeof(d[1].mcast_ifn));
2699 d[1].syncid = ipvs->backup_syncid;
2507 } 2700 }
2508 if (copy_to_user(user, &d, sizeof(d)) != 0) 2701 if (copy_to_user(user, &d, sizeof(d)) != 0)
2509 ret = -EFAULT; 2702 ret = -EFAULT;
@@ -2542,6 +2735,7 @@ static struct genl_family ip_vs_genl_family = {
2542 .name = IPVS_GENL_NAME, 2735 .name = IPVS_GENL_NAME,
2543 .version = IPVS_GENL_VERSION, 2736 .version = IPVS_GENL_VERSION,
2544 .maxattr = IPVS_CMD_MAX, 2737 .maxattr = IPVS_CMD_MAX,
2738 .netnsok = true, /* Make ipvsadm to work on netns */
2545}; 2739};
2546 2740
2547/* Policy used for first-level command attributes */ 2741/* Policy used for first-level command attributes */
@@ -2599,31 +2793,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2599static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2793static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2600 struct ip_vs_stats *stats) 2794 struct ip_vs_stats *stats)
2601{ 2795{
2796 struct ip_vs_stats_user ustats;
2602 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2797 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2603 if (!nl_stats) 2798 if (!nl_stats)
2604 return -EMSGSIZE; 2799 return -EMSGSIZE;
2605 2800
2606 spin_lock_bh(&stats->lock); 2801 ip_vs_copy_stats(&ustats, stats);
2607 2802
2608 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); 2803 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2609 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); 2804 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2610 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); 2805 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2611 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); 2806 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2612 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); 2807 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2613 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); 2808 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2614 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); 2809 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); 2810 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); 2811 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2617 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); 2812 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2618
2619 spin_unlock_bh(&stats->lock);
2620 2813
2621 nla_nest_end(skb, nl_stats); 2814 nla_nest_end(skb, nl_stats);
2622 2815
2623 return 0; 2816 return 0;
2624 2817
2625nla_put_failure: 2818nla_put_failure:
2626 spin_unlock_bh(&stats->lock);
2627 nla_nest_cancel(skb, nl_stats); 2819 nla_nest_cancel(skb, nl_stats);
2628 return -EMSGSIZE; 2820 return -EMSGSIZE;
2629} 2821}
@@ -2696,11 +2888,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2696 int idx = 0, i; 2888 int idx = 0, i;
2697 int start = cb->args[0]; 2889 int start = cb->args[0];
2698 struct ip_vs_service *svc; 2890 struct ip_vs_service *svc;
2891 struct net *net = skb_sknet(skb);
2699 2892
2700 mutex_lock(&__ip_vs_mutex); 2893 mutex_lock(&__ip_vs_mutex);
2701 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2894 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2702 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2895 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2703 if (++idx <= start) 2896 if (++idx <= start || !net_eq(svc->net, net))
2704 continue; 2897 continue;
2705 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2898 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2706 idx--; 2899 idx--;
@@ -2711,7 +2904,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2711 2904
2712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2905 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2906 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2714 if (++idx <= start) 2907 if (++idx <= start || !net_eq(svc->net, net))
2715 continue; 2908 continue;
2716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2909 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2717 idx--; 2910 idx--;
@@ -2727,7 +2920,8 @@ nla_put_failure:
2727 return skb->len; 2920 return skb->len;
2728} 2921}
2729 2922
2730static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2923static int ip_vs_genl_parse_service(struct net *net,
2924 struct ip_vs_service_user_kern *usvc,
2731 struct nlattr *nla, int full_entry, 2925 struct nlattr *nla, int full_entry,
2732 struct ip_vs_service **ret_svc) 2926 struct ip_vs_service **ret_svc)
2733{ 2927{
@@ -2770,9 +2964,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2770 } 2964 }
2771 2965
2772 if (usvc->fwmark) 2966 if (usvc->fwmark)
2773 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); 2967 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2774 else 2968 else
2775 svc = __ip_vs_service_find(usvc->af, usvc->protocol, 2969 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2776 &usvc->addr, usvc->port); 2970 &usvc->addr, usvc->port);
2777 *ret_svc = svc; 2971 *ret_svc = svc;
2778 2972
@@ -2809,13 +3003,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2809 return 0; 3003 return 0;
2810} 3004}
2811 3005
2812static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 3006static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3007 struct nlattr *nla)
2813{ 3008{
2814 struct ip_vs_service_user_kern usvc; 3009 struct ip_vs_service_user_kern usvc;
2815 struct ip_vs_service *svc; 3010 struct ip_vs_service *svc;
2816 int ret; 3011 int ret;
2817 3012
2818 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); 3013 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2819 return ret ? ERR_PTR(ret) : svc; 3014 return ret ? ERR_PTR(ret) : svc;
2820} 3015}
2821 3016
@@ -2883,6 +3078,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2883 struct ip_vs_service *svc; 3078 struct ip_vs_service *svc;
2884 struct ip_vs_dest *dest; 3079 struct ip_vs_dest *dest;
2885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3080 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3081 struct net *net = skb_sknet(skb);
2886 3082
2887 mutex_lock(&__ip_vs_mutex); 3083 mutex_lock(&__ip_vs_mutex);
2888 3084
@@ -2891,7 +3087,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 3087 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2892 goto out_err; 3088 goto out_err;
2893 3089
2894 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); 3090
3091 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2895 if (IS_ERR(svc) || svc == NULL) 3092 if (IS_ERR(svc) || svc == NULL)
2896 goto out_err; 3093 goto out_err;
2897 3094
@@ -3005,20 +3202,23 @@ nla_put_failure:
3005static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3202static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3006 struct netlink_callback *cb) 3203 struct netlink_callback *cb)
3007{ 3204{
3205 struct net *net = skb_sknet(skb);
3206 struct netns_ipvs *ipvs = net_ipvs(net);
3207
3008 mutex_lock(&__ip_vs_mutex); 3208 mutex_lock(&__ip_vs_mutex);
3009 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3209 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3010 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3210 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3011 ip_vs_master_mcast_ifn, 3211 ipvs->master_mcast_ifn,
3012 ip_vs_master_syncid, cb) < 0) 3212 ipvs->master_syncid, cb) < 0)
3013 goto nla_put_failure; 3213 goto nla_put_failure;
3014 3214
3015 cb->args[0] = 1; 3215 cb->args[0] = 1;
3016 } 3216 }
3017 3217
3018 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3218 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3019 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3219 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3020 ip_vs_backup_mcast_ifn, 3220 ipvs->backup_mcast_ifn,
3021 ip_vs_backup_syncid, cb) < 0) 3221 ipvs->backup_syncid, cb) < 0)
3022 goto nla_put_failure; 3222 goto nla_put_failure;
3023 3223
3024 cb->args[1] = 1; 3224 cb->args[1] = 1;
@@ -3030,31 +3230,33 @@ nla_put_failure:
3030 return skb->len; 3230 return skb->len;
3031} 3231}
3032 3232
3033static int ip_vs_genl_new_daemon(struct nlattr **attrs) 3233static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3034{ 3234{
3035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3235 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3236 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3037 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3237 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3038 return -EINVAL; 3238 return -EINVAL;
3039 3239
3040 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3240 return start_sync_thread(net,
3241 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3041 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3242 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3042 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 3243 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3043} 3244}
3044 3245
3045static int ip_vs_genl_del_daemon(struct nlattr **attrs) 3246static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3046{ 3247{
3047 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3248 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3048 return -EINVAL; 3249 return -EINVAL;
3049 3250
3050 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3251 return stop_sync_thread(net,
3252 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3051} 3253}
3052 3254
3053static int ip_vs_genl_set_config(struct nlattr **attrs) 3255static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3054{ 3256{
3055 struct ip_vs_timeout_user t; 3257 struct ip_vs_timeout_user t;
3056 3258
3057 __ip_vs_get_timeouts(&t); 3259 __ip_vs_get_timeouts(net, &t);
3058 3260
3059 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3261 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3060 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3262 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3268,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
3066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3268 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3269 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3068 3270
3069 return ip_vs_set_timeout(&t); 3271 return ip_vs_set_timeout(net, &t);
3070} 3272}
3071 3273
3072static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3274static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3278,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3076 struct ip_vs_dest_user_kern udest; 3278 struct ip_vs_dest_user_kern udest;
3077 int ret = 0, cmd; 3279 int ret = 0, cmd;
3078 int need_full_svc = 0, need_full_dest = 0; 3280 int need_full_svc = 0, need_full_dest = 0;
3281 struct net *net;
3282 struct netns_ipvs *ipvs;
3079 3283
3284 net = skb_sknet(skb);
3285 ipvs = net_ipvs(net);
3080 cmd = info->genlhdr->cmd; 3286 cmd = info->genlhdr->cmd;
3081 3287
3082 mutex_lock(&__ip_vs_mutex); 3288 mutex_lock(&__ip_vs_mutex);
3083 3289
3084 if (cmd == IPVS_CMD_FLUSH) { 3290 if (cmd == IPVS_CMD_FLUSH) {
3085 ret = ip_vs_flush(); 3291 ret = ip_vs_flush(net);
3086 goto out; 3292 goto out;
3087 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3293 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3088 ret = ip_vs_genl_set_config(info->attrs); 3294 ret = ip_vs_genl_set_config(net, info->attrs);
3089 goto out; 3295 goto out;
3090 } else if (cmd == IPVS_CMD_NEW_DAEMON || 3296 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3091 cmd == IPVS_CMD_DEL_DAEMON) { 3297 cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3307,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3101 } 3307 }
3102 3308
3103 if (cmd == IPVS_CMD_NEW_DAEMON) 3309 if (cmd == IPVS_CMD_NEW_DAEMON)
3104 ret = ip_vs_genl_new_daemon(daemon_attrs); 3310 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3105 else 3311 else
3106 ret = ip_vs_genl_del_daemon(daemon_attrs); 3312 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3107 goto out; 3313 goto out;
3108 } else if (cmd == IPVS_CMD_ZERO && 3314 } else if (cmd == IPVS_CMD_ZERO &&
3109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3315 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3110 ret = ip_vs_zero_all(); 3316 ret = ip_vs_zero_all(net);
3111 goto out; 3317 goto out;
3112 } 3318 }
3113 3319
@@ -3117,7 +3323,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3323 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3118 need_full_svc = 1; 3324 need_full_svc = 1;
3119 3325
3120 ret = ip_vs_genl_parse_service(&usvc, 3326 ret = ip_vs_genl_parse_service(net, &usvc,
3121 info->attrs[IPVS_CMD_ATTR_SERVICE], 3327 info->attrs[IPVS_CMD_ATTR_SERVICE],
3122 need_full_svc, &svc); 3328 need_full_svc, &svc);
3123 if (ret) 3329 if (ret)
@@ -3147,7 +3353,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3147 switch (cmd) { 3353 switch (cmd) {
3148 case IPVS_CMD_NEW_SERVICE: 3354 case IPVS_CMD_NEW_SERVICE:
3149 if (svc == NULL) 3355 if (svc == NULL)
3150 ret = ip_vs_add_service(&usvc, &svc); 3356 ret = ip_vs_add_service(net, &usvc, &svc);
3151 else 3357 else
3152 ret = -EEXIST; 3358 ret = -EEXIST;
3153 break; 3359 break;
@@ -3185,7 +3391,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3185 struct sk_buff *msg; 3391 struct sk_buff *msg;
3186 void *reply; 3392 void *reply;
3187 int ret, cmd, reply_cmd; 3393 int ret, cmd, reply_cmd;
3394 struct net *net;
3395 struct netns_ipvs *ipvs;
3188 3396
3397 net = skb_sknet(skb);
3398 ipvs = net_ipvs(net);
3189 cmd = info->genlhdr->cmd; 3399 cmd = info->genlhdr->cmd;
3190 3400
3191 if (cmd == IPVS_CMD_GET_SERVICE) 3401 if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3424,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3214 { 3424 {
3215 struct ip_vs_service *svc; 3425 struct ip_vs_service *svc;
3216 3426
3217 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); 3427 svc = ip_vs_genl_find_service(net,
3428 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3218 if (IS_ERR(svc)) { 3429 if (IS_ERR(svc)) {
3219 ret = PTR_ERR(svc); 3430 ret = PTR_ERR(svc);
3220 goto out_err; 3431 goto out_err;
@@ -3234,7 +3445,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3234 { 3445 {
3235 struct ip_vs_timeout_user t; 3446 struct ip_vs_timeout_user t;
3236 3447
3237 __ip_vs_get_timeouts(&t); 3448 __ip_vs_get_timeouts(net, &t);
3238#ifdef CONFIG_IP_VS_PROTO_TCP 3449#ifdef CONFIG_IP_VS_PROTO_TCP
3239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); 3450 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3451 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3591,186 @@ static void ip_vs_genl_unregister(void)
3380 3591
3381/* End of Generic Netlink interface definitions */ 3592/* End of Generic Netlink interface definitions */
3382 3593
3594/*
3595 * per netns intit/exit func.
3596 */
3597#ifdef CONFIG_SYSCTL
3598int __net_init __ip_vs_control_init_sysctl(struct net *net)
3599{
3600 int idx;
3601 struct netns_ipvs *ipvs = net_ipvs(net);
3602 struct ctl_table *tbl;
3603
3604 atomic_set(&ipvs->dropentry, 0);
3605 spin_lock_init(&ipvs->dropentry_lock);
3606 spin_lock_init(&ipvs->droppacket_lock);
3607 spin_lock_init(&ipvs->securetcp_lock);
3608
3609 if (!net_eq(net, &init_net)) {
3610 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3611 if (tbl == NULL)
3612 return -ENOMEM;
3613 } else
3614 tbl = vs_vars;
3615 /* Initialize sysctl defaults */
3616 idx = 0;
3617 ipvs->sysctl_amemthresh = 1024;
3618 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3619 ipvs->sysctl_am_droprate = 10;
3620 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3621 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3622 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3623#ifdef CONFIG_IP_VS_NFCT
3624 tbl[idx++].data = &ipvs->sysctl_conntrack;
3625#endif
3626 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3627 ipvs->sysctl_snat_reroute = 1;
3628 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3629 ipvs->sysctl_sync_ver = 1;
3630 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3631 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3632 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3633 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3634 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3635 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3636 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3637 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3638 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3639
3640
3641 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3642 tbl);
3643 if (ipvs->sysctl_hdr == NULL) {
3644 if (!net_eq(net, &init_net))
3645 kfree(tbl);
3646 return -ENOMEM;
3647 }
3648 ip_vs_start_estimator(net, &ipvs->tot_stats);
3649 ipvs->sysctl_tbl = tbl;
3650 /* Schedule defense work */
3651 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3652 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3653
3654 return 0;
3655}
3656
3657void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3658{
3659 struct netns_ipvs *ipvs = net_ipvs(net);
3660
3661 cancel_delayed_work_sync(&ipvs->defense_work);
3662 cancel_work_sync(&ipvs->defense_work.work);
3663 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3664}
3665
3666#else
3667
3668int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3669void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3670
3671#endif
3672
3673static struct notifier_block ip_vs_dst_notifier = {
3674 .notifier_call = ip_vs_dst_event,
3675};
3676
3677int __net_init __ip_vs_control_init(struct net *net)
3678{
3679 int idx;
3680 struct netns_ipvs *ipvs = net_ipvs(net);
3681
3682 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3683
3684 /* Initialize rs_table */
3685 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3686 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3687
3688 INIT_LIST_HEAD(&ipvs->dest_trash);
3689 atomic_set(&ipvs->ftpsvc_counter, 0);
3690 atomic_set(&ipvs->nullsvc_counter, 0);
3691
3692 /* procfs stats */
3693 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3694 if (!ipvs->tot_stats.cpustats) {
3695 pr_err("%s(): alloc_percpu.\n", __func__);
3696 return -ENOMEM;
3697 }
3698 spin_lock_init(&ipvs->tot_stats.lock);
3699
3700 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3701 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3702 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3703 &ip_vs_stats_percpu_fops);
3704
3705 if (__ip_vs_control_init_sysctl(net))
3706 goto err;
3707
3708 return 0;
3709
3710err:
3711 free_percpu(ipvs->tot_stats.cpustats);
3712 return -ENOMEM;
3713}
3714
3715void __net_exit __ip_vs_control_cleanup(struct net *net)
3716{
3717 struct netns_ipvs *ipvs = net_ipvs(net);
3718
3719 ip_vs_trash_cleanup(net);
3720 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3721 __ip_vs_control_cleanup_sysctl(net);
3722 proc_net_remove(net, "ip_vs_stats_percpu");
3723 proc_net_remove(net, "ip_vs_stats");
3724 proc_net_remove(net, "ip_vs");
3725 free_percpu(ipvs->tot_stats.cpustats);
3726}
3383 3727
3384int __init ip_vs_control_init(void) 3728int __init ip_vs_control_init(void)
3385{ 3729{
3386 int ret;
3387 int idx; 3730 int idx;
3731 int ret;
3388 3732
3389 EnterFunction(2); 3733 EnterFunction(2);
3390 3734
3391 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 3735 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3392 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3736 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3393 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3737 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3394 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3738 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3395 } 3739 }
3396 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 3740
3397 INIT_LIST_HEAD(&ip_vs_rtable[idx]); 3741 smp_wmb(); /* Do we really need it now ? */
3398 }
3399 smp_wmb();
3400 3742
3401 ret = nf_register_sockopt(&ip_vs_sockopts); 3743 ret = nf_register_sockopt(&ip_vs_sockopts);
3402 if (ret) { 3744 if (ret) {
3403 pr_err("cannot register sockopt.\n"); 3745 pr_err("cannot register sockopt.\n");
3404 return ret; 3746 goto err_sock;
3405 } 3747 }
3406 3748
3407 ret = ip_vs_genl_register(); 3749 ret = ip_vs_genl_register();
3408 if (ret) { 3750 if (ret) {
3409 pr_err("cannot register Generic Netlink interface.\n"); 3751 pr_err("cannot register Generic Netlink interface.\n");
3410 nf_unregister_sockopt(&ip_vs_sockopts); 3752 goto err_genl;
3411 return ret;
3412 } 3753 }
3413 3754
3414 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); 3755 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3415 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); 3756 if (ret < 0)
3416 3757 goto err_notf;
3417 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3418
3419 ip_vs_new_estimator(&ip_vs_stats);
3420
3421 /* Hook the defense timer */
3422 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3423 3758
3424 LeaveFunction(2); 3759 LeaveFunction(2);
3425 return 0; 3760 return 0;
3761
3762err_notf:
3763 ip_vs_genl_unregister();
3764err_genl:
3765 nf_unregister_sockopt(&ip_vs_sockopts);
3766err_sock:
3767 return ret;
3426} 3768}
3427 3769
3428 3770
3429void ip_vs_control_cleanup(void) 3771void ip_vs_control_cleanup(void)
3430{ 3772{
3431 EnterFunction(2); 3773 EnterFunction(2);
3432 ip_vs_trash_cleanup();
3433 cancel_delayed_work_sync(&defense_work);
3434 cancel_work_sync(&defense_work.work);
3435 ip_vs_kill_estimator(&ip_vs_stats);
3436 unregister_sysctl_table(sysctl_header);
3437 proc_net_remove(&init_net, "ip_vs_stats");
3438 proc_net_remove(&init_net, "ip_vs");
3439 ip_vs_genl_unregister(); 3774 ip_vs_genl_unregister();
3440 nf_unregister_sockopt(&ip_vs_sockopts); 3775 nf_unregister_sockopt(&ip_vs_sockopts);
3441 LeaveFunction(2); 3776 LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801962e0..508cce98777c 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 * Changes: 11 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
12 * 12 * Network name space (netns) aware.
13 * Global data moved to netns i.e struct netns_ipvs
14 * Affected data: est_list and est_lock.
15 * estimation_timer() runs with timer per netns.
16 * get_stats()) do the per cpu summing.
13 */ 17 */
14 18
15#define KMSG_COMPONENT "IPVS" 19#define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
48 */ 52 */
49 53
50 54
51static void estimation_timer(unsigned long arg); 55/*
56 * Make a summary from each cpu
57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats)
60{
61 int i;
62
63 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start;
66 __u64 inbytes, outbytes;
67 if (i) {
68 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts;
71 do {
72 start = u64_stats_fetch_begin(&s->syncp);
73 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes;
75 } while (u64_stats_fetch_retry(&s->syncp, start));
76 sum->inbytes += inbytes;
77 sum->outbytes += outbytes;
78 } else {
79 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts;
82 do {
83 start = u64_stats_fetch_begin(&s->syncp);
84 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes;
86 } while (u64_stats_fetch_retry(&s->syncp, start));
87 }
88 }
89}
52 90
53static LIST_HEAD(est_list);
54static DEFINE_SPINLOCK(est_lock);
55static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
56 91
57static void estimation_timer(unsigned long arg) 92static void estimation_timer(unsigned long arg)
58{ 93{
@@ -62,12 +97,16 @@ static void estimation_timer(unsigned long arg)
62 u32 n_inpkts, n_outpkts; 97 u32 n_inpkts, n_outpkts;
63 u64 n_inbytes, n_outbytes; 98 u64 n_inbytes, n_outbytes;
64 u32 rate; 99 u32 rate;
100 struct net *net = (struct net *)arg;
101 struct netns_ipvs *ipvs;
65 102
66 spin_lock(&est_lock); 103 ipvs = net_ipvs(net);
67 list_for_each_entry(e, &est_list, list) { 104 spin_lock(&ipvs->est_lock);
105 list_for_each_entry(e, &ipvs->est_list, list) {
68 s = container_of(e, struct ip_vs_stats, est); 106 s = container_of(e, struct ip_vs_stats, est);
69 107
70 spin_lock(&s->lock); 108 spin_lock(&s->lock);
109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
71 n_conns = s->ustats.conns; 110 n_conns = s->ustats.conns;
72 n_inpkts = s->ustats.inpkts; 111 n_inpkts = s->ustats.inpkts;
73 n_outpkts = s->ustats.outpkts; 112 n_outpkts = s->ustats.outpkts;
@@ -75,81 +114,64 @@ static void estimation_timer(unsigned long arg)
75 n_outbytes = s->ustats.outbytes; 114 n_outbytes = s->ustats.outbytes;
76 115
77 /* scaled by 2^10, but divided 2 seconds */ 116 /* scaled by 2^10, but divided 2 seconds */
78 rate = (n_conns - e->last_conns)<<9; 117 rate = (n_conns - e->last_conns) << 9;
79 e->last_conns = n_conns; 118 e->last_conns = n_conns;
80 e->cps += ((long)rate - (long)e->cps)>>2; 119 e->cps += ((long)rate - (long)e->cps) >> 2;
81 s->ustats.cps = (e->cps+0x1FF)>>10;
82 120
83 rate = (n_inpkts - e->last_inpkts)<<9; 121 rate = (n_inpkts - e->last_inpkts) << 9;
84 e->last_inpkts = n_inpkts; 122 e->last_inpkts = n_inpkts;
85 e->inpps += ((long)rate - (long)e->inpps)>>2; 123 e->inpps += ((long)rate - (long)e->inpps) >> 2;
86 s->ustats.inpps = (e->inpps+0x1FF)>>10;
87 124
88 rate = (n_outpkts - e->last_outpkts)<<9; 125 rate = (n_outpkts - e->last_outpkts) << 9;
89 e->last_outpkts = n_outpkts; 126 e->last_outpkts = n_outpkts;
90 e->outpps += ((long)rate - (long)e->outpps)>>2; 127 e->outpps += ((long)rate - (long)e->outpps) >> 2;
91 s->ustats.outpps = (e->outpps+0x1FF)>>10;
92 128
93 rate = (n_inbytes - e->last_inbytes)<<4; 129 rate = (n_inbytes - e->last_inbytes) << 4;
94 e->last_inbytes = n_inbytes; 130 e->last_inbytes = n_inbytes;
95 e->inbps += ((long)rate - (long)e->inbps)>>2; 131 e->inbps += ((long)rate - (long)e->inbps) >> 2;
96 s->ustats.inbps = (e->inbps+0xF)>>5;
97 132
98 rate = (n_outbytes - e->last_outbytes)<<4; 133 rate = (n_outbytes - e->last_outbytes) << 4;
99 e->last_outbytes = n_outbytes; 134 e->last_outbytes = n_outbytes;
100 e->outbps += ((long)rate - (long)e->outbps)>>2; 135 e->outbps += ((long)rate - (long)e->outbps) >> 2;
101 s->ustats.outbps = (e->outbps+0xF)>>5;
102 spin_unlock(&s->lock); 136 spin_unlock(&s->lock);
103 } 137 }
104 spin_unlock(&est_lock); 138 spin_unlock(&ipvs->est_lock);
105 mod_timer(&est_timer, jiffies + 2*HZ); 139 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
106} 140}
107 141
108void ip_vs_new_estimator(struct ip_vs_stats *stats) 142void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
109{ 143{
144 struct netns_ipvs *ipvs = net_ipvs(net);
110 struct ip_vs_estimator *est = &stats->est; 145 struct ip_vs_estimator *est = &stats->est;
111 146
112 INIT_LIST_HEAD(&est->list); 147 INIT_LIST_HEAD(&est->list);
113 148
114 est->last_conns = stats->ustats.conns; 149 spin_lock_bh(&ipvs->est_lock);
115 est->cps = stats->ustats.cps<<10; 150 list_add(&est->list, &ipvs->est_list);
116 151 spin_unlock_bh(&ipvs->est_lock);
117 est->last_inpkts = stats->ustats.inpkts;
118 est->inpps = stats->ustats.inpps<<10;
119
120 est->last_outpkts = stats->ustats.outpkts;
121 est->outpps = stats->ustats.outpps<<10;
122
123 est->last_inbytes = stats->ustats.inbytes;
124 est->inbps = stats->ustats.inbps<<5;
125
126 est->last_outbytes = stats->ustats.outbytes;
127 est->outbps = stats->ustats.outbps<<5;
128
129 spin_lock_bh(&est_lock);
130 list_add(&est->list, &est_list);
131 spin_unlock_bh(&est_lock);
132} 152}
133 153
134void ip_vs_kill_estimator(struct ip_vs_stats *stats) 154void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
135{ 155{
156 struct netns_ipvs *ipvs = net_ipvs(net);
136 struct ip_vs_estimator *est = &stats->est; 157 struct ip_vs_estimator *est = &stats->est;
137 158
138 spin_lock_bh(&est_lock); 159 spin_lock_bh(&ipvs->est_lock);
139 list_del(&est->list); 160 list_del(&est->list);
140 spin_unlock_bh(&est_lock); 161 spin_unlock_bh(&ipvs->est_lock);
141} 162}
142 163
143void ip_vs_zero_estimator(struct ip_vs_stats *stats) 164void ip_vs_zero_estimator(struct ip_vs_stats *stats)
144{ 165{
145 struct ip_vs_estimator *est = &stats->est; 166 struct ip_vs_estimator *est = &stats->est;
146 167 struct ip_vs_stats_user *u = &stats->ustats;
147 /* set counters zero, caller must hold the stats->lock lock */ 168
148 est->last_inbytes = 0; 169 /* reset counters, caller must hold the stats->lock lock */
149 est->last_outbytes = 0; 170 est->last_inbytes = u->inbytes;
150 est->last_conns = 0; 171 est->last_outbytes = u->outbytes;
151 est->last_inpkts = 0; 172 est->last_conns = u->conns;
152 est->last_outpkts = 0; 173 est->last_inpkts = u->inpkts;
174 est->last_outpkts = u->outpkts;
153 est->cps = 0; 175 est->cps = 0;
154 est->inpps = 0; 176 est->inpps = 0;
155 est->outpps = 0; 177 est->outpps = 0;
@@ -157,13 +179,40 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
157 est->outbps = 0; 179 est->outbps = 0;
158} 180}
159 181
182/* Get decoded rates */
183void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
184 struct ip_vs_stats *stats)
185{
186 struct ip_vs_estimator *e = &stats->est;
187
188 dst->cps = (e->cps + 0x1FF) >> 10;
189 dst->inpps = (e->inpps + 0x1FF) >> 10;
190 dst->outpps = (e->outpps + 0x1FF) >> 10;
191 dst->inbps = (e->inbps + 0xF) >> 5;
192 dst->outbps = (e->outbps + 0xF) >> 5;
193}
194
195int __net_init __ip_vs_estimator_init(struct net *net)
196{
197 struct netns_ipvs *ipvs = net_ipvs(net);
198
199 INIT_LIST_HEAD(&ipvs->est_list);
200 spin_lock_init(&ipvs->est_lock);
201 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
202 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
203 return 0;
204}
205
206void __net_exit __ip_vs_estimator_cleanup(struct net *net)
207{
208 del_timer_sync(&net_ipvs(net)->est_timer);
209}
210
160int __init ip_vs_estimator_init(void) 211int __init ip_vs_estimator_init(void)
161{ 212{
162 mod_timer(&est_timer, jiffies + 2 * HZ);
163 return 0; 213 return 0;
164} 214}
165 215
166void ip_vs_estimator_cleanup(void) 216void ip_vs_estimator_cleanup(void)
167{ 217{
168 del_timer_sync(&est_timer);
169} 218}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 75455000ad1c..6b5dd6ddaae9 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
157 int ret = 0; 157 int ret = 0;
158 enum ip_conntrack_info ctinfo; 158 enum ip_conntrack_info ctinfo;
159 struct nf_conn *ct; 159 struct nf_conn *ct;
160 struct net *net;
160 161
161#ifdef CONFIG_IP_VS_IPV6 162#ifdef CONFIG_IP_VS_IPV6
162 /* This application helper doesn't work with IPv6 yet, 163 /* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
197 */ 198 */
198 { 199 {
199 struct ip_vs_conn_param p; 200 struct ip_vs_conn_param p;
200 ip_vs_conn_fill_param(AF_INET, iph->protocol, 201 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
201 &from, port, &cp->caddr, 0, &p); 202 iph->protocol, &from, port,
203 &cp->caddr, 0, &p);
202 n_cp = ip_vs_conn_out_get(&p); 204 n_cp = ip_vs_conn_out_get(&p);
203 } 205 }
204 if (!n_cp) { 206 if (!n_cp) {
205 struct ip_vs_conn_param p; 207 struct ip_vs_conn_param p;
206 ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, 208 ip_vs_conn_fill_param(ip_vs_conn_net(cp),
209 AF_INET, IPPROTO_TCP, &cp->caddr,
207 0, &cp->vaddr, port, &p); 210 0, &cp->vaddr, port, &p);
208 n_cp = ip_vs_conn_new(&p, &from, port, 211 n_cp = ip_vs_conn_new(&p, &from, port,
209 IP_VS_CONN_F_NO_CPORT | 212 IP_VS_CONN_F_NO_CPORT |
210 IP_VS_CONN_F_NFCT, 213 IP_VS_CONN_F_NFCT,
211 cp->dest); 214 cp->dest, skb->mark);
212 if (!n_cp) 215 if (!n_cp)
213 return 0; 216 return 0;
214 217
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
257 * would be adjusted twice. 260 * would be adjusted twice.
258 */ 261 */
259 262
263 net = skb_net(skb);
260 cp->app_data = NULL; 264 cp->app_data = NULL;
261 ip_vs_tcp_conn_listen(n_cp); 265 ip_vs_tcp_conn_listen(net, n_cp);
262 ip_vs_conn_put(n_cp); 266 ip_vs_conn_put(n_cp);
263 return ret; 267 return ret;
264 } 268 }
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
287 union nf_inet_addr to; 291 union nf_inet_addr to;
288 __be16 port; 292 __be16 port;
289 struct ip_vs_conn *n_cp; 293 struct ip_vs_conn *n_cp;
294 struct net *net;
290 295
291#ifdef CONFIG_IP_VS_IPV6 296#ifdef CONFIG_IP_VS_IPV6
292 /* This application helper doesn't work with IPv6 yet, 297 /* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
358 363
359 { 364 {
360 struct ip_vs_conn_param p; 365 struct ip_vs_conn_param p;
361 ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, 366 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
362 &cp->vaddr, htons(ntohs(cp->vport)-1), 367 iph->protocol, &to, port, &cp->vaddr,
363 &p); 368 htons(ntohs(cp->vport)-1), &p);
364 n_cp = ip_vs_conn_in_get(&p); 369 n_cp = ip_vs_conn_in_get(&p);
365 if (!n_cp) { 370 if (!n_cp) {
366 n_cp = ip_vs_conn_new(&p, &cp->daddr, 371 n_cp = ip_vs_conn_new(&p, &cp->daddr,
367 htons(ntohs(cp->dport)-1), 372 htons(ntohs(cp->dport)-1),
368 IP_VS_CONN_F_NFCT, cp->dest); 373 IP_VS_CONN_F_NFCT, cp->dest,
374 skb->mark);
369 if (!n_cp) 375 if (!n_cp)
370 return 0; 376 return 0;
371 377
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
377 /* 383 /*
378 * Move tunnel to listen state 384 * Move tunnel to listen state
379 */ 385 */
380 ip_vs_tcp_conn_listen(n_cp); 386 net = skb_net(skb);
387 ip_vs_tcp_conn_listen(net, n_cp);
381 ip_vs_conn_put(n_cp); 388 ip_vs_conn_put(n_cp);
382 389
383 return 1; 390 return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
398 .pkt_in = ip_vs_ftp_in, 405 .pkt_in = ip_vs_ftp_in,
399}; 406};
400 407
401
402/* 408/*
403 * ip_vs_ftp initialization 409 * per netns ip_vs_ftp initialization
404 */ 410 */
405static int __init ip_vs_ftp_init(void) 411static int __net_init __ip_vs_ftp_init(struct net *net)
406{ 412{
407 int i, ret; 413 int i, ret;
408 struct ip_vs_app *app = &ip_vs_ftp; 414 struct ip_vs_app *app = &ip_vs_ftp;
409 415
410 ret = register_ip_vs_app(app); 416 ret = register_ip_vs_app(net, app);
411 if (ret) 417 if (ret)
412 return ret; 418 return ret;
413 419
414 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { 420 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
415 if (!ports[i]) 421 if (!ports[i])
416 continue; 422 continue;
417 ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); 423 ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
418 if (ret) 424 if (ret)
419 break; 425 break;
420 pr_info("%s: loaded support on port[%d] = %d\n", 426 pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
422 } 428 }
423 429
424 if (ret) 430 if (ret)
425 unregister_ip_vs_app(app); 431 unregister_ip_vs_app(net, app);
426 432
427 return ret; 433 return ret;
428} 434}
435/*
436 * netns exit
437 */
438static void __ip_vs_ftp_exit(struct net *net)
439{
440 struct ip_vs_app *app = &ip_vs_ftp;
441
442 unregister_ip_vs_app(net, app);
443}
444
445static struct pernet_operations ip_vs_ftp_ops = {
446 .init = __ip_vs_ftp_init,
447 .exit = __ip_vs_ftp_exit,
448};
429 449
450int __init ip_vs_ftp_init(void)
451{
452 int rv;
453
454 rv = register_pernet_subsys(&ip_vs_ftp_ops);
455 return rv;
456}
430 457
431/* 458/*
432 * ip_vs_ftp finish. 459 * ip_vs_ftp finish.
433 */ 460 */
434static void __exit ip_vs_ftp_exit(void) 461static void __exit ip_vs_ftp_exit(void)
435{ 462{
436 unregister_ip_vs_app(&ip_vs_ftp); 463 unregister_pernet_subsys(&ip_vs_ftp_ops);
437} 464}
438 465
439 466
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9323f8944199..87e40ea77a95 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -63,6 +63,8 @@
63#define CHECK_EXPIRE_INTERVAL (60*HZ) 63#define CHECK_EXPIRE_INTERVAL (60*HZ)
64#define ENTRY_TIMEOUT (6*60*HZ) 64#define ENTRY_TIMEOUT (6*60*HZ)
65 65
66#define DEFAULT_EXPIRATION (24*60*60*HZ)
67
66/* 68/*
67 * It is for full expiration check. 69 * It is for full expiration check.
68 * When there is no partial expiration check (garbage collection) 70 * When there is no partial expiration check (garbage collection)
@@ -70,7 +72,6 @@
70 * entries that haven't been touched for a day. 72 * entries that haven't been touched for a day.
71 */ 73 */
72#define COUNT_FOR_FULL_EXPIRATION 30 74#define COUNT_FOR_FULL_EXPIRATION 30
73static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
74 75
75 76
76/* 77/*
@@ -113,25 +114,24 @@ struct ip_vs_lblc_table {
113/* 114/*
114 * IPVS LBLC sysctl table 115 * IPVS LBLC sysctl table
115 */ 116 */
116 117#ifdef CONFIG_SYSCTL
117static ctl_table vs_vars_table[] = { 118static ctl_table vs_vars_table[] = {
118 { 119 {
119 .procname = "lblc_expiration", 120 .procname = "lblc_expiration",
120 .data = &sysctl_ip_vs_lblc_expiration, 121 .data = NULL,
121 .maxlen = sizeof(int), 122 .maxlen = sizeof(int),
122 .mode = 0644, 123 .mode = 0644,
123 .proc_handler = proc_dointvec_jiffies, 124 .proc_handler = proc_dointvec_jiffies,
124 }, 125 },
125 { } 126 { }
126}; 127};
127 128#endif
128static struct ctl_table_header * sysctl_header;
129 129
130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
131{ 131{
132 list_del(&en->list); 132 list_del(&en->list);
133 /* 133 /*
134 * We don't kfree dest because it is refered either by its service 134 * We don't kfree dest because it is referred either by its service
135 * or the trash dest list. 135 * or the trash dest list.
136 */ 136 */
137 atomic_dec(&en->dest->refcnt); 137 atomic_dec(&en->dest->refcnt);
@@ -241,6 +241,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
241 } 241 }
242} 242}
243 243
244static int sysctl_lblc_expiration(struct ip_vs_service *svc)
245{
246#ifdef CONFIG_SYSCTL
247 struct netns_ipvs *ipvs = net_ipvs(svc->net);
248 return ipvs->sysctl_lblc_expiration;
249#else
250 return DEFAULT_EXPIRATION;
251#endif
252}
244 253
245static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) 254static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
246{ 255{
@@ -255,7 +264,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
255 write_lock(&svc->sched_lock); 264 write_lock(&svc->sched_lock);
256 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 265 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
257 if (time_before(now, 266 if (time_before(now,
258 en->lastuse + sysctl_ip_vs_lblc_expiration)) 267 en->lastuse +
268 sysctl_lblc_expiration(svc)))
259 continue; 269 continue;
260 270
261 ip_vs_lblc_free(en); 271 ip_vs_lblc_free(en);
@@ -390,12 +400,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
390 int loh, doh; 400 int loh, doh;
391 401
392 /* 402 /*
393 * We think the overhead of processing active connections is fifty 403 * We use the following formula to estimate the load:
394 * times higher than that of inactive connections in average. (This
395 * fifty times might not be accurate, we will change it later.) We
396 * use the following formula to estimate the overhead:
397 * dest->activeconns*50 + dest->inactconns
398 * and the load:
399 * (dest overhead) / dest->weight 404 * (dest overhead) / dest->weight
400 * 405 *
401 * Remember -- no floats in kernel mode!!! 406 * Remember -- no floats in kernel mode!!!
@@ -411,8 +416,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
411 continue; 416 continue;
412 if (atomic_read(&dest->weight) > 0) { 417 if (atomic_read(&dest->weight) > 0) {
413 least = dest; 418 least = dest;
414 loh = atomic_read(&least->activeconns) * 50 419 loh = ip_vs_dest_conn_overhead(least);
415 + atomic_read(&least->inactconns);
416 goto nextstage; 420 goto nextstage;
417 } 421 }
418 } 422 }
@@ -426,8 +430,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
426 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 430 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
427 continue; 431 continue;
428 432
429 doh = atomic_read(&dest->activeconns) * 50 433 doh = ip_vs_dest_conn_overhead(dest);
430 + atomic_read(&dest->inactconns);
431 if (loh * atomic_read(&dest->weight) > 434 if (loh * atomic_read(&dest->weight) >
432 doh * atomic_read(&least->weight)) { 435 doh * atomic_read(&least->weight)) {
433 least = dest; 436 least = dest;
@@ -511,7 +514,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
511 /* No cache entry or it is invalid, time to schedule */ 514 /* No cache entry or it is invalid, time to schedule */
512 dest = __ip_vs_lblc_schedule(svc); 515 dest = __ip_vs_lblc_schedule(svc);
513 if (!dest) { 516 if (!dest) {
514 IP_VS_ERR_RL("LBLC: no destination available\n"); 517 ip_vs_scheduler_err(svc, "no destination available");
515 return NULL; 518 return NULL;
516 } 519 }
517 520
@@ -543,23 +546,77 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
543 .schedule = ip_vs_lblc_schedule, 546 .schedule = ip_vs_lblc_schedule,
544}; 547};
545 548
549/*
550 * per netns init.
551 */
552#ifdef CONFIG_SYSCTL
553static int __net_init __ip_vs_lblc_init(struct net *net)
554{
555 struct netns_ipvs *ipvs = net_ipvs(net);
556
557 if (!net_eq(net, &init_net)) {
558 ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
559 sizeof(vs_vars_table),
560 GFP_KERNEL);
561 if (ipvs->lblc_ctl_table == NULL)
562 return -ENOMEM;
563 } else
564 ipvs->lblc_ctl_table = vs_vars_table;
565 ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
566 ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
567
568 ipvs->lblc_ctl_header =
569 register_net_sysctl_table(net, net_vs_ctl_path,
570 ipvs->lblc_ctl_table);
571 if (!ipvs->lblc_ctl_header) {
572 if (!net_eq(net, &init_net))
573 kfree(ipvs->lblc_ctl_table);
574 return -ENOMEM;
575 }
576
577 return 0;
578}
579
580static void __net_exit __ip_vs_lblc_exit(struct net *net)
581{
582 struct netns_ipvs *ipvs = net_ipvs(net);
583
584 unregister_net_sysctl_table(ipvs->lblc_ctl_header);
585
586 if (!net_eq(net, &init_net))
587 kfree(ipvs->lblc_ctl_table);
588}
589
590#else
591
592static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
593static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
594
595#endif
596
597static struct pernet_operations ip_vs_lblc_ops = {
598 .init = __ip_vs_lblc_init,
599 .exit = __ip_vs_lblc_exit,
600};
546 601
547static int __init ip_vs_lblc_init(void) 602static int __init ip_vs_lblc_init(void)
548{ 603{
549 int ret; 604 int ret;
550 605
551 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 606 ret = register_pernet_subsys(&ip_vs_lblc_ops);
607 if (ret)
608 return ret;
609
552 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); 610 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
553 if (ret) 611 if (ret)
554 unregister_sysctl_table(sysctl_header); 612 unregister_pernet_subsys(&ip_vs_lblc_ops);
555 return ret; 613 return ret;
556} 614}
557 615
558
559static void __exit ip_vs_lblc_cleanup(void) 616static void __exit ip_vs_lblc_cleanup(void)
560{ 617{
561 unregister_sysctl_table(sysctl_header);
562 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 618 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
619 unregister_pernet_subsys(&ip_vs_lblc_ops);
563} 620}
564 621
565 622
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index dbeed8ea421a..90f618ab6dda 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -63,6 +63,8 @@
63#define CHECK_EXPIRE_INTERVAL (60*HZ) 63#define CHECK_EXPIRE_INTERVAL (60*HZ)
64#define ENTRY_TIMEOUT (6*60*HZ) 64#define ENTRY_TIMEOUT (6*60*HZ)
65 65
66#define DEFAULT_EXPIRATION (24*60*60*HZ)
67
66/* 68/*
67 * It is for full expiration check. 69 * It is for full expiration check.
68 * When there is no partial expiration check (garbage collection) 70 * When there is no partial expiration check (garbage collection)
@@ -70,8 +72,6 @@
70 * entries that haven't been touched for a day. 72 * entries that haven't been touched for a day.
71 */ 73 */
72#define COUNT_FOR_FULL_EXPIRATION 30 74#define COUNT_FOR_FULL_EXPIRATION 30
73static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
74
75 75
76/* 76/*
77 * for IPVS lblcr entry hash table 77 * for IPVS lblcr entry hash table
@@ -152,7 +152,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
152 write_lock(&set->lock); 152 write_lock(&set->lock);
153 list_for_each_entry_safe(e, ep, &set->list, list) { 153 list_for_each_entry_safe(e, ep, &set->list, list) {
154 /* 154 /*
155 * We don't kfree dest because it is refered either 155 * We don't kfree dest because it is referred either
156 * by its service or by the trash dest list. 156 * by its service or by the trash dest list.
157 */ 157 */
158 atomic_dec(&e->dest->refcnt); 158 atomic_dec(&e->dest->refcnt);
@@ -180,8 +180,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
180 180
181 if ((atomic_read(&least->weight) > 0) 181 if ((atomic_read(&least->weight) > 0)
182 && (least->flags & IP_VS_DEST_F_AVAILABLE)) { 182 && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
183 loh = atomic_read(&least->activeconns) * 50 183 loh = ip_vs_dest_conn_overhead(least);
184 + atomic_read(&least->inactconns);
185 goto nextstage; 184 goto nextstage;
186 } 185 }
187 } 186 }
@@ -194,8 +193,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
194 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 193 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
195 continue; 194 continue;
196 195
197 doh = atomic_read(&dest->activeconns) * 50 196 doh = ip_vs_dest_conn_overhead(dest);
198 + atomic_read(&dest->inactconns);
199 if ((loh * atomic_read(&dest->weight) > 197 if ((loh * atomic_read(&dest->weight) >
200 doh * atomic_read(&least->weight)) 198 doh * atomic_read(&least->weight))
201 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 199 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -230,8 +228,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
230 list_for_each_entry(e, &set->list, list) { 228 list_for_each_entry(e, &set->list, list) {
231 most = e->dest; 229 most = e->dest;
232 if (atomic_read(&most->weight) > 0) { 230 if (atomic_read(&most->weight) > 0) {
233 moh = atomic_read(&most->activeconns) * 50 231 moh = ip_vs_dest_conn_overhead(most);
234 + atomic_read(&most->inactconns);
235 goto nextstage; 232 goto nextstage;
236 } 233 }
237 } 234 }
@@ -241,8 +238,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
241 nextstage: 238 nextstage:
242 list_for_each_entry(e, &set->list, list) { 239 list_for_each_entry(e, &set->list, list) {
243 dest = e->dest; 240 dest = e->dest;
244 doh = atomic_read(&dest->activeconns) * 50 241 doh = ip_vs_dest_conn_overhead(dest);
245 + atomic_read(&dest->inactconns);
246 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 242 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
247 if ((moh * atomic_read(&dest->weight) < 243 if ((moh * atomic_read(&dest->weight) <
248 doh * atomic_read(&most->weight)) 244 doh * atomic_read(&most->weight))
@@ -289,6 +285,7 @@ struct ip_vs_lblcr_table {
289}; 285};
290 286
291 287
288#ifdef CONFIG_SYSCTL
292/* 289/*
293 * IPVS LBLCR sysctl table 290 * IPVS LBLCR sysctl table
294 */ 291 */
@@ -296,15 +293,14 @@ struct ip_vs_lblcr_table {
296static ctl_table vs_vars_table[] = { 293static ctl_table vs_vars_table[] = {
297 { 294 {
298 .procname = "lblcr_expiration", 295 .procname = "lblcr_expiration",
299 .data = &sysctl_ip_vs_lblcr_expiration, 296 .data = NULL,
300 .maxlen = sizeof(int), 297 .maxlen = sizeof(int),
301 .mode = 0644, 298 .mode = 0644,
302 .proc_handler = proc_dointvec_jiffies, 299 .proc_handler = proc_dointvec_jiffies,
303 }, 300 },
304 { } 301 { }
305}; 302};
306 303#endif
307static struct ctl_table_header * sysctl_header;
308 304
309static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 305static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
310{ 306{
@@ -418,6 +414,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
418 } 414 }
419} 415}
420 416
417static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
418{
419#ifdef CONFIG_SYSCTL
420 struct netns_ipvs *ipvs = net_ipvs(svc->net);
421 return ipvs->sysctl_lblcr_expiration;
422#else
423 return DEFAULT_EXPIRATION;
424#endif
425}
421 426
422static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) 427static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
423{ 428{
@@ -431,8 +436,8 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
431 436
432 write_lock(&svc->sched_lock); 437 write_lock(&svc->sched_lock);
433 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 438 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
434 if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, 439 if (time_after(en->lastuse +
435 now)) 440 sysctl_lblcr_expiration(svc), now))
436 continue; 441 continue;
437 442
438 ip_vs_lblcr_free(en); 443 ip_vs_lblcr_free(en);
@@ -566,12 +571,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
566 int loh, doh; 571 int loh, doh;
567 572
568 /* 573 /*
569 * We think the overhead of processing active connections is fifty 574 * We use the following formula to estimate the load:
570 * times higher than that of inactive connections in average. (This
571 * fifty times might not be accurate, we will change it later.) We
572 * use the following formula to estimate the overhead:
573 * dest->activeconns*50 + dest->inactconns
574 * and the load:
575 * (dest overhead) / dest->weight 575 * (dest overhead) / dest->weight
576 * 576 *
577 * Remember -- no floats in kernel mode!!! 577 * Remember -- no floats in kernel mode!!!
@@ -588,8 +588,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
588 588
589 if (atomic_read(&dest->weight) > 0) { 589 if (atomic_read(&dest->weight) > 0) {
590 least = dest; 590 least = dest;
591 loh = atomic_read(&least->activeconns) * 50 591 loh = ip_vs_dest_conn_overhead(least);
592 + atomic_read(&least->inactconns);
593 goto nextstage; 592 goto nextstage;
594 } 593 }
595 } 594 }
@@ -603,8 +602,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
603 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 602 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
604 continue; 603 continue;
605 604
606 doh = atomic_read(&dest->activeconns) * 50 605 doh = ip_vs_dest_conn_overhead(dest);
607 + atomic_read(&dest->inactconns);
608 if (loh * atomic_read(&dest->weight) > 606 if (loh * atomic_read(&dest->weight) >
609 doh * atomic_read(&least->weight)) { 607 doh * atomic_read(&least->weight)) {
610 least = dest; 608 least = dest;
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
675 /* More than one destination + enough time passed by, cleanup */ 673 /* More than one destination + enough time passed by, cleanup */
676 if (atomic_read(&en->set.size) > 1 && 674 if (atomic_read(&en->set.size) > 1 &&
677 time_after(jiffies, en->set.lastmod + 675 time_after(jiffies, en->set.lastmod +
678 sysctl_ip_vs_lblcr_expiration)) { 676 sysctl_lblcr_expiration(svc))) {
679 struct ip_vs_dest *m; 677 struct ip_vs_dest *m;
680 678
681 write_lock(&en->set.lock); 679 write_lock(&en->set.lock);
@@ -694,7 +692,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
694 /* The cache entry is invalid, time to schedule */ 692 /* The cache entry is invalid, time to schedule */
695 dest = __ip_vs_lblcr_schedule(svc); 693 dest = __ip_vs_lblcr_schedule(svc);
696 if (!dest) { 694 if (!dest) {
697 IP_VS_ERR_RL("LBLCR: no destination available\n"); 695 ip_vs_scheduler_err(svc, "no destination available");
698 read_unlock(&svc->sched_lock); 696 read_unlock(&svc->sched_lock);
699 return NULL; 697 return NULL;
700 } 698 }
@@ -744,23 +742,77 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
744 .schedule = ip_vs_lblcr_schedule, 742 .schedule = ip_vs_lblcr_schedule,
745}; 743};
746 744
745/*
746 * per netns init.
747 */
748#ifdef CONFIG_SYSCTL
749static int __net_init __ip_vs_lblcr_init(struct net *net)
750{
751 struct netns_ipvs *ipvs = net_ipvs(net);
752
753 if (!net_eq(net, &init_net)) {
754 ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
755 sizeof(vs_vars_table),
756 GFP_KERNEL);
757 if (ipvs->lblcr_ctl_table == NULL)
758 return -ENOMEM;
759 } else
760 ipvs->lblcr_ctl_table = vs_vars_table;
761 ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
762 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
763
764 ipvs->lblcr_ctl_header =
765 register_net_sysctl_table(net, net_vs_ctl_path,
766 ipvs->lblcr_ctl_table);
767 if (!ipvs->lblcr_ctl_header) {
768 if (!net_eq(net, &init_net))
769 kfree(ipvs->lblcr_ctl_table);
770 return -ENOMEM;
771 }
772
773 return 0;
774}
775
776static void __net_exit __ip_vs_lblcr_exit(struct net *net)
777{
778 struct netns_ipvs *ipvs = net_ipvs(net);
779
780 unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
781
782 if (!net_eq(net, &init_net))
783 kfree(ipvs->lblcr_ctl_table);
784}
785
786#else
787
788static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
789static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
790
791#endif
792
793static struct pernet_operations ip_vs_lblcr_ops = {
794 .init = __ip_vs_lblcr_init,
795 .exit = __ip_vs_lblcr_exit,
796};
747 797
748static int __init ip_vs_lblcr_init(void) 798static int __init ip_vs_lblcr_init(void)
749{ 799{
750 int ret; 800 int ret;
751 801
752 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 802 ret = register_pernet_subsys(&ip_vs_lblcr_ops);
803 if (ret)
804 return ret;
805
753 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 806 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
754 if (ret) 807 if (ret)
755 unregister_sysctl_table(sysctl_header); 808 unregister_pernet_subsys(&ip_vs_lblcr_ops);
756 return ret; 809 return ret;
757} 810}
758 811
759
760static void __exit ip_vs_lblcr_cleanup(void) 812static void __exit ip_vs_lblcr_cleanup(void)
761{ 813{
762 unregister_sysctl_table(sysctl_header);
763 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 814 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
815 unregister_pernet_subsys(&ip_vs_lblcr_ops);
764} 816}
765 817
766 818
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 4f69db1fac56..f391819c0cca 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -22,22 +22,6 @@
22 22
23#include <net/ip_vs.h> 23#include <net/ip_vs.h>
24 24
25
26static inline unsigned int
27ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
28{
29 /*
30 * We think the overhead of processing active connections is 256
31 * times higher than that of inactive connections in average. (This
32 * 256 times might not be accurate, we will change it later) We
33 * use the following formula to estimate the overhead now:
34 * dest->activeconns*256 + dest->inactconns
35 */
36 return (atomic_read(&dest->activeconns) << 8) +
37 atomic_read(&dest->inactconns);
38}
39
40
41/* 25/*
42 * Least Connection scheduling 26 * Least Connection scheduling
43 */ 27 */
@@ -62,7 +46,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
62 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || 46 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
63 atomic_read(&dest->weight) == 0) 47 atomic_read(&dest->weight) == 0)
64 continue; 48 continue;
65 doh = ip_vs_lc_dest_overhead(dest); 49 doh = ip_vs_dest_conn_overhead(dest);
66 if (!least || doh < loh) { 50 if (!least || doh < loh) {
67 least = dest; 51 least = dest;
68 loh = doh; 52 loh = doh;
@@ -70,7 +54,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
70 } 54 }
71 55
72 if (!least) 56 if (!least)
73 IP_VS_ERR_RL("LC: no destination available\n"); 57 ip_vs_scheduler_err(svc, "no destination available");
74 else 58 else
75 IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d " 59 IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
76 "inactconns %d\n", 60 "inactconns %d\n",
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647cd450..f454c80df0a7 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
141 struct nf_conntrack_tuple *orig, new_reply; 141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp; 142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p; 143 struct ip_vs_conn_param p;
144 struct net *net = nf_ct_net(ct);
144 145
145 if (exp->tuple.src.l3num != PF_INET) 146 if (exp->tuple.src.l3num != PF_INET)
146 return; 147 return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
155 156
156 /* RS->CLIENT */ 157 /* RS->CLIENT */
157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 158 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
158 ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, 159 ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
159 &orig->src.u3, orig->src.u.tcp.port, 160 &orig->src.u3, orig->src.u.tcp.port,
160 &orig->dst.u3, orig->dst.u.tcp.port, &p); 161 &orig->dst.u3, orig->dst.u.tcp.port, &p);
161 cp = ip_vs_conn_out_get(&p); 162 cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
268 " for conn " FMT_CONN "\n", 269 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 270 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270 271
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); 272 h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
273 &tuple);
272 if (h) { 274 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h); 275 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */ 276 /* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index c413e1830823..984d9c137d84 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -99,7 +99,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
99 } 99 }
100 100
101 if (!least) { 101 if (!least) {
102 IP_VS_ERR_RL("NQ: no destination available\n"); 102 ip_vs_scheduler_err(svc, "no destination available");
103 return NULL; 103 return NULL;
104 } 104 }
105 105
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af70ee12..5cf859ccb31b 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
29} 29}
30 30
31/* Get pe in the pe list by name */ 31/* Get pe in the pe list by name */
32static struct ip_vs_pe * 32struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
33ip_vs_pe_getbyname(const char *pe_name)
34{ 33{
35 struct ip_vs_pe *pe; 34 struct ip_vs_pe *pe;
36 35
37 IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, 36 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
38 pe_name); 37 pe_name);
39 38
40 spin_lock_bh(&ip_vs_pe_lock); 39 spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
60} 59}
61 60
62/* Lookup pe and try to load it if it doesn't exist */ 61/* Lookup pe and try to load it if it doesn't exist */
63struct ip_vs_pe *ip_vs_pe_get(const char *name) 62struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
64{ 63{
65 struct ip_vs_pe *pe; 64 struct ip_vs_pe *pe;
66 65
67 /* Search for the pe by name */ 66 /* Search for the pe by name */
68 pe = ip_vs_pe_getbyname(name); 67 pe = __ip_vs_pe_getbyname(name);
69 68
70 /* If pe not found, load the module and search again */ 69 /* If pe not found, load the module and search again */
71 if (!pe) { 70 if (!pe) {
72 request_module("ip_vs_pe_%s", name); 71 request_module("ip_vs_pe_%s", name);
73 pe = ip_vs_pe_getbyname(name); 72 pe = __ip_vs_pe_getbyname(name);
74 } 73 }
75 74
76 return pe; 75 return pe;
77} 76}
78 77
79void ip_vs_pe_put(struct ip_vs_pe *pe)
80{
81 if (pe && pe->module)
82 module_put(pe->module);
83}
84
85/* Register a pe in the pe list */ 78/* Register a pe in the pe list */
86int register_ip_vs_pe(struct ip_vs_pe *pe) 79int register_ip_vs_pe(struct ip_vs_pe *pe)
87{ 80{
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e9620f3e..13d607ae9c52 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
71 struct ip_vs_iphdr iph; 71 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 72 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 73 const char *dptr;
74 int retc;
74 75
75 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
76 77
@@ -83,20 +84,21 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
83 if (dataoff >= skb->len) 84 if (dataoff >= skb->len)
84 return -EINVAL; 85 return -EINVAL;
85 86
87 if ((retc=skb_linearize(skb)) < 0)
88 return retc;
86 dptr = skb->data + dataoff; 89 dptr = skb->data + dataoff;
87 datalen = skb->len - dataoff; 90 datalen = skb->len - dataoff;
88 91
89 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) 92 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
90 return -EINVAL; 93 return -EINVAL;
91 94
92 p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
93 if (!p->pe_data)
94 return -ENOMEM;
95
96 /* N.B: pe_data is only set on success, 95 /* N.B: pe_data is only set on success,
97 * this allows fallback to the default persistence logic on failure 96 * this allows fallback to the default persistence logic on failure
98 */ 97 */
99 memcpy(p->pe_data, dptr + matchoff, matchlen); 98 p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
99 if (!p->pe_data)
100 return -ENOMEM;
101
100 p->pe_data_len = matchlen; 102 p->pe_data_len = matchlen;
101 103
102 return 0; 104 return 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index c53998390877..eb86028536fc 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,35 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
60 return 0; 60 return 0;
61} 61}
62 62
63#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \
64 defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \
65 defined(CONFIG_IP_VS_PROTO_ESP)
66/*
67 * register an ipvs protocols netns related data
68 */
69static int
70register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
71{
72 struct netns_ipvs *ipvs = net_ipvs(net);
73 unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
74 struct ip_vs_proto_data *pd =
75 kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
76
77 if (!pd) {
78 pr_err("%s(): no memory.\n", __func__);
79 return -ENOMEM;
80 }
81 pd->pp = pp; /* For speed issues */
82 pd->next = ipvs->proto_data_table[hash];
83 ipvs->proto_data_table[hash] = pd;
84 atomic_set(&pd->appcnt, 0); /* Init app counter */
85
86 if (pp->init_netns != NULL)
87 pp->init_netns(net, pd);
88
89 return 0;
90}
91#endif
63 92
64/* 93/*
65 * unregister an ipvs protocol 94 * unregister an ipvs protocol
@@ -82,6 +111,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
82 return -ESRCH; 111 return -ESRCH;
83} 112}
84 113
114/*
115 * unregister an ipvs protocols netns data
116 */
117static int
118unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
119{
120 struct netns_ipvs *ipvs = net_ipvs(net);
121 struct ip_vs_proto_data **pd_p;
122 unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
123
124 pd_p = &ipvs->proto_data_table[hash];
125 for (; *pd_p; pd_p = &(*pd_p)->next) {
126 if (*pd_p == pd) {
127 *pd_p = pd->next;
128 if (pd->pp->exit_netns != NULL)
129 pd->pp->exit_netns(net, pd);
130 kfree(pd);
131 return 0;
132 }
133 }
134
135 return -ESRCH;
136}
85 137
86/* 138/*
87 * get ip_vs_protocol object by its proto. 139 * get ip_vs_protocol object by its proto.
@@ -100,19 +152,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
100} 152}
101EXPORT_SYMBOL(ip_vs_proto_get); 153EXPORT_SYMBOL(ip_vs_proto_get);
102 154
155/*
156 * get ip_vs_protocol object data by netns and proto
157 */
158struct ip_vs_proto_data *
159__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
160{
161 struct ip_vs_proto_data *pd;
162 unsigned hash = IP_VS_PROTO_HASH(proto);
163
164 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
165 if (pd->pp->protocol == proto)
166 return pd;
167 }
168
169 return NULL;
170}
171
172struct ip_vs_proto_data *
173ip_vs_proto_data_get(struct net *net, unsigned short proto)
174{
175 struct netns_ipvs *ipvs = net_ipvs(net);
176
177 return __ipvs_proto_data_get(ipvs, proto);
178}
179EXPORT_SYMBOL(ip_vs_proto_data_get);
103 180
104/* 181/*
105 * Propagate event for state change to all protocols 182 * Propagate event for state change to all protocols
106 */ 183 */
107void ip_vs_protocol_timeout_change(int flags) 184void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
108{ 185{
109 struct ip_vs_protocol *pp; 186 struct ip_vs_proto_data *pd;
110 int i; 187 int i;
111 188
112 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 189 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
113 for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { 190 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
114 if (pp->timeout_change) 191 if (pd->pp->timeout_change)
115 pp->timeout_change(pp, flags); 192 pd->pp->timeout_change(pd, flags);
116 } 193 }
117 } 194 }
118} 195}
@@ -236,6 +313,41 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
236 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 313 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
237} 314}
238 315
316/*
317 * per network name-space init
318 */
319int __net_init __ip_vs_protocol_init(struct net *net)
320{
321#ifdef CONFIG_IP_VS_PROTO_TCP
322 register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
323#endif
324#ifdef CONFIG_IP_VS_PROTO_UDP
325 register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
326#endif
327#ifdef CONFIG_IP_VS_PROTO_SCTP
328 register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
329#endif
330#ifdef CONFIG_IP_VS_PROTO_AH
331 register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
332#endif
333#ifdef CONFIG_IP_VS_PROTO_ESP
334 register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
335#endif
336 return 0;
337}
338
339void __net_exit __ip_vs_protocol_cleanup(struct net *net)
340{
341 struct netns_ipvs *ipvs = net_ipvs(net);
342 struct ip_vs_proto_data *pd;
343 int i;
344
345 /* unregister all the ipvs proto data for this netns */
346 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
347 while ((pd = ipvs->proto_data_table[i]) != NULL)
348 unregister_ip_vs_proto_netns(net, pd);
349 }
350}
239 351
240int __init ip_vs_protocol_init(void) 352int __init ip_vs_protocol_init(void)
241{ 353{
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a0461117d3f..5b8eb8b12c3e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@ struct isakmp_hdr {
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void 43static void
44ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, 44ah_esp_conn_fill_param_proto(struct net *net, int af,
45 int inverse, struct ip_vs_conn_param *p) 45 const struct ip_vs_iphdr *iph, int inverse,
46 struct ip_vs_conn_param *p)
46{ 47{
47 if (likely(!inverse)) 48 if (likely(!inverse))
48 ip_vs_conn_fill_param(af, IPPROTO_UDP, 49 ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
49 &iph->saddr, htons(PORT_ISAKMP), 50 &iph->saddr, htons(PORT_ISAKMP),
50 &iph->daddr, htons(PORT_ISAKMP), p); 51 &iph->daddr, htons(PORT_ISAKMP), p);
51 else 52 else
52 ip_vs_conn_fill_param(af, IPPROTO_UDP, 53 ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
53 &iph->daddr, htons(PORT_ISAKMP), 54 &iph->daddr, htons(PORT_ISAKMP),
54 &iph->saddr, htons(PORT_ISAKMP), p); 55 &iph->saddr, htons(PORT_ISAKMP), p);
55} 56}
56 57
57static struct ip_vs_conn * 58static struct ip_vs_conn *
58ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
59 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph, unsigned int proto_off,
60 int inverse) 61 int inverse)
61{ 62{
62 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
63 struct ip_vs_conn_param p; 64 struct ip_vs_conn_param p;
65 struct net *net = skb_net(skb);
64 66
65 ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 67 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
66 cp = ip_vs_conn_in_get(&p); 68 cp = ip_vs_conn_in_get(&p);
67 if (!cp) { 69 if (!cp) {
68 /* 70 /*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
72 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " 74 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
73 "%s%s %s->%s\n", 75 "%s%s %s->%s\n",
74 inverse ? "ICMP+" : "", 76 inverse ? "ICMP+" : "",
75 pp->name, 77 ip_vs_proto_get(iph->protocol)->name,
76 IP_VS_DBG_ADDR(af, &iph->saddr), 78 IP_VS_DBG_ADDR(af, &iph->saddr),
77 IP_VS_DBG_ADDR(af, &iph->daddr)); 79 IP_VS_DBG_ADDR(af, &iph->daddr));
78 } 80 }
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
83 85
84static struct ip_vs_conn * 86static struct ip_vs_conn *
85ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
86 struct ip_vs_protocol *pp,
87 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph,
88 unsigned int proto_off, 89 unsigned int proto_off,
89 int inverse) 90 int inverse)
90{ 91{
91 struct ip_vs_conn *cp; 92 struct ip_vs_conn *cp;
92 struct ip_vs_conn_param p; 93 struct ip_vs_conn_param p;
94 struct net *net = skb_net(skb);
93 95
94 ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 96 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
95 cp = ip_vs_conn_out_get(&p); 97 cp = ip_vs_conn_out_get(&p);
96 if (!cp) { 98 if (!cp) {
97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 99 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
98 "%s%s %s->%s\n", 100 "%s%s %s->%s\n",
99 inverse ? "ICMP+" : "", 101 inverse ? "ICMP+" : "",
100 pp->name, 102 ip_vs_proto_get(iph->protocol)->name,
101 IP_VS_DBG_ADDR(af, &iph->saddr), 103 IP_VS_DBG_ADDR(af, &iph->saddr),
102 IP_VS_DBG_ADDR(af, &iph->daddr)); 104 IP_VS_DBG_ADDR(af, &iph->daddr));
103 } 105 }
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
107 109
108 110
109static int 111static int
110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
111 int *verdict, struct ip_vs_conn **cpp) 113 int *verdict, struct ip_vs_conn **cpp)
112{ 114{
113 /* 115 /*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
117 return 0; 119 return 0;
118} 120}
119 121
120static void ah_esp_init(struct ip_vs_protocol *pp)
121{
122 /* nothing to do now */
123}
124
125
126static void ah_esp_exit(struct ip_vs_protocol *pp)
127{
128 /* nothing to do now */
129}
130
131
132#ifdef CONFIG_IP_VS_PROTO_AH 122#ifdef CONFIG_IP_VS_PROTO_AH
133struct ip_vs_protocol ip_vs_protocol_ah = { 123struct ip_vs_protocol ip_vs_protocol_ah = {
134 .name = "AH", 124 .name = "AH",
135 .protocol = IPPROTO_AH, 125 .protocol = IPPROTO_AH,
136 .num_states = 1, 126 .num_states = 1,
137 .dont_defrag = 1, 127 .dont_defrag = 1,
138 .init = ah_esp_init, 128 .init = NULL,
139 .exit = ah_esp_exit, 129 .exit = NULL,
140 .conn_schedule = ah_esp_conn_schedule, 130 .conn_schedule = ah_esp_conn_schedule,
141 .conn_in_get = ah_esp_conn_in_get, 131 .conn_in_get = ah_esp_conn_in_get,
142 .conn_out_get = ah_esp_conn_out_get, 132 .conn_out_get = ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
149 .app_conn_bind = NULL, 139 .app_conn_bind = NULL,
150 .debug_packet = ip_vs_tcpudp_debug_packet, 140 .debug_packet = ip_vs_tcpudp_debug_packet,
151 .timeout_change = NULL, /* ISAKMP */ 141 .timeout_change = NULL, /* ISAKMP */
152 .set_state_timeout = NULL,
153}; 142};
154#endif 143#endif
155 144
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
159 .protocol = IPPROTO_ESP, 148 .protocol = IPPROTO_ESP,
160 .num_states = 1, 149 .num_states = 1,
161 .dont_defrag = 1, 150 .dont_defrag = 1,
162 .init = ah_esp_init, 151 .init = NULL,
163 .exit = ah_esp_exit, 152 .exit = NULL,
164 .conn_schedule = ah_esp_conn_schedule, 153 .conn_schedule = ah_esp_conn_schedule,
165 .conn_in_get = ah_esp_conn_in_get, 154 .conn_in_get = ah_esp_conn_in_get,
166 .conn_out_get = ah_esp_conn_out_get, 155 .conn_out_get = ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd342b..d12ed53ec95f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
9#include <net/ip_vs.h> 9#include <net/ip_vs.h>
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp)
14{ 14{
15 struct net *net;
15 struct ip_vs_service *svc; 16 struct ip_vs_service *svc;
16 sctp_chunkhdr_t _schunkh, *sch; 17 sctp_chunkhdr_t _schunkh, *sch;
17 sctp_sctphdr_t *sh, _sctph; 18 sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
27 sizeof(_schunkh), &_schunkh); 28 sizeof(_schunkh), &_schunkh);
28 if (sch == NULL) 29 if (sch == NULL)
29 return 0; 30 return 0;
30 31 net = skb_net(skb);
31 if ((sch->type == SCTP_CID_INIT) && 32 if ((sch->type == SCTP_CID_INIT) &&
32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
33 &iph.daddr, sh->dest))) { 34 &iph.daddr, sh->dest))) {
34 int ignored; 35 int ignored;
35 36
36 if (ip_vs_todrop()) { 37 if (ip_vs_todrop(net_ipvs(net))) {
37 /* 38 /*
38 * It seems that we are very loaded. 39 * It seems that we are very loaded.
39 * We have to drop this packet :( 40 * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
46 * Let the virtual server select a real server for the 47 * Let the virtual server select a real server for the
47 * incoming connection, and create a connection entry. 48 * incoming connection, and create a connection entry.
48 */ 49 */
49 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
50 if (!*cpp && !ignored) { 51 if (!*cpp && ignored <= 0) {
51 *verdict = ip_vs_leave(svc, skb, pp); 52 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd);
54 else {
55 ip_vs_service_put(svc);
56 *verdict = NF_DROP;
57 }
52 return 0; 58 return 0;
53 } 59 }
54 ip_vs_service_put(svc); 60 ip_vs_service_put(svc);
55 } 61 }
56 62 /* NF_ACCEPT */
57 return 1; 63 return 1;
58} 64}
59 65
@@ -560,7 +566,7 @@ static struct ipvs_sctp_nextstate
560 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server 566 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
561 */ 567 */
562 /* 568 /*
563 * We recieved the data chuck, keep the state unchanged. I assume 569 * We received the data chuck, keep the state unchanged. I assume
564 * that still data chuncks can be received by both the peers in 570 * that still data chuncks can be received by both the peers in
565 * SHUDOWN state 571 * SHUDOWN state
566 */ 572 */
@@ -627,7 +633,7 @@ static struct ipvs_sctp_nextstate
627 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client 633 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
628 */ 634 */
629 /* 635 /*
630 * We recieved the data chuck, keep the state unchanged. I assume 636 * We received the data chuck, keep the state unchanged. I assume
631 * that still data chuncks can be received by both the peers in 637 * that still data chuncks can be received by both the peers in
632 * SHUDOWN state 638 * SHUDOWN state
633 */ 639 */
@@ -695,7 +701,7 @@ static struct ipvs_sctp_nextstate
695 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server 701 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
696 */ 702 */
697 /* 703 /*
698 * We recieved the data chuck, keep the state unchanged. I assume 704 * We received the data chuck, keep the state unchanged. I assume
699 * that still data chuncks can be received by both the peers in 705 * that still data chuncks can be received by both the peers in
700 * SHUDOWN state 706 * SHUDOWN state
701 */ 707 */
@@ -765,7 +771,7 @@ static struct ipvs_sctp_nextstate
765 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client 771 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
766 */ 772 */
767 /* 773 /*
768 * We recieved the data chuck, keep the state unchanged. I assume 774 * We received the data chuck, keep the state unchanged. I assume
769 * that still data chuncks can be received by both the peers in 775 * that still data chuncks can be received by both the peers in
770 * SHUDOWN state 776 * SHUDOWN state
771 */ 777 */
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
856/* 862/*
857 * Timeout table[state] 863 * Timeout table[state]
858 */ 864 */
859static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 865static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
860 [IP_VS_SCTP_S_NONE] = 2 * HZ, 866 [IP_VS_SCTP_S_NONE] = 2 * HZ,
861 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, 867 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
862 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, 868 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
900 return "?"; 906 return "?";
901} 907}
902 908
903static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
904{
905}
906
907static int
908sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
909{
910
911return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
912 sctp_state_name_table, sname, to);
913}
914
915static inline int 909static inline int
916set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 910set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
917 int direction, const struct sk_buff *skb) 911 int direction, const struct sk_buff *skb)
918{ 912{
919 sctp_chunkhdr_t _sctpch, *sch; 913 sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
971 965
972 IP_VS_DBG_BUF(8, "%s %s %s:%d->" 966 IP_VS_DBG_BUF(8, "%s %s %s:%d->"
973 "%s:%d state: %s->%s conn->refcnt:%d\n", 967 "%s:%d state: %s->%s conn->refcnt:%d\n",
974 pp->name, 968 pd->pp->name,
975 ((direction == IP_VS_DIR_OUTPUT) ? 969 ((direction == IP_VS_DIR_OUTPUT) ?
976 "output " : "input "), 970 "output " : "input "),
977 IP_VS_DBG_ADDR(cp->af, &cp->daddr), 971 IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
995 } 989 }
996 } 990 }
997 } 991 }
992 if (likely(pd))
993 cp->timeout = pd->timeout_table[cp->state = next_state];
994 else /* What to do ? */
995 cp->timeout = sctp_timeouts[cp->state = next_state];
998 996
999 cp->timeout = pp->timeout_table[cp->state = next_state]; 997 return 1;
1000
1001 return 1;
1002} 998}
1003 999
1004static int 1000static int
1005sctp_state_transition(struct ip_vs_conn *cp, int direction, 1001sctp_state_transition(struct ip_vs_conn *cp, int direction,
1006 const struct sk_buff *skb, struct ip_vs_protocol *pp) 1002 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
1007{ 1003{
1008 int ret = 0; 1004 int ret = 0;
1009 1005
1010 spin_lock(&cp->lock); 1006 spin_lock(&cp->lock);
1011 ret = set_sctp_state(pp, cp, direction, skb); 1007 ret = set_sctp_state(pd, cp, direction, skb);
1012 spin_unlock(&cp->lock); 1008 spin_unlock(&cp->lock);
1013 1009
1014 return ret; 1010 return ret;
1015} 1011}
1016 1012
1017/*
1018 * Hash table for SCTP application incarnations
1019 */
1020#define SCTP_APP_TAB_BITS 4
1021#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
1022#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
1023
1024static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
1025static DEFINE_SPINLOCK(sctp_app_lock);
1026
1027static inline __u16 sctp_app_hashkey(__be16 port) 1013static inline __u16 sctp_app_hashkey(__be16 port)
1028{ 1014{
1029 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) 1015 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
1030 & SCTP_APP_TAB_MASK; 1016 & SCTP_APP_TAB_MASK;
1031} 1017}
1032 1018
1033static int sctp_register_app(struct ip_vs_app *inc) 1019static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
1034{ 1020{
1035 struct ip_vs_app *i; 1021 struct ip_vs_app *i;
1036 __u16 hash; 1022 __u16 hash;
1037 __be16 port = inc->port; 1023 __be16 port = inc->port;
1038 int ret = 0; 1024 int ret = 0;
1025 struct netns_ipvs *ipvs = net_ipvs(net);
1026 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1039 1027
1040 hash = sctp_app_hashkey(port); 1028 hash = sctp_app_hashkey(port);
1041 1029
1042 spin_lock_bh(&sctp_app_lock); 1030 spin_lock_bh(&ipvs->sctp_app_lock);
1043 list_for_each_entry(i, &sctp_apps[hash], p_list) { 1031 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
1044 if (i->port == port) { 1032 if (i->port == port) {
1045 ret = -EEXIST; 1033 ret = -EEXIST;
1046 goto out; 1034 goto out;
1047 } 1035 }
1048 } 1036 }
1049 list_add(&inc->p_list, &sctp_apps[hash]); 1037 list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
1050 atomic_inc(&ip_vs_protocol_sctp.appcnt); 1038 atomic_inc(&pd->appcnt);
1051out: 1039out:
1052 spin_unlock_bh(&sctp_app_lock); 1040 spin_unlock_bh(&ipvs->sctp_app_lock);
1053 1041
1054 return ret; 1042 return ret;
1055} 1043}
1056 1044
1057static void sctp_unregister_app(struct ip_vs_app *inc) 1045static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
1058{ 1046{
1059 spin_lock_bh(&sctp_app_lock); 1047 struct netns_ipvs *ipvs = net_ipvs(net);
1060 atomic_dec(&ip_vs_protocol_sctp.appcnt); 1048 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1049
1050 spin_lock_bh(&ipvs->sctp_app_lock);
1051 atomic_dec(&pd->appcnt);
1061 list_del(&inc->p_list); 1052 list_del(&inc->p_list);
1062 spin_unlock_bh(&sctp_app_lock); 1053 spin_unlock_bh(&ipvs->sctp_app_lock);
1063} 1054}
1064 1055
1065static int sctp_app_conn_bind(struct ip_vs_conn *cp) 1056static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1066{ 1057{
1058 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
1067 int hash; 1059 int hash;
1068 struct ip_vs_app *inc; 1060 struct ip_vs_app *inc;
1069 int result = 0; 1061 int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1074 /* Lookup application incarnations and bind the right one */ 1066 /* Lookup application incarnations and bind the right one */
1075 hash = sctp_app_hashkey(cp->vport); 1067 hash = sctp_app_hashkey(cp->vport);
1076 1068
1077 spin_lock(&sctp_app_lock); 1069 spin_lock(&ipvs->sctp_app_lock);
1078 list_for_each_entry(inc, &sctp_apps[hash], p_list) { 1070 list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
1079 if (inc->port == cp->vport) { 1071 if (inc->port == cp->vport) {
1080 if (unlikely(!ip_vs_app_inc_get(inc))) 1072 if (unlikely(!ip_vs_app_inc_get(inc)))
1081 break; 1073 break;
1082 spin_unlock(&sctp_app_lock); 1074 spin_unlock(&ipvs->sctp_app_lock);
1083 1075
1084 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 1076 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
1085 "%s:%u to app %s on port %u\n", 1077 "%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1095 goto out; 1087 goto out;
1096 } 1088 }
1097 } 1089 }
1098 spin_unlock(&sctp_app_lock); 1090 spin_unlock(&ipvs->sctp_app_lock);
1099out: 1091out:
1100 return result; 1092 return result;
1101} 1093}
1102 1094
1103static void ip_vs_sctp_init(struct ip_vs_protocol *pp) 1095/* ---------------------------------------------
1096 * timeouts is netns related now.
1097 * ---------------------------------------------
1098 */
1099static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
1104{ 1100{
1105 IP_VS_INIT_HASH_TABLE(sctp_apps); 1101 struct netns_ipvs *ipvs = net_ipvs(net);
1106 pp->timeout_table = sctp_timeouts;
1107}
1108 1102
1103 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
1104 spin_lock_init(&ipvs->sctp_app_lock);
1105 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
1106 sizeof(sctp_timeouts));
1107}
1109 1108
1110static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) 1109static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
1111{ 1110{
1112 1111 kfree(pd->timeout_table);
1113} 1112}
1114 1113
1115struct ip_vs_protocol ip_vs_protocol_sctp = { 1114struct ip_vs_protocol ip_vs_protocol_sctp = {
1116 .name = "SCTP", 1115 .name = "SCTP",
1117 .protocol = IPPROTO_SCTP, 1116 .protocol = IPPROTO_SCTP,
1118 .num_states = IP_VS_SCTP_S_LAST, 1117 .num_states = IP_VS_SCTP_S_LAST,
1119 .dont_defrag = 0, 1118 .dont_defrag = 0,
1120 .appcnt = ATOMIC_INIT(0), 1119 .init = NULL,
1121 .init = ip_vs_sctp_init, 1120 .exit = NULL,
1122 .exit = ip_vs_sctp_exit, 1121 .init_netns = __ip_vs_sctp_init,
1123 .register_app = sctp_register_app, 1122 .exit_netns = __ip_vs_sctp_exit,
1123 .register_app = sctp_register_app,
1124 .unregister_app = sctp_unregister_app, 1124 .unregister_app = sctp_unregister_app,
1125 .conn_schedule = sctp_conn_schedule, 1125 .conn_schedule = sctp_conn_schedule,
1126 .conn_in_get = ip_vs_conn_in_get_proto, 1126 .conn_in_get = ip_vs_conn_in_get_proto,
1127 .conn_out_get = ip_vs_conn_out_get_proto, 1127 .conn_out_get = ip_vs_conn_out_get_proto,
1128 .snat_handler = sctp_snat_handler, 1128 .snat_handler = sctp_snat_handler,
1129 .dnat_handler = sctp_dnat_handler, 1129 .dnat_handler = sctp_dnat_handler,
1130 .csum_check = sctp_csum_check, 1130 .csum_check = sctp_csum_check,
1131 .state_name = sctp_state_name, 1131 .state_name = sctp_state_name,
1132 .state_transition = sctp_state_transition, 1132 .state_transition = sctp_state_transition,
1133 .app_conn_bind = sctp_app_conn_bind, 1133 .app_conn_bind = sctp_app_conn_bind,
1134 .debug_packet = ip_vs_tcpudp_debug_packet, 1134 .debug_packet = ip_vs_tcpudp_debug_packet,
1135 .timeout_change = sctp_timeout_change, 1135 .timeout_change = NULL,
1136 .set_state_timeout = sctp_set_state_timeout,
1137}; 1136};
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200e2146..c0cc341b840d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * 13 *
14 * Network name space (netns) aware.
15 * Global data moved to netns i.e struct netns_ipvs
16 * tcp_timeouts table has copy per netns in a hash table per
17 * protocol ip_vs_proto_data and is handled by netns
14 */ 18 */
15 19
16#define KMSG_COMPONENT "IPVS" 20#define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
28#include <net/ip_vs.h> 32#include <net/ip_vs.h>
29 33
30static int 34static int
31tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
32 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp)
33{ 37{
38 struct net *net;
34 struct ip_vs_service *svc; 39 struct ip_vs_service *svc;
35 struct tcphdr _tcph, *th; 40 struct tcphdr _tcph, *th;
36 struct ip_vs_iphdr iph; 41 struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
42 *verdict = NF_DROP; 47 *verdict = NF_DROP;
43 return 0; 48 return 0;
44 } 49 }
45 50 net = skb_net(skb);
46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
47 if (th->syn && 52 if (th->syn &&
48 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
49 th->dest))) { 54 &iph.daddr, th->dest))) {
50 int ignored; 55 int ignored;
51 56
52 if (ip_vs_todrop()) { 57 if (ip_vs_todrop(net_ipvs(net))) {
53 /* 58 /*
54 * It seems that we are very loaded. 59 * It seems that we are very loaded.
55 * We have to drop this packet :( 60 * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
63 * Let the virtual server select a real server for the 68 * Let the virtual server select a real server for the
64 * incoming connection, and create a connection entry. 69 * incoming connection, and create a connection entry.
65 */ 70 */
66 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
67 if (!*cpp && !ignored) { 72 if (!*cpp && ignored <= 0) {
68 *verdict = ip_vs_leave(svc, skb, pp); 73 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd);
75 else {
76 ip_vs_service_put(svc);
77 *verdict = NF_DROP;
78 }
69 return 0; 79 return 0;
70 } 80 }
71 ip_vs_service_put(svc); 81 ip_vs_service_put(svc);
72 } 82 }
83 /* NF_ACCEPT */
73 return 1; 84 return 1;
74} 85}
75 86
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
338/* 349/*
339 * Timeout table[state] 350 * Timeout table[state]
340 */ 351 */
341static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { 352static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
342 [IP_VS_TCP_S_NONE] = 2*HZ, 353 [IP_VS_TCP_S_NONE] = 2*HZ,
343 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, 354 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
344 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, 355 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
437/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 448/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
438}; 449};
439 450
440static struct tcp_states_t *tcp_state_table = tcp_states; 451static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
441
442
443static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
444{ 452{
445 int on = (flags & 1); /* secure_tcp */ 453 int on = (flags & 1); /* secure_tcp */
446 454
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
450 ** for most if not for all of the applications. Something 458 ** for most if not for all of the applications. Something
451 ** like "capabilities" (flags) for each object. 459 ** like "capabilities" (flags) for each object.
452 */ 460 */
453 tcp_state_table = (on? tcp_states_dos : tcp_states); 461 pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
454}
455
456static int
457tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
458{
459 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
460 tcp_state_name_table, sname, to);
461} 462}
462 463
463static inline int tcp_state_idx(struct tcphdr *th) 464static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
474} 475}
475 476
476static inline void 477static inline void
477set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 478set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
478 int direction, struct tcphdr *th) 479 int direction, struct tcphdr *th)
479{ 480{
480 int state_idx; 481 int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
497 goto tcp_state_out; 498 goto tcp_state_out;
498 } 499 }
499 500
500 new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; 501 new_state =
502 pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
501 503
502 tcp_state_out: 504 tcp_state_out:
503 if (new_state != cp->state) { 505 if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
505 507
506 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" 508 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
507 "%s:%d state: %s->%s conn->refcnt:%d\n", 509 "%s:%d state: %s->%s conn->refcnt:%d\n",
508 pp->name, 510 pd->pp->name,
509 ((state_off == TCP_DIR_OUTPUT) ? 511 ((state_off == TCP_DIR_OUTPUT) ?
510 "output " : "input "), 512 "output " : "input "),
511 th->syn ? 'S' : '.', 513 th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
535 } 537 }
536 } 538 }
537 539
538 cp->timeout = pp->timeout_table[cp->state = new_state]; 540 if (likely(pd))
541 cp->timeout = pd->timeout_table[cp->state = new_state];
542 else /* What to do ? */
543 cp->timeout = tcp_timeouts[cp->state = new_state];
539} 544}
540 545
541
542/* 546/*
543 * Handle state transitions 547 * Handle state transitions
544 */ 548 */
545static int 549static int
546tcp_state_transition(struct ip_vs_conn *cp, int direction, 550tcp_state_transition(struct ip_vs_conn *cp, int direction,
547 const struct sk_buff *skb, 551 const struct sk_buff *skb,
548 struct ip_vs_protocol *pp) 552 struct ip_vs_proto_data *pd)
549{ 553{
550 struct tcphdr _tcph, *th; 554 struct tcphdr _tcph, *th;
551 555
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
560 return 0; 564 return 0;
561 565
562 spin_lock(&cp->lock); 566 spin_lock(&cp->lock);
563 set_tcp_state(pp, cp, direction, th); 567 set_tcp_state(pd, cp, direction, th);
564 spin_unlock(&cp->lock); 568 spin_unlock(&cp->lock);
565 569
566 return 1; 570 return 1;
567} 571}
568 572
569
570/*
571 * Hash table for TCP application incarnations
572 */
573#define TCP_APP_TAB_BITS 4
574#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
575#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
576
577static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
578static DEFINE_SPINLOCK(tcp_app_lock);
579
580static inline __u16 tcp_app_hashkey(__be16 port) 573static inline __u16 tcp_app_hashkey(__be16 port)
581{ 574{
582 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) 575 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
584} 577}
585 578
586 579
587static int tcp_register_app(struct ip_vs_app *inc) 580static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
588{ 581{
589 struct ip_vs_app *i; 582 struct ip_vs_app *i;
590 __u16 hash; 583 __u16 hash;
591 __be16 port = inc->port; 584 __be16 port = inc->port;
592 int ret = 0; 585 int ret = 0;
586 struct netns_ipvs *ipvs = net_ipvs(net);
587 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
593 588
594 hash = tcp_app_hashkey(port); 589 hash = tcp_app_hashkey(port);
595 590
596 spin_lock_bh(&tcp_app_lock); 591 spin_lock_bh(&ipvs->tcp_app_lock);
597 list_for_each_entry(i, &tcp_apps[hash], p_list) { 592 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
598 if (i->port == port) { 593 if (i->port == port) {
599 ret = -EEXIST; 594 ret = -EEXIST;
600 goto out; 595 goto out;
601 } 596 }
602 } 597 }
603 list_add(&inc->p_list, &tcp_apps[hash]); 598 list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
604 atomic_inc(&ip_vs_protocol_tcp.appcnt); 599 atomic_inc(&pd->appcnt);
605 600
606 out: 601 out:
607 spin_unlock_bh(&tcp_app_lock); 602 spin_unlock_bh(&ipvs->tcp_app_lock);
608 return ret; 603 return ret;
609} 604}
610 605
611 606
612static void 607static void
613tcp_unregister_app(struct ip_vs_app *inc) 608tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
614{ 609{
615 spin_lock_bh(&tcp_app_lock); 610 struct netns_ipvs *ipvs = net_ipvs(net);
616 atomic_dec(&ip_vs_protocol_tcp.appcnt); 611 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
612
613 spin_lock_bh(&ipvs->tcp_app_lock);
614 atomic_dec(&pd->appcnt);
617 list_del(&inc->p_list); 615 list_del(&inc->p_list);
618 spin_unlock_bh(&tcp_app_lock); 616 spin_unlock_bh(&ipvs->tcp_app_lock);
619} 617}
620 618
621 619
622static int 620static int
623tcp_app_conn_bind(struct ip_vs_conn *cp) 621tcp_app_conn_bind(struct ip_vs_conn *cp)
624{ 622{
623 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
625 int hash; 624 int hash;
626 struct ip_vs_app *inc; 625 struct ip_vs_app *inc;
627 int result = 0; 626 int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
633 /* Lookup application incarnations and bind the right one */ 632 /* Lookup application incarnations and bind the right one */
634 hash = tcp_app_hashkey(cp->vport); 633 hash = tcp_app_hashkey(cp->vport);
635 634
636 spin_lock(&tcp_app_lock); 635 spin_lock(&ipvs->tcp_app_lock);
637 list_for_each_entry(inc, &tcp_apps[hash], p_list) { 636 list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
638 if (inc->port == cp->vport) { 637 if (inc->port == cp->vport) {
639 if (unlikely(!ip_vs_app_inc_get(inc))) 638 if (unlikely(!ip_vs_app_inc_get(inc)))
640 break; 639 break;
641 spin_unlock(&tcp_app_lock); 640 spin_unlock(&ipvs->tcp_app_lock);
642 641
643 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 642 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
644 "%s:%u to app %s on port %u\n", 643 "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
655 goto out; 654 goto out;
656 } 655 }
657 } 656 }
658 spin_unlock(&tcp_app_lock); 657 spin_unlock(&ipvs->tcp_app_lock);
659 658
660 out: 659 out:
661 return result; 660 return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
665/* 664/*
666 * Set LISTEN timeout. (ip_vs_conn_put will setup timer) 665 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
667 */ 666 */
668void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) 667void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
669{ 668{
669 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
670
670 spin_lock(&cp->lock); 671 spin_lock(&cp->lock);
671 cp->state = IP_VS_TCP_S_LISTEN; 672 cp->state = IP_VS_TCP_S_LISTEN;
672 cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; 673 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
674 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
673 spin_unlock(&cp->lock); 675 spin_unlock(&cp->lock);
674} 676}
675 677
676 678/* ---------------------------------------------
677static void ip_vs_tcp_init(struct ip_vs_protocol *pp) 679 * timeouts is netns related now.
680 * ---------------------------------------------
681 */
682static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
678{ 683{
679 IP_VS_INIT_HASH_TABLE(tcp_apps); 684 struct netns_ipvs *ipvs = net_ipvs(net);
680 pp->timeout_table = tcp_timeouts;
681}
682 685
686 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
687 spin_lock_init(&ipvs->tcp_app_lock);
688 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
689 sizeof(tcp_timeouts));
690 pd->tcp_state_table = tcp_states;
691}
683 692
684static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) 693static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
685{ 694{
695 kfree(pd->timeout_table);
686} 696}
687 697
688 698
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
691 .protocol = IPPROTO_TCP, 701 .protocol = IPPROTO_TCP,
692 .num_states = IP_VS_TCP_S_LAST, 702 .num_states = IP_VS_TCP_S_LAST,
693 .dont_defrag = 0, 703 .dont_defrag = 0,
694 .appcnt = ATOMIC_INIT(0), 704 .init = NULL,
695 .init = ip_vs_tcp_init, 705 .exit = NULL,
696 .exit = ip_vs_tcp_exit, 706 .init_netns = __ip_vs_tcp_init,
707 .exit_netns = __ip_vs_tcp_exit,
697 .register_app = tcp_register_app, 708 .register_app = tcp_register_app,
698 .unregister_app = tcp_unregister_app, 709 .unregister_app = tcp_unregister_app,
699 .conn_schedule = tcp_conn_schedule, 710 .conn_schedule = tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
707 .app_conn_bind = tcp_app_conn_bind, 718 .app_conn_bind = tcp_app_conn_bind,
708 .debug_packet = ip_vs_tcpudp_debug_packet, 719 .debug_packet = ip_vs_tcpudp_debug_packet,
709 .timeout_change = tcp_timeout_change, 720 .timeout_change = tcp_timeout_change,
710 .set_state_timeout = tcp_set_state_timeout,
711}; 721};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a06bb0a..f1282cbe6fe3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * Network name space (netns) aware.
13 * 14 *
14 */ 15 */
15 16
@@ -28,9 +29,10 @@
28#include <net/ip6_checksum.h> 29#include <net/ip6_checksum.h>
29 30
30static int 31static int
31udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
32 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp)
33{ 34{
35 struct net *net;
34 struct ip_vs_service *svc; 36 struct ip_vs_service *svc;
35 struct udphdr _udph, *uh; 37 struct udphdr _udph, *uh;
36 struct ip_vs_iphdr iph; 38 struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
42 *verdict = NF_DROP; 44 *verdict = NF_DROP;
43 return 0; 45 return 0;
44 } 46 }
45 47 net = skb_net(skb);
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol, 48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest); 49 &iph.daddr, uh->dest);
48 if (svc) { 50 if (svc) {
49 int ignored; 51 int ignored;
50 52
51 if (ip_vs_todrop()) { 53 if (ip_vs_todrop(net_ipvs(net))) {
52 /* 54 /*
53 * It seems that we are very loaded. 55 * It seems that we are very loaded.
54 * We have to drop this packet :( 56 * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
62 * Let the virtual server select a real server for the 64 * Let the virtual server select a real server for the
63 * incoming connection, and create a connection entry. 65 * incoming connection, and create a connection entry.
64 */ 66 */
65 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
66 if (!*cpp && !ignored) { 68 if (!*cpp && ignored <= 0) {
67 *verdict = ip_vs_leave(svc, skb, pp); 69 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd);
71 else {
72 ip_vs_service_put(svc);
73 *verdict = NF_DROP;
74 }
68 return 0; 75 return 0;
69 } 76 }
70 ip_vs_service_put(svc); 77 ip_vs_service_put(svc);
71 } 78 }
79 /* NF_ACCEPT */
72 return 1; 80 return 1;
73} 81}
74 82
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
338 return 1; 346 return 1;
339} 347}
340 348
341
342/*
343 * Note: the caller guarantees that only one of register_app,
344 * unregister_app or app_conn_bind is called each time.
345 */
346
347#define UDP_APP_TAB_BITS 4
348#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
349#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
350
351static struct list_head udp_apps[UDP_APP_TAB_SIZE];
352static DEFINE_SPINLOCK(udp_app_lock);
353
354static inline __u16 udp_app_hashkey(__be16 port) 349static inline __u16 udp_app_hashkey(__be16 port)
355{ 350{
356 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) 351 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
358} 353}
359 354
360 355
361static int udp_register_app(struct ip_vs_app *inc) 356static int udp_register_app(struct net *net, struct ip_vs_app *inc)
362{ 357{
363 struct ip_vs_app *i; 358 struct ip_vs_app *i;
364 __u16 hash; 359 __u16 hash;
365 __be16 port = inc->port; 360 __be16 port = inc->port;
366 int ret = 0; 361 int ret = 0;
362 struct netns_ipvs *ipvs = net_ipvs(net);
363 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
367 364
368 hash = udp_app_hashkey(port); 365 hash = udp_app_hashkey(port);
369 366
370 367
371 spin_lock_bh(&udp_app_lock); 368 spin_lock_bh(&ipvs->udp_app_lock);
372 list_for_each_entry(i, &udp_apps[hash], p_list) { 369 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
373 if (i->port == port) { 370 if (i->port == port) {
374 ret = -EEXIST; 371 ret = -EEXIST;
375 goto out; 372 goto out;
376 } 373 }
377 } 374 }
378 list_add(&inc->p_list, &udp_apps[hash]); 375 list_add(&inc->p_list, &ipvs->udp_apps[hash]);
379 atomic_inc(&ip_vs_protocol_udp.appcnt); 376 atomic_inc(&pd->appcnt);
380 377
381 out: 378 out:
382 spin_unlock_bh(&udp_app_lock); 379 spin_unlock_bh(&ipvs->udp_app_lock);
383 return ret; 380 return ret;
384} 381}
385 382
386 383
387static void 384static void
388udp_unregister_app(struct ip_vs_app *inc) 385udp_unregister_app(struct net *net, struct ip_vs_app *inc)
389{ 386{
390 spin_lock_bh(&udp_app_lock); 387 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
391 atomic_dec(&ip_vs_protocol_udp.appcnt); 388 struct netns_ipvs *ipvs = net_ipvs(net);
389
390 spin_lock_bh(&ipvs->udp_app_lock);
391 atomic_dec(&pd->appcnt);
392 list_del(&inc->p_list); 392 list_del(&inc->p_list);
393 spin_unlock_bh(&udp_app_lock); 393 spin_unlock_bh(&ipvs->udp_app_lock);
394} 394}
395 395
396 396
397static int udp_app_conn_bind(struct ip_vs_conn *cp) 397static int udp_app_conn_bind(struct ip_vs_conn *cp)
398{ 398{
399 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
399 int hash; 400 int hash;
400 struct ip_vs_app *inc; 401 struct ip_vs_app *inc;
401 int result = 0; 402 int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
407 /* Lookup application incarnations and bind the right one */ 408 /* Lookup application incarnations and bind the right one */
408 hash = udp_app_hashkey(cp->vport); 409 hash = udp_app_hashkey(cp->vport);
409 410
410 spin_lock(&udp_app_lock); 411 spin_lock(&ipvs->udp_app_lock);
411 list_for_each_entry(inc, &udp_apps[hash], p_list) { 412 list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
412 if (inc->port == cp->vport) { 413 if (inc->port == cp->vport) {
413 if (unlikely(!ip_vs_app_inc_get(inc))) 414 if (unlikely(!ip_vs_app_inc_get(inc)))
414 break; 415 break;
415 spin_unlock(&udp_app_lock); 416 spin_unlock(&ipvs->udp_app_lock);
416 417
417 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 418 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
418 "%s:%u to app %s on port %u\n", 419 "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
429 goto out; 430 goto out;
430 } 431 }
431 } 432 }
432 spin_unlock(&udp_app_lock); 433 spin_unlock(&ipvs->udp_app_lock);
433 434
434 out: 435 out:
435 return result; 436 return result;
436} 437}
437 438
438 439
439static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 440static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
440 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 441 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
441 [IP_VS_UDP_S_LAST] = 2*HZ, 442 [IP_VS_UDP_S_LAST] = 2*HZ,
442}; 443};
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
446 [IP_VS_UDP_S_LAST] = "BUG!", 447 [IP_VS_UDP_S_LAST] = "BUG!",
447}; 448};
448 449
449
450static int
451udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
452{
453 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
454 udp_state_name_table, sname, to);
455}
456
457static const char * udp_state_name(int state) 450static const char * udp_state_name(int state)
458{ 451{
459 if (state >= IP_VS_UDP_S_LAST) 452 if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
464static int 457static int
465udp_state_transition(struct ip_vs_conn *cp, int direction, 458udp_state_transition(struct ip_vs_conn *cp, int direction,
466 const struct sk_buff *skb, 459 const struct sk_buff *skb,
467 struct ip_vs_protocol *pp) 460 struct ip_vs_proto_data *pd)
468{ 461{
469 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; 462 if (unlikely(!pd)) {
463 pr_err("UDP no ns data\n");
464 return 0;
465 }
466
467 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
470 return 1; 468 return 1;
471} 469}
472 470
473static void udp_init(struct ip_vs_protocol *pp) 471static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
474{ 472{
475 IP_VS_INIT_HASH_TABLE(udp_apps); 473 struct netns_ipvs *ipvs = net_ipvs(net);
476 pp->timeout_table = udp_timeouts; 474
475 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
476 spin_lock_init(&ipvs->udp_app_lock);
477 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
478 sizeof(udp_timeouts));
477} 479}
478 480
479static void udp_exit(struct ip_vs_protocol *pp) 481static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
480{ 482{
483 kfree(pd->timeout_table);
481} 484}
482 485
483 486
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
486 .protocol = IPPROTO_UDP, 489 .protocol = IPPROTO_UDP,
487 .num_states = IP_VS_UDP_S_LAST, 490 .num_states = IP_VS_UDP_S_LAST,
488 .dont_defrag = 0, 491 .dont_defrag = 0,
489 .init = udp_init, 492 .init = NULL,
490 .exit = udp_exit, 493 .exit = NULL,
494 .init_netns = __udp_init,
495 .exit_netns = __udp_exit,
491 .conn_schedule = udp_conn_schedule, 496 .conn_schedule = udp_conn_schedule,
492 .conn_in_get = ip_vs_conn_in_get_proto, 497 .conn_in_get = ip_vs_conn_in_get_proto,
493 .conn_out_get = ip_vs_conn_out_get_proto, 498 .conn_out_get = ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
501 .app_conn_bind = udp_app_conn_bind, 506 .app_conn_bind = udp_app_conn_bind,
502 .debug_packet = ip_vs_tcpudp_debug_packet, 507 .debug_packet = ip_vs_tcpudp_debug_packet,
503 .timeout_change = NULL, 508 .timeout_change = NULL,
504 .set_state_timeout = udp_set_state_timeout,
505}; 509};
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index e210f37d8ea2..c49b388d1085 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -72,7 +72,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
72 q = q->next; 72 q = q->next;
73 } while (q != p); 73 } while (q != p);
74 write_unlock(&svc->sched_lock); 74 write_unlock(&svc->sched_lock);
75 IP_VS_ERR_RL("RR: no destination available\n"); 75 ip_vs_scheduler_err(svc, "no destination available");
76 return NULL; 76 return NULL;
77 77
78 out: 78 out:
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 076ebe00435d..08dbdd5bc18f 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -29,6 +29,7 @@
29 29
30#include <net/ip_vs.h> 30#include <net/ip_vs.h>
31 31
32EXPORT_SYMBOL(ip_vs_scheduler_err);
32/* 33/*
33 * IPVS scheduler list 34 * IPVS scheduler list
34 */ 35 */
@@ -146,6 +147,30 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
146 module_put(scheduler->module); 147 module_put(scheduler->module);
147} 148}
148 149
150/*
151 * Common error output helper for schedulers
152 */
153
154void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
155{
156 if (svc->fwmark) {
157 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
158 svc->scheduler->name, svc->fwmark,
159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n",
163 svc->scheduler->name,
164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg);
166#endif
167 } else {
168 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
169 svc->scheduler->name,
170 ip_vs_proto_name(svc->protocol),
171 &svc->addr.ip, ntohs(svc->port), msg);
172 }
173}
149 174
150/* 175/*
151 * Register a scheduler in the scheduler list 176 * Register a scheduler in the scheduler list
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 1ab75a9dc400..89ead246ed3d 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -87,7 +87,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
87 goto nextstage; 87 goto nextstage;
88 } 88 }
89 } 89 }
90 IP_VS_ERR_RL("SED: no destination available\n"); 90 ip_vs_scheduler_err(svc, "no destination available");
91 return NULL; 91 return NULL;
92 92
93 /* 93 /*
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e6cc174fbc06..b5e2556c581a 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -223,7 +223,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
223 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 223 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
224 || atomic_read(&dest->weight) <= 0 224 || atomic_read(&dest->weight) <= 0
225 || is_overloaded(dest)) { 225 || is_overloaded(dest)) {
226 IP_VS_ERR_RL("SH: no destination available\n"); 226 ip_vs_scheduler_err(svc, "no destination available");
227 return NULL; 227 return NULL;
228 } 228 }
229 229
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aedea17e..e292e5bddc70 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version 1, is capable of handling both version 0 and 1 messages.
9 * Version 0 is the plain old format.
10 * Note Version 0 receivers will just drop Ver 1 messages.
11 * Version 1 is capable of handle IPv6, Persistence data,
12 * time-outs, and firewall marks.
13 * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
14 * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
15 *
16 * Definitions Message: is a complete datagram
17 * Sync_conn: is a part of a Message
18 * Param Data is an option to a Sync_conn.
19 *
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 20 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * 21 *
10 * ip_vs_sync: sync connection info from master load balancer to backups 22 * ip_vs_sync: sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
15 * Alexandre Cassen : Added SyncID support for incoming sync 27 * Alexandre Cassen : Added SyncID support for incoming sync
16 * messages filtering. 28 * messages filtering.
17 * Justin Ossevoort : Fix endian problem on sync message size. 29 * Justin Ossevoort : Fix endian problem on sync message size.
30 * Hans Schillstrom : Added Version 1: i.e. IPv6,
31 * Persistence support, fwmark and time-out.
18 */ 32 */
19 33
20#define KMSG_COMPONENT "IPVS" 34#define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
35#include <linux/wait.h> 49#include <linux/wait.h>
36#include <linux/kernel.h> 50#include <linux/kernel.h>
37 51
52#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
53
38#include <net/ip.h> 54#include <net/ip.h>
39#include <net/sock.h> 55#include <net/sock.h>
40 56
@@ -43,11 +59,13 @@
43#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ 59#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
44#define IP_VS_SYNC_PORT 8848 /* multicast port */ 60#define IP_VS_SYNC_PORT 8848 /* multicast port */
45 61
62#define SYNC_PROTO_VER 1 /* Protocol version in header */
46 63
47/* 64/*
48 * IPVS sync connection entry 65 * IPVS sync connection entry
66 * Version 0, i.e. original version.
49 */ 67 */
50struct ip_vs_sync_conn { 68struct ip_vs_sync_conn_v0 {
51 __u8 reserved; 69 __u8 reserved;
52 70
53 /* Protocol, addresses and port numbers */ 71 /* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
71 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 89 struct ip_vs_seq out_seq; /* outgoing seq. struct */
72}; 90};
73 91
92/*
93 Sync Connection format (sync_conn)
94
95 0 1 2 3
96 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
97 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
98 | Type | Protocol | Ver. | Size |
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 | Flags |
101 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
102 | State | cport |
103 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104 | vport | dport |
105 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106 | fwmark |
107 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
108 | timeout (in sec.) |
109 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
110 | ... |
111 | IP-Addresses (v4 or v6) |
112 | ... |
113 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
114 Optional Parameters.
115 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
116 | Param. Type | Param. Length | Param. data |
117 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
118 | ... |
119 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
120 | | Param Type | Param. Length |
121 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
122 | Param data |
123 | Last Param data should be padded for 32 bit alignment |
124 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
125*/
126
127/*
128 * Type 0, IPv4 sync connection format
129 */
130struct ip_vs_sync_v4 {
131 __u8 type;
132 __u8 protocol; /* Which protocol (TCP/UDP) */
133 __be16 ver_size; /* Version msb 4 bits */
134 /* Flags and state transition */
135 __be32 flags; /* status flags */
136 __be16 state; /* state info */
137 /* Protocol, addresses and port numbers */
138 __be16 cport;
139 __be16 vport;
140 __be16 dport;
141 __be32 fwmark; /* Firewall mark from skb */
142 __be32 timeout; /* cp timeout */
143 __be32 caddr; /* client address */
144 __be32 vaddr; /* virtual address */
145 __be32 daddr; /* destination address */
146 /* The sequence options start here */
147 /* PE data padded to 32bit alignment after seq. options */
148};
149/*
150 * Type 2 messages IPv6
151 */
152struct ip_vs_sync_v6 {
153 __u8 type;
154 __u8 protocol; /* Which protocol (TCP/UDP) */
155 __be16 ver_size; /* Version msb 4 bits */
156 /* Flags and state transition */
157 __be32 flags; /* status flags */
158 __be16 state; /* state info */
159 /* Protocol, addresses and port numbers */
160 __be16 cport;
161 __be16 vport;
162 __be16 dport;
163 __be32 fwmark; /* Firewall mark from skb */
164 __be32 timeout; /* cp timeout */
165 struct in6_addr caddr; /* client address */
166 struct in6_addr vaddr; /* virtual address */
167 struct in6_addr daddr; /* destination address */
168 /* The sequence options start here */
169 /* PE data padded to 32bit alignment after seq. options */
170};
171
172union ip_vs_sync_conn {
173 struct ip_vs_sync_v4 v4;
174 struct ip_vs_sync_v6 v6;
175};
176
177/* Bits in Type field in above */
178#define STYPE_INET6 0
179#define STYPE_F_INET6 (1 << STYPE_INET6)
180
181#define SVER_SHIFT 12 /* Shift to get version */
182#define SVER_MASK 0x0fff /* Mask to strip version */
183
184#define IPVS_OPT_SEQ_DATA 1
185#define IPVS_OPT_PE_DATA 2
186#define IPVS_OPT_PE_NAME 3
187#define IPVS_OPT_PARAM 7
188
189#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
190#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
191#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
192#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
193
74struct ip_vs_sync_thread_data { 194struct ip_vs_sync_thread_data {
195 struct net *net;
75 struct socket *sock; 196 struct socket *sock;
76 char *buf; 197 char *buf;
77}; 198};
78 199
79#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 200/* Version 0 definition of packet sizes */
201#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
80#define FULL_CONN_SIZE \ 202#define FULL_CONN_SIZE \
81(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) 203(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
82 204
83 205
84/* 206/*
85 The master mulitcasts messages to the backup load balancers in the 207 The master mulitcasts messages (Datagrams) to the backup load balancers
86 following format. 208 in the following format.
209
210 Version 1:
211 Note, first byte should be Zero, so ver 0 receivers will drop the packet.
87 212
88 0 1 2 3 213 0 1 2 3
89 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 214 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
90 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 215 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
91 | Count Conns | SyncID | Size | 216 | 0 | SyncID | Size |
217 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
218 | Count Conns | Version | Reserved, set to Zero |
92 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 219 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
93 | | 220 | |
94 | IPVS Sync Connection (1) | 221 | IPVS Sync Connection (1) |
95 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 222 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
96 | . | 223 | . |
97 | . | 224 ~ . ~
98 | . | 225 | . |
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 226 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 | | 227 | |
101 | IPVS Sync Connection (n) | 228 | IPVS Sync Connection (n) |
102 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 229 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
230
231 Version 0 Header
232 0 1 2 3
233 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
234 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
235 | Count Conns | SyncID | Size |
236 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
237 | IPVS Sync Connection (1) |
103*/ 238*/
104 239
105#define SYNC_MESG_HEADER_LEN 4 240#define SYNC_MESG_HEADER_LEN 4
106#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ 241#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
107 242
108struct ip_vs_sync_mesg { 243/* Version 0 header */
244struct ip_vs_sync_mesg_v0 {
109 __u8 nr_conns; 245 __u8 nr_conns;
110 __u8 syncid; 246 __u8 syncid;
111 __u16 size; 247 __u16 size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
113 /* ip_vs_sync_conn entries start here */ 249 /* ip_vs_sync_conn entries start here */
114}; 250};
115 251
116/* the maximum length of sync (sending/receiving) message */ 252/* Version 1 header */
117static int sync_send_mesg_maxlen; 253struct ip_vs_sync_mesg {
118static int sync_recv_mesg_maxlen; 254 __u8 reserved; /* must be zero */
255 __u8 syncid;
256 __u16 size;
257 __u8 nr_conns;
258 __s8 version; /* SYNC_PROTO_VER */
259 __u16 spare;
260 /* ip_vs_sync_conn entries start here */
261};
119 262
120struct ip_vs_sync_buff { 263struct ip_vs_sync_buff {
121 struct list_head list; 264 struct list_head list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
127 unsigned char *end; 270 unsigned char *end;
128}; 271};
129 272
130
131/* the sync_buff list head and the lock */
132static LIST_HEAD(ip_vs_sync_queue);
133static DEFINE_SPINLOCK(ip_vs_sync_lock);
134
135/* current sync_buff for accepting new conn entries */
136static struct ip_vs_sync_buff *curr_sb = NULL;
137static DEFINE_SPINLOCK(curr_sb_lock);
138
139/* ipvs sync daemon state */
140volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
141volatile int ip_vs_master_syncid = 0;
142volatile int ip_vs_backup_syncid = 0;
143
144/* multicast interface name */
145char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
146char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
147
148/* sync daemon tasks */
149static struct task_struct *sync_master_thread;
150static struct task_struct *sync_backup_thread;
151
152/* multicast addr */ 273/* multicast addr */
153static struct sockaddr_in mcast_addr = { 274static struct sockaddr_in mcast_addr = {
154 .sin_family = AF_INET, 275 .sin_family = AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
156 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), 277 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
157}; 278};
158 279
280/*
281 * Copy of struct ip_vs_seq
282 * From unaligned network order to aligned host order
283 */
284static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
285{
286 ho->init_seq = get_unaligned_be32(&no->init_seq);
287 ho->delta = get_unaligned_be32(&no->delta);
288 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
289}
290
291/*
292 * Copy of struct ip_vs_seq
293 * From Aligned host order to unaligned network order
294 */
295static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
296{
297 put_unaligned_be32(ho->init_seq, &no->init_seq);
298 put_unaligned_be32(ho->delta, &no->delta);
299 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
300}
159 301
160static inline struct ip_vs_sync_buff *sb_dequeue(void) 302static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
161{ 303{
162 struct ip_vs_sync_buff *sb; 304 struct ip_vs_sync_buff *sb;
163 305
164 spin_lock_bh(&ip_vs_sync_lock); 306 spin_lock_bh(&ipvs->sync_lock);
165 if (list_empty(&ip_vs_sync_queue)) { 307 if (list_empty(&ipvs->sync_queue)) {
166 sb = NULL; 308 sb = NULL;
167 } else { 309 } else {
168 sb = list_entry(ip_vs_sync_queue.next, 310 sb = list_entry(ipvs->sync_queue.next,
169 struct ip_vs_sync_buff, 311 struct ip_vs_sync_buff,
170 list); 312 list);
171 list_del(&sb->list); 313 list_del(&sb->list);
172 } 314 }
173 spin_unlock_bh(&ip_vs_sync_lock); 315 spin_unlock_bh(&ipvs->sync_lock);
174 316
175 return sb; 317 return sb;
176} 318}
177 319
178static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) 320/*
321 * Create a new sync buffer for Version 1 proto.
322 */
323static inline struct ip_vs_sync_buff *
324ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
179{ 325{
180 struct ip_vs_sync_buff *sb; 326 struct ip_vs_sync_buff *sb;
181 327
182 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 328 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
183 return NULL; 329 return NULL;
184 330
185 if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { 331 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
332 if (!sb->mesg) {
186 kfree(sb); 333 kfree(sb);
187 return NULL; 334 return NULL;
188 } 335 }
336 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
337 sb->mesg->version = SYNC_PROTO_VER;
338 sb->mesg->syncid = ipvs->master_syncid;
339 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
189 sb->mesg->nr_conns = 0; 340 sb->mesg->nr_conns = 0;
190 sb->mesg->syncid = ip_vs_master_syncid; 341 sb->mesg->spare = 0;
191 sb->mesg->size = 4; 342 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
192 sb->head = (unsigned char *)sb->mesg + 4; 343 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
193 sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; 344
194 sb->firstuse = jiffies; 345 sb->firstuse = jiffies;
195 return sb; 346 return sb;
196} 347}
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
201 kfree(sb); 352 kfree(sb);
202} 353}
203 354
204static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 355static inline void sb_queue_tail(struct netns_ipvs *ipvs)
205{ 356{
206 spin_lock(&ip_vs_sync_lock); 357 struct ip_vs_sync_buff *sb = ipvs->sync_buff;
207 if (ip_vs_sync_state & IP_VS_STATE_MASTER) 358
208 list_add_tail(&sb->list, &ip_vs_sync_queue); 359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER)
361 list_add_tail(&sb->list, &ipvs->sync_queue);
209 else 362 else
210 ip_vs_sync_buff_release(sb); 363 ip_vs_sync_buff_release(sb);
211 spin_unlock(&ip_vs_sync_lock); 364 spin_unlock(&ipvs->sync_lock);
212} 365}
213 366
214/* 367/*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
216 * than the specified time or the specified time is zero. 369 * than the specified time or the specified time is zero.
217 */ 370 */
218static inline struct ip_vs_sync_buff * 371static inline struct ip_vs_sync_buff *
219get_curr_sync_buff(unsigned long time) 372get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
220{ 373{
221 struct ip_vs_sync_buff *sb; 374 struct ip_vs_sync_buff *sb;
222 375
223 spin_lock_bh(&curr_sb_lock); 376 spin_lock_bh(&ipvs->sync_buff_lock);
224 if (curr_sb && (time == 0 || 377 if (ipvs->sync_buff &&
225 time_before(jiffies - curr_sb->firstuse, time))) { 378 time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
226 sb = curr_sb; 379 sb = ipvs->sync_buff;
227 curr_sb = NULL; 380 ipvs->sync_buff = NULL;
228 } else 381 } else
229 sb = NULL; 382 sb = NULL;
230 spin_unlock_bh(&curr_sb_lock); 383 spin_unlock_bh(&ipvs->sync_buff_lock);
231 return sb; 384 return sb;
232} 385}
233 386
387/*
388 * Switch mode from sending version 0 or 1
389 * - must handle sync_buf
390 */
391void ip_vs_sync_switch_mode(struct net *net, int mode)
392{
393 struct netns_ipvs *ipvs = net_ipvs(net);
394
395 if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
396 return;
397 if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
398 return;
399
400 spin_lock_bh(&ipvs->sync_buff_lock);
401 /* Buffer empty ? then let buf_create do the job */
402 if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
403 kfree(ipvs->sync_buff);
404 ipvs->sync_buff = NULL;
405 } else {
406 spin_lock_bh(&ipvs->sync_lock);
407 if (ipvs->sync_state & IP_VS_STATE_MASTER)
408 list_add_tail(&ipvs->sync_buff->list,
409 &ipvs->sync_queue);
410 else
411 ip_vs_sync_buff_release(ipvs->sync_buff);
412 spin_unlock_bh(&ipvs->sync_lock);
413 }
414 spin_unlock_bh(&ipvs->sync_buff_lock);
415}
416
417/*
418 * Create a new sync buffer for Version 0 proto.
419 */
420static inline struct ip_vs_sync_buff *
421ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
422{
423 struct ip_vs_sync_buff *sb;
424 struct ip_vs_sync_mesg_v0 *mesg;
425
426 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
427 return NULL;
428
429 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
430 if (!sb->mesg) {
431 kfree(sb);
432 return NULL;
433 }
434 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
435 mesg->nr_conns = 0;
436 mesg->syncid = ipvs->master_syncid;
437 mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
438 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
439 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
440 sb->firstuse = jiffies;
441 return sb;
442}
234 443
235/* 444/*
445 * Version 0 , could be switched in by sys_ctl.
236 * Add an ip_vs_conn information into the current sync_buff. 446 * Add an ip_vs_conn information into the current sync_buff.
237 * Called by ip_vs_in.
238 */ 447 */
239void ip_vs_sync_conn(struct ip_vs_conn *cp) 448void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
240{ 449{
241 struct ip_vs_sync_mesg *m; 450 struct netns_ipvs *ipvs = net_ipvs(net);
242 struct ip_vs_sync_conn *s; 451 struct ip_vs_sync_mesg_v0 *m;
452 struct ip_vs_sync_conn_v0 *s;
243 int len; 453 int len;
244 454
245 spin_lock(&curr_sb_lock); 455 if (unlikely(cp->af != AF_INET))
246 if (!curr_sb) { 456 return;
247 if (!(curr_sb=ip_vs_sync_buff_create())) { 457 /* Do not sync ONE PACKET */
248 spin_unlock(&curr_sb_lock); 458 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
459 return;
460
461 spin_lock(&ipvs->sync_buff_lock);
462 if (!ipvs->sync_buff) {
463 ipvs->sync_buff =
464 ip_vs_sync_buff_create_v0(ipvs);
465 if (!ipvs->sync_buff) {
466 spin_unlock(&ipvs->sync_buff_lock);
249 pr_err("ip_vs_sync_buff_create failed.\n"); 467 pr_err("ip_vs_sync_buff_create failed.\n");
250 return; 468 return;
251 } 469 }
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
253 471
254 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 472 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
255 SIMPLE_CONN_SIZE; 473 SIMPLE_CONN_SIZE;
256 m = curr_sb->mesg; 474 m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
257 s = (struct ip_vs_sync_conn *)curr_sb->head; 475 s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
258 476
259 /* copy members */ 477 /* copy members */
478 s->reserved = 0;
260 s->protocol = cp->protocol; 479 s->protocol = cp->protocol;
261 s->cport = cp->cport; 480 s->cport = cp->cport;
262 s->vport = cp->vport; 481 s->vport = cp->vport;
@@ -274,83 +493,365 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
274 493
275 m->nr_conns++; 494 m->nr_conns++;
276 m->size += len; 495 m->size += len;
277 curr_sb->head += len; 496 ipvs->sync_buff->head += len;
278 497
279 /* check if there is a space for next one */ 498 /* check if there is a space for next one */
280 if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { 499 if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
281 sb_queue_tail(curr_sb); 500 sb_queue_tail(ipvs);
282 curr_sb = NULL; 501 ipvs->sync_buff = NULL;
283 } 502 }
284 spin_unlock(&curr_sb_lock); 503 spin_unlock(&ipvs->sync_buff_lock);
285 504
286 /* synchronize its controller if it has */ 505 /* synchronize its controller if it has */
287 if (cp->control) 506 if (cp->control)
288 ip_vs_sync_conn(cp->control); 507 ip_vs_sync_conn(net, cp->control);
289} 508}
290 509
510/*
511 * Add an ip_vs_conn information into the current sync_buff.
512 * Called by ip_vs_in.
513 * Sending Version 1 messages
514 */
515void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
516{
517 struct netns_ipvs *ipvs = net_ipvs(net);
518 struct ip_vs_sync_mesg *m;
519 union ip_vs_sync_conn *s;
520 __u8 *p;
521 unsigned int len, pe_name_len, pad;
522
523 /* Handle old version of the protocol */
524 if (sysctl_sync_ver(ipvs) == 0) {
525 ip_vs_sync_conn_v0(net, cp);
526 return;
527 }
528 /* Do not sync ONE PACKET */
529 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
530 goto control;
531sloop:
532 /* Sanity checks */
533 pe_name_len = 0;
534 if (cp->pe_data_len) {
535 if (!cp->pe_data || !cp->dest) {
536 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
537 return;
538 }
539 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
540 }
541
542 spin_lock(&ipvs->sync_buff_lock);
543
544#ifdef CONFIG_IP_VS_IPV6
545 if (cp->af == AF_INET6)
546 len = sizeof(struct ip_vs_sync_v6);
547 else
548#endif
549 len = sizeof(struct ip_vs_sync_v4);
550
551 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
552 len += sizeof(struct ip_vs_sync_conn_options) + 2;
553
554 if (cp->pe_data_len)
555 len += cp->pe_data_len + 2; /* + Param hdr field */
556 if (pe_name_len)
557 len += pe_name_len + 2;
558
559 /* check if there is a space for this one */
560 pad = 0;
561 if (ipvs->sync_buff) {
562 pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
563 if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
564 sb_queue_tail(ipvs);
565 ipvs->sync_buff = NULL;
566 pad = 0;
567 }
568 }
569
570 if (!ipvs->sync_buff) {
571 ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
572 if (!ipvs->sync_buff) {
573 spin_unlock(&ipvs->sync_buff_lock);
574 pr_err("ip_vs_sync_buff_create failed.\n");
575 return;
576 }
577 }
578
579 m = ipvs->sync_buff->mesg;
580 p = ipvs->sync_buff->head;
581 ipvs->sync_buff->head += pad + len;
582 m->size += pad + len;
583 /* Add ev. padding from prev. sync_conn */
584 while (pad--)
585 *(p++) = 0;
586
587 s = (union ip_vs_sync_conn *)p;
588
589 /* Set message type & copy members */
590 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
591 s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
592 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
593 s->v4.state = htons(cp->state);
594 s->v4.protocol = cp->protocol;
595 s->v4.cport = cp->cport;
596 s->v4.vport = cp->vport;
597 s->v4.dport = cp->dport;
598 s->v4.fwmark = htonl(cp->fwmark);
599 s->v4.timeout = htonl(cp->timeout / HZ);
600 m->nr_conns++;
601
602#ifdef CONFIG_IP_VS_IPV6
603 if (cp->af == AF_INET6) {
604 p += sizeof(struct ip_vs_sync_v6);
605 ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
606 ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
607 ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
608 } else
609#endif
610 {
611 p += sizeof(struct ip_vs_sync_v4); /* options ptr */
612 s->v4.caddr = cp->caddr.ip;
613 s->v4.vaddr = cp->vaddr.ip;
614 s->v4.daddr = cp->daddr.ip;
615 }
616 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
617 *(p++) = IPVS_OPT_SEQ_DATA;
618 *(p++) = sizeof(struct ip_vs_sync_conn_options);
619 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
620 p += sizeof(struct ip_vs_seq);
621 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
622 p += sizeof(struct ip_vs_seq);
623 }
624 /* Handle pe data */
625 if (cp->pe_data_len && cp->pe_data) {
626 *(p++) = IPVS_OPT_PE_DATA;
627 *(p++) = cp->pe_data_len;
628 memcpy(p, cp->pe_data, cp->pe_data_len);
629 p += cp->pe_data_len;
630 if (pe_name_len) {
631 /* Add PE_NAME */
632 *(p++) = IPVS_OPT_PE_NAME;
633 *(p++) = pe_name_len;
634 memcpy(p, cp->pe->name, pe_name_len);
635 p += pe_name_len;
636 }
637 }
638
639 spin_unlock(&ipvs->sync_buff_lock);
640
641control:
642 /* synchronize its controller if it has */
643 cp = cp->control;
644 if (!cp)
645 return;
646 /*
647 * Reduce sync rate for templates
648 * i.e only increment in_pkts for Templates.
649 */
650 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
651 int pkts = atomic_add_return(1, &cp->in_pkts);
652
653 if (pkts % sysctl_sync_period(ipvs) != 1)
654 return;
655 }
656 goto sloop;
657}
658
659/*
660 * fill_param used by version 1
661 */
291static inline int 662static inline int
292ip_vs_conn_fill_param_sync(int af, int protocol, 663ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
293 const union nf_inet_addr *caddr, __be16 cport, 664 struct ip_vs_conn_param *p,
294 const union nf_inet_addr *vaddr, __be16 vport, 665 __u8 *pe_data, unsigned int pe_data_len,
295 struct ip_vs_conn_param *p) 666 __u8 *pe_name, unsigned int pe_name_len)
296{ 667{
297 /* XXX: Need to take into account persistence engine */ 668#ifdef CONFIG_IP_VS_IPV6
298 ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); 669 if (af == AF_INET6)
670 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
671 (const union nf_inet_addr *)&sc->v6.caddr,
672 sc->v6.cport,
673 (const union nf_inet_addr *)&sc->v6.vaddr,
674 sc->v6.vport, p);
675 else
676#endif
677 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
678 (const union nf_inet_addr *)&sc->v4.caddr,
679 sc->v4.cport,
680 (const union nf_inet_addr *)&sc->v4.vaddr,
681 sc->v4.vport, p);
682 /* Handle pe data */
683 if (pe_data_len) {
684 if (pe_name_len) {
685 char buff[IP_VS_PENAME_MAXLEN+1];
686
687 memcpy(buff, pe_name, pe_name_len);
688 buff[pe_name_len]=0;
689 p->pe = __ip_vs_pe_getbyname(buff);
690 if (!p->pe) {
691 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
692 buff);
693 return 1;
694 }
695 } else {
696 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
697 return 1;
698 }
699
700 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
701 if (!p->pe_data) {
702 if (p->pe->module)
703 module_put(p->pe->module);
704 return -ENOMEM;
705 }
706 p->pe_data_len = pe_data_len;
707 }
299 return 0; 708 return 0;
300} 709}
301 710
302/* 711/*
303 * Process received multicast message and create the corresponding 712 * Connection Add / Update.
304 * ip_vs_conn entries. 713 * Common for version 0 and 1 reception of backup sync_conns.
714 * Param: ...
715 * timeout is in sec.
305 */ 716 */
306static void ip_vs_process_message(const char *buffer, const size_t buflen) 717static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
718 unsigned int flags, unsigned int state,
719 unsigned int protocol, unsigned int type,
720 const union nf_inet_addr *daddr, __be16 dport,
721 unsigned long timeout, __u32 fwmark,
722 struct ip_vs_sync_conn_options *opt)
307{ 723{
308 struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
309 struct ip_vs_sync_conn *s;
310 struct ip_vs_sync_conn_options *opt;
311 struct ip_vs_conn *cp;
312 struct ip_vs_protocol *pp;
313 struct ip_vs_dest *dest; 724 struct ip_vs_dest *dest;
314 struct ip_vs_conn_param param; 725 struct ip_vs_conn *cp;
315 char *p; 726 struct netns_ipvs *ipvs = net_ipvs(net);
316 int i;
317 727
318 if (buflen < sizeof(struct ip_vs_sync_mesg)) { 728 if (!(flags & IP_VS_CONN_F_TEMPLATE))
319 IP_VS_ERR_RL("sync message header too short\n"); 729 cp = ip_vs_conn_in_get(param);
320 return; 730 else
321 } 731 cp = ip_vs_ct_in_get(param);
322 732
323 /* Convert size back to host byte order */ 733 if (cp && param->pe_data) /* Free pe_data */
324 m->size = ntohs(m->size); 734 kfree(param->pe_data);
735 if (!cp) {
736 /*
737 * Find the appropriate destination for the connection.
738 * If it is not found the connection will remain unbound
739 * but still handled.
740 */
741 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
742 param->vport, protocol, fwmark);
325 743
326 if (buflen != m->size) { 744 /* Set the approprite ativity flag */
327 IP_VS_ERR_RL("bogus sync message size\n"); 745 if (protocol == IPPROTO_TCP) {
328 return; 746 if (state != IP_VS_TCP_S_ESTABLISHED)
747 flags |= IP_VS_CONN_F_INACTIVE;
748 else
749 flags &= ~IP_VS_CONN_F_INACTIVE;
750 } else if (protocol == IPPROTO_SCTP) {
751 if (state != IP_VS_SCTP_S_ESTABLISHED)
752 flags |= IP_VS_CONN_F_INACTIVE;
753 else
754 flags &= ~IP_VS_CONN_F_INACTIVE;
755 }
756 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
757 if (dest)
758 atomic_dec(&dest->refcnt);
759 if (!cp) {
760 if (param->pe_data)
761 kfree(param->pe_data);
762 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
763 return;
764 }
765 } else if (!cp->dest) {
766 dest = ip_vs_try_bind_dest(cp);
767 if (dest)
768 atomic_dec(&dest->refcnt);
769 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
770 (cp->state != state)) {
771 /* update active/inactive flag for the connection */
772 dest = cp->dest;
773 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
774 (state != IP_VS_TCP_S_ESTABLISHED)) {
775 atomic_dec(&dest->activeconns);
776 atomic_inc(&dest->inactconns);
777 cp->flags |= IP_VS_CONN_F_INACTIVE;
778 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
779 (state == IP_VS_TCP_S_ESTABLISHED)) {
780 atomic_inc(&dest->activeconns);
781 atomic_dec(&dest->inactconns);
782 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
783 }
784 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
785 (cp->state != state)) {
786 dest = cp->dest;
787 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
788 (state != IP_VS_SCTP_S_ESTABLISHED)) {
789 atomic_dec(&dest->activeconns);
790 atomic_inc(&dest->inactconns);
791 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
792 }
329 } 793 }
330 794
331 /* SyncID sanity check */ 795 if (opt)
332 if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { 796 memcpy(&cp->in_seq, opt, sizeof(*opt));
333 IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", 797 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
334 m->syncid); 798 cp->state = state;
335 return; 799 cp->old_state = cp->state;
800 /*
801 * For Ver 0 messages style
802 * - Not possible to recover the right timeout for templates
803 * - can not find the right fwmark
804 * virtual service. If needed, we can do it for
805 * non-fwmark persistent services.
806 * Ver 1 messages style.
807 * - No problem.
808 */
809 if (timeout) {
810 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
811 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
812 cp->timeout = timeout*HZ;
813 } else {
814 struct ip_vs_proto_data *pd;
815
816 pd = ip_vs_proto_data_get(net, protocol);
817 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
818 cp->timeout = pd->timeout_table[state];
819 else
820 cp->timeout = (3*60*HZ);
336 } 821 }
822 ip_vs_conn_put(cp);
823}
337 824
338 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 825/*
826 * Process received multicast message for Version 0
827 */
828static void ip_vs_process_message_v0(struct net *net, const char *buffer,
829 const size_t buflen)
830{
831 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
832 struct ip_vs_sync_conn_v0 *s;
833 struct ip_vs_sync_conn_options *opt;
834 struct ip_vs_protocol *pp;
835 struct ip_vs_conn_param param;
836 char *p;
837 int i;
838
839 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
339 for (i=0; i<m->nr_conns; i++) { 840 for (i=0; i<m->nr_conns; i++) {
340 unsigned flags, state; 841 unsigned flags, state;
341 842
342 if (p + SIMPLE_CONN_SIZE > buffer+buflen) { 843 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
343 IP_VS_ERR_RL("bogus conn in sync message\n"); 844 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
344 return; 845 return;
345 } 846 }
346 s = (struct ip_vs_sync_conn *) p; 847 s = (struct ip_vs_sync_conn_v0 *) p;
347 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; 848 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
348 flags &= ~IP_VS_CONN_F_HASHED; 849 flags &= ~IP_VS_CONN_F_HASHED;
349 if (flags & IP_VS_CONN_F_SEQ_MASK) { 850 if (flags & IP_VS_CONN_F_SEQ_MASK) {
350 opt = (struct ip_vs_sync_conn_options *)&s[1]; 851 opt = (struct ip_vs_sync_conn_options *)&s[1];
351 p += FULL_CONN_SIZE; 852 p += FULL_CONN_SIZE;
352 if (p > buffer+buflen) { 853 if (p > buffer+buflen) {
353 IP_VS_ERR_RL("bogus conn options in sync message\n"); 854 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
354 return; 855 return;
355 } 856 }
356 } else { 857 } else {
@@ -362,118 +863,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
362 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 863 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
363 pp = ip_vs_proto_get(s->protocol); 864 pp = ip_vs_proto_get(s->protocol);
364 if (!pp) { 865 if (!pp) {
365 IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", 866 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
366 s->protocol); 867 s->protocol);
367 continue; 868 continue;
368 } 869 }
369 if (state >= pp->num_states) { 870 if (state >= pp->num_states) {
370 IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", 871 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
371 pp->name, state); 872 pp->name, state);
372 continue; 873 continue;
373 } 874 }
374 } else { 875 } else {
375 /* protocol in templates is not used for state/timeout */ 876 /* protocol in templates is not used for state/timeout */
376 pp = NULL;
377 if (state > 0) { 877 if (state > 0) {
378 IP_VS_DBG(2, "Invalid template state %u in sync msg\n", 878 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
379 state); 879 state);
380 state = 0; 880 state = 0;
381 } 881 }
382 } 882 }
383 883
384 { 884 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
385 if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, 885 (const union nf_inet_addr *)&s->caddr,
386 (union nf_inet_addr *)&s->caddr, 886 s->cport,
387 s->cport, 887 (const union nf_inet_addr *)&s->vaddr,
388 (union nf_inet_addr *)&s->vaddr, 888 s->vport, &param);
389 s->vport, &param)) { 889
390 pr_err("ip_vs_conn_fill_param_sync failed"); 890 /* Send timeout as Zero */
391 return; 891 ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
892 (union nf_inet_addr *)&s->daddr, s->dport,
893 0, 0, opt);
894 }
895}
896
897/*
898 * Handle options
899 */
900static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
901 __u32 *opt_flags,
902 struct ip_vs_sync_conn_options *opt)
903{
904 struct ip_vs_sync_conn_options *topt;
905
906 topt = (struct ip_vs_sync_conn_options *)p;
907
908 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
909 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
910 return -EINVAL;
911 }
912 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
913 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
914 return -EINVAL;
915 }
916 ntoh_seq(&topt->in_seq, &opt->in_seq);
917 ntoh_seq(&topt->out_seq, &opt->out_seq);
918 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
919 return 0;
920}
921
922static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
923 __u8 **data, unsigned int maxlen,
924 __u32 *opt_flags, __u32 flag)
925{
926 if (plen > maxlen) {
927 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
928 return -EINVAL;
929 }
930 if (*opt_flags & flag) {
931 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
932 return -EINVAL;
933 }
934 *data_len = plen;
935 *data = p;
936 *opt_flags |= flag;
937 return 0;
938}
939/*
940 * Process a Version 1 sync. connection
941 */
942static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
943{
944 struct ip_vs_sync_conn_options opt;
945 union ip_vs_sync_conn *s;
946 struct ip_vs_protocol *pp;
947 struct ip_vs_conn_param param;
948 __u32 flags;
949 unsigned int af, state, pe_data_len=0, pe_name_len=0;
950 __u8 *pe_data=NULL, *pe_name=NULL;
951 __u32 opt_flags=0;
952 int retc=0;
953
954 s = (union ip_vs_sync_conn *) p;
955
956 if (s->v6.type & STYPE_F_INET6) {
957#ifdef CONFIG_IP_VS_IPV6
958 af = AF_INET6;
959 p += sizeof(struct ip_vs_sync_v6);
960#else
961 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
962 retc = 10;
963 goto out;
964#endif
965 } else if (!s->v4.type) {
966 af = AF_INET;
967 p += sizeof(struct ip_vs_sync_v4);
968 } else {
969 return -10;
970 }
971 if (p > msg_end)
972 return -20;
973
974 /* Process optional params check Type & Len. */
975 while (p < msg_end) {
976 int ptype;
977 int plen;
978
979 if (p+2 > msg_end)
980 return -30;
981 ptype = *(p++);
982 plen = *(p++);
983
984 if (!plen || ((p + plen) > msg_end))
985 return -40;
986 /* Handle seq option p = param data */
987 switch (ptype & ~IPVS_OPT_F_PARAM) {
988 case IPVS_OPT_SEQ_DATA:
989 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
990 return -50;
991 break;
992
993 case IPVS_OPT_PE_DATA:
994 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
995 IP_VS_PEDATA_MAXLEN, &opt_flags,
996 IPVS_OPT_F_PE_DATA))
997 return -60;
998 break;
999
1000 case IPVS_OPT_PE_NAME:
1001 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1002 IP_VS_PENAME_MAXLEN, &opt_flags,
1003 IPVS_OPT_F_PE_NAME))
1004 return -70;
1005 break;
1006
1007 default:
1008 /* Param data mandatory ? */
1009 if (!(ptype & IPVS_OPT_F_PARAM)) {
1010 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1011 ptype & ~IPVS_OPT_F_PARAM);
1012 retc = 20;
1013 goto out;
392 } 1014 }
393 if (!(flags & IP_VS_CONN_F_TEMPLATE))
394 cp = ip_vs_conn_in_get(&param);
395 else
396 cp = ip_vs_ct_in_get(&param);
397 } 1015 }
398 if (!cp) { 1016 p += plen; /* Next option */
399 /* 1017 }
400 * Find the appropriate destination for the connection. 1018
401 * If it is not found the connection will remain unbound 1019 /* Get flags and Mask off unsupported */
402 * but still handled. 1020 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
403 */ 1021 flags |= IP_VS_CONN_F_SYNC;
404 dest = ip_vs_find_dest(AF_INET, 1022 state = ntohs(s->v4.state);
405 (union nf_inet_addr *)&s->daddr, 1023
406 s->dport, 1024 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
407 (union nf_inet_addr *)&s->vaddr, 1025 pp = ip_vs_proto_get(s->v4.protocol);
408 s->vport, 1026 if (!pp) {
409 s->protocol); 1027 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
410 /* Set the approprite ativity flag */ 1028 s->v4.protocol);
411 if (s->protocol == IPPROTO_TCP) { 1029 retc = 30;
412 if (state != IP_VS_TCP_S_ESTABLISHED) 1030 goto out;
413 flags |= IP_VS_CONN_F_INACTIVE; 1031 }
414 else 1032 if (state >= pp->num_states) {
415 flags &= ~IP_VS_CONN_F_INACTIVE; 1033 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
416 } else if (s->protocol == IPPROTO_SCTP) { 1034 pp->name, state);
417 if (state != IP_VS_SCTP_S_ESTABLISHED) 1035 retc = 40;
418 flags |= IP_VS_CONN_F_INACTIVE; 1036 goto out;
419 else 1037 }
420 flags &= ~IP_VS_CONN_F_INACTIVE; 1038 } else {
1039 /* protocol in templates is not used for state/timeout */
1040 if (state > 0) {
1041 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1042 state);
1043 state = 0;
1044 }
1045 }
1046 if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
1047 pe_data_len, pe_name, pe_name_len)) {
1048 retc = 50;
1049 goto out;
1050 }
1051 /* If only IPv4, just silent skip IPv6 */
1052 if (af == AF_INET)
1053 ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
1054 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1055 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1056 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1057 );
1058#ifdef CONFIG_IP_VS_IPV6
1059 else
1060 ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
1061 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1062 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1063 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1064 );
1065#endif
1066 return 0;
1067 /* Error exit */
1068out:
1069 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1070 return retc;
1071
1072}
1073/*
1074 * Process received multicast message and create the corresponding
1075 * ip_vs_conn entries.
1076 * Handles Version 0 & 1
1077 */
1078static void ip_vs_process_message(struct net *net, __u8 *buffer,
1079 const size_t buflen)
1080{
1081 struct netns_ipvs *ipvs = net_ipvs(net);
1082 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1083 __u8 *p, *msg_end;
1084 int i, nr_conns;
1085
1086 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1087 IP_VS_DBG(2, "BACKUP, message header too short\n");
1088 return;
1089 }
1090 /* Convert size back to host byte order */
1091 m2->size = ntohs(m2->size);
1092
1093 if (buflen != m2->size) {
1094 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1095 return;
1096 }
1097 /* SyncID sanity check */
1098 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1099 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1100 return;
1101 }
1102 /* Handle version 1 message */
1103 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1104 && (m2->spare == 0)) {
1105
1106 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1107 nr_conns = m2->nr_conns;
1108
1109 for (i=0; i<nr_conns; i++) {
1110 union ip_vs_sync_conn *s;
1111 unsigned size;
1112 int retc;
1113
1114 p = msg_end;
1115 if (p + sizeof(s->v4) > buffer+buflen) {
1116 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1117 return;
421 } 1118 }
422 cp = ip_vs_conn_new(&param, 1119 s = (union ip_vs_sync_conn *)p;
423 (union nf_inet_addr *)&s->daddr, 1120 size = ntohs(s->v4.ver_size) & SVER_MASK;
424 s->dport, flags, dest); 1121 msg_end = p + size;
425 if (dest) 1122 /* Basic sanity checks */
426 atomic_dec(&dest->refcnt); 1123 if (msg_end > buffer+buflen) {
427 if (!cp) { 1124 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
428 pr_err("ip_vs_conn_new failed\n");
429 return; 1125 return;
430 } 1126 }
431 } else if (!cp->dest) { 1127 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
432 dest = ip_vs_try_bind_dest(cp); 1128 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
433 if (dest) 1129 ntohs(s->v4.ver_size) >> SVER_SHIFT);
434 atomic_dec(&dest->refcnt); 1130 return;
435 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
436 (cp->state != state)) {
437 /* update active/inactive flag for the connection */
438 dest = cp->dest;
439 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
440 (state != IP_VS_TCP_S_ESTABLISHED)) {
441 atomic_dec(&dest->activeconns);
442 atomic_inc(&dest->inactconns);
443 cp->flags |= IP_VS_CONN_F_INACTIVE;
444 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
445 (state == IP_VS_TCP_S_ESTABLISHED)) {
446 atomic_inc(&dest->activeconns);
447 atomic_dec(&dest->inactconns);
448 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
449 } 1131 }
450 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && 1132 /* Process a single sync_conn */
451 (cp->state != state)) { 1133 retc = ip_vs_proc_sync_conn(net, p, msg_end);
452 dest = cp->dest; 1134 if (retc < 0) {
453 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 1135 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
454 (state != IP_VS_SCTP_S_ESTABLISHED)) { 1136 retc);
455 atomic_dec(&dest->activeconns); 1137 return;
456 atomic_inc(&dest->inactconns);
457 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
458 } 1138 }
1139 /* Make sure we have 32 bit alignment */
1140 msg_end = p + ((size + 3) & ~3);
459 } 1141 }
460 1142 } else {
461 if (opt) 1143 /* Old type of message */
462 memcpy(&cp->in_seq, opt, sizeof(*opt)); 1144 ip_vs_process_message_v0(net, buffer, buflen);
463 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 1145 return;
464 cp->state = state;
465 cp->old_state = cp->state;
466 /*
467 * We can not recover the right timeout for templates
468 * in all cases, we can not find the right fwmark
469 * virtual service. If needed, we can do it for
470 * non-fwmark persistent services.
471 */
472 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
473 cp->timeout = pp->timeout_table[state];
474 else
475 cp->timeout = (3*60*HZ);
476 ip_vs_conn_put(cp);
477 } 1146 }
478} 1147}
479 1148
@@ -511,8 +1180,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
511{ 1180{
512 struct net_device *dev; 1181 struct net_device *dev;
513 struct inet_sock *inet = inet_sk(sk); 1182 struct inet_sock *inet = inet_sk(sk);
1183 struct net *net = sock_net(sk);
514 1184
515 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1185 dev = __dev_get_by_name(net, ifname);
1186 if (!dev)
516 return -ENODEV; 1187 return -ENODEV;
517 1188
518 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1189 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1202,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
531 * Set the maximum length of sync message according to the 1202 * Set the maximum length of sync message according to the
532 * specified interface's MTU. 1203 * specified interface's MTU.
533 */ 1204 */
534static int set_sync_mesg_maxlen(int sync_state) 1205static int set_sync_mesg_maxlen(struct net *net, int sync_state)
535{ 1206{
1207 struct netns_ipvs *ipvs = net_ipvs(net);
536 struct net_device *dev; 1208 struct net_device *dev;
537 int num; 1209 int num;
538 1210
539 if (sync_state == IP_VS_STATE_MASTER) { 1211 if (sync_state == IP_VS_STATE_MASTER) {
540 if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) 1212 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1213 if (!dev)
541 return -ENODEV; 1214 return -ENODEV;
542 1215
543 num = (dev->mtu - sizeof(struct iphdr) - 1216 num = (dev->mtu - sizeof(struct iphdr) -
544 sizeof(struct udphdr) - 1217 sizeof(struct udphdr) -
545 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; 1218 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
546 sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + 1219 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
547 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); 1220 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
548 IP_VS_DBG(7, "setting the maximum length of sync sending " 1221 IP_VS_DBG(7, "setting the maximum length of sync sending "
549 "message %d.\n", sync_send_mesg_maxlen); 1222 "message %d.\n", ipvs->send_mesg_maxlen);
550 } else if (sync_state == IP_VS_STATE_BACKUP) { 1223 } else if (sync_state == IP_VS_STATE_BACKUP) {
551 if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) 1224 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1225 if (!dev)
552 return -ENODEV; 1226 return -ENODEV;
553 1227
554 sync_recv_mesg_maxlen = dev->mtu - 1228 ipvs->recv_mesg_maxlen = dev->mtu -
555 sizeof(struct iphdr) - sizeof(struct udphdr); 1229 sizeof(struct iphdr) - sizeof(struct udphdr);
556 IP_VS_DBG(7, "setting the maximum length of sync receiving " 1230 IP_VS_DBG(7, "setting the maximum length of sync receiving "
557 "message %d.\n", sync_recv_mesg_maxlen); 1231 "message %d.\n", ipvs->recv_mesg_maxlen);
558 } 1232 }
559 1233
560 return 0; 1234 return 0;
@@ -569,6 +1243,7 @@ static int set_sync_mesg_maxlen(int sync_state)
569static int 1243static int
570join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) 1244join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
571{ 1245{
1246 struct net *net = sock_net(sk);
572 struct ip_mreqn mreq; 1247 struct ip_mreqn mreq;
573 struct net_device *dev; 1248 struct net_device *dev;
574 int ret; 1249 int ret;
@@ -576,7 +1251,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
576 memset(&mreq, 0, sizeof(mreq)); 1251 memset(&mreq, 0, sizeof(mreq));
577 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); 1252 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
578 1253
579 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1254 dev = __dev_get_by_name(net, ifname);
1255 if (!dev)
580 return -ENODEV; 1256 return -ENODEV;
581 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1257 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
582 return -EINVAL; 1258 return -EINVAL;
@@ -593,11 +1269,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
593 1269
594static int bind_mcastif_addr(struct socket *sock, char *ifname) 1270static int bind_mcastif_addr(struct socket *sock, char *ifname)
595{ 1271{
1272 struct net *net = sock_net(sock->sk);
596 struct net_device *dev; 1273 struct net_device *dev;
597 __be32 addr; 1274 __be32 addr;
598 struct sockaddr_in sin; 1275 struct sockaddr_in sin;
599 1276
600 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1277 dev = __dev_get_by_name(net, ifname);
1278 if (!dev)
601 return -ENODEV; 1279 return -ENODEV;
602 1280
603 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 1281 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,19 +1297,25 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
619/* 1297/*
620 * Set up sending multicast socket over UDP 1298 * Set up sending multicast socket over UDP
621 */ 1299 */
622static struct socket * make_send_sock(void) 1300static struct socket *make_send_sock(struct net *net)
623{ 1301{
1302 struct netns_ipvs *ipvs = net_ipvs(net);
624 struct socket *sock; 1303 struct socket *sock;
625 int result; 1304 int result;
626 1305
627 /* First create a socket */ 1306 /* First create a socket move it to right name space later */
628 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1307 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
629 if (result < 0) { 1308 if (result < 0) {
630 pr_err("Error during creation of socket; terminating\n"); 1309 pr_err("Error during creation of socket; terminating\n");
631 return ERR_PTR(result); 1310 return ERR_PTR(result);
632 } 1311 }
633 1312 /*
634 result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); 1313 * Kernel sockets that are a part of a namespace, should not
1314 * hold a reference to a namespace in order to allow to stop it.
1315 * After sk_change_net should be released using sk_release_kernel.
1316 */
1317 sk_change_net(sock->sk, net);
1318 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
635 if (result < 0) { 1319 if (result < 0) {
636 pr_err("Error setting outbound mcast interface\n"); 1320 pr_err("Error setting outbound mcast interface\n");
637 goto error; 1321 goto error;
@@ -640,7 +1324,7 @@ static struct socket * make_send_sock(void)
640 set_mcast_loop(sock->sk, 0); 1324 set_mcast_loop(sock->sk, 0);
641 set_mcast_ttl(sock->sk, 1); 1325 set_mcast_ttl(sock->sk, 1);
642 1326
643 result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); 1327 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
644 if (result < 0) { 1328 if (result < 0) {
645 pr_err("Error binding address of the mcast interface\n"); 1329 pr_err("Error binding address of the mcast interface\n");
646 goto error; 1330 goto error;
@@ -655,8 +1339,8 @@ static struct socket * make_send_sock(void)
655 1339
656 return sock; 1340 return sock;
657 1341
658 error: 1342error:
659 sock_release(sock); 1343 sk_release_kernel(sock->sk);
660 return ERR_PTR(result); 1344 return ERR_PTR(result);
661} 1345}
662 1346
@@ -664,8 +1348,9 @@ static struct socket * make_send_sock(void)
664/* 1348/*
665 * Set up receiving multicast socket over UDP 1349 * Set up receiving multicast socket over UDP
666 */ 1350 */
667static struct socket * make_receive_sock(void) 1351static struct socket *make_receive_sock(struct net *net)
668{ 1352{
1353 struct netns_ipvs *ipvs = net_ipvs(net);
669 struct socket *sock; 1354 struct socket *sock;
670 int result; 1355 int result;
671 1356
@@ -675,7 +1360,12 @@ static struct socket * make_receive_sock(void)
675 pr_err("Error during creation of socket; terminating\n"); 1360 pr_err("Error during creation of socket; terminating\n");
676 return ERR_PTR(result); 1361 return ERR_PTR(result);
677 } 1362 }
678 1363 /*
1364 * Kernel sockets that are a part of a namespace, should not
1365 * hold a reference to a namespace in order to allow to stop it.
1366 * After sk_change_net should be released using sk_release_kernel.
1367 */
1368 sk_change_net(sock->sk, net);
679 /* it is equivalent to the REUSEADDR option in user-space */ 1369 /* it is equivalent to the REUSEADDR option in user-space */
680 sock->sk->sk_reuse = 1; 1370 sock->sk->sk_reuse = 1;
681 1371
@@ -689,7 +1379,7 @@ static struct socket * make_receive_sock(void)
689 /* join the multicast group */ 1379 /* join the multicast group */
690 result = join_mcast_group(sock->sk, 1380 result = join_mcast_group(sock->sk,
691 (struct in_addr *) &mcast_addr.sin_addr, 1381 (struct in_addr *) &mcast_addr.sin_addr,
692 ip_vs_backup_mcast_ifn); 1382 ipvs->backup_mcast_ifn);
693 if (result < 0) { 1383 if (result < 0) {
694 pr_err("Error joining to the multicast group\n"); 1384 pr_err("Error joining to the multicast group\n");
695 goto error; 1385 goto error;
@@ -697,8 +1387,8 @@ static struct socket * make_receive_sock(void)
697 1387
698 return sock; 1388 return sock;
699 1389
700 error: 1390error:
701 sock_release(sock); 1391 sk_release_kernel(sock->sk);
702 return ERR_PTR(result); 1392 return ERR_PTR(result);
703} 1393}
704 1394
@@ -760,20 +1450,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
760static int sync_thread_master(void *data) 1450static int sync_thread_master(void *data)
761{ 1451{
762 struct ip_vs_sync_thread_data *tinfo = data; 1452 struct ip_vs_sync_thread_data *tinfo = data;
1453 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
763 struct ip_vs_sync_buff *sb; 1454 struct ip_vs_sync_buff *sb;
764 1455
765 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " 1456 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
766 "syncid = %d\n", 1457 "syncid = %d\n",
767 ip_vs_master_mcast_ifn, ip_vs_master_syncid); 1458 ipvs->master_mcast_ifn, ipvs->master_syncid);
768 1459
769 while (!kthread_should_stop()) { 1460 while (!kthread_should_stop()) {
770 while ((sb = sb_dequeue())) { 1461 while ((sb = sb_dequeue(ipvs))) {
771 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1462 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
772 ip_vs_sync_buff_release(sb); 1463 ip_vs_sync_buff_release(sb);
773 } 1464 }
774 1465
775 /* check if entries stay in curr_sb for 2 seconds */ 1466 /* check if entries stay in ipvs->sync_buff for 2 seconds */
776 sb = get_curr_sync_buff(2 * HZ); 1467 sb = get_curr_sync_buff(ipvs, 2 * HZ);
777 if (sb) { 1468 if (sb) {
778 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1469 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
779 ip_vs_sync_buff_release(sb); 1470 ip_vs_sync_buff_release(sb);
@@ -783,17 +1474,16 @@ static int sync_thread_master(void *data)
783 } 1474 }
784 1475
785 /* clean up the sync_buff queue */ 1476 /* clean up the sync_buff queue */
786 while ((sb=sb_dequeue())) { 1477 while ((sb = sb_dequeue(ipvs)))
787 ip_vs_sync_buff_release(sb); 1478 ip_vs_sync_buff_release(sb);
788 }
789 1479
790 /* clean up the current sync_buff */ 1480 /* clean up the current sync_buff */
791 if ((sb = get_curr_sync_buff(0))) { 1481 sb = get_curr_sync_buff(ipvs, 0);
1482 if (sb)
792 ip_vs_sync_buff_release(sb); 1483 ip_vs_sync_buff_release(sb);
793 }
794 1484
795 /* release the sending multicast socket */ 1485 /* release the sending multicast socket */
796 sock_release(tinfo->sock); 1486 sk_release_kernel(tinfo->sock->sk);
797 kfree(tinfo); 1487 kfree(tinfo);
798 1488
799 return 0; 1489 return 0;
@@ -803,11 +1493,12 @@ static int sync_thread_master(void *data)
803static int sync_thread_backup(void *data) 1493static int sync_thread_backup(void *data)
804{ 1494{
805 struct ip_vs_sync_thread_data *tinfo = data; 1495 struct ip_vs_sync_thread_data *tinfo = data;
1496 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
806 int len; 1497 int len;
807 1498
808 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1499 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
809 "syncid = %d\n", 1500 "syncid = %d\n",
810 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 1501 ipvs->backup_mcast_ifn, ipvs->backup_syncid);
811 1502
812 while (!kthread_should_stop()) { 1503 while (!kthread_should_stop()) {
813 wait_event_interruptible(*sk_sleep(tinfo->sock->sk), 1504 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1508,7 @@ static int sync_thread_backup(void *data)
817 /* do we have data now? */ 1508 /* do we have data now? */
818 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { 1509 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
819 len = ip_vs_receive(tinfo->sock, tinfo->buf, 1510 len = ip_vs_receive(tinfo->sock, tinfo->buf,
820 sync_recv_mesg_maxlen); 1511 ipvs->recv_mesg_maxlen);
821 if (len <= 0) { 1512 if (len <= 0) {
822 pr_err("receiving message error\n"); 1513 pr_err("receiving message error\n");
823 break; 1514 break;
@@ -826,13 +1517,13 @@ static int sync_thread_backup(void *data)
826 /* disable bottom half, because it accesses the data 1517 /* disable bottom half, because it accesses the data
827 shared by softirq while getting/creating conns */ 1518 shared by softirq while getting/creating conns */
828 local_bh_disable(); 1519 local_bh_disable();
829 ip_vs_process_message(tinfo->buf, len); 1520 ip_vs_process_message(tinfo->net, tinfo->buf, len);
830 local_bh_enable(); 1521 local_bh_enable();
831 } 1522 }
832 } 1523 }
833 1524
834 /* release the sending multicast socket */ 1525 /* release the sending multicast socket */
835 sock_release(tinfo->sock); 1526 sk_release_kernel(tinfo->sock->sk);
836 kfree(tinfo->buf); 1527 kfree(tinfo->buf);
837 kfree(tinfo); 1528 kfree(tinfo);
838 1529
@@ -840,41 +1531,42 @@ static int sync_thread_backup(void *data)
840} 1531}
841 1532
842 1533
843int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 1534int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
844{ 1535{
845 struct ip_vs_sync_thread_data *tinfo; 1536 struct ip_vs_sync_thread_data *tinfo;
846 struct task_struct **realtask, *task; 1537 struct task_struct **realtask, *task;
847 struct socket *sock; 1538 struct socket *sock;
1539 struct netns_ipvs *ipvs = net_ipvs(net);
848 char *name, *buf = NULL; 1540 char *name, *buf = NULL;
849 int (*threadfn)(void *data); 1541 int (*threadfn)(void *data);
850 int result = -ENOMEM; 1542 int result = -ENOMEM;
851 1543
852 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1544 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
853 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", 1545 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
854 sizeof(struct ip_vs_sync_conn)); 1546 sizeof(struct ip_vs_sync_conn_v0));
855 1547
856 if (state == IP_VS_STATE_MASTER) { 1548 if (state == IP_VS_STATE_MASTER) {
857 if (sync_master_thread) 1549 if (ipvs->master_thread)
858 return -EEXIST; 1550 return -EEXIST;
859 1551
860 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, 1552 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
861 sizeof(ip_vs_master_mcast_ifn)); 1553 sizeof(ipvs->master_mcast_ifn));
862 ip_vs_master_syncid = syncid; 1554 ipvs->master_syncid = syncid;
863 realtask = &sync_master_thread; 1555 realtask = &ipvs->master_thread;
864 name = "ipvs_syncmaster"; 1556 name = "ipvs_master:%d";
865 threadfn = sync_thread_master; 1557 threadfn = sync_thread_master;
866 sock = make_send_sock(); 1558 sock = make_send_sock(net);
867 } else if (state == IP_VS_STATE_BACKUP) { 1559 } else if (state == IP_VS_STATE_BACKUP) {
868 if (sync_backup_thread) 1560 if (ipvs->backup_thread)
869 return -EEXIST; 1561 return -EEXIST;
870 1562
871 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, 1563 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
872 sizeof(ip_vs_backup_mcast_ifn)); 1564 sizeof(ipvs->backup_mcast_ifn));
873 ip_vs_backup_syncid = syncid; 1565 ipvs->backup_syncid = syncid;
874 realtask = &sync_backup_thread; 1566 realtask = &ipvs->backup_thread;
875 name = "ipvs_syncbackup"; 1567 name = "ipvs_backup:%d";
876 threadfn = sync_thread_backup; 1568 threadfn = sync_thread_backup;
877 sock = make_receive_sock(); 1569 sock = make_receive_sock(net);
878 } else { 1570 } else {
879 return -EINVAL; 1571 return -EINVAL;
880 } 1572 }
@@ -884,9 +1576,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
884 goto out; 1576 goto out;
885 } 1577 }
886 1578
887 set_sync_mesg_maxlen(state); 1579 set_sync_mesg_maxlen(net, state);
888 if (state == IP_VS_STATE_BACKUP) { 1580 if (state == IP_VS_STATE_BACKUP) {
889 buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); 1581 buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
890 if (!buf) 1582 if (!buf)
891 goto outsocket; 1583 goto outsocket;
892 } 1584 }
@@ -895,10 +1587,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
895 if (!tinfo) 1587 if (!tinfo)
896 goto outbuf; 1588 goto outbuf;
897 1589
1590 tinfo->net = net;
898 tinfo->sock = sock; 1591 tinfo->sock = sock;
899 tinfo->buf = buf; 1592 tinfo->buf = buf;
900 1593
901 task = kthread_run(threadfn, tinfo, name); 1594 task = kthread_run(threadfn, tinfo, name, ipvs->gen);
902 if (IS_ERR(task)) { 1595 if (IS_ERR(task)) {
903 result = PTR_ERR(task); 1596 result = PTR_ERR(task);
904 goto outtinfo; 1597 goto outtinfo;
@@ -906,7 +1599,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
906 1599
907 /* mark as active */ 1600 /* mark as active */
908 *realtask = task; 1601 *realtask = task;
909 ip_vs_sync_state |= state; 1602 ipvs->sync_state |= state;
910 1603
911 /* increase the module use count */ 1604 /* increase the module use count */
912 ip_vs_use_count_inc(); 1605 ip_vs_use_count_inc();
@@ -918,22 +1611,25 @@ outtinfo:
918outbuf: 1611outbuf:
919 kfree(buf); 1612 kfree(buf);
920outsocket: 1613outsocket:
921 sock_release(sock); 1614 sk_release_kernel(sock->sk);
922out: 1615out:
923 return result; 1616 return result;
924} 1617}
925 1618
926 1619
927int stop_sync_thread(int state) 1620int stop_sync_thread(struct net *net, int state)
928{ 1621{
1622 struct netns_ipvs *ipvs = net_ipvs(net);
1623 int retc = -EINVAL;
1624
929 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1625 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
930 1626
931 if (state == IP_VS_STATE_MASTER) { 1627 if (state == IP_VS_STATE_MASTER) {
932 if (!sync_master_thread) 1628 if (!ipvs->master_thread)
933 return -ESRCH; 1629 return -ESRCH;
934 1630
935 pr_info("stopping master sync thread %d ...\n", 1631 pr_info("stopping master sync thread %d ...\n",
936 task_pid_nr(sync_master_thread)); 1632 task_pid_nr(ipvs->master_thread));
937 1633
938 /* 1634 /*
939 * The lock synchronizes with sb_queue_tail(), so that we don't 1635 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,27 +1637,64 @@ int stop_sync_thread(int state)
941 * progress of stopping the master sync daemon. 1637 * progress of stopping the master sync daemon.
942 */ 1638 */
943 1639
944 spin_lock_bh(&ip_vs_sync_lock); 1640 spin_lock_bh(&ipvs->sync_lock);
945 ip_vs_sync_state &= ~IP_VS_STATE_MASTER; 1641 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
946 spin_unlock_bh(&ip_vs_sync_lock); 1642 spin_unlock_bh(&ipvs->sync_lock);
947 kthread_stop(sync_master_thread); 1643 retc = kthread_stop(ipvs->master_thread);
948 sync_master_thread = NULL; 1644 ipvs->master_thread = NULL;
949 } else if (state == IP_VS_STATE_BACKUP) { 1645 } else if (state == IP_VS_STATE_BACKUP) {
950 if (!sync_backup_thread) 1646 if (!ipvs->backup_thread)
951 return -ESRCH; 1647 return -ESRCH;
952 1648
953 pr_info("stopping backup sync thread %d ...\n", 1649 pr_info("stopping backup sync thread %d ...\n",
954 task_pid_nr(sync_backup_thread)); 1650 task_pid_nr(ipvs->backup_thread));
955 1651
956 ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; 1652 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
957 kthread_stop(sync_backup_thread); 1653 retc = kthread_stop(ipvs->backup_thread);
958 sync_backup_thread = NULL; 1654 ipvs->backup_thread = NULL;
959 } else {
960 return -EINVAL;
961 } 1655 }
962 1656
963 /* decrease the module use count */ 1657 /* decrease the module use count */
964 ip_vs_use_count_dec(); 1658 ip_vs_use_count_dec();
965 1659
1660 return retc;
1661}
1662
1663/*
1664 * Initialize data struct for each netns
1665 */
1666int __net_init __ip_vs_sync_init(struct net *net)
1667{
1668 struct netns_ipvs *ipvs = net_ipvs(net);
1669
1670 INIT_LIST_HEAD(&ipvs->sync_queue);
1671 spin_lock_init(&ipvs->sync_lock);
1672 spin_lock_init(&ipvs->sync_buff_lock);
1673
1674 ipvs->sync_mcast_addr.sin_family = AF_INET;
1675 ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
1676 ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
1677 return 0;
1678}
1679
1680void __ip_vs_sync_cleanup(struct net *net)
1681{
1682 int retc;
1683
1684 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1685 if (retc && retc != -ESRCH)
1686 pr_err("Failed to stop Master Daemon\n");
1687
1688 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1689 if (retc && retc != -ESRCH)
1690 pr_err("Failed to stop Backup Daemon\n");
1691}
1692
1693int __init ip_vs_sync_init(void)
1694{
966 return 0; 1695 return 0;
967} 1696}
1697
1698void ip_vs_sync_cleanup(void)
1699{
1700}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bbddfdb10db2..bc1bfc48a17f 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -27,22 +27,6 @@
27 27
28#include <net/ip_vs.h> 28#include <net/ip_vs.h>
29 29
30
31static inline unsigned int
32ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
33{
34 /*
35 * We think the overhead of processing active connections is 256
36 * times higher than that of inactive connections in average. (This
37 * 256 times might not be accurate, we will change it later) We
38 * use the following formula to estimate the overhead now:
39 * dest->activeconns*256 + dest->inactconns
40 */
41 return (atomic_read(&dest->activeconns) << 8) +
42 atomic_read(&dest->inactconns);
43}
44
45
46/* 30/*
47 * Weighted Least Connection scheduling 31 * Weighted Least Connection scheduling
48 */ 32 */
@@ -71,11 +55,11 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
71 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 55 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
72 atomic_read(&dest->weight) > 0) { 56 atomic_read(&dest->weight) > 0) {
73 least = dest; 57 least = dest;
74 loh = ip_vs_wlc_dest_overhead(least); 58 loh = ip_vs_dest_conn_overhead(least);
75 goto nextstage; 59 goto nextstage;
76 } 60 }
77 } 61 }
78 IP_VS_ERR_RL("WLC: no destination available\n"); 62 ip_vs_scheduler_err(svc, "no destination available");
79 return NULL; 63 return NULL;
80 64
81 /* 65 /*
@@ -85,7 +69,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
85 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 69 list_for_each_entry_continue(dest, &svc->destinations, n_list) {
86 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 70 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
87 continue; 71 continue;
88 doh = ip_vs_wlc_dest_overhead(dest); 72 doh = ip_vs_dest_conn_overhead(dest);
89 if (loh * atomic_read(&dest->weight) > 73 if (loh * atomic_read(&dest->weight) >
90 doh * atomic_read(&least->weight)) { 74 doh * atomic_read(&least->weight)) {
91 least = dest; 75 least = dest;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 30db633f88f1..1ef41f50723c 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -147,8 +147,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
147 147
148 if (mark->cl == mark->cl->next) { 148 if (mark->cl == mark->cl->next) {
149 /* no dest entry */ 149 /* no dest entry */
150 IP_VS_ERR_RL("WRR: no destination available: " 150 ip_vs_scheduler_err(svc,
151 "no destinations present\n"); 151 "no destination available: "
152 "no destinations present");
152 dest = NULL; 153 dest = NULL;
153 goto out; 154 goto out;
154 } 155 }
@@ -162,8 +163,8 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
162 */ 163 */
163 if (mark->cw == 0) { 164 if (mark->cw == 0) {
164 mark->cl = &svc->destinations; 165 mark->cl = &svc->destinations;
165 IP_VS_ERR_RL("WRR: no destination " 166 ip_vs_scheduler_err(svc,
166 "available\n"); 167 "no destination available");
167 dest = NULL; 168 dest = NULL;
168 goto out; 169 goto out;
169 } 170 }
@@ -185,8 +186,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
185 /* back to the start, and no dest is found. 186 /* back to the start, and no dest is found.
186 It is only possible when all dests are OVERLOADED */ 187 It is only possible when all dests are OVERLOADED */
187 dest = NULL; 188 dest = NULL;
188 IP_VS_ERR_RL("WRR: no destination available: " 189 ip_vs_scheduler_err(svc,
189 "all destinations are overloaded\n"); 190 "no destination available: "
191 "all destinations are overloaded");
190 goto out; 192 goto out;
191 } 193 }
192 } 194 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3fbe4ac..ee319a4338b0 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -43,6 +43,13 @@
43 43
44#include <net/ip_vs.h> 44#include <net/ip_vs.h>
45 45
46enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
52};
46 53
47/* 54/*
48 * Destination cache to speed up outgoing route lookup 55 * Destination cache to speed up outgoing route lookup
@@ -77,14 +84,10 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
77 return dst; 84 return dst;
78} 85}
79 86
80/* 87/* Get route to destination or remote server */
81 * Get route to destination or remote server
82 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
83 * &4=Allow redirect from remote daddr to local
84 */
85static struct rtable * 88static struct rtable *
86__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 89__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
87 __be32 daddr, u32 rtos, int rt_mode) 90 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
88{ 91{
89 struct net *net = dev_net(skb_dst(skb)->dev); 92 struct net *net = dev_net(skb_dst(skb)->dev);
90 struct rtable *rt; /* Route to the other host */ 93 struct rtable *rt; /* Route to the other host */
@@ -95,56 +98,66 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
95 spin_lock(&dest->dst_lock); 98 spin_lock(&dest->dst_lock);
96 if (!(rt = (struct rtable *) 99 if (!(rt = (struct rtable *)
97 __ip_vs_dst_check(dest, rtos))) { 100 __ip_vs_dst_check(dest, rtos))) {
98 struct flowi fl = { 101 struct flowi4 fl4;
99 .fl4_dst = dest->addr.ip,
100 .fl4_tos = rtos,
101 };
102 102
103 if (ip_route_output_key(net, &rt, &fl)) { 103 memset(&fl4, 0, sizeof(fl4));
104 fl4.daddr = dest->addr.ip;
105 fl4.flowi4_tos = rtos;
106 rt = ip_route_output_key(net, &fl4);
107 if (IS_ERR(rt)) {
104 spin_unlock(&dest->dst_lock); 108 spin_unlock(&dest->dst_lock);
105 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 109 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
106 &dest->addr.ip); 110 &dest->addr.ip);
107 return NULL; 111 return NULL;
108 } 112 }
109 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); 113 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
110 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 114 dest->dst_saddr.ip = fl4.saddr;
111 &dest->addr.ip, 115 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
116 "rtos=%X\n",
117 &dest->addr.ip, &dest->dst_saddr.ip,
112 atomic_read(&rt->dst.__refcnt), rtos); 118 atomic_read(&rt->dst.__refcnt), rtos);
113 } 119 }
120 daddr = dest->addr.ip;
121 if (ret_saddr)
122 *ret_saddr = dest->dst_saddr.ip;
114 spin_unlock(&dest->dst_lock); 123 spin_unlock(&dest->dst_lock);
115 } else { 124 } else {
116 struct flowi fl = { 125 struct flowi4 fl4;
117 .fl4_dst = daddr,
118 .fl4_tos = rtos,
119 };
120 126
121 if (ip_route_output_key(net, &rt, &fl)) { 127 memset(&fl4, 0, sizeof(fl4));
128 fl4.daddr = daddr;
129 fl4.flowi4_tos = rtos;
130 rt = ip_route_output_key(net, &fl4);
131 if (IS_ERR(rt)) {
122 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 132 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
123 &daddr); 133 &daddr);
124 return NULL; 134 return NULL;
125 } 135 }
136 if (ret_saddr)
137 *ret_saddr = fl4.saddr;
126 } 138 }
127 139
128 local = rt->rt_flags & RTCF_LOCAL; 140 local = rt->rt_flags & RTCF_LOCAL;
129 if (!((local ? 1 : 2) & rt_mode)) { 141 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
142 rt_mode)) {
130 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", 143 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
131 (rt->rt_flags & RTCF_LOCAL) ? 144 (rt->rt_flags & RTCF_LOCAL) ?
132 "local":"non-local", &rt->rt_dst); 145 "local":"non-local", &daddr);
133 ip_rt_put(rt); 146 ip_rt_put(rt);
134 return NULL; 147 return NULL;
135 } 148 }
136 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) && 149 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
137 ort->rt_flags & RTCF_LOCAL)) { 150 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
138 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " 151 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
139 "requires NAT method, dest: %pI4\n", 152 "requires NAT method, dest: %pI4\n",
140 &ip_hdr(skb)->daddr, &rt->rt_dst); 153 &ip_hdr(skb)->daddr, &daddr);
141 ip_rt_put(rt); 154 ip_rt_put(rt);
142 return NULL; 155 return NULL;
143 } 156 }
144 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { 157 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
145 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " 158 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
146 "to non-local address, dest: %pI4\n", 159 "to non-local address, dest: %pI4\n",
147 &ip_hdr(skb)->saddr, &rt->rt_dst); 160 &ip_hdr(skb)->saddr, &daddr);
148 ip_rt_put(rt); 161 ip_rt_put(rt);
149 return NULL; 162 return NULL;
150 } 163 }
@@ -169,15 +182,15 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
169 return 0; 182 return 0;
170 refdst_drop(orefdst); 183 refdst_drop(orefdst);
171 } else { 184 } else {
172 struct flowi fl = { 185 struct flowi4 fl4 = {
173 .fl4_dst = iph->daddr, 186 .daddr = iph->daddr,
174 .fl4_src = iph->saddr, 187 .saddr = iph->saddr,
175 .fl4_tos = RT_TOS(iph->tos), 188 .flowi4_tos = RT_TOS(iph->tos),
176 .mark = skb->mark, 189 .flowi4_mark = skb->mark,
177 }; 190 };
178 struct rtable *rt;
179 191
180 if (ip_route_output_key(net, &rt, &fl)) 192 rt = ip_route_output_key(net, &fl4);
193 if (IS_ERR(rt))
181 return 0; 194 return 0;
182 if (!(rt->rt_flags & RTCF_LOCAL)) { 195 if (!(rt->rt_flags & RTCF_LOCAL)) {
183 ip_rt_put(rt); 196 ip_rt_put(rt);
@@ -202,22 +215,27 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
202 struct in6_addr *ret_saddr, int do_xfrm) 215 struct in6_addr *ret_saddr, int do_xfrm)
203{ 216{
204 struct dst_entry *dst; 217 struct dst_entry *dst;
205 struct flowi fl = { 218 struct flowi6 fl6 = {
206 .fl6_dst = *daddr, 219 .daddr = *daddr,
207 }; 220 };
208 221
209 dst = ip6_route_output(net, NULL, &fl); 222 dst = ip6_route_output(net, NULL, &fl6);
210 if (dst->error) 223 if (dst->error)
211 goto out_err; 224 goto out_err;
212 if (!ret_saddr) 225 if (!ret_saddr)
213 return dst; 226 return dst;
214 if (ipv6_addr_any(&fl.fl6_src) && 227 if (ipv6_addr_any(&fl6.saddr) &&
215 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 228 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
216 &fl.fl6_dst, 0, &fl.fl6_src) < 0) 229 &fl6.daddr, 0, &fl6.saddr) < 0)
217 goto out_err; 230 goto out_err;
218 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0) 231 if (do_xfrm) {
219 goto out_err; 232 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
220 ipv6_addr_copy(ret_saddr, &fl.fl6_src); 233 if (IS_ERR(dst)) {
234 dst = NULL;
235 goto out_err;
236 }
237 }
238 ipv6_addr_copy(ret_saddr, &fl6.saddr);
221 return dst; 239 return dst;
222 240
223out_err: 241out_err:
@@ -228,8 +246,6 @@ out_err:
228 246
229/* 247/*
230 * Get route to destination or remote server 248 * Get route to destination or remote server
231 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
232 * &4=Allow redirect from remote daddr to local
233 */ 249 */
234static struct rt6_info * 250static struct rt6_info *
235__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, 251__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
@@ -249,7 +265,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
249 u32 cookie; 265 u32 cookie;
250 266
251 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 267 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
252 &dest->dst_saddr, 268 &dest->dst_saddr.in6,
253 do_xfrm); 269 do_xfrm);
254 if (!dst) { 270 if (!dst) {
255 spin_unlock(&dest->dst_lock); 271 spin_unlock(&dest->dst_lock);
@@ -259,11 +275,11 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
259 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 275 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
260 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); 276 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
261 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 277 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
262 &dest->addr.in6, &dest->dst_saddr, 278 &dest->addr.in6, &dest->dst_saddr.in6,
263 atomic_read(&rt->dst.__refcnt)); 279 atomic_read(&rt->dst.__refcnt));
264 } 280 }
265 if (ret_saddr) 281 if (ret_saddr)
266 ipv6_addr_copy(ret_saddr, &dest->dst_saddr); 282 ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6);
267 spin_unlock(&dest->dst_lock); 283 spin_unlock(&dest->dst_lock);
268 } else { 284 } else {
269 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 285 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
@@ -273,13 +289,14 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
273 } 289 }
274 290
275 local = __ip_vs_is_local_route6(rt); 291 local = __ip_vs_is_local_route6(rt);
276 if (!((local ? 1 : 2) & rt_mode)) { 292 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
293 rt_mode)) {
277 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", 294 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
278 local ? "local":"non-local", daddr); 295 local ? "local":"non-local", daddr);
279 dst_release(&rt->dst); 296 dst_release(&rt->dst);
280 return NULL; 297 return NULL;
281 } 298 }
282 if (local && !(rt_mode & 4) && 299 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
283 !((ort = (struct rt6_info *) skb_dst(skb)) && 300 !((ort = (struct rt6_info *) skb_dst(skb)) &&
284 __ip_vs_is_local_route6(ort))) { 301 __ip_vs_is_local_route6(ort))) {
285 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " 302 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
@@ -384,13 +401,14 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
384 401
385 EnterFunction(10); 402 EnterFunction(10);
386 403
387 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, 404 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
388 RT_TOS(iph->tos), 2))) 405 IP_VS_RT_MODE_NON_LOCAL, NULL)))
389 goto tx_error_icmp; 406 goto tx_error_icmp;
390 407
391 /* MTU checking */ 408 /* MTU checking */
392 mtu = dst_mtu(&rt->dst); 409 mtu = dst_mtu(&rt->dst);
393 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 410 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
411 !skb_is_gso(skb)) {
394 ip_rt_put(rt); 412 ip_rt_put(rt);
395 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 413 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
396 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 414 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -438,12 +456,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
438 456
439 EnterFunction(10); 457 EnterFunction(10);
440 458
441 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2))) 459 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
460 IP_VS_RT_MODE_NON_LOCAL)))
442 goto tx_error_icmp; 461 goto tx_error_icmp;
443 462
444 /* MTU checking */ 463 /* MTU checking */
445 mtu = dst_mtu(&rt->dst); 464 mtu = dst_mtu(&rt->dst);
446 if (skb->len > mtu) { 465 if (skb->len > mtu && !skb_is_gso(skb)) {
447 if (!skb->dev) { 466 if (!skb->dev) {
448 struct net *net = dev_net(skb_dst(skb)->dev); 467 struct net *net = dev_net(skb_dst(skb)->dev);
449 468
@@ -512,7 +531,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
512 } 531 }
513 532
514 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 533 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
515 RT_TOS(iph->tos), 1|2|4))) 534 RT_TOS(iph->tos),
535 IP_VS_RT_MODE_LOCAL |
536 IP_VS_RT_MODE_NON_LOCAL |
537 IP_VS_RT_MODE_RDR, NULL)))
516 goto tx_error_icmp; 538 goto tx_error_icmp;
517 local = rt->rt_flags & RTCF_LOCAL; 539 local = rt->rt_flags & RTCF_LOCAL;
518 /* 540 /*
@@ -534,7 +556,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
534#endif 556#endif
535 557
536 /* From world but DNAT to loopback address? */ 558 /* From world but DNAT to loopback address? */
537 if (local && ipv4_is_loopback(rt->rt_dst) && 559 if (local && ipv4_is_loopback(cp->daddr.ip) &&
538 rt_is_input_route(skb_rtable(skb))) { 560 rt_is_input_route(skb_rtable(skb))) {
539 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 561 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
540 "stopping DNAT to loopback address"); 562 "stopping DNAT to loopback address");
@@ -543,7 +565,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
543 565
544 /* MTU checking */ 566 /* MTU checking */
545 mtu = dst_mtu(&rt->dst); 567 mtu = dst_mtu(&rt->dst);
546 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 568 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
569 !skb_is_gso(skb)) {
547 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 570 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
548 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, 571 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
549 "ip_vs_nat_xmit(): frag needed for"); 572 "ip_vs_nat_xmit(): frag needed for");
@@ -626,7 +649,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
626 } 649 }
627 650
628 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 651 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
629 0, 1|2|4))) 652 0, (IP_VS_RT_MODE_LOCAL |
653 IP_VS_RT_MODE_NON_LOCAL |
654 IP_VS_RT_MODE_RDR))))
630 goto tx_error_icmp; 655 goto tx_error_icmp;
631 local = __ip_vs_is_local_route6(rt); 656 local = __ip_vs_is_local_route6(rt);
632 /* 657 /*
@@ -658,7 +683,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
658 683
659 /* MTU checking */ 684 /* MTU checking */
660 mtu = dst_mtu(&rt->dst); 685 mtu = dst_mtu(&rt->dst);
661 if (skb->len > mtu) { 686 if (skb->len > mtu && !skb_is_gso(skb)) {
662 if (!skb->dev) { 687 if (!skb->dev) {
663 struct net *net = dev_net(skb_dst(skb)->dev); 688 struct net *net = dev_net(skb_dst(skb)->dev);
664 689
@@ -742,6 +767,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
742 struct ip_vs_protocol *pp) 767 struct ip_vs_protocol *pp)
743{ 768{
744 struct rtable *rt; /* Route to the other host */ 769 struct rtable *rt; /* Route to the other host */
770 __be32 saddr; /* Source for tunnel */
745 struct net_device *tdev; /* Device to other host */ 771 struct net_device *tdev; /* Device to other host */
746 struct iphdr *old_iph = ip_hdr(skb); 772 struct iphdr *old_iph = ip_hdr(skb);
747 u8 tos = old_iph->tos; 773 u8 tos = old_iph->tos;
@@ -754,7 +780,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
754 EnterFunction(10); 780 EnterFunction(10);
755 781
756 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 782 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
757 RT_TOS(tos), 1|2))) 783 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
784 IP_VS_RT_MODE_NON_LOCAL,
785 &saddr)))
758 goto tx_error_icmp; 786 goto tx_error_icmp;
759 if (rt->rt_flags & RTCF_LOCAL) { 787 if (rt->rt_flags & RTCF_LOCAL) {
760 ip_rt_put(rt); 788 ip_rt_put(rt);
@@ -773,8 +801,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
773 801
774 df |= (old_iph->frag_off & htons(IP_DF)); 802 df |= (old_iph->frag_off & htons(IP_DF));
775 803
776 if ((old_iph->frag_off & htons(IP_DF)) 804 if ((old_iph->frag_off & htons(IP_DF) &&
777 && mtu < ntohs(old_iph->tot_len)) { 805 mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
778 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 806 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
779 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 807 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
780 goto tx_error_put; 808 goto tx_error_put;
@@ -822,8 +850,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
822 iph->frag_off = df; 850 iph->frag_off = df;
823 iph->protocol = IPPROTO_IPIP; 851 iph->protocol = IPPROTO_IPIP;
824 iph->tos = tos; 852 iph->tos = tos;
825 iph->daddr = rt->rt_dst; 853 iph->daddr = cp->daddr.ip;
826 iph->saddr = rt->rt_src; 854 iph->saddr = saddr;
827 iph->ttl = old_iph->ttl; 855 iph->ttl = old_iph->ttl;
828 ip_select_ident(iph, &rt->dst, NULL); 856 ip_select_ident(iph, &rt->dst, NULL);
829 857
@@ -868,7 +896,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
868 EnterFunction(10); 896 EnterFunction(10);
869 897
870 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, 898 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
871 &saddr, 1, 1|2))) 899 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
900 IP_VS_RT_MODE_NON_LOCAL))))
872 goto tx_error_icmp; 901 goto tx_error_icmp;
873 if (__ip_vs_is_local_route6(rt)) { 902 if (__ip_vs_is_local_route6(rt)) {
874 dst_release(&rt->dst); 903 dst_release(&rt->dst);
@@ -886,7 +915,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
886 if (skb_dst(skb)) 915 if (skb_dst(skb))
887 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 916 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
888 917
889 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 918 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
919 !skb_is_gso(skb)) {
890 if (!skb->dev) { 920 if (!skb->dev) {
891 struct net *net = dev_net(skb_dst(skb)->dev); 921 struct net *net = dev_net(skb_dst(skb)->dev);
892 922
@@ -982,7 +1012,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
982 EnterFunction(10); 1012 EnterFunction(10);
983 1013
984 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1014 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
985 RT_TOS(iph->tos), 1|2))) 1015 RT_TOS(iph->tos),
1016 IP_VS_RT_MODE_LOCAL |
1017 IP_VS_RT_MODE_NON_LOCAL, NULL)))
986 goto tx_error_icmp; 1018 goto tx_error_icmp;
987 if (rt->rt_flags & RTCF_LOCAL) { 1019 if (rt->rt_flags & RTCF_LOCAL) {
988 ip_rt_put(rt); 1020 ip_rt_put(rt);
@@ -991,7 +1023,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
991 1023
992 /* MTU checking */ 1024 /* MTU checking */
993 mtu = dst_mtu(&rt->dst); 1025 mtu = dst_mtu(&rt->dst);
994 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { 1026 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1027 !skb_is_gso(skb)) {
995 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 1028 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
996 ip_rt_put(rt); 1029 ip_rt_put(rt);
997 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1030 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1039,7 +1072,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1039 EnterFunction(10); 1072 EnterFunction(10);
1040 1073
1041 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1074 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1042 0, 1|2))) 1075 0, (IP_VS_RT_MODE_LOCAL |
1076 IP_VS_RT_MODE_NON_LOCAL))))
1043 goto tx_error_icmp; 1077 goto tx_error_icmp;
1044 if (__ip_vs_is_local_route6(rt)) { 1078 if (__ip_vs_is_local_route6(rt)) {
1045 dst_release(&rt->dst); 1079 dst_release(&rt->dst);
@@ -1098,12 +1132,13 @@ tx_error:
1098 */ 1132 */
1099int 1133int
1100ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1134ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1101 struct ip_vs_protocol *pp, int offset) 1135 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1102{ 1136{
1103 struct rtable *rt; /* Route to the other host */ 1137 struct rtable *rt; /* Route to the other host */
1104 int mtu; 1138 int mtu;
1105 int rc; 1139 int rc;
1106 int local; 1140 int local;
1141 int rt_mode;
1107 1142
1108 EnterFunction(10); 1143 EnterFunction(10);
1109 1144
@@ -1124,8 +1159,13 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1124 * mangle and send the packet here (only for VS/NAT) 1159 * mangle and send the packet here (only for VS/NAT)
1125 */ 1160 */
1126 1161
1162 /* LOCALNODE from FORWARD hook is not supported */
1163 rt_mode = (hooknum != NF_INET_FORWARD) ?
1164 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1165 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1127 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1166 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1128 RT_TOS(ip_hdr(skb)->tos), 1|2|4))) 1167 RT_TOS(ip_hdr(skb)->tos),
1168 rt_mode, NULL)))
1129 goto tx_error_icmp; 1169 goto tx_error_icmp;
1130 local = rt->rt_flags & RTCF_LOCAL; 1170 local = rt->rt_flags & RTCF_LOCAL;
1131 1171
@@ -1148,7 +1188,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1148#endif 1188#endif
1149 1189
1150 /* From world but DNAT to loopback address? */ 1190 /* From world but DNAT to loopback address? */
1151 if (local && ipv4_is_loopback(rt->rt_dst) && 1191 if (local && ipv4_is_loopback(cp->daddr.ip) &&
1152 rt_is_input_route(skb_rtable(skb))) { 1192 rt_is_input_route(skb_rtable(skb))) {
1153 IP_VS_DBG(1, "%s(): " 1193 IP_VS_DBG(1, "%s(): "
1154 "stopping DNAT to loopback %pI4\n", 1194 "stopping DNAT to loopback %pI4\n",
@@ -1158,7 +1198,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1158 1198
1159 /* MTU checking */ 1199 /* MTU checking */
1160 mtu = dst_mtu(&rt->dst); 1200 mtu = dst_mtu(&rt->dst);
1161 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1201 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1202 !skb_is_gso(skb)) {
1162 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1203 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1163 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1204 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1164 goto tx_error_put; 1205 goto tx_error_put;
@@ -1212,12 +1253,13 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1212#ifdef CONFIG_IP_VS_IPV6 1253#ifdef CONFIG_IP_VS_IPV6
1213int 1254int
1214ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1255ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1215 struct ip_vs_protocol *pp, int offset) 1256 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1216{ 1257{
1217 struct rt6_info *rt; /* Route to the other host */ 1258 struct rt6_info *rt; /* Route to the other host */
1218 int mtu; 1259 int mtu;
1219 int rc; 1260 int rc;
1220 int local; 1261 int local;
1262 int rt_mode;
1221 1263
1222 EnterFunction(10); 1264 EnterFunction(10);
1223 1265
@@ -1238,8 +1280,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1238 * mangle and send the packet here (only for VS/NAT) 1280 * mangle and send the packet here (only for VS/NAT)
1239 */ 1281 */
1240 1282
1283 /* LOCALNODE from FORWARD hook is not supported */
1284 rt_mode = (hooknum != NF_INET_FORWARD) ?
1285 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1286 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1241 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1287 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1242 0, 1|2|4))) 1288 0, rt_mode)))
1243 goto tx_error_icmp; 1289 goto tx_error_icmp;
1244 1290
1245 local = __ip_vs_is_local_route6(rt); 1291 local = __ip_vs_is_local_route6(rt);
@@ -1272,7 +1318,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1272 1318
1273 /* MTU checking */ 1319 /* MTU checking */
1274 mtu = dst_mtu(&rt->dst); 1320 mtu = dst_mtu(&rt->dst);
1275 if (skb->len > mtu) { 1321 if (skb->len > mtu && !skb_is_gso(skb)) {
1276 if (!skb->dev) { 1322 if (!skb->dev) {
1277 struct net *net = dev_net(skb_dst(skb)->dev); 1323 struct net *net = dev_net(skb_dst(skb)->dev);
1278 1324
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 000000000000..4e99cca61612
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
1/*
2 * broadcast connection tracking helper
3 *
4 * (c) 2005 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/ip.h>
14#include <net/route.h>
15#include <linux/inetdevice.h>
16#include <linux/skbuff.h>
17
18#include <net/netfilter/nf_conntrack.h>
19#include <net/netfilter/nf_conntrack_helper.h>
20#include <net/netfilter/nf_conntrack_expect.h>
21
22int nf_conntrack_broadcast_help(struct sk_buff *skb,
23 unsigned int protoff,
24 struct nf_conn *ct,
25 enum ip_conntrack_info ctinfo,
26 unsigned int timeout)
27{
28 struct nf_conntrack_expect *exp;
29 struct iphdr *iph = ip_hdr(skb);
30 struct rtable *rt = skb_rtable(skb);
31 struct in_device *in_dev;
32 struct nf_conn_help *help = nfct_help(ct);
33 __be32 mask = 0;
34
35 /* we're only interested in locally generated packets */
36 if (skb->sk == NULL)
37 goto out;
38 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
39 goto out;
40 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
41 goto out;
42
43 rcu_read_lock();
44 in_dev = __in_dev_get_rcu(rt->dst.dev);
45 if (in_dev != NULL) {
46 for_primary_ifa(in_dev) {
47 if (ifa->ifa_broadcast == iph->daddr) {
48 mask = ifa->ifa_mask;
49 break;
50 }
51 } endfor_ifa(in_dev);
52 }
53 rcu_read_unlock();
54
55 if (mask == 0)
56 goto out;
57
58 exp = nf_ct_expect_alloc(ct);
59 if (exp == NULL)
60 goto out;
61
62 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
63 exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
64
65 exp->mask.src.u3.ip = mask;
66 exp->mask.src.u.udp.port = htons(0xFFFF);
67
68 exp->expectfn = NULL;
69 exp->flags = NF_CT_EXPECT_PERMANENT;
70 exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
71 exp->helper = NULL;
72
73 nf_ct_expect_related(exp);
74 nf_ct_expect_put(exp);
75
76 nf_ct_refresh(ct, skb, timeout * HZ);
77out:
78 return NF_ACCEPT;
79}
80EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
81
82MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 84f4fcc5884b..2e1c11f78419 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
43#include <net/netfilter/nf_conntrack_acct.h> 43#include <net/netfilter/nf_conntrack_acct.h>
44#include <net/netfilter/nf_conntrack_ecache.h> 44#include <net/netfilter/nf_conntrack_ecache.h>
45#include <net/netfilter/nf_conntrack_zones.h> 45#include <net/netfilter/nf_conntrack_zones.h>
46#include <net/netfilter/nf_conntrack_timestamp.h>
46#include <net/netfilter/nf_nat.h> 47#include <net/netfilter/nf_nat.h>
47#include <net/netfilter/nf_nat_core.h> 48#include <net/netfilter/nf_nat_core.h>
48 49
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
282static void death_by_timeout(unsigned long ul_conntrack) 283static void death_by_timeout(unsigned long ul_conntrack)
283{ 284{
284 struct nf_conn *ct = (void *)ul_conntrack; 285 struct nf_conn *ct = (void *)ul_conntrack;
286 struct nf_conn_tstamp *tstamp;
287
288 tstamp = nf_conn_tstamp_find(ct);
289 if (tstamp && tstamp->stop == 0)
290 tstamp->stop = ktime_to_ns(ktime_get_real());
285 291
286 if (!test_bit(IPS_DYING_BIT, &ct->status) && 292 if (!test_bit(IPS_DYING_BIT, &ct->status) &&
287 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 293 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
419 struct nf_conntrack_tuple_hash *h; 425 struct nf_conntrack_tuple_hash *h;
420 struct nf_conn *ct; 426 struct nf_conn *ct;
421 struct nf_conn_help *help; 427 struct nf_conn_help *help;
428 struct nf_conn_tstamp *tstamp;
422 struct hlist_nulls_node *n; 429 struct hlist_nulls_node *n;
423 enum ip_conntrack_info ctinfo; 430 enum ip_conntrack_info ctinfo;
424 struct net *net; 431 struct net *net;
@@ -446,7 +453,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
446 REJECT will give spurious warnings here. */ 453 REJECT will give spurious warnings here. */
447 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 454 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
448 455
449 /* No external references means noone else could have 456 /* No external references means no one else could have
450 confirmed us. */ 457 confirmed us. */
451 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 458 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
452 pr_debug("Confirming conntrack %p\n", ct); 459 pr_debug("Confirming conntrack %p\n", ct);
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
486 ct->timeout.expires += jiffies; 493 ct->timeout.expires += jiffies;
487 add_timer(&ct->timeout); 494 add_timer(&ct->timeout);
488 atomic_inc(&ct->ct_general.use); 495 atomic_inc(&ct->ct_general.use);
489 set_bit(IPS_CONFIRMED_BIT, &ct->status); 496 ct->status |= IPS_CONFIRMED;
497
498 /* set conntrack timestamp, if enabled. */
499 tstamp = nf_conn_tstamp_find(ct);
500 if (tstamp) {
501 if (skb->tstamp.tv64 == 0)
502 __net_timestamp((struct sk_buff *)skb);
490 503
504 tstamp->start = ktime_to_ns(skb->tstamp);
505 }
491 /* Since the lookup is lockless, hash insertion must be done after 506 /* Since the lookup is lockless, hash insertion must be done after
492 * starting the timer and setting the CONFIRMED bit. The RCU barriers 507 * starting the timer and setting the CONFIRMED bit. The RCU barriers
493 * guarantee that no other CPU can find the conntrack before the above 508 * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
655 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. 670 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
656 */ 671 */
657 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, 672 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
658 sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); 673 offsetof(struct nf_conn, proto) -
674 offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
659 spin_lock_init(&ct->lock); 675 spin_lock_init(&ct->lock);
660 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 676 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
661 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 677 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
745 } 761 }
746 762
747 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 763 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
764 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
748 765
749 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 766 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
750 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, 767 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -884,7 +901,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
884 ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 901 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
885 &dataoff, &protonum); 902 &dataoff, &protonum);
886 if (ret <= 0) { 903 if (ret <= 0) {
887 pr_debug("not prepared to track yet or error occured\n"); 904 pr_debug("not prepared to track yet or error occurred\n");
888 NF_CT_STAT_INC_ATOMIC(net, error); 905 NF_CT_STAT_INC_ATOMIC(net, error);
889 NF_CT_STAT_INC_ATOMIC(net, invalid); 906 NF_CT_STAT_INC_ATOMIC(net, invalid);
890 ret = -ret; 907 ret = -ret;
@@ -1192,6 +1209,11 @@ struct __nf_ct_flush_report {
1192static int kill_report(struct nf_conn *i, void *data) 1209static int kill_report(struct nf_conn *i, void *data)
1193{ 1210{
1194 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1211 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1212 struct nf_conn_tstamp *tstamp;
1213
1214 tstamp = nf_conn_tstamp_find(i);
1215 if (tstamp && tstamp->stop == 0)
1216 tstamp->stop = ktime_to_ns(ktime_get_real());
1195 1217
1196 /* If we fail to deliver the event, death_by_timeout() will retry */ 1218 /* If we fail to deliver the event, death_by_timeout() will retry */
1197 if (nf_conntrack_event_report(IPCT_DESTROY, i, 1219 if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1208,9 +1230,9 @@ static int kill_all(struct nf_conn *i, void *data)
1208 return 1; 1230 return 1;
1209} 1231}
1210 1232
1211void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) 1233void nf_ct_free_hashtable(void *hash, unsigned int size)
1212{ 1234{
1213 if (vmalloced) 1235 if (is_vmalloc_addr(hash))
1214 vfree(hash); 1236 vfree(hash);
1215 else 1237 else
1216 free_pages((unsigned long)hash, 1238 free_pages((unsigned long)hash,
@@ -1277,9 +1299,9 @@ static void nf_conntrack_cleanup_net(struct net *net)
1277 goto i_see_dead_people; 1299 goto i_see_dead_people;
1278 } 1300 }
1279 1301
1280 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1302 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1281 net->ct.htable_size);
1282 nf_conntrack_ecache_fini(net); 1303 nf_conntrack_ecache_fini(net);
1304 nf_conntrack_tstamp_fini(net);
1283 nf_conntrack_acct_fini(net); 1305 nf_conntrack_acct_fini(net);
1284 nf_conntrack_expect_fini(net); 1306 nf_conntrack_expect_fini(net);
1285 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1307 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
@@ -1307,21 +1329,18 @@ void nf_conntrack_cleanup(struct net *net)
1307 } 1329 }
1308} 1330}
1309 1331
1310void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) 1332void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1311{ 1333{
1312 struct hlist_nulls_head *hash; 1334 struct hlist_nulls_head *hash;
1313 unsigned int nr_slots, i; 1335 unsigned int nr_slots, i;
1314 size_t sz; 1336 size_t sz;
1315 1337
1316 *vmalloced = 0;
1317
1318 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 1338 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1319 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 1339 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1320 sz = nr_slots * sizeof(struct hlist_nulls_head); 1340 sz = nr_slots * sizeof(struct hlist_nulls_head);
1321 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1341 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1322 get_order(sz)); 1342 get_order(sz));
1323 if (!hash) { 1343 if (!hash) {
1324 *vmalloced = 1;
1325 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1344 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1326 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 1345 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1327 PAGE_KERNEL); 1346 PAGE_KERNEL);
@@ -1337,7 +1356,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1337 1356
1338int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1357int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1339{ 1358{
1340 int i, bucket, vmalloced, old_vmalloced; 1359 int i, bucket;
1341 unsigned int hashsize, old_size; 1360 unsigned int hashsize, old_size;
1342 struct hlist_nulls_head *hash, *old_hash; 1361 struct hlist_nulls_head *hash, *old_hash;
1343 struct nf_conntrack_tuple_hash *h; 1362 struct nf_conntrack_tuple_hash *h;
@@ -1354,7 +1373,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1354 if (!hashsize) 1373 if (!hashsize)
1355 return -EINVAL; 1374 return -EINVAL;
1356 1375
1357 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); 1376 hash = nf_ct_alloc_hashtable(&hashsize, 1);
1358 if (!hash) 1377 if (!hash)
1359 return -ENOMEM; 1378 return -ENOMEM;
1360 1379
@@ -1376,15 +1395,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1376 } 1395 }
1377 } 1396 }
1378 old_size = init_net.ct.htable_size; 1397 old_size = init_net.ct.htable_size;
1379 old_vmalloced = init_net.ct.hash_vmalloc;
1380 old_hash = init_net.ct.hash; 1398 old_hash = init_net.ct.hash;
1381 1399
1382 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1400 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
1383 init_net.ct.hash_vmalloc = vmalloced;
1384 init_net.ct.hash = hash; 1401 init_net.ct.hash = hash;
1385 spin_unlock_bh(&nf_conntrack_lock); 1402 spin_unlock_bh(&nf_conntrack_lock);
1386 1403
1387 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1404 nf_ct_free_hashtable(old_hash, old_size);
1388 return 0; 1405 return 0;
1389} 1406}
1390EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1407EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1497,8 +1514,7 @@ static int nf_conntrack_init_net(struct net *net)
1497 } 1514 }
1498 1515
1499 net->ct.htable_size = nf_conntrack_htable_size; 1516 net->ct.htable_size = nf_conntrack_htable_size;
1500 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1517 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1501 &net->ct.hash_vmalloc, 1);
1502 if (!net->ct.hash) { 1518 if (!net->ct.hash) {
1503 ret = -ENOMEM; 1519 ret = -ENOMEM;
1504 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1520 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1510,6 +1526,9 @@ static int nf_conntrack_init_net(struct net *net)
1510 ret = nf_conntrack_acct_init(net); 1526 ret = nf_conntrack_acct_init(net);
1511 if (ret < 0) 1527 if (ret < 0)
1512 goto err_acct; 1528 goto err_acct;
1529 ret = nf_conntrack_tstamp_init(net);
1530 if (ret < 0)
1531 goto err_tstamp;
1513 ret = nf_conntrack_ecache_init(net); 1532 ret = nf_conntrack_ecache_init(net);
1514 if (ret < 0) 1533 if (ret < 0)
1515 goto err_ecache; 1534 goto err_ecache;
@@ -1517,12 +1536,13 @@ static int nf_conntrack_init_net(struct net *net)
1517 return 0; 1536 return 0;
1518 1537
1519err_ecache: 1538err_ecache:
1539 nf_conntrack_tstamp_fini(net);
1540err_tstamp:
1520 nf_conntrack_acct_fini(net); 1541 nf_conntrack_acct_fini(net);
1521err_acct: 1542err_acct:
1522 nf_conntrack_expect_fini(net); 1543 nf_conntrack_expect_fini(net);
1523err_expect: 1544err_expect:
1524 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1545 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1525 net->ct.htable_size);
1526err_hash: 1546err_hash:
1527 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1547 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1528err_cache: 1548err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0bd1efe..cd1e8e0970f2 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
319 const struct nf_conntrack_expect_policy *p; 319 const struct nf_conntrack_expect_policy *p;
320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); 320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321 321
322 atomic_inc(&exp->use); 322 /* two references : one for hash insert, one for the timer */
323 atomic_add(2, &exp->use);
323 324
324 if (master_help) { 325 if (master_help) {
325 hlist_add_head(&exp->lnode, &master_help->expectations); 326 hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
333 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 334 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
334 (unsigned long)exp); 335 (unsigned long)exp);
335 if (master_help) { 336 if (master_help) {
336 p = &master_help->helper->expect_policy[exp->class]; 337 p = &rcu_dereference_protected(
338 master_help->helper,
339 lockdep_is_held(&nf_conntrack_lock)
340 )->expect_policy[exp->class];
337 exp->timeout.expires = jiffies + p->timeout * HZ; 341 exp->timeout.expires = jiffies + p->timeout * HZ;
338 } 342 }
339 add_timer(&exp->timeout); 343 add_timer(&exp->timeout);
340 344
341 atomic_inc(&exp->use);
342 NF_CT_STAT_INC(net, expect_create); 345 NF_CT_STAT_INC(net, expect_create);
343} 346}
344 347
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
369 if (!del_timer(&i->timeout)) 372 if (!del_timer(&i->timeout))
370 return 0; 373 return 0;
371 374
372 p = &master_help->helper->expect_policy[i->class]; 375 p = &rcu_dereference_protected(
376 master_help->helper,
377 lockdep_is_held(&nf_conntrack_lock)
378 )->expect_policy[i->class];
373 i->timeout.expires = jiffies + p->timeout * HZ; 379 i->timeout.expires = jiffies + p->timeout * HZ;
374 add_timer(&i->timeout); 380 add_timer(&i->timeout);
375 return 1; 381 return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
407 } 413 }
408 /* Will be over limit? */ 414 /* Will be over limit? */
409 if (master_help) { 415 if (master_help) {
410 p = &master_help->helper->expect_policy[expect->class]; 416 p = &rcu_dereference_protected(
417 master_help->helper,
418 lockdep_is_held(&nf_conntrack_lock)
419 )->expect_policy[expect->class];
411 if (p->max_expected && 420 if (p->max_expected &&
412 master_help->expecting[expect->class] >= p->max_expected) { 421 master_help->expecting[expect->class] >= p->max_expected) {
413 evict_oldest_expect(master, expect); 422 evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
478 struct hlist_node *n; 487 struct hlist_node *n;
479 488
480 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 489 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
481 n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 490 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
482 if (n) 491 if (n)
483 return n; 492 return n;
484 } 493 }
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
491 struct net *net = seq_file_net(seq); 500 struct net *net = seq_file_net(seq);
492 struct ct_expect_iter_state *st = seq->private; 501 struct ct_expect_iter_state *st = seq->private;
493 502
494 head = rcu_dereference(head->next); 503 head = rcu_dereference(hlist_next_rcu(head));
495 while (head == NULL) { 504 while (head == NULL) {
496 if (++st->bucket >= nf_ct_expect_hsize) 505 if (++st->bucket >= nf_ct_expect_hsize)
497 return NULL; 506 return NULL;
498 head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 507 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
499 } 508 }
500 return head; 509 return head;
501} 510}
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
630 } 639 }
631 640
632 net->ct.expect_count = 0; 641 net->ct.expect_count = 0;
633 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 642 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
634 &net->ct.expect_vmalloc, 0);
635 if (net->ct.expect_hash == NULL) 643 if (net->ct.expect_hash == NULL)
636 goto err1; 644 goto err1;
637 645
@@ -653,8 +661,7 @@ err3:
653 if (net_eq(net, &init_net)) 661 if (net_eq(net, &init_net))
654 kmem_cache_destroy(nf_ct_expect_cachep); 662 kmem_cache_destroy(nf_ct_expect_cachep);
655err2: 663err2:
656 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 664 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
657 nf_ct_expect_hsize);
658err1: 665err1:
659 return err; 666 return err;
660} 667}
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
666 rcu_barrier(); /* Wait for call_rcu() before destroy */ 673 rcu_barrier(); /* Wait for call_rcu() before destroy */
667 kmem_cache_destroy(nf_ct_expect_cachep); 674 kmem_cache_destroy(nf_ct_expect_cachep);
668 } 675 }
669 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 676 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
670 nf_ct_expect_hsize);
671} 677}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450c193f..05ecdc281a53 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -68,12 +68,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
68 return (void *)(*ext) + off; 68 return (void *)(*ext) + off;
69} 69}
70 70
71static void __nf_ct_ext_free_rcu(struct rcu_head *head)
72{
73 struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu);
74 kfree(ext);
75}
76
77void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) 71void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
78{ 72{
79 struct nf_ct_ext *old, *new; 73 struct nf_ct_ext *old, *new;
@@ -114,7 +108,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
114 (void *)old + old->offset[i]); 108 (void *)old + old->offset[i]);
115 rcu_read_unlock(); 109 rcu_read_unlock();
116 } 110 }
117 call_rcu(&old->rcu, __nf_ct_ext_free_rcu); 111 kfree_rcu(old, rcu);
118 ct->ext = new; 112 ct->ext = new;
119 } 113 }
120 114
@@ -140,15 +134,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
140 /* This assumes that extended areas in conntrack for the types 134 /* This assumes that extended areas in conntrack for the types
141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ 135 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
142 for (i = min; i <= max; i++) { 136 for (i = min; i <= max; i++) {
143 t1 = nf_ct_ext_types[i]; 137 t1 = rcu_dereference_protected(nf_ct_ext_types[i],
138 lockdep_is_held(&nf_ct_ext_type_mutex));
144 if (!t1) 139 if (!t1)
145 continue; 140 continue;
146 141
147 t1->alloc_size = sizeof(struct nf_ct_ext) 142 t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
148 + ALIGN(sizeof(struct nf_ct_ext), t1->align) 143 t1->len;
149 + t1->len;
150 for (j = 0; j < NF_CT_EXT_NUM; j++) { 144 for (j = 0; j < NF_CT_EXT_NUM; j++) {
151 t2 = nf_ct_ext_types[j]; 145 t2 = rcu_dereference_protected(nf_ct_ext_types[j],
146 lockdep_is_held(&nf_ct_ext_type_mutex));
152 if (t2 == NULL || t2 == t1 || 147 if (t2 == NULL || t2 == t1 ||
153 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) 148 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
154 continue; 149 continue;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 867882313e49..bcd5ed6b7130 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
631 CHECK_BOUND(bs, 2); 631 CHECK_BOUND(bs, 2);
632 count = *bs->cur++; 632 count = *bs->cur++;
633 count <<= 8; 633 count <<= 8;
634 count = *bs->cur++; 634 count += *bs->cur++;
635 break; 635 break;
636 case SEMI: 636 case SEMI:
637 BYTE_ALIGN(bs); 637 BYTE_ALIGN(bs);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index b969025cf82f..18b2ce5c8ced 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -714,7 +714,6 @@ static int callforward_do_filter(const union nf_inet_addr *src,
714 u_int8_t family) 714 u_int8_t family)
715{ 715{
716 const struct nf_afinfo *afinfo; 716 const struct nf_afinfo *afinfo;
717 struct flowi fl1, fl2;
718 int ret = 0; 717 int ret = 0;
719 718
720 /* rcu_read_lock()ed by nf_hook_slow() */ 719 /* rcu_read_lock()ed by nf_hook_slow() */
@@ -722,17 +721,20 @@ static int callforward_do_filter(const union nf_inet_addr *src,
722 if (!afinfo) 721 if (!afinfo)
723 return 0; 722 return 0;
724 723
725 memset(&fl1, 0, sizeof(fl1));
726 memset(&fl2, 0, sizeof(fl2));
727
728 switch (family) { 724 switch (family) {
729 case AF_INET: { 725 case AF_INET: {
726 struct flowi4 fl1, fl2;
730 struct rtable *rt1, *rt2; 727 struct rtable *rt1, *rt2;
731 728
732 fl1.fl4_dst = src->ip; 729 memset(&fl1, 0, sizeof(fl1));
733 fl2.fl4_dst = dst->ip; 730 fl1.daddr = src->ip;
734 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { 731
735 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 732 memset(&fl2, 0, sizeof(fl2));
733 fl2.daddr = dst->ip;
734 if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
735 flowi4_to_flowi(&fl1), false)) {
736 if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
737 flowi4_to_flowi(&fl2), false)) {
736 if (rt1->rt_gateway == rt2->rt_gateway && 738 if (rt1->rt_gateway == rt2->rt_gateway &&
737 rt1->dst.dev == rt2->dst.dev) 739 rt1->dst.dev == rt2->dst.dev)
738 ret = 1; 740 ret = 1;
@@ -745,12 +747,18 @@ static int callforward_do_filter(const union nf_inet_addr *src,
745#if defined(CONFIG_NF_CONNTRACK_IPV6) || \ 747#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
746 defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) 748 defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
747 case AF_INET6: { 749 case AF_INET6: {
750 struct flowi6 fl1, fl2;
748 struct rt6_info *rt1, *rt2; 751 struct rt6_info *rt1, *rt2;
749 752
750 memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); 753 memset(&fl1, 0, sizeof(fl1));
751 memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); 754 ipv6_addr_copy(&fl1.daddr, &src->in6);
752 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { 755
753 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 756 memset(&fl2, 0, sizeof(fl2));
757 ipv6_addr_copy(&fl2.daddr, &dst->in6);
758 if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
759 flowi6_to_flowi(&fl1), false)) {
760 if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
761 flowi6_to_flowi(&fl2), false)) {
754 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 762 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
755 sizeof(rt1->rt6i_gateway)) && 763 sizeof(rt1->rt6i_gateway)) &&
756 rt1->dst.dev == rt2->dst.dev) 764 rt1->dst.dev == rt2->dst.dev)
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4cd4e8b..1bdfea357955 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
33static struct hlist_head *nf_ct_helper_hash __read_mostly; 33static struct hlist_head *nf_ct_helper_hash __read_mostly;
34static unsigned int nf_ct_helper_hsize __read_mostly; 34static unsigned int nf_ct_helper_hsize __read_mostly;
35static unsigned int nf_ct_helper_count __read_mostly; 35static unsigned int nf_ct_helper_count __read_mostly;
36static int nf_ct_helper_vmalloc;
37 36
38 37
39/* Stupid hash, but collision free for the default registrations of the 38/* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
158 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); 157 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
159 struct nf_conn_help *help = nfct_help(ct); 158 struct nf_conn_help *help = nfct_help(ct);
160 159
161 if (help && help->helper == me) { 160 if (help && rcu_dereference_protected(
161 help->helper,
162 lockdep_is_held(&nf_conntrack_lock)
163 ) == me) {
162 nf_conntrack_event(IPCT_HELPER, ct); 164 nf_conntrack_event(IPCT_HELPER, ct);
163 rcu_assign_pointer(help->helper, NULL); 165 rcu_assign_pointer(help->helper, NULL);
164 } 166 }
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
210 hlist_for_each_entry_safe(exp, n, next, 212 hlist_for_each_entry_safe(exp, n, next,
211 &net->ct.expect_hash[i], hnode) { 213 &net->ct.expect_hash[i], hnode) {
212 struct nf_conn_help *help = nfct_help(exp->master); 214 struct nf_conn_help *help = nfct_help(exp->master);
213 if ((help->helper == me || exp->helper == me) && 215 if ((rcu_dereference_protected(
216 help->helper,
217 lockdep_is_held(&nf_conntrack_lock)
218 ) == me || exp->helper == me) &&
214 del_timer(&exp->timeout)) { 219 del_timer(&exp->timeout)) {
215 nf_ct_unlink_expect(exp); 220 nf_ct_unlink_expect(exp);
216 nf_ct_expect_put(exp); 221 nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
261 int err; 266 int err;
262 267
263 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 268 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
264 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 269 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
265 &nf_ct_helper_vmalloc, 0);
266 if (!nf_ct_helper_hash) 270 if (!nf_ct_helper_hash)
267 return -ENOMEM; 271 return -ENOMEM;
268 272
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
273 return 0; 277 return 0;
274 278
275err1: 279err1:
276 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 280 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
277 nf_ct_helper_hsize);
278 return err; 281 return err;
279} 282}
280 283
281void nf_conntrack_helper_fini(void) 284void nf_conntrack_helper_fini(void)
282{ 285{
283 nf_ct_extend_unregister(&helper_extend); 286 nf_ct_extend_unregister(&helper_extend);
284 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 287 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
285 nf_ct_helper_hsize);
286} 288}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index aadde018a072..4c8f30a3d6d2 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/inetdevice.h>
24#include <linux/if_addr.h>
25#include <linux/in.h> 21#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/netfilter.h>
28#include <net/route.h>
29 22
30#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
31#include <net/netfilter/nf_conntrack_helper.h> 24#include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
40MODULE_ALIAS_NFCT_HELPER("netbios_ns"); 33MODULE_ALIAS_NFCT_HELPER("netbios_ns");
41 34
42static unsigned int timeout __read_mostly = 3; 35static unsigned int timeout __read_mostly = 3;
43module_param(timeout, uint, 0400); 36module_param(timeout, uint, S_IRUSR);
44MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 37MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
45 38
46static int help(struct sk_buff *skb, unsigned int protoff,
47 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
48{
49 struct nf_conntrack_expect *exp;
50 struct iphdr *iph = ip_hdr(skb);
51 struct rtable *rt = skb_rtable(skb);
52 struct in_device *in_dev;
53 __be32 mask = 0;
54
55 /* we're only interested in locally generated packets */
56 if (skb->sk == NULL)
57 goto out;
58 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
59 goto out;
60 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
61 goto out;
62
63 rcu_read_lock();
64 in_dev = __in_dev_get_rcu(rt->dst.dev);
65 if (in_dev != NULL) {
66 for_primary_ifa(in_dev) {
67 if (ifa->ifa_broadcast == iph->daddr) {
68 mask = ifa->ifa_mask;
69 break;
70 }
71 } endfor_ifa(in_dev);
72 }
73 rcu_read_unlock();
74
75 if (mask == 0)
76 goto out;
77
78 exp = nf_ct_expect_alloc(ct);
79 if (exp == NULL)
80 goto out;
81
82 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
83 exp->tuple.src.u.udp.port = htons(NMBD_PORT);
84
85 exp->mask.src.u3.ip = mask;
86 exp->mask.src.u.udp.port = htons(0xFFFF);
87
88 exp->expectfn = NULL;
89 exp->flags = NF_CT_EXPECT_PERMANENT;
90 exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
91 exp->helper = NULL;
92
93 nf_ct_expect_related(exp);
94 nf_ct_expect_put(exp);
95
96 nf_ct_refresh(ct, skb, timeout * HZ);
97out:
98 return NF_ACCEPT;
99}
100
101static struct nf_conntrack_expect_policy exp_policy = { 39static struct nf_conntrack_expect_policy exp_policy = {
102 .max_expected = 1, 40 .max_expected = 1,
103}; 41};
104 42
43static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
44 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
45{
46 return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
47}
48
105static struct nf_conntrack_helper helper __read_mostly = { 49static struct nf_conntrack_helper helper __read_mostly = {
106 .name = "netbios-ns", 50 .name = "netbios-ns",
107 .tuple.src.l3num = AF_INET, 51 .tuple.src.l3num = NFPROTO_IPV4,
108 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), 52 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
109 .tuple.dst.protonum = IPPROTO_UDP, 53 .tuple.dst.protonum = IPPROTO_UDP,
110 .me = THIS_MODULE, 54 .me = THIS_MODULE,
111 .help = help, 55 .help = netbios_ns_help,
112 .expect_policy = &exp_policy, 56 .expect_policy = &exp_policy,
113}; 57};
114 58
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eead9db6f899..482e90c61850 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
42#include <net/netfilter/nf_conntrack_tuple.h> 42#include <net/netfilter/nf_conntrack_tuple.h>
43#include <net/netfilter/nf_conntrack_acct.h> 43#include <net/netfilter/nf_conntrack_acct.h>
44#include <net/netfilter/nf_conntrack_zones.h> 44#include <net/netfilter/nf_conntrack_zones.h>
45#include <net/netfilter/nf_conntrack_timestamp.h>
45#ifdef CONFIG_NF_NAT_NEEDED 46#ifdef CONFIG_NF_NAT_NEEDED
46#include <net/netfilter/nf_nat_core.h> 47#include <net/netfilter/nf_nat_core.h>
47#include <net/netfilter/nf_nat_protocol.h> 48#include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
230 return -1; 231 return -1;
231} 232}
232 233
234static int
235ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
236{
237 struct nlattr *nest_count;
238 const struct nf_conn_tstamp *tstamp;
239
240 tstamp = nf_conn_tstamp_find(ct);
241 if (!tstamp)
242 return 0;
243
244 nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
245 if (!nest_count)
246 goto nla_put_failure;
247
248 NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
249 if (tstamp->stop != 0) {
250 NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
251 cpu_to_be64(tstamp->stop));
252 }
253 nla_nest_end(skb, nest_count);
254
255 return 0;
256
257nla_put_failure:
258 return -1;
259}
260
233#ifdef CONFIG_NF_CONNTRACK_MARK 261#ifdef CONFIG_NF_CONNTRACK_MARK
234static inline int 262static inline int
235ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) 263ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
404 ctnetlink_dump_timeout(skb, ct) < 0 || 432 ctnetlink_dump_timeout(skb, ct) < 0 ||
405 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 433 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
406 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 434 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
435 ctnetlink_dump_timestamp(skb, ct) < 0 ||
407 ctnetlink_dump_protoinfo(skb, ct) < 0 || 436 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
408 ctnetlink_dump_helpinfo(skb, ct) < 0 || 437 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
409 ctnetlink_dump_mark(skb, ct) < 0 || 438 ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
471} 500}
472 501
473static inline size_t 502static inline size_t
503ctnetlink_timestamp_size(const struct nf_conn *ct)
504{
505#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
506 if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
507 return 0;
508 return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
509#else
510 return 0;
511#endif
512}
513
514static inline size_t
474ctnetlink_nlmsg_size(const struct nf_conn *ct) 515ctnetlink_nlmsg_size(const struct nf_conn *ct)
475{ 516{
476 return NLMSG_ALIGN(sizeof(struct nfgenmsg)) 517 return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
481 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ 522 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
482 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ 523 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
483 + ctnetlink_counters_size(ct) 524 + ctnetlink_counters_size(ct)
525 + ctnetlink_timestamp_size(ct)
484 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ 526 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
485 + nla_total_size(0) /* CTA_PROTOINFO */ 527 + nla_total_size(0) /* CTA_PROTOINFO */
486 + nla_total_size(0) /* CTA_HELP */ 528 + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
571 613
572 if (events & (1 << IPCT_DESTROY)) { 614 if (events & (1 << IPCT_DESTROY)) {
573 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 615 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
574 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 616 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
617 ctnetlink_dump_timestamp(skb, ct) < 0)
575 goto nla_put_failure; 618 goto nla_put_failure;
576 } else { 619 } else {
577 if (ctnetlink_dump_timeout(skb, ct) < 0) 620 if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -761,7 +804,7 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
761static int 804static int
762ctnetlink_parse_tuple(const struct nlattr * const cda[], 805ctnetlink_parse_tuple(const struct nlattr * const cda[],
763 struct nf_conntrack_tuple *tuple, 806 struct nf_conntrack_tuple *tuple,
764 enum ctattr_tuple type, u_int8_t l3num) 807 enum ctattr_type type, u_int8_t l3num)
765{ 808{
766 struct nlattr *tb[CTA_TUPLE_MAX+1]; 809 struct nlattr *tb[CTA_TUPLE_MAX+1];
767 int err; 810 int err;
@@ -1291,6 +1334,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1291 struct nf_conn *ct; 1334 struct nf_conn *ct;
1292 int err = -EINVAL; 1335 int err = -EINVAL;
1293 struct nf_conntrack_helper *helper; 1336 struct nf_conntrack_helper *helper;
1337 struct nf_conn_tstamp *tstamp;
1294 1338
1295 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); 1339 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
1296 if (IS_ERR(ct)) 1340 if (IS_ERR(ct))
@@ -1358,6 +1402,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1358 } 1402 }
1359 1403
1360 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1404 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1405 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
1361 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); 1406 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
1362 /* we must add conntrack extensions before confirmation. */ 1407 /* we must add conntrack extensions before confirmation. */
1363 ct->status |= IPS_CONFIRMED; 1408 ct->status |= IPS_CONFIRMED;
@@ -1376,6 +1421,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1376 } 1421 }
1377#endif 1422#endif
1378 1423
1424 memset(&ct->proto, 0, sizeof(ct->proto));
1379 if (cda[CTA_PROTOINFO]) { 1425 if (cda[CTA_PROTOINFO]) {
1380 err = ctnetlink_change_protoinfo(ct, cda); 1426 err = ctnetlink_change_protoinfo(ct, cda);
1381 if (err < 0) 1427 if (err < 0)
@@ -1406,6 +1452,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1406 __set_bit(IPS_EXPECTED_BIT, &ct->status); 1452 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1407 ct->master = master_ct; 1453 ct->master = master_ct;
1408 } 1454 }
1455 tstamp = nf_conn_tstamp_find(ct);
1456 if (tstamp)
1457 tstamp->start = ktime_to_ns(ktime_get_real());
1409 1458
1410 add_timer(&ct->timeout); 1459 add_timer(&ct->timeout);
1411 nf_conntrack_hash_insert(ct); 1460 nf_conntrack_hash_insert(ct);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74110df..5701c8dd783c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
166int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) 166int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
167{ 167{
168 int ret = 0; 168 int ret = 0;
169 struct nf_conntrack_l3proto *old;
169 170
170 if (proto->l3proto >= AF_MAX) 171 if (proto->l3proto >= AF_MAX)
171 return -EBUSY; 172 return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
174 return -EINVAL; 175 return -EINVAL;
175 176
176 mutex_lock(&nf_ct_proto_mutex); 177 mutex_lock(&nf_ct_proto_mutex);
177 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { 178 old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
179 lockdep_is_held(&nf_ct_proto_mutex));
180 if (old != &nf_conntrack_l3proto_generic) {
178 ret = -EBUSY; 181 ret = -EBUSY;
179 goto out_unlock; 182 goto out_unlock;
180 } 183 }
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
201 BUG_ON(proto->l3proto >= AF_MAX); 204 BUG_ON(proto->l3proto >= AF_MAX);
202 205
203 mutex_lock(&nf_ct_proto_mutex); 206 mutex_lock(&nf_ct_proto_mutex);
204 BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); 207 BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
208 lockdep_is_held(&nf_ct_proto_mutex)
209 ) != proto);
205 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 210 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
206 &nf_conntrack_l3proto_generic); 211 &nf_conntrack_l3proto_generic);
207 nf_ct_l3proto_unregister_sysctl(proto); 212 nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
279 mutex_lock(&nf_ct_proto_mutex); 284 mutex_lock(&nf_ct_proto_mutex);
280 if (!nf_ct_protos[l4proto->l3proto]) { 285 if (!nf_ct_protos[l4proto->l3proto]) {
281 /* l3proto may be loaded latter. */ 286 /* l3proto may be loaded latter. */
282 struct nf_conntrack_l4proto **proto_array; 287 struct nf_conntrack_l4proto __rcu **proto_array;
283 int i; 288 int i;
284 289
285 proto_array = kmalloc(MAX_NF_CT_PROTO * 290 proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
291 } 296 }
292 297
293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 298 for (i = 0; i < MAX_NF_CT_PROTO; i++)
294 proto_array[i] = &nf_conntrack_l4proto_generic; 299 RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
295 300
296 /* Before making proto_array visible to lockless readers, 301 /* Before making proto_array visible to lockless readers,
297 * we must make sure its content is committed to memory. 302 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
299 smp_wmb(); 304 smp_wmb();
300 305
301 nf_ct_protos[l4proto->l3proto] = proto_array; 306 nf_ct_protos[l4proto->l3proto] = proto_array;
302 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 307 } else if (rcu_dereference_protected(
303 &nf_conntrack_l4proto_generic) { 308 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
309 lockdep_is_held(&nf_ct_proto_mutex)
310 ) != &nf_conntrack_l4proto_generic) {
304 ret = -EBUSY; 311 ret = -EBUSY;
305 goto out_unlock; 312 goto out_unlock;
306 } 313 }
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
331 BUG_ON(l4proto->l3proto >= PF_MAX); 338 BUG_ON(l4proto->l3proto >= PF_MAX);
332 339
333 mutex_lock(&nf_ct_proto_mutex); 340 mutex_lock(&nf_ct_proto_mutex);
334 BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); 341 BUG_ON(rcu_dereference_protected(
342 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
343 lockdep_is_held(&nf_ct_proto_mutex)
344 ) != l4proto);
335 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 345 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
336 &nf_conntrack_l4proto_generic); 346 &nf_conntrack_l4proto_generic);
337 nf_ct_l4proto_unregister_sysctl(l4proto); 347 nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560d6d4a..2e664a69d7db 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -98,7 +98,7 @@ static const char * const dccp_state_names[] = {
98#define sIV CT_DCCP_INVALID 98#define sIV CT_DCCP_INVALID
99 99
100/* 100/*
101 * DCCP state transistion table 101 * DCCP state transition table
102 * 102 *
103 * The assumption is the same as for TCP tracking: 103 * The assumption is the same as for TCP tracking:
104 * 104 *
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; 453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
454 ct->proto.dccp.state = CT_DCCP_NONE; 454 ct->proto.dccp.state = CT_DCCP_NONE;
455 ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
456 ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
457 ct->proto.dccp.handshake_seq = 0;
455 return true; 458 return true;
456 459
457out_invalid: 460out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2d5ea8..6772b1154654 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -107,9 +107,9 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
107/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, 107/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
108/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, 108/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
109/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, 109/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
110/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ 110/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/
111/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ 111/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
112/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ 112/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */
113/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} 113/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
114 }, 114 },
115 { 115 {
@@ -121,7 +121,7 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
121/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, 121/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
122/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, 122/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
123/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, 123/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
124/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ 124/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */
125/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, 125/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
126/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} 126/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
127 } 127 }
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
413 test_bit(SCTP_CID_COOKIE_ACK, map)) 413 test_bit(SCTP_CID_COOKIE_ACK, map))
414 return false; 414 return false;
415 415
416 memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
416 new_state = SCTP_CONNTRACK_MAX; 417 new_state = SCTP_CONNTRACK_MAX;
417 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 418 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
418 /* Don't need lock here: this conntrack not in circulation yet */ 419 /* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73b24dc..37bf94394be0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -227,11 +227,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
227 * sCL -> sIV 227 * sCL -> sIV
228 */ 228 */
229/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 229/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
230/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, 230/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231/* 231/*
232 * sSS -> sSR Standard open. 232 * sSS -> sSR Standard open.
233 * sS2 -> sSR Simultaneous open 233 * sS2 -> sSR Simultaneous open
234 * sSR -> sSR Retransmitted SYN/ACK. 234 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
235 * sES -> sIG Late retransmitted SYN/ACK? 235 * sES -> sIG Late retransmitted SYN/ACK?
236 * sFW -> sIG Might be SYN/ACK answering ignored SYN 236 * sFW -> sIG Might be SYN/ACK answering ignored SYN
237 * sCW -> sIG 237 * sCW -> sIG
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1066 BUG_ON(th == NULL); 1066 BUG_ON(th == NULL);
1067 1067
1068 /* Don't need lock here: this conntrack not in circulation yet */ 1068 /* Don't need lock here: this conntrack not in circulation yet */
1069 new_state 1069 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1070 = tcp_conntracks[0][get_conntrack_index(th)]
1071 [TCP_CONNTRACK_NONE];
1072 1070
1073 /* Invalid: delete conntrack */ 1071 /* Invalid: delete conntrack */
1074 if (new_state >= TCP_CONNTRACK_MAX) { 1072 if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1077 } 1075 }
1078 1076
1079 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1077 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1078 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1080 /* SYN packet */ 1079 /* SYN packet */
1081 ct->proto.tcp.seen[0].td_end = 1080 ct->proto.tcp.seen[0].td_end =
1082 segment_seq_plus_len(ntohl(th->seq), skb->len, 1081 segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1088 ct->proto.tcp.seen[0].td_end; 1087 ct->proto.tcp.seen[0].td_end;
1089 1088
1090 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); 1089 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1091 ct->proto.tcp.seen[1].flags = 0;
1092 } else if (nf_ct_tcp_loose == 0) { 1090 } else if (nf_ct_tcp_loose == 0) {
1093 /* Don't try to pick up connections. */ 1091 /* Don't try to pick up connections. */
1094 return false; 1092 return false;
1095 } else { 1093 } else {
1094 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1096 /* 1095 /*
1097 * We are in the middle of a connection, 1096 * We are in the middle of a connection,
1098 * its history is lost for us. 1097 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1107 ct->proto.tcp.seen[0].td_maxend = 1106 ct->proto.tcp.seen[0].td_maxend =
1108 ct->proto.tcp.seen[0].td_end + 1107 ct->proto.tcp.seen[0].td_end +
1109 ct->proto.tcp.seen[0].td_maxwin; 1108 ct->proto.tcp.seen[0].td_maxwin;
1110 ct->proto.tcp.seen[0].td_scale = 0;
1111 1109
1112 /* We assume SACK and liberal window checking to handle 1110 /* We assume SACK and liberal window checking to handle
1113 * window scaling */ 1111 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1116 IP_CT_TCP_FLAG_BE_LIBERAL; 1114 IP_CT_TCP_FLAG_BE_LIBERAL;
1117 } 1115 }
1118 1116
1119 ct->proto.tcp.seen[1].td_end = 0;
1120 ct->proto.tcp.seen[1].td_maxend = 0;
1121 ct->proto.tcp.seen[1].td_maxwin = 0;
1122 ct->proto.tcp.seen[1].td_scale = 0;
1123
1124 /* tcp_packet will set them */ 1117 /* tcp_packet will set them */
1125 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1126 ct->proto.tcp.last_index = TCP_NONE_SET; 1118 ct->proto.tcp.last_index = TCP_NONE_SET;
1127 1119
1128 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1120 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index bcf47eb518ef..cb5a28581782 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -707,7 +707,7 @@ static const char *ct_sdp_header_search(const char *dptr, const char *limit,
707} 707}
708 708
709/* Locate a SDP header (optionally a substring within the header value), 709/* Locate a SDP header (optionally a substring within the header value),
710 * optionally stopping at the first occurence of the term header, parse 710 * optionally stopping at the first occurrence of the term header, parse
711 * it and return the offset and length of the data we're interested in. 711 * it and return the offset and length of the data we're interested in.
712 */ 712 */
713int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, 713int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
@@ -1419,6 +1419,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
1419 const char *dptr, *end; 1419 const char *dptr, *end;
1420 s16 diff, tdiff = 0; 1420 s16 diff, tdiff = 0;
1421 int ret = NF_ACCEPT; 1421 int ret = NF_ACCEPT;
1422 bool term;
1422 typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; 1423 typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
1423 1424
1424 if (ctinfo != IP_CT_ESTABLISHED && 1425 if (ctinfo != IP_CT_ESTABLISHED &&
@@ -1453,14 +1454,21 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
1453 if (dptr + matchoff == end) 1454 if (dptr + matchoff == end)
1454 break; 1455 break;
1455 1456
1456 if (end + strlen("\r\n\r\n") > dptr + datalen) 1457 term = false;
1457 break; 1458 for (; end + strlen("\r\n\r\n") <= dptr + datalen; end++) {
1458 if (end[0] != '\r' || end[1] != '\n' || 1459 if (end[0] == '\r' && end[1] == '\n' &&
1459 end[2] != '\r' || end[3] != '\n') 1460 end[2] == '\r' && end[3] == '\n') {
1461 term = true;
1462 break;
1463 }
1464 }
1465 if (!term)
1460 break; 1466 break;
1461 end += strlen("\r\n\r\n") + clen; 1467 end += strlen("\r\n\r\n") + clen;
1462 1468
1463 msglen = origlen = end - dptr; 1469 msglen = origlen = end - dptr;
1470 if (msglen > datalen)
1471 return NF_DROP;
1464 1472
1465 ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen); 1473 ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
1466 if (ret != NF_ACCEPT) 1474 if (ret != NF_ACCEPT)
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 000000000000..6e545e26289e
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
1/*
2 * SNMP service broadcast connection tracking helper
3 *
4 * (c) 2011 Jiri Olsa <jolsa@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/in.h>
15
16#include <net/netfilter/nf_conntrack.h>
17#include <net/netfilter/nf_conntrack_helper.h>
18#include <net/netfilter/nf_conntrack_expect.h>
19
20#define SNMP_PORT 161
21
22MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
23MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
24MODULE_LICENSE("GPL");
25MODULE_ALIAS_NFCT_HELPER("snmp");
26
27static unsigned int timeout __read_mostly = 30;
28module_param(timeout, uint, S_IRUSR);
29MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
30
31int (*nf_nat_snmp_hook)(struct sk_buff *skb,
32 unsigned int protoff,
33 struct nf_conn *ct,
34 enum ip_conntrack_info ctinfo);
35EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
36
37static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
38 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
39{
40 typeof(nf_nat_snmp_hook) nf_nat_snmp;
41
42 nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
43
44 nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
45 if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
46 return nf_nat_snmp(skb, protoff, ct, ctinfo);
47
48 return NF_ACCEPT;
49}
50
51static struct nf_conntrack_expect_policy exp_policy = {
52 .max_expected = 1,
53};
54
55static struct nf_conntrack_helper helper __read_mostly = {
56 .name = "snmp",
57 .tuple.src.l3num = NFPROTO_IPV4,
58 .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
59 .tuple.dst.protonum = IPPROTO_UDP,
60 .me = THIS_MODULE,
61 .help = snmp_conntrack_help,
62 .expect_policy = &exp_policy,
63};
64
65static int __init nf_conntrack_snmp_init(void)
66{
67 exp_policy.timeout = timeout;
68 return nf_conntrack_helper_register(&helper);
69}
70
71static void __exit nf_conntrack_snmp_fini(void)
72{
73 nf_conntrack_helper_unregister(&helper);
74}
75
76module_init(nf_conntrack_snmp_init);
77module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f24b27..05e9feb101c3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
29#include <net/netfilter/nf_conntrack_helper.h> 29#include <net/netfilter/nf_conntrack_helper.h>
30#include <net/netfilter/nf_conntrack_acct.h> 30#include <net/netfilter/nf_conntrack_acct.h>
31#include <net/netfilter/nf_conntrack_zones.h> 31#include <net/netfilter/nf_conntrack_zones.h>
32#include <net/netfilter/nf_conntrack_timestamp.h>
33#include <linux/rculist_nulls.h>
32 34
33MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
34 36
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
45struct ct_iter_state { 47struct ct_iter_state {
46 struct seq_net_private p; 48 struct seq_net_private p;
47 unsigned int bucket; 49 unsigned int bucket;
50 u_int64_t time_now;
48}; 51};
49 52
50static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 53static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56 for (st->bucket = 0; 59 for (st->bucket = 0;
57 st->bucket < net->ct.htable_size; 60 st->bucket < net->ct.htable_size;
58 st->bucket++) { 61 st->bucket++) {
59 n = rcu_dereference(net->ct.hash[st->bucket].first); 62 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
60 if (!is_a_nulls(n)) 63 if (!is_a_nulls(n))
61 return n; 64 return n;
62 } 65 }
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
69 struct net *net = seq_file_net(seq); 72 struct net *net = seq_file_net(seq);
70 struct ct_iter_state *st = seq->private; 73 struct ct_iter_state *st = seq->private;
71 74
72 head = rcu_dereference(head->next); 75 head = rcu_dereference(hlist_nulls_next_rcu(head));
73 while (is_a_nulls(head)) { 76 while (is_a_nulls(head)) {
74 if (likely(get_nulls_value(head) == st->bucket)) { 77 if (likely(get_nulls_value(head) == st->bucket)) {
75 if (++st->bucket >= net->ct.htable_size) 78 if (++st->bucket >= net->ct.htable_size)
76 return NULL; 79 return NULL;
77 } 80 }
78 head = rcu_dereference(net->ct.hash[st->bucket].first); 81 head = rcu_dereference(
82 hlist_nulls_first_rcu(
83 &net->ct.hash[st->bucket]));
79 } 84 }
80 return head; 85 return head;
81} 86}
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
93static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 98static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
94 __acquires(RCU) 99 __acquires(RCU)
95{ 100{
101 struct ct_iter_state *st = seq->private;
102
103 st->time_now = ktime_to_ns(ktime_get_real());
96 rcu_read_lock(); 104 rcu_read_lock();
97 return ct_get_idx(seq, *pos); 105 return ct_get_idx(seq, *pos);
98} 106}
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
132} 140}
133#endif 141#endif
134 142
143#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
144static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
145{
146 struct ct_iter_state *st = s->private;
147 struct nf_conn_tstamp *tstamp;
148 s64 delta_time;
149
150 tstamp = nf_conn_tstamp_find(ct);
151 if (tstamp) {
152 delta_time = st->time_now - tstamp->start;
153 if (delta_time > 0)
154 delta_time = div_s64(delta_time, NSEC_PER_SEC);
155 else
156 delta_time = 0;
157
158 return seq_printf(s, "delta-time=%llu ",
159 (unsigned long long)delta_time);
160 }
161 return 0;
162}
163#else
164static inline int
165ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
166{
167 return 0;
168}
169#endif
170
135/* return 0 on success, 1 in case of error */ 171/* return 0 on success, 1 in case of error */
136static int ct_seq_show(struct seq_file *s, void *v) 172static int ct_seq_show(struct seq_file *s, void *v)
137{ 173{
@@ -200,13 +236,16 @@ static int ct_seq_show(struct seq_file *s, void *v)
200 goto release; 236 goto release;
201#endif 237#endif
202 238
239 if (ct_show_delta_time(s, ct))
240 goto release;
241
203 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 242 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
204 goto release; 243 goto release;
205 244
206 ret = 0; 245 ret = 0;
207release: 246release:
208 nf_ct_put(ct); 247 nf_ct_put(ct);
209 return 0; 248 return ret;
210} 249}
211 250
212static const struct seq_operations ct_seq_ops = { 251static const struct seq_operations ct_seq_ops = {
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 000000000000..af7dd31af0a1
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
1/*
2 * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation (or any later at your option).
7 */
8
9#include <linux/netfilter.h>
10#include <linux/slab.h>
11#include <linux/kernel.h>
12#include <linux/moduleparam.h>
13
14#include <net/netfilter/nf_conntrack.h>
15#include <net/netfilter/nf_conntrack_extend.h>
16#include <net/netfilter/nf_conntrack_timestamp.h>
17
18static int nf_ct_tstamp __read_mostly;
19
20module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
21MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
22
23#ifdef CONFIG_SYSCTL
24static struct ctl_table tstamp_sysctl_table[] = {
25 {
26 .procname = "nf_conntrack_timestamp",
27 .data = &init_net.ct.sysctl_tstamp,
28 .maxlen = sizeof(unsigned int),
29 .mode = 0644,
30 .proc_handler = proc_dointvec,
31 },
32 {}
33};
34#endif /* CONFIG_SYSCTL */
35
36static struct nf_ct_ext_type tstamp_extend __read_mostly = {
37 .len = sizeof(struct nf_conn_tstamp),
38 .align = __alignof__(struct nf_conn_tstamp),
39 .id = NF_CT_EXT_TSTAMP,
40};
41
42#ifdef CONFIG_SYSCTL
43static int nf_conntrack_tstamp_init_sysctl(struct net *net)
44{
45 struct ctl_table *table;
46
47 table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
48 GFP_KERNEL);
49 if (!table)
50 goto out;
51
52 table[0].data = &net->ct.sysctl_tstamp;
53
54 net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
55 nf_net_netfilter_sysctl_path, table);
56 if (!net->ct.tstamp_sysctl_header) {
57 printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
58 goto out_register;
59 }
60 return 0;
61
62out_register:
63 kfree(table);
64out:
65 return -ENOMEM;
66}
67
68static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
69{
70 struct ctl_table *table;
71
72 table = net->ct.tstamp_sysctl_header->ctl_table_arg;
73 unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
74 kfree(table);
75}
76#else
77static int nf_conntrack_tstamp_init_sysctl(struct net *net)
78{
79 return 0;
80}
81
82static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
83{
84}
85#endif
86
87int nf_conntrack_tstamp_init(struct net *net)
88{
89 int ret;
90
91 net->ct.sysctl_tstamp = nf_ct_tstamp;
92
93 if (net_eq(net, &init_net)) {
94 ret = nf_ct_extend_register(&tstamp_extend);
95 if (ret < 0) {
96 printk(KERN_ERR "nf_ct_tstamp: Unable to register "
97 "extension\n");
98 goto out_extend_register;
99 }
100 }
101
102 ret = nf_conntrack_tstamp_init_sysctl(net);
103 if (ret < 0)
104 goto out_sysctl;
105
106 return 0;
107
108out_sysctl:
109 if (net_eq(net, &init_net))
110 nf_ct_extend_unregister(&tstamp_extend);
111out_extend_register:
112 return ret;
113}
114
115void nf_conntrack_tstamp_fini(struct net *net)
116{
117 nf_conntrack_tstamp_fini_sysctl(net);
118 if (net_eq(net, &init_net))
119 nf_ct_extend_unregister(&tstamp_extend);
120}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88e..20714edf6cd2 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister);
85 85
86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) 86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
87{ 87{
88 if (pf >= ARRAY_SIZE(nf_loggers))
89 return -EINVAL;
88 mutex_lock(&nf_log_mutex); 90 mutex_lock(&nf_log_mutex);
89 if (__find_logger(pf, logger->name) == NULL) { 91 if (__find_logger(pf, logger->name) == NULL) {
90 mutex_unlock(&nf_log_mutex); 92 mutex_unlock(&nf_log_mutex);
@@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
98 100
99void nf_log_unbind_pf(u_int8_t pf) 101void nf_log_unbind_pf(u_int8_t pf)
100{ 102{
103 if (pf >= ARRAY_SIZE(nf_loggers))
104 return;
101 mutex_lock(&nf_log_mutex); 105 mutex_lock(&nf_log_mutex);
102 rcu_assign_pointer(nf_loggers[pf], NULL); 106 rcu_assign_pointer(nf_loggers[pf], NULL);
103 mutex_unlock(&nf_log_mutex); 107 mutex_unlock(&nf_log_mutex);
@@ -161,7 +165,8 @@ static int seq_show(struct seq_file *s, void *v)
161 struct nf_logger *t; 165 struct nf_logger *t;
162 int ret; 166 int ret;
163 167
164 logger = nf_loggers[*pos]; 168 logger = rcu_dereference_protected(nf_loggers[*pos],
169 lockdep_is_held(&nf_log_mutex));
165 170
166 if (!logger) 171 if (!logger)
167 ret = seq_printf(s, "%2lld NONE (", *pos); 172 ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +254,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
249 mutex_unlock(&nf_log_mutex); 254 mutex_unlock(&nf_log_mutex);
250 } else { 255 } else {
251 mutex_lock(&nf_log_mutex); 256 mutex_lock(&nf_log_mutex);
252 logger = nf_loggers[tindex]; 257 logger = rcu_dereference_protected(nf_loggers[tindex],
258 lockdep_is_held(&nf_log_mutex));
253 if (!logger) 259 if (!logger)
254 table->data = "NONE"; 260 table->data = "NONE";
255 else 261 else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed5bd28..5b466cd1272f 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
28{ 28{
29 int ret; 29 int ret;
30 const struct nf_queue_handler *old;
30 31
31 if (pf >= ARRAY_SIZE(queue_handler)) 32 if (pf >= ARRAY_SIZE(queue_handler))
32 return -EINVAL; 33 return -EINVAL;
33 34
34 mutex_lock(&queue_handler_mutex); 35 mutex_lock(&queue_handler_mutex);
35 if (queue_handler[pf] == qh) 36 old = rcu_dereference_protected(queue_handler[pf],
37 lockdep_is_held(&queue_handler_mutex));
38 if (old == qh)
36 ret = -EEXIST; 39 ret = -EEXIST;
37 else if (queue_handler[pf]) 40 else if (old)
38 ret = -EBUSY; 41 ret = -EBUSY;
39 else { 42 else {
40 rcu_assign_pointer(queue_handler[pf], qh); 43 rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
49/* The caller must flush their queue before this */ 52/* The caller must flush their queue before this */
50int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 53int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
51{ 54{
55 const struct nf_queue_handler *old;
56
52 if (pf >= ARRAY_SIZE(queue_handler)) 57 if (pf >= ARRAY_SIZE(queue_handler))
53 return -EINVAL; 58 return -EINVAL;
54 59
55 mutex_lock(&queue_handler_mutex); 60 mutex_lock(&queue_handler_mutex);
56 if (queue_handler[pf] && queue_handler[pf] != qh) { 61 old = rcu_dereference_protected(queue_handler[pf],
62 lockdep_is_held(&queue_handler_mutex));
63 if (old && old != qh) {
57 mutex_unlock(&queue_handler_mutex); 64 mutex_unlock(&queue_handler_mutex);
58 return -EINVAL; 65 return -EINVAL;
59 } 66 }
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
73 80
74 mutex_lock(&queue_handler_mutex); 81 mutex_lock(&queue_handler_mutex);
75 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { 82 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
76 if (queue_handler[pf] == qh) 83 if (rcu_dereference_protected(
84 queue_handler[pf],
85 lockdep_is_held(&queue_handler_mutex)
86 ) == qh)
77 rcu_assign_pointer(queue_handler[pf], NULL); 87 rcu_assign_pointer(queue_handler[pf], NULL);
78 } 88 }
79 mutex_unlock(&queue_handler_mutex); 89 mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
115 int (*okfn)(struct sk_buff *), 125 int (*okfn)(struct sk_buff *),
116 unsigned int queuenum) 126 unsigned int queuenum)
117{ 127{
118 int status; 128 int status = -ENOENT;
119 struct nf_queue_entry *entry = NULL; 129 struct nf_queue_entry *entry = NULL;
120#ifdef CONFIG_BRIDGE_NETFILTER 130#ifdef CONFIG_BRIDGE_NETFILTER
121 struct net_device *physindev; 131 struct net_device *physindev;
@@ -124,20 +134,24 @@ static int __nf_queue(struct sk_buff *skb,
124 const struct nf_afinfo *afinfo; 134 const struct nf_afinfo *afinfo;
125 const struct nf_queue_handler *qh; 135 const struct nf_queue_handler *qh;
126 136
127 /* QUEUE == DROP if noone is waiting, to be safe. */ 137 /* QUEUE == DROP if no one is waiting, to be safe. */
128 rcu_read_lock(); 138 rcu_read_lock();
129 139
130 qh = rcu_dereference(queue_handler[pf]); 140 qh = rcu_dereference(queue_handler[pf]);
131 if (!qh) 141 if (!qh) {
142 status = -ESRCH;
132 goto err_unlock; 143 goto err_unlock;
144 }
133 145
134 afinfo = nf_get_afinfo(pf); 146 afinfo = nf_get_afinfo(pf);
135 if (!afinfo) 147 if (!afinfo)
136 goto err_unlock; 148 goto err_unlock;
137 149
138 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); 150 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
139 if (!entry) 151 if (!entry) {
152 status = -ENOMEM;
140 goto err_unlock; 153 goto err_unlock;
154 }
141 155
142 *entry = (struct nf_queue_entry) { 156 *entry = (struct nf_queue_entry) {
143 .skb = skb, 157 .skb = skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
151 165
152 /* If it's going away, ignore hook. */ 166 /* If it's going away, ignore hook. */
153 if (!try_module_get(entry->elem->owner)) { 167 if (!try_module_get(entry->elem->owner)) {
154 rcu_read_unlock(); 168 status = -ECANCELED;
155 kfree(entry); 169 goto err_unlock;
156 return 0;
157 } 170 }
158
159 /* Bump dev refs so they don't vanish while packet is out */ 171 /* Bump dev refs so they don't vanish while packet is out */
160 if (indev) 172 if (indev)
161 dev_hold(indev); 173 dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
182 goto err; 194 goto err;
183 } 195 }
184 196
185 return 1; 197 return 0;
186 198
187err_unlock: 199err_unlock:
188 rcu_read_unlock(); 200 rcu_read_unlock();
189err: 201err:
190 kfree_skb(skb);
191 kfree(entry); 202 kfree(entry);
192 return 1; 203 return status;
193} 204}
194 205
195int nf_queue(struct sk_buff *skb, 206int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
201 unsigned int queuenum) 212 unsigned int queuenum)
202{ 213{
203 struct sk_buff *segs; 214 struct sk_buff *segs;
215 int err;
216 unsigned int queued;
204 217
205 if (!skb_is_gso(skb)) 218 if (!skb_is_gso(skb))
206 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 219 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
216 } 229 }
217 230
218 segs = skb_gso_segment(skb, 0); 231 segs = skb_gso_segment(skb, 0);
219 kfree_skb(skb); 232 /* Does not use PTR_ERR to limit the number of error codes that can be
233 * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
234 * 'ignore this hook'.
235 */
220 if (IS_ERR(segs)) 236 if (IS_ERR(segs))
221 return 1; 237 return -EINVAL;
222 238
239 queued = 0;
240 err = 0;
223 do { 241 do {
224 struct sk_buff *nskb = segs->next; 242 struct sk_buff *nskb = segs->next;
225 243
226 segs->next = NULL; 244 segs->next = NULL;
227 if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, 245 if (err == 0)
228 queuenum)) 246 err = __nf_queue(segs, elem, pf, hook, indev,
247 outdev, okfn, queuenum);
248 if (err == 0)
249 queued++;
250 else
229 kfree_skb(segs); 251 kfree_skb(segs);
230 segs = nskb; 252 segs = nskb;
231 } while (segs); 253 } while (segs);
232 return 1; 254
255 /* also free orig skb if only some segments were queued */
256 if (unlikely(err && queued))
257 err = 0;
258 if (err == 0)
259 kfree_skb(skb);
260 return err;
233} 261}
234 262
235void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 263void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
237 struct sk_buff *skb = entry->skb; 265 struct sk_buff *skb = entry->skb;
238 struct list_head *elem = &entry->elem->list; 266 struct list_head *elem = &entry->elem->list;
239 const struct nf_afinfo *afinfo; 267 const struct nf_afinfo *afinfo;
268 int err;
240 269
241 rcu_read_lock(); 270 rcu_read_lock();
242 271
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
270 local_bh_enable(); 299 local_bh_enable();
271 break; 300 break;
272 case NF_QUEUE: 301 case NF_QUEUE:
273 if (!__nf_queue(skb, elem, entry->pf, entry->hook, 302 err = __nf_queue(skb, elem, entry->pf, entry->hook,
274 entry->indev, entry->outdev, entry->okfn, 303 entry->indev, entry->outdev, entry->okfn,
275 verdict >> NF_VERDICT_BITS)) 304 verdict >> NF_VERDICT_QBITS);
276 goto next_hook; 305 if (err < 0) {
306 if (err == -ECANCELED)
307 goto next_hook;
308 if (err == -ESRCH &&
309 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
310 goto next_hook;
311 kfree_skb(skb);
312 }
277 break; 313 break;
278 case NF_STOLEN: 314 case NF_STOLEN:
279 default: 315 default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b0ab41..e0ee010935e7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -376,13 +376,11 @@ __build_packet_message(struct nfulnl_instance *inst,
376 unsigned int hooknum, 376 unsigned int hooknum,
377 const struct net_device *indev, 377 const struct net_device *indev,
378 const struct net_device *outdev, 378 const struct net_device *outdev,
379 const struct nf_loginfo *li,
380 const char *prefix, unsigned int plen) 379 const char *prefix, unsigned int plen)
381{ 380{
382 struct nfulnl_msg_packet_hdr pmsg; 381 struct nfulnl_msg_packet_hdr pmsg;
383 struct nlmsghdr *nlh; 382 struct nlmsghdr *nlh;
384 struct nfgenmsg *nfmsg; 383 struct nfgenmsg *nfmsg;
385 __be32 tmp_uint;
386 sk_buff_data_t old_tail = inst->skb->tail; 384 sk_buff_data_t old_tail = inst->skb->tail;
387 385
388 nlh = NLMSG_PUT(inst->skb, 0, 0, 386 nlh = NLMSG_PUT(inst->skb, 0, 0,
@@ -429,7 +427,6 @@ __build_packet_message(struct nfulnl_instance *inst,
429 } 427 }
430 428
431 if (outdev) { 429 if (outdev) {
432 tmp_uint = htonl(outdev->ifindex);
433#ifndef CONFIG_BRIDGE_NETFILTER 430#ifndef CONFIG_BRIDGE_NETFILTER
434 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, 431 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
435 htonl(outdev->ifindex)); 432 htonl(outdev->ifindex));
@@ -652,7 +649,7 @@ nfulnl_log_packet(u_int8_t pf,
652 inst->qlen++; 649 inst->qlen++;
653 650
654 __build_packet_message(inst, skb, data_len, pf, 651 __build_packet_message(inst, skb, data_len, pf,
655 hooknum, in, out, li, prefix, plen); 652 hooknum, in, out, prefix, plen);
656 653
657 if (inst->qlen >= qthreshold) 654 if (inst->qlen >= qthreshold)
658 __nfulnl_flush(inst); 655 __nfulnl_flush(inst);
@@ -874,19 +871,19 @@ static struct hlist_node *get_first(struct iter_state *st)
874 871
875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 872 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
876 if (!hlist_empty(&instance_table[st->bucket])) 873 if (!hlist_empty(&instance_table[st->bucket]))
877 return rcu_dereference_bh(instance_table[st->bucket].first); 874 return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
878 } 875 }
879 return NULL; 876 return NULL;
880} 877}
881 878
882static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 879static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
883{ 880{
884 h = rcu_dereference_bh(h->next); 881 h = rcu_dereference_bh(hlist_next_rcu(h));
885 while (!h) { 882 while (!h) {
886 if (++st->bucket >= INSTANCE_BUCKETS) 883 if (++st->bucket >= INSTANCE_BUCKETS)
887 return NULL; 884 return NULL;
888 885
889 h = rcu_dereference_bh(instance_table[st->bucket].first); 886 h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
890 } 887 }
891 return h; 888 return h;
892} 889}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 68e67d19724d..b83123f12b42 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
387{ 387{
388 struct sk_buff *nskb; 388 struct sk_buff *nskb;
389 struct nfqnl_instance *queue; 389 struct nfqnl_instance *queue;
390 int err; 390 int err = -ENOBUFS;
391 391
392 /* rcu_read_lock()ed by nf_hook_slow() */ 392 /* rcu_read_lock()ed by nf_hook_slow() */
393 queue = instance_lookup(queuenum); 393 queue = instance_lookup(queuenum);
394 if (!queue) 394 if (!queue) {
395 err = -ESRCH;
395 goto err_out; 396 goto err_out;
397 }
396 398
397 if (queue->copy_mode == NFQNL_COPY_NONE) 399 if (queue->copy_mode == NFQNL_COPY_NONE) {
400 err = -EINVAL;
398 goto err_out; 401 goto err_out;
402 }
399 403
400 nskb = nfqnl_build_packet_message(queue, entry); 404 nskb = nfqnl_build_packet_message(queue, entry);
401 if (nskb == NULL) 405 if (nskb == NULL) {
406 err = -ENOMEM;
402 goto err_out; 407 goto err_out;
403 408 }
404 spin_lock_bh(&queue->lock); 409 spin_lock_bh(&queue->lock);
405 410
406 if (!queue->peer_pid) 411 if (!queue->peer_pid) {
412 err = -EINVAL;
407 goto err_out_free_nskb; 413 goto err_out_free_nskb;
408 414 }
409 if (queue->queue_total >= queue->queue_maxlen) { 415 if (queue->queue_total >= queue->queue_maxlen) {
410 queue->queue_dropped++; 416 queue->queue_dropped++;
411 if (net_ratelimit()) 417 if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
432err_out_unlock: 438err_out_unlock:
433 spin_unlock_bh(&queue->lock); 439 spin_unlock_bh(&queue->lock);
434err_out: 440err_out:
435 return -1; 441 return err;
436} 442}
437 443
438static int 444static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c94237631077..b0869fe3633b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/audit.h>
26#include <net/net_namespace.h> 27#include <net/net_namespace.h>
27 28
28#include <linux/netfilter/x_tables.h> 29#include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
38#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 39#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
39 40
40struct compat_delta { 41struct compat_delta {
41 struct compat_delta *next; 42 unsigned int offset; /* offset in kernel */
42 unsigned int offset; 43 int delta; /* delta in 32bit user land */
43 int delta;
44}; 44};
45 45
46struct xt_af { 46struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
49 struct list_head target; 49 struct list_head target;
50#ifdef CONFIG_COMPAT 50#ifdef CONFIG_COMPAT
51 struct mutex compat_mutex; 51 struct mutex compat_mutex;
52 struct compat_delta *compat_offsets; 52 struct compat_delta *compat_tab;
53 unsigned int number; /* number of slots in compat_tab[] */
54 unsigned int cur; /* number of used slots in compat_tab[] */
53#endif 55#endif
54}; 56};
55 57
@@ -181,14 +183,14 @@ EXPORT_SYMBOL(xt_unregister_matches);
181/* 183/*
182 * These are weird, but module loading must not be done with mutex 184 * These are weird, but module loading must not be done with mutex
183 * held (since they will register), and we have to have a single 185 * held (since they will register), and we have to have a single
184 * function to use try_then_request_module(). 186 * function to use.
185 */ 187 */
186 188
187/* Find match, grabs ref. Returns ERR_PTR() on error. */ 189/* Find match, grabs ref. Returns ERR_PTR() on error. */
188struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) 190struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
189{ 191{
190 struct xt_match *m; 192 struct xt_match *m;
191 int err = 0; 193 int err = -ENOENT;
192 194
193 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 195 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
194 return ERR_PTR(-EINTR); 196 return ERR_PTR(-EINTR);
@@ -219,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
219{ 221{
220 struct xt_match *match; 222 struct xt_match *match;
221 223
222 match = try_then_request_module(xt_find_match(nfproto, name, revision), 224 match = xt_find_match(nfproto, name, revision);
223 "%st_%s", xt_prefix[nfproto], name); 225 if (IS_ERR(match)) {
224 return (match != NULL) ? match : ERR_PTR(-ENOENT); 226 request_module("%st_%s", xt_prefix[nfproto], name);
227 match = xt_find_match(nfproto, name, revision);
228 }
229
230 return match;
225} 231}
226EXPORT_SYMBOL_GPL(xt_request_find_match); 232EXPORT_SYMBOL_GPL(xt_request_find_match);
227 233
@@ -229,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match);
229struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) 235struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
230{ 236{
231 struct xt_target *t; 237 struct xt_target *t;
232 int err = 0; 238 int err = -ENOENT;
233 239
234 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 240 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
235 return ERR_PTR(-EINTR); 241 return ERR_PTR(-EINTR);
@@ -259,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
259{ 265{
260 struct xt_target *target; 266 struct xt_target *target;
261 267
262 target = try_then_request_module(xt_find_target(af, name, revision), 268 target = xt_find_target(af, name, revision);
263 "%st_%s", xt_prefix[af], name); 269 if (IS_ERR(target)) {
264 return (target != NULL) ? target : ERR_PTR(-ENOENT); 270 request_module("%st_%s", xt_prefix[af], name);
271 target = xt_find_target(af, name, revision);
272 }
273
274 return target;
265} 275}
266EXPORT_SYMBOL_GPL(xt_request_find_target); 276EXPORT_SYMBOL_GPL(xt_request_find_target);
267 277
@@ -414,54 +424,67 @@ int xt_check_match(struct xt_mtchk_param *par,
414EXPORT_SYMBOL_GPL(xt_check_match); 424EXPORT_SYMBOL_GPL(xt_check_match);
415 425
416#ifdef CONFIG_COMPAT 426#ifdef CONFIG_COMPAT
417int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) 427int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
418{ 428{
419 struct compat_delta *tmp; 429 struct xt_af *xp = &xt[af];
420 430
421 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); 431 if (!xp->compat_tab) {
422 if (!tmp) 432 if (!xp->number)
423 return -ENOMEM; 433 return -EINVAL;
434 xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
435 if (!xp->compat_tab)
436 return -ENOMEM;
437 xp->cur = 0;
438 }
424 439
425 tmp->offset = offset; 440 if (xp->cur >= xp->number)
426 tmp->delta = delta; 441 return -EINVAL;
427 442
428 if (xt[af].compat_offsets) { 443 if (xp->cur)
429 tmp->next = xt[af].compat_offsets->next; 444 delta += xp->compat_tab[xp->cur - 1].delta;
430 xt[af].compat_offsets->next = tmp; 445 xp->compat_tab[xp->cur].offset = offset;
431 } else { 446 xp->compat_tab[xp->cur].delta = delta;
432 xt[af].compat_offsets = tmp; 447 xp->cur++;
433 tmp->next = NULL;
434 }
435 return 0; 448 return 0;
436} 449}
437EXPORT_SYMBOL_GPL(xt_compat_add_offset); 450EXPORT_SYMBOL_GPL(xt_compat_add_offset);
438 451
439void xt_compat_flush_offsets(u_int8_t af) 452void xt_compat_flush_offsets(u_int8_t af)
440{ 453{
441 struct compat_delta *tmp, *next; 454 if (xt[af].compat_tab) {
442 455 vfree(xt[af].compat_tab);
443 if (xt[af].compat_offsets) { 456 xt[af].compat_tab = NULL;
444 for (tmp = xt[af].compat_offsets; tmp; tmp = next) { 457 xt[af].number = 0;
445 next = tmp->next; 458 xt[af].cur = 0;
446 kfree(tmp);
447 }
448 xt[af].compat_offsets = NULL;
449 } 459 }
450} 460}
451EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); 461EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
452 462
453int xt_compat_calc_jump(u_int8_t af, unsigned int offset) 463int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
454{ 464{
455 struct compat_delta *tmp; 465 struct compat_delta *tmp = xt[af].compat_tab;
456 int delta; 466 int mid, left = 0, right = xt[af].cur - 1;
457 467
458 for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) 468 while (left <= right) {
459 if (tmp->offset < offset) 469 mid = (left + right) >> 1;
460 delta += tmp->delta; 470 if (offset > tmp[mid].offset)
461 return delta; 471 left = mid + 1;
472 else if (offset < tmp[mid].offset)
473 right = mid - 1;
474 else
475 return mid ? tmp[mid - 1].delta : 0;
476 }
477 return left ? tmp[left - 1].delta : 0;
462} 478}
463EXPORT_SYMBOL_GPL(xt_compat_calc_jump); 479EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
464 480
481void xt_compat_init_offsets(u_int8_t af, unsigned int number)
482{
483 xt[af].number = number;
484 xt[af].cur = 0;
485}
486EXPORT_SYMBOL(xt_compat_init_offsets);
487
465int xt_compat_match_offset(const struct xt_match *match) 488int xt_compat_match_offset(const struct xt_match *match)
466{ 489{
467 u_int16_t csize = match->compatsize ? : match->matchsize; 490 u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -739,8 +762,8 @@ void xt_compat_unlock(u_int8_t af)
739EXPORT_SYMBOL_GPL(xt_compat_unlock); 762EXPORT_SYMBOL_GPL(xt_compat_unlock);
740#endif 763#endif
741 764
742DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); 765DEFINE_PER_CPU(seqcount_t, xt_recseq);
743EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); 766EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
744 767
745static int xt_jumpstack_alloc(struct xt_table_info *i) 768static int xt_jumpstack_alloc(struct xt_table_info *i)
746{ 769{
@@ -820,6 +843,21 @@ xt_replace_table(struct xt_table *table,
820 */ 843 */
821 local_bh_enable(); 844 local_bh_enable();
822 845
846#ifdef CONFIG_AUDIT
847 if (audit_enabled) {
848 struct audit_buffer *ab;
849
850 ab = audit_log_start(current->audit_context, GFP_KERNEL,
851 AUDIT_NETFILTER_CFG);
852 if (ab) {
853 audit_log_format(ab, "table=%s family=%u entries=%u",
854 table->name, table->af,
855 private->number);
856 audit_log_end(ab);
857 }
858 }
859#endif
860
823 return private; 861 return private;
824} 862}
825EXPORT_SYMBOL_GPL(xt_replace_table); 863EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1324,10 +1362,7 @@ static int __init xt_init(void)
1324 int rv; 1362 int rv;
1325 1363
1326 for_each_possible_cpu(i) { 1364 for_each_possible_cpu(i) {
1327 struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); 1365 seqcount_init(&per_cpu(xt_recseq, i));
1328
1329 seqlock_init(&lock->lock);
1330 lock->readers = 0;
1331 } 1366 }
1332 1367
1333 xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); 1368 xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
@@ -1338,7 +1373,7 @@ static int __init xt_init(void)
1338 mutex_init(&xt[i].mutex); 1373 mutex_init(&xt[i].mutex);
1339#ifdef CONFIG_COMPAT 1374#ifdef CONFIG_COMPAT
1340 mutex_init(&xt[i].compat_mutex); 1375 mutex_init(&xt[i].compat_mutex);
1341 xt[i].compat_offsets = NULL; 1376 xt[i].compat_tab = NULL;
1342#endif 1377#endif
1343 INIT_LIST_HEAD(&xt[i].target); 1378 INIT_LIST_HEAD(&xt[i].target);
1344 INIT_LIST_HEAD(&xt[i].match); 1379 INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 000000000000..363a99ec0637
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,222 @@
1/*
2 * Creates audit record for dropped/accepted packets
3 *
4 * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
5 * (C) 2010-2011 Red Hat, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10*/
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/audit.h>
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_AUDIT.h>
22#include <linux/netfilter_bridge/ebtables.h>
23#include <net/ipv6.h>
24#include <net/ip.h>
25
26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
28MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
29MODULE_ALIAS("ipt_AUDIT");
30MODULE_ALIAS("ip6t_AUDIT");
31MODULE_ALIAS("ebt_AUDIT");
32MODULE_ALIAS("arpt_AUDIT");
33
34static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
35 unsigned int proto, unsigned int offset)
36{
37 switch (proto) {
38 case IPPROTO_TCP:
39 case IPPROTO_UDP:
40 case IPPROTO_UDPLITE: {
41 const __be16 *pptr;
42 __be16 _ports[2];
43
44 pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
45 if (pptr == NULL) {
46 audit_log_format(ab, " truncated=1");
47 return;
48 }
49
50 audit_log_format(ab, " sport=%hu dport=%hu",
51 ntohs(pptr[0]), ntohs(pptr[1]));
52 }
53 break;
54
55 case IPPROTO_ICMP:
56 case IPPROTO_ICMPV6: {
57 const u8 *iptr;
58 u8 _ih[2];
59
60 iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
61 if (iptr == NULL) {
62 audit_log_format(ab, " truncated=1");
63 return;
64 }
65
66 audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
67 iptr[0], iptr[1]);
68
69 }
70 break;
71 }
72}
73
74static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
75{
76 struct iphdr _iph;
77 const struct iphdr *ih;
78
79 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
80 if (!ih) {
81 audit_log_format(ab, " truncated=1");
82 return;
83 }
84
85 audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
86 &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
87
88 if (ntohs(ih->frag_off) & IP_OFFSET) {
89 audit_log_format(ab, " frag=1");
90 return;
91 }
92
93 audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
94}
95
96static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
97{
98 struct ipv6hdr _ip6h;
99 const struct ipv6hdr *ih;
100 u8 nexthdr;
101 int offset;
102
103 ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
104 if (!ih) {
105 audit_log_format(ab, " truncated=1");
106 return;
107 }
108
109 nexthdr = ih->nexthdr;
110 offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
111 &nexthdr);
112
113 audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
114 &ih->saddr, &ih->daddr, nexthdr);
115
116 if (offset)
117 audit_proto(ab, skb, nexthdr, offset);
118}
119
120static unsigned int
121audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
122{
123 const struct xt_audit_info *info = par->targinfo;
124 struct audit_buffer *ab;
125
126 ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
127 if (ab == NULL)
128 goto errout;
129
130 audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
131 info->type, par->hooknum, skb->len,
132 par->in ? par->in->name : "?",
133 par->out ? par->out->name : "?");
134
135 if (skb->mark)
136 audit_log_format(ab, " mark=%#x", skb->mark);
137
138 if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
139 audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
140 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
141 ntohs(eth_hdr(skb)->h_proto));
142
143 if (par->family == NFPROTO_BRIDGE) {
144 switch (eth_hdr(skb)->h_proto) {
145 case __constant_htons(ETH_P_IP):
146 audit_ip4(ab, skb);
147 break;
148
149 case __constant_htons(ETH_P_IPV6):
150 audit_ip6(ab, skb);
151 break;
152 }
153 }
154 }
155
156 switch (par->family) {
157 case NFPROTO_IPV4:
158 audit_ip4(ab, skb);
159 break;
160
161 case NFPROTO_IPV6:
162 audit_ip6(ab, skb);
163 break;
164 }
165
166 audit_log_end(ab);
167
168errout:
169 return XT_CONTINUE;
170}
171
172static unsigned int
173audit_tg_ebt(struct sk_buff *skb, const struct xt_action_param *par)
174{
175 audit_tg(skb, par);
176 return EBT_CONTINUE;
177}
178
179static int audit_tg_check(const struct xt_tgchk_param *par)
180{
181 const struct xt_audit_info *info = par->targinfo;
182
183 if (info->type > XT_AUDIT_TYPE_MAX) {
184 pr_info("Audit type out of range (valid range: 0..%hhu)\n",
185 XT_AUDIT_TYPE_MAX);
186 return -ERANGE;
187 }
188
189 return 0;
190}
191
192static struct xt_target audit_tg_reg[] __read_mostly = {
193 {
194 .name = "AUDIT",
195 .family = NFPROTO_UNSPEC,
196 .target = audit_tg,
197 .targetsize = sizeof(struct xt_audit_info),
198 .checkentry = audit_tg_check,
199 .me = THIS_MODULE,
200 },
201 {
202 .name = "AUDIT",
203 .family = NFPROTO_BRIDGE,
204 .target = audit_tg_ebt,
205 .targetsize = sizeof(struct xt_audit_info),
206 .checkentry = audit_tg_check,
207 .me = THIS_MODULE,
208 },
209};
210
211static int __init audit_tg_init(void)
212{
213 return xt_register_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg));
214}
215
216static void __exit audit_tg_exit(void)
217{
218 xt_unregister_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg));
219}
220
221module_init(audit_tg_init);
222module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4abeb99..af9c4dadf816 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
19#include <linux/netfilter_ipv6.h> 19#include <linux/netfilter_ipv6.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_CLASSIFY.h> 21#include <linux/netfilter/xt_CLASSIFY.h>
22#include <linux/netfilter_arp.h>
22 23
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 24MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
25MODULE_DESCRIPTION("Xtables: Qdisc classification"); 26MODULE_DESCRIPTION("Xtables: Qdisc classification");
26MODULE_ALIAS("ipt_CLASSIFY"); 27MODULE_ALIAS("ipt_CLASSIFY");
27MODULE_ALIAS("ip6t_CLASSIFY"); 28MODULE_ALIAS("ip6t_CLASSIFY");
29MODULE_ALIAS("arpt_CLASSIFY");
28 30
29static unsigned int 31static unsigned int
30classify_tg(struct sk_buff *skb, const struct xt_action_param *par) 32classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
35 return XT_CONTINUE; 37 return XT_CONTINUE;
36} 38}
37 39
38static struct xt_target classify_tg_reg __read_mostly = { 40static struct xt_target classify_tg_reg[] __read_mostly = {
39 .name = "CLASSIFY", 41 {
40 .revision = 0, 42 .name = "CLASSIFY",
41 .family = NFPROTO_UNSPEC, 43 .revision = 0,
42 .table = "mangle", 44 .family = NFPROTO_UNSPEC,
43 .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | 45 .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
44 (1 << NF_INET_POST_ROUTING), 46 (1 << NF_INET_POST_ROUTING),
45 .target = classify_tg, 47 .target = classify_tg,
46 .targetsize = sizeof(struct xt_classify_target_info), 48 .targetsize = sizeof(struct xt_classify_target_info),
47 .me = THIS_MODULE, 49 .me = THIS_MODULE,
50 },
51 {
52 .name = "CLASSIFY",
53 .revision = 0,
54 .family = NFPROTO_ARP,
55 .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
56 .target = classify_tg,
57 .targetsize = sizeof(struct xt_classify_target_info),
58 .me = THIS_MODULE,
59 },
48}; 60};
49 61
50static int __init classify_tg_init(void) 62static int __init classify_tg_init(void)
51{ 63{
52 return xt_register_target(&classify_tg_reg); 64 return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
53} 65}
54 66
55static void __exit classify_tg_exit(void) 67static void __exit classify_tg_exit(void)
56{ 68{
57 xt_unregister_target(&classify_tg_reg); 69 xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
58} 70}
59 71
60module_init(classify_tg_init); 72module_init(classify_tg_init);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 0a229191e55b..ae8271652efa 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
99 u_int8_t orig, nv; 99 u_int8_t orig, nv;
100 100
101 orig = ipv6_get_dsfield(iph); 101 orig = ipv6_get_dsfield(iph);
102 nv = (orig & info->tos_mask) ^ info->tos_value; 102 nv = (orig & ~info->tos_mask) ^ info->tos_value;
103 103
104 if (orig != nv) { 104 if (orig != nv) {
105 if (!skb_make_writable(skb, sizeof(struct iphdr))) 105 if (!skb_make_writable(skb, sizeof(struct iphdr)))
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e13545..3bdd443aaf15 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); 313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
314MODULE_DESCRIPTION("Xtables: idle time monitor"); 314MODULE_DESCRIPTION("Xtables: idle time monitor");
315MODULE_LICENSE("GPL v2"); 315MODULE_LICENSE("GPL v2");
316MODULE_ALIAS("ipt_IDLETIMER");
317MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a4140509eea1..993de2ba89d3 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); 32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); 33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
34MODULE_ALIAS("ipt_LED");
35MODULE_ALIAS("ip6t_LED");
34 36
35static LIST_HEAD(xt_led_triggers); 37static LIST_HEAD(xt_led_triggers);
36static DEFINE_MUTEX(xt_led_mutex); 38static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1bde3d..d4f4b5d66b20 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
72 72
73 if (info->queues_total > 1) { 73 if (info->queues_total > 1) {
74 if (par->family == NFPROTO_IPV4) 74 if (par->family == NFPROTO_IPV4)
75 queue = hash_v4(skb) % info->queues_total + queue; 75 queue = (((u64) hash_v4(skb) * info->queues_total) >>
76 32) + queue;
76#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 77#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
77 else if (par->family == NFPROTO_IPV6) 78 else if (par->family == NFPROTO_IPV6)
78 queue = hash_v6(skb) % info->queues_total + queue; 79 queue = (((u64) hash_v6(skb) * info->queues_total) >>
80 32) + queue;
79#endif 81#endif
80 } 82 }
81 return NF_QUEUE_NR(queue); 83 return NF_QUEUE_NR(queue);
82} 84}
83 85
84static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) 86static unsigned int
87nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
85{ 88{
86 const struct xt_NFQ_info_v1 *info = par->targinfo; 89 const struct xt_NFQ_info_v2 *info = par->targinfo;
90 unsigned int ret = nfqueue_tg_v1(skb, par);
91
92 if (info->bypass)
93 ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
94 return ret;
95}
96
97static int nfqueue_tg_check(const struct xt_tgchk_param *par)
98{
99 const struct xt_NFQ_info_v2 *info = par->targinfo;
87 u32 maxid; 100 u32 maxid;
88 101
89 if (unlikely(!rnd_inited)) { 102 if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
100 info->queues_total, maxid); 113 info->queues_total, maxid);
101 return -ERANGE; 114 return -ERANGE;
102 } 115 }
116 if (par->target->revision == 2 && info->bypass > 1)
117 return -EINVAL;
103 return 0; 118 return 0;
104} 119}
105 120
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
115 .name = "NFQUEUE", 130 .name = "NFQUEUE",
116 .revision = 1, 131 .revision = 1,
117 .family = NFPROTO_UNSPEC, 132 .family = NFPROTO_UNSPEC,
118 .checkentry = nfqueue_tg_v1_check, 133 .checkentry = nfqueue_tg_check,
119 .target = nfqueue_tg_v1, 134 .target = nfqueue_tg_v1,
120 .targetsize = sizeof(struct xt_NFQ_info_v1), 135 .targetsize = sizeof(struct xt_NFQ_info_v1),
121 .me = THIS_MODULE, 136 .me = THIS_MODULE,
122 }, 137 },
138 {
139 .name = "NFQUEUE",
140 .revision = 2,
141 .family = NFPROTO_UNSPEC,
142 .checkentry = nfqueue_tg_check,
143 .target = nfqueue_tg_v2,
144 .targetsize = sizeof(struct xt_NFQ_info_v2),
145 .me = THIS_MODULE,
146 },
123}; 147};
124 148
125static int __init nfqueue_tg_init(void) 149static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index eb81c380da1b..9e63b43faeed 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -148,20 +148,25 @@ tcpmss_mangle_packet(struct sk_buff *skb,
148static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, 148static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
149 unsigned int family) 149 unsigned int family)
150{ 150{
151 struct flowi fl = {}; 151 struct flowi fl;
152 const struct nf_afinfo *ai; 152 const struct nf_afinfo *ai;
153 struct rtable *rt = NULL; 153 struct rtable *rt = NULL;
154 u_int32_t mtu = ~0U; 154 u_int32_t mtu = ~0U;
155 155
156 if (family == PF_INET) 156 if (family == PF_INET) {
157 fl.fl4_dst = ip_hdr(skb)->saddr; 157 struct flowi4 *fl4 = &fl.u.ip4;
158 else 158 memset(fl4, 0, sizeof(*fl4));
159 fl.fl6_dst = ipv6_hdr(skb)->saddr; 159 fl4->daddr = ip_hdr(skb)->saddr;
160 } else {
161 struct flowi6 *fl6 = &fl.u.ip6;
160 162
163 memset(fl6, 0, sizeof(*fl6));
164 ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
165 }
161 rcu_read_lock(); 166 rcu_read_lock();
162 ai = nf_get_afinfo(family); 167 ai = nf_get_afinfo(family);
163 if (ai != NULL) 168 if (ai != NULL)
164 ai->route((struct dst_entry **)&rt, &fl); 169 ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
165 rcu_read_unlock(); 170 rcu_read_unlock();
166 171
167 if (rt != NULL) { 172 if (rt != NULL) {
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 5128a6c4cb2c..5f054a0dbbb1 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -62,18 +62,19 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
62 const struct iphdr *iph = ip_hdr(skb); 62 const struct iphdr *iph = ip_hdr(skb);
63 struct net *net = pick_net(skb); 63 struct net *net = pick_net(skb);
64 struct rtable *rt; 64 struct rtable *rt;
65 struct flowi fl; 65 struct flowi4 fl4;
66 66
67 memset(&fl, 0, sizeof(fl)); 67 memset(&fl4, 0, sizeof(fl4));
68 if (info->priv) { 68 if (info->priv) {
69 if (info->priv->oif == -1) 69 if (info->priv->oif == -1)
70 return false; 70 return false;
71 fl.oif = info->priv->oif; 71 fl4.flowi4_oif = info->priv->oif;
72 } 72 }
73 fl.fl4_dst = info->gw.ip; 73 fl4.daddr = info->gw.ip;
74 fl.fl4_tos = RT_TOS(iph->tos); 74 fl4.flowi4_tos = RT_TOS(iph->tos);
75 fl.fl4_scope = RT_SCOPE_UNIVERSE; 75 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
76 if (ip_route_output_key(net, &rt, &fl) != 0) 76 rt = ip_route_output_key(net, &fl4);
77 if (IS_ERR(rt))
77 return false; 78 return false;
78 79
79 skb_dst_drop(skb); 80 skb_dst_drop(skb);
@@ -142,18 +143,18 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
142 const struct ipv6hdr *iph = ipv6_hdr(skb); 143 const struct ipv6hdr *iph = ipv6_hdr(skb);
143 struct net *net = pick_net(skb); 144 struct net *net = pick_net(skb);
144 struct dst_entry *dst; 145 struct dst_entry *dst;
145 struct flowi fl; 146 struct flowi6 fl6;
146 147
147 memset(&fl, 0, sizeof(fl)); 148 memset(&fl6, 0, sizeof(fl6));
148 if (info->priv) { 149 if (info->priv) {
149 if (info->priv->oif == -1) 150 if (info->priv->oif == -1)
150 return false; 151 return false;
151 fl.oif = info->priv->oif; 152 fl6.flowi6_oif = info->priv->oif;
152 } 153 }
153 fl.fl6_dst = info->gw.in6; 154 fl6.daddr = info->gw.in6;
154 fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | 155 fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
155 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; 156 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
156 dst = ip6_route_output(net, NULL, &fl); 157 dst = ip6_route_output(net, NULL, &fl6);
157 if (dst == NULL) 158 if (dst == NULL)
158 return false; 159 return false;
159 160
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
new file mode 100644
index 000000000000..b77d383cec78
--- /dev/null
+++ b/net/netfilter/xt_addrtype.c
@@ -0,0 +1,243 @@
1/*
2 * iptables module to match inet_addr_type() of an ip.
3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/ip.h>
17#include <net/route.h>
18
19#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
20#include <net/ipv6.h>
21#include <net/ip6_route.h>
22#include <net/ip6_fib.h>
23#endif
24
25#include <linux/netfilter/xt_addrtype.h>
26#include <linux/netfilter/x_tables.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
30MODULE_DESCRIPTION("Xtables: address type match");
31MODULE_ALIAS("ipt_addrtype");
32MODULE_ALIAS("ip6t_addrtype");
33
34#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
35static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
36 const struct in6_addr *addr)
37{
38 const struct nf_afinfo *afinfo;
39 struct flowi6 flow;
40 struct rt6_info *rt;
41 u32 ret;
42 int route_err;
43
44 memset(&flow, 0, sizeof(flow));
45 ipv6_addr_copy(&flow.daddr, addr);
46 if (dev)
47 flow.flowi6_oif = dev->ifindex;
48
49 rcu_read_lock();
50
51 afinfo = nf_get_afinfo(NFPROTO_IPV6);
52 if (afinfo != NULL)
53 route_err = afinfo->route(net, (struct dst_entry **)&rt,
54 flowi6_to_flowi(&flow), !!dev);
55 else
56 route_err = 1;
57
58 rcu_read_unlock();
59
60 if (route_err)
61 return XT_ADDRTYPE_UNREACHABLE;
62
63 if (rt->rt6i_flags & RTF_REJECT)
64 ret = XT_ADDRTYPE_UNREACHABLE;
65 else
66 ret = 0;
67
68 if (rt->rt6i_flags & RTF_LOCAL)
69 ret |= XT_ADDRTYPE_LOCAL;
70 if (rt->rt6i_flags & RTF_ANYCAST)
71 ret |= XT_ADDRTYPE_ANYCAST;
72
73
74 dst_release(&rt->dst);
75 return ret;
76}
77
78static bool match_type6(struct net *net, const struct net_device *dev,
79 const struct in6_addr *addr, u16 mask)
80{
81 int addr_type = ipv6_addr_type(addr);
82
83 if ((mask & XT_ADDRTYPE_MULTICAST) &&
84 !(addr_type & IPV6_ADDR_MULTICAST))
85 return false;
86 if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST))
87 return false;
88 if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY)
89 return false;
90
91 if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
92 XT_ADDRTYPE_UNREACHABLE) & mask)
93 return !!(mask & match_lookup_rt6(net, dev, addr));
94 return true;
95}
96
97static bool
98addrtype_mt6(struct net *net, const struct net_device *dev,
99 const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info)
100{
101 const struct ipv6hdr *iph = ipv6_hdr(skb);
102 bool ret = true;
103
104 if (info->source)
105 ret &= match_type6(net, dev, &iph->saddr, info->source) ^
106 (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
107 if (ret && info->dest)
108 ret &= match_type6(net, dev, &iph->daddr, info->dest) ^
109 !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
110 return ret;
111}
112#endif
113
114static inline bool match_type(struct net *net, const struct net_device *dev,
115 __be32 addr, u_int16_t mask)
116{
117 return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
118}
119
120static bool
121addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
122{
123 struct net *net = dev_net(par->in ? par->in : par->out);
124 const struct xt_addrtype_info *info = par->matchinfo;
125 const struct iphdr *iph = ip_hdr(skb);
126 bool ret = true;
127
128 if (info->source)
129 ret &= match_type(net, NULL, iph->saddr, info->source) ^
130 info->invert_source;
131 if (info->dest)
132 ret &= match_type(net, NULL, iph->daddr, info->dest) ^
133 info->invert_dest;
134
135 return ret;
136}
137
138static bool
139addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
140{
141 struct net *net = dev_net(par->in ? par->in : par->out);
142 const struct xt_addrtype_info_v1 *info = par->matchinfo;
143 const struct iphdr *iph;
144 const struct net_device *dev = NULL;
145 bool ret = true;
146
147 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
148 dev = par->in;
149 else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
150 dev = par->out;
151
152#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
153 if (par->family == NFPROTO_IPV6)
154 return addrtype_mt6(net, dev, skb, info);
155#endif
156 iph = ip_hdr(skb);
157 if (info->source)
158 ret &= match_type(net, dev, iph->saddr, info->source) ^
159 (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
160 if (ret && info->dest)
161 ret &= match_type(net, dev, iph->daddr, info->dest) ^
162 !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
163 return ret;
164}
165
166static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
167{
168 struct xt_addrtype_info_v1 *info = par->matchinfo;
169
170 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
171 info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
172 pr_info("both incoming and outgoing "
173 "interface limitation cannot be selected\n");
174 return -EINVAL;
175 }
176
177 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
178 (1 << NF_INET_LOCAL_IN)) &&
179 info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
180 pr_info("output interface limitation "
181 "not valid in PREROUTING and INPUT\n");
182 return -EINVAL;
183 }
184
185 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
186 (1 << NF_INET_LOCAL_OUT)) &&
187 info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
188 pr_info("input interface limitation "
189 "not valid in POSTROUTING and OUTPUT\n");
190 return -EINVAL;
191 }
192
193#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
194 if (par->family == NFPROTO_IPV6) {
195 if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
196 pr_err("ipv6 BLACKHOLE matching not supported\n");
197 return -EINVAL;
198 }
199 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
200 pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
201 return -EINVAL;
202 }
203 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
204 pr_err("ipv6 does not support BROADCAST matching\n");
205 return -EINVAL;
206 }
207 }
208#endif
209 return 0;
210}
211
212static struct xt_match addrtype_mt_reg[] __read_mostly = {
213 {
214 .name = "addrtype",
215 .family = NFPROTO_IPV4,
216 .match = addrtype_mt_v0,
217 .matchsize = sizeof(struct xt_addrtype_info),
218 .me = THIS_MODULE
219 },
220 {
221 .name = "addrtype",
222 .family = NFPROTO_UNSPEC,
223 .revision = 1,
224 .match = addrtype_mt_v1,
225 .checkentry = addrtype_mt_checkentry_v1,
226 .matchsize = sizeof(struct xt_addrtype_info_v1),
227 .me = THIS_MODULE
228 }
229};
230
231static int __init addrtype_mt_init(void)
232{
233 return xt_register_matches(addrtype_mt_reg,
234 ARRAY_SIZE(addrtype_mt_reg));
235}
236
237static void __exit addrtype_mt_exit(void)
238{
239 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
240}
241
242module_init(addrtype_mt_init);
243module_exit(addrtype_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5c5b6b921b84..c6d5a83450c9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -33,17 +33,17 @@
33 33
34/* we will save the tuples of all connections we care about */ 34/* we will save the tuples of all connections we care about */
35struct xt_connlimit_conn { 35struct xt_connlimit_conn {
36 struct list_head list; 36 struct hlist_node node;
37 struct nf_conntrack_tuple tuple; 37 struct nf_conntrack_tuple tuple;
38 union nf_inet_addr addr;
38}; 39};
39 40
40struct xt_connlimit_data { 41struct xt_connlimit_data {
41 struct list_head iphash[256]; 42 struct hlist_head iphash[256];
42 spinlock_t lock; 43 spinlock_t lock;
43}; 44};
44 45
45static u_int32_t connlimit_rnd __read_mostly; 46static u_int32_t connlimit_rnd __read_mostly;
46static bool connlimit_rnd_inited __read_mostly;
47 47
48static inline unsigned int connlimit_iphash(__be32 addr) 48static inline unsigned int connlimit_iphash(__be32 addr)
49{ 49{
@@ -101,9 +101,9 @@ static int count_them(struct net *net,
101{ 101{
102 const struct nf_conntrack_tuple_hash *found; 102 const struct nf_conntrack_tuple_hash *found;
103 struct xt_connlimit_conn *conn; 103 struct xt_connlimit_conn *conn;
104 struct xt_connlimit_conn *tmp; 104 struct hlist_node *pos, *n;
105 struct nf_conn *found_ct; 105 struct nf_conn *found_ct;
106 struct list_head *hash; 106 struct hlist_head *hash;
107 bool addit = true; 107 bool addit = true;
108 int matches = 0; 108 int matches = 0;
109 109
@@ -115,7 +115,7 @@ static int count_them(struct net *net,
115 rcu_read_lock(); 115 rcu_read_lock();
116 116
117 /* check the saved connections */ 117 /* check the saved connections */
118 list_for_each_entry_safe(conn, tmp, hash, list) { 118 hlist_for_each_entry_safe(conn, pos, n, hash, node) {
119 found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, 119 found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
120 &conn->tuple); 120 &conn->tuple);
121 found_ct = NULL; 121 found_ct = NULL;
@@ -135,7 +135,7 @@ static int count_them(struct net *net,
135 135
136 if (found == NULL) { 136 if (found == NULL) {
137 /* this one is gone */ 137 /* this one is gone */
138 list_del(&conn->list); 138 hlist_del(&conn->node);
139 kfree(conn); 139 kfree(conn);
140 continue; 140 continue;
141 } 141 }
@@ -146,12 +146,12 @@ static int count_them(struct net *net,
146 * closed already -> ditch it 146 * closed already -> ditch it
147 */ 147 */
148 nf_ct_put(found_ct); 148 nf_ct_put(found_ct);
149 list_del(&conn->list); 149 hlist_del(&conn->node);
150 kfree(conn); 150 kfree(conn);
151 continue; 151 continue;
152 } 152 }
153 153
154 if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) 154 if (same_source_net(addr, mask, &conn->addr, family))
155 /* same source network -> be counted! */ 155 /* same source network -> be counted! */
156 ++matches; 156 ++matches;
157 nf_ct_put(found_ct); 157 nf_ct_put(found_ct);
@@ -161,11 +161,12 @@ static int count_them(struct net *net,
161 161
162 if (addit) { 162 if (addit) {
163 /* save the new connection in our list */ 163 /* save the new connection in our list */
164 conn = kzalloc(sizeof(*conn), GFP_ATOMIC); 164 conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
165 if (conn == NULL) 165 if (conn == NULL)
166 return -ENOMEM; 166 return -ENOMEM;
167 conn->tuple = *tuple; 167 conn->tuple = *tuple;
168 list_add(&conn->list, hash); 168 conn->addr = *addr;
169 hlist_add_head(&conn->node, hash);
169 ++matches; 170 ++matches;
170 } 171 }
171 172
@@ -186,17 +187,19 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
186 187
187 ct = nf_ct_get(skb, &ctinfo); 188 ct = nf_ct_get(skb, &ctinfo);
188 if (ct != NULL) 189 if (ct != NULL)
189 tuple_ptr = &ct->tuplehash[0].tuple; 190 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
190 else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), 191 else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
191 par->family, &tuple)) 192 par->family, &tuple))
192 goto hotdrop; 193 goto hotdrop;
193 194
194 if (par->family == NFPROTO_IPV6) { 195 if (par->family == NFPROTO_IPV6) {
195 const struct ipv6hdr *iph = ipv6_hdr(skb); 196 const struct ipv6hdr *iph = ipv6_hdr(skb);
196 memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); 197 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
198 &iph->daddr : &iph->saddr, sizeof(addr.ip6));
197 } else { 199 } else {
198 const struct iphdr *iph = ip_hdr(skb); 200 const struct iphdr *iph = ip_hdr(skb);
199 addr.ip = iph->saddr; 201 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
202 iph->daddr : iph->saddr;
200 } 203 }
201 204
202 spin_lock_bh(&info->data->lock); 205 spin_lock_bh(&info->data->lock);
@@ -204,13 +207,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
204 &info->mask, par->family); 207 &info->mask, par->family);
205 spin_unlock_bh(&info->data->lock); 208 spin_unlock_bh(&info->data->lock);
206 209
207 if (connections < 0) { 210 if (connections < 0)
208 /* kmalloc failed, drop it entirely */ 211 /* kmalloc failed, drop it entirely */
209 par->hotdrop = true; 212 goto hotdrop;
210 return false;
211 }
212 213
213 return (connections > info->limit) ^ info->inverse; 214 return (connections > info->limit) ^
215 !!(info->flags & XT_CONNLIMIT_INVERT);
214 216
215 hotdrop: 217 hotdrop:
216 par->hotdrop = true; 218 par->hotdrop = true;
@@ -223,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
223 unsigned int i; 225 unsigned int i;
224 int ret; 226 int ret;
225 227
226 if (unlikely(!connlimit_rnd_inited)) { 228 if (unlikely(!connlimit_rnd)) {
227 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); 229 u_int32_t rand;
228 connlimit_rnd_inited = true; 230
231 do {
232 get_random_bytes(&rand, sizeof(rand));
233 } while (!rand);
234 cmpxchg(&connlimit_rnd, 0, rand);
229 } 235 }
230 ret = nf_ct_l3proto_try_module_get(par->family); 236 ret = nf_ct_l3proto_try_module_get(par->family);
231 if (ret < 0) { 237 if (ret < 0) {
@@ -243,7 +249,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
243 249
244 spin_lock_init(&info->data->lock); 250 spin_lock_init(&info->data->lock);
245 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) 251 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
246 INIT_LIST_HEAD(&info->data->iphash[i]); 252 INIT_HLIST_HEAD(&info->data->iphash[i]);
247 253
248 return 0; 254 return 0;
249} 255}
@@ -252,15 +258,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
252{ 258{
253 const struct xt_connlimit_info *info = par->matchinfo; 259 const struct xt_connlimit_info *info = par->matchinfo;
254 struct xt_connlimit_conn *conn; 260 struct xt_connlimit_conn *conn;
255 struct xt_connlimit_conn *tmp; 261 struct hlist_node *pos, *n;
256 struct list_head *hash = info->data->iphash; 262 struct hlist_head *hash = info->data->iphash;
257 unsigned int i; 263 unsigned int i;
258 264
259 nf_ct_l3proto_module_put(par->family); 265 nf_ct_l3proto_module_put(par->family);
260 266
261 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { 267 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
262 list_for_each_entry_safe(conn, tmp, &hash[i], list) { 268 hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
263 list_del(&conn->list); 269 hlist_del(&conn->node);
264 kfree(conn); 270 kfree(conn);
265 } 271 }
266 } 272 }
@@ -268,25 +274,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
268 kfree(info->data); 274 kfree(info->data);
269} 275}
270 276
271static struct xt_match connlimit_mt_reg __read_mostly = { 277static struct xt_match connlimit_mt_reg[] __read_mostly = {
272 .name = "connlimit", 278 {
273 .revision = 0, 279 .name = "connlimit",
274 .family = NFPROTO_UNSPEC, 280 .revision = 0,
275 .checkentry = connlimit_mt_check, 281 .family = NFPROTO_UNSPEC,
276 .match = connlimit_mt, 282 .checkentry = connlimit_mt_check,
277 .matchsize = sizeof(struct xt_connlimit_info), 283 .match = connlimit_mt,
278 .destroy = connlimit_mt_destroy, 284 .matchsize = sizeof(struct xt_connlimit_info),
279 .me = THIS_MODULE, 285 .destroy = connlimit_mt_destroy,
286 .me = THIS_MODULE,
287 },
288 {
289 .name = "connlimit",
290 .revision = 1,
291 .family = NFPROTO_UNSPEC,
292 .checkentry = connlimit_mt_check,
293 .match = connlimit_mt,
294 .matchsize = sizeof(struct xt_connlimit_info),
295 .destroy = connlimit_mt_destroy,
296 .me = THIS_MODULE,
297 },
280}; 298};
281 299
282static int __init connlimit_mt_init(void) 300static int __init connlimit_mt_init(void)
283{ 301{
284 return xt_register_match(&connlimit_mt_reg); 302 return xt_register_matches(connlimit_mt_reg,
303 ARRAY_SIZE(connlimit_mt_reg));
285} 304}
286 305
287static void __exit connlimit_mt_exit(void) 306static void __exit connlimit_mt_exit(void)
288{ 307{
289 xt_unregister_match(&connlimit_mt_reg); 308 xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
290} 309}
291 310
292module_init(connlimit_mt_init); 311module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e536710ad916..61805d7b38aa 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
112 return true; 112 return true;
113} 113}
114 114
115static inline bool
116port_match(u16 min, u16 max, u16 port, bool invert)
117{
118 return (port >= min && port <= max) ^ invert;
119}
120
121static inline bool
122ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
123 const struct nf_conn *ct)
124{
125 const struct nf_conntrack_tuple *tuple;
126
127 tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
128 if ((info->match_flags & XT_CONNTRACK_PROTO) &&
129 (nf_ct_protonum(ct) == info->l4proto) ^
130 !(info->invert_flags & XT_CONNTRACK_PROTO))
131 return false;
132
133 /* Shortcut to match all recognized protocols by using ->src.all. */
134 if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
135 !port_match(info->origsrc_port, info->origsrc_port_high,
136 ntohs(tuple->src.u.all),
137 info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
138 return false;
139
140 if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
141 !port_match(info->origdst_port, info->origdst_port_high,
142 ntohs(tuple->dst.u.all),
143 info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
144 return false;
145
146 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
147
148 if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
149 !port_match(info->replsrc_port, info->replsrc_port_high,
150 ntohs(tuple->src.u.all),
151 info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
152 return false;
153
154 if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
155 !port_match(info->repldst_port, info->repldst_port_high,
156 ntohs(tuple->dst.u.all),
157 info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
158 return false;
159
160 return true;
161}
162
115static bool 163static bool
116conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, 164conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
117 u16 state_mask, u16 status_mask) 165 u16 state_mask, u16 status_mask)
@@ -147,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
147 return info->match_flags & XT_CONNTRACK_STATE; 195 return info->match_flags & XT_CONNTRACK_STATE;
148 if ((info->match_flags & XT_CONNTRACK_DIRECTION) && 196 if ((info->match_flags & XT_CONNTRACK_DIRECTION) &&
149 (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ 197 (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
150 !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) 198 !(info->invert_flags & XT_CONNTRACK_DIRECTION))
151 return false; 199 return false;
152 200
153 if (info->match_flags & XT_CONNTRACK_ORIGSRC) 201 if (info->match_flags & XT_CONNTRACK_ORIGSRC)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
170 !(info->invert_flags & XT_CONNTRACK_REPLDST)) 218 !(info->invert_flags & XT_CONNTRACK_REPLDST))
171 return false; 219 return false;
172 220
173 if (!ct_proto_port_check(info, ct)) 221 if (par->match->revision != 3) {
174 return false; 222 if (!ct_proto_port_check(info, ct))
223 return false;
224 } else {
225 if (!ct_proto_port_check_v3(par->matchinfo, ct))
226 return false;
227 }
175 228
176 if ((info->match_flags & XT_CONNTRACK_STATUS) && 229 if ((info->match_flags & XT_CONNTRACK_STATUS) &&
177 (!!(status_mask & ct->status) ^ 230 (!!(status_mask & ct->status) ^
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
207 return conntrack_mt(skb, par, info->state_mask, info->status_mask); 260 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
208} 261}
209 262
263static bool
264conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
265{
266 const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
267
268 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
269}
270
210static int conntrack_mt_check(const struct xt_mtchk_param *par) 271static int conntrack_mt_check(const struct xt_mtchk_param *par)
211{ 272{
212 int ret; 273 int ret;
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
244 .destroy = conntrack_mt_destroy, 305 .destroy = conntrack_mt_destroy,
245 .me = THIS_MODULE, 306 .me = THIS_MODULE,
246 }, 307 },
308 {
309 .name = "conntrack",
310 .revision = 3,
311 .family = NFPROTO_UNSPEC,
312 .matchsize = sizeof(struct xt_conntrack_mtinfo3),
313 .match = conntrack_mt_v3,
314 .checkentry = conntrack_mt_check,
315 .destroy = conntrack_mt_destroy,
316 .me = THIS_MODULE,
317 },
247}; 318};
248 319
249static int __init conntrack_mt_init(void) 320static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a5cbae..c7a2e5466bc4 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); 23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
24MODULE_DESCRIPTION("Xtables: CPU match"); 24MODULE_DESCRIPTION("Xtables: CPU match");
25MODULE_ALIAS("ipt_cpu");
26MODULE_ALIAS("ip6t_cpu");
25 27
26static int cpu_mt_check(const struct xt_mtchk_param *par) 28static int cpu_mt_check(const struct xt_mtchk_param *par)
27{ 29{
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
new file mode 100644
index 000000000000..d9202cdd25c9
--- /dev/null
+++ b/net/netfilter/xt_devgroup.c
@@ -0,0 +1,82 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/netdevice.h>
12
13#include <linux/netfilter/xt_devgroup.h>
14#include <linux/netfilter/x_tables.h>
15
16MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
17MODULE_LICENSE("GPL");
18MODULE_DESCRIPTION("Xtables: Device group match");
19MODULE_ALIAS("ipt_devgroup");
20MODULE_ALIAS("ip6t_devgroup");
21
22static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
23{
24 const struct xt_devgroup_info *info = par->matchinfo;
25
26 if (info->flags & XT_DEVGROUP_MATCH_SRC &&
27 (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
28 ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
29 return false;
30
31 if (info->flags & XT_DEVGROUP_MATCH_DST &&
32 (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
33 ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
34 return false;
35
36 return true;
37}
38
39static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
40{
41 const struct xt_devgroup_info *info = par->matchinfo;
42
43 if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
44 XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
45 return -EINVAL;
46
47 if (info->flags & XT_DEVGROUP_MATCH_SRC &&
48 par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) |
49 (1 << NF_INET_LOCAL_IN) |
50 (1 << NF_INET_FORWARD)))
51 return -EINVAL;
52
53 if (info->flags & XT_DEVGROUP_MATCH_DST &&
54 par->hook_mask & ~((1 << NF_INET_FORWARD) |
55 (1 << NF_INET_LOCAL_OUT) |
56 (1 << NF_INET_POST_ROUTING)))
57 return -EINVAL;
58
59 return 0;
60}
61
62static struct xt_match devgroup_mt_reg __read_mostly = {
63 .name = "devgroup",
64 .match = devgroup_mt,
65 .checkentry = devgroup_mt_checkentry,
66 .matchsize = sizeof(struct xt_devgroup_info),
67 .family = NFPROTO_UNSPEC,
68 .me = THIS_MODULE
69};
70
71static int __init devgroup_mt_init(void)
72{
73 return xt_register_match(&devgroup_mt_reg);
74}
75
76static void __exit devgroup_mt_exit(void)
77{
78 xt_unregister_match(&devgroup_mt_reg);
79}
80
81module_init(devgroup_mt_init);
82module_exit(devgroup_mt_exit);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 73c33a42f87f..b46626cddd93 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -31,7 +31,7 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
31 pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n", 31 pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n",
32 &iph->saddr, 32 &iph->saddr,
33 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "", 33 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
34 &info->src_max.ip, 34 &info->src_min.ip,
35 &info->src_max.ip); 35 &info->src_max.ip);
36 return false; 36 return false;
37 } 37 }
@@ -76,15 +76,27 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
76 m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6); 76 m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6);
77 m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr); 77 m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr);
78 m ^= !!(info->flags & IPRANGE_SRC_INV); 78 m ^= !!(info->flags & IPRANGE_SRC_INV);
79 if (m) 79 if (m) {
80 pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n",
81 &iph->saddr,
82 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
83 &info->src_min.in6,
84 &info->src_max.in6);
80 return false; 85 return false;
86 }
81 } 87 }
82 if (info->flags & IPRANGE_DST) { 88 if (info->flags & IPRANGE_DST) {
83 m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6); 89 m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6);
84 m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr); 90 m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr);
85 m ^= !!(info->flags & IPRANGE_DST_INV); 91 m ^= !!(info->flags & IPRANGE_DST_INV);
86 if (m) 92 if (m) {
93 pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n",
94 &iph->daddr,
95 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
96 &info->dst_min.in6,
97 &info->dst_max.in6);
87 return false; 98 return false;
99 }
88 } 100 }
89 return true; 101 return true;
90} 102}
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d8aa35..bb10b0717f1b 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4327e101c047..846f895cb656 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -62,13 +62,6 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
62 [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, 62 [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
63}; 63};
64 64
65static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
66{
67 struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
68
69 kfree(f);
70}
71
72static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, 65static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
73 const struct nlmsghdr *nlh, 66 const struct nlmsghdr *nlh,
74 const struct nlattr * const osf_attrs[]) 67 const struct nlattr * const osf_attrs[])
@@ -133,7 +126,7 @@ static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
133 * We are protected by nfnl mutex. 126 * We are protected by nfnl mutex.
134 */ 127 */
135 list_del_rcu(&sf->finger_entry); 128 list_del_rcu(&sf->finger_entry);
136 call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu); 129 kfree_rcu(sf, rcu_head);
137 130
138 err = 0; 131 err = 0;
139 break; 132 break;
@@ -414,7 +407,7 @@ static void __exit xt_osf_fini(void)
414 407
415 list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) { 408 list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
416 list_del_rcu(&f->finger_entry); 409 list_del_rcu(&f->finger_entry);
417 call_rcu(&f->rcu_head, xt_osf_finger_free_rcu); 410 kfree_rcu(f, rcu_head);
418 } 411 }
419 } 412 }
420 rcu_read_unlock(); 413 rcu_read_unlock();
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
new file mode 100644
index 000000000000..b3babaed7719
--- /dev/null
+++ b/net/netfilter/xt_set.c
@@ -0,0 +1,373 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Martin Josefsson <gandalf@wlug.westbo.se>
4 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* Kernel module which implements the set match and SET target
12 * for netfilter/iptables. */
13
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/version.h>
17
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/xt_set.h>
20
21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
23MODULE_DESCRIPTION("Xtables: IP set match and target module");
24MODULE_ALIAS("xt_SET");
25MODULE_ALIAS("ipt_set");
26MODULE_ALIAS("ip6t_set");
27MODULE_ALIAS("ipt_SET");
28MODULE_ALIAS("ip6t_SET");
29
30static inline int
31match_set(ip_set_id_t index, const struct sk_buff *skb,
32 u8 pf, u8 dim, u8 flags, int inv)
33{
34 if (ip_set_test(index, skb, pf, dim, flags))
35 inv = !inv;
36 return inv;
37}
38
39/* Revision 0 interface: backward compatible with netfilter/iptables */
40
41static bool
42set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
43{
44 const struct xt_set_info_match_v0 *info = par->matchinfo;
45
46 return match_set(info->match_set.index, skb, par->family,
47 info->match_set.u.compat.dim,
48 info->match_set.u.compat.flags,
49 info->match_set.u.compat.flags & IPSET_INV_MATCH);
50}
51
52static void
53compat_flags(struct xt_set_info_v0 *info)
54{
55 u_int8_t i;
56
57 /* Fill out compatibility data according to enum ip_set_kopt */
58 info->u.compat.dim = IPSET_DIM_ZERO;
59 if (info->u.flags[0] & IPSET_MATCH_INV)
60 info->u.compat.flags |= IPSET_INV_MATCH;
61 for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
62 info->u.compat.dim++;
63 if (info->u.flags[i] & IPSET_SRC)
64 info->u.compat.flags |= (1<<info->u.compat.dim);
65 }
66}
67
68static int
69set_match_v0_checkentry(const struct xt_mtchk_param *par)
70{
71 struct xt_set_info_match_v0 *info = par->matchinfo;
72 ip_set_id_t index;
73
74 index = ip_set_nfnl_get_byindex(info->match_set.index);
75
76 if (index == IPSET_INVALID_ID) {
77 pr_warning("Cannot find set indentified by id %u to match\n",
78 info->match_set.index);
79 return -ENOENT;
80 }
81 if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
82 pr_warning("Protocol error: set match dimension "
83 "is over the limit!\n");
84 ip_set_nfnl_put(info->match_set.index);
85 return -ERANGE;
86 }
87
88 /* Fill out compatibility data */
89 compat_flags(&info->match_set);
90
91 return 0;
92}
93
94static void
95set_match_v0_destroy(const struct xt_mtdtor_param *par)
96{
97 struct xt_set_info_match_v0 *info = par->matchinfo;
98
99 ip_set_nfnl_put(info->match_set.index);
100}
101
102static unsigned int
103set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
104{
105 const struct xt_set_info_target_v0 *info = par->targinfo;
106
107 if (info->add_set.index != IPSET_INVALID_ID)
108 ip_set_add(info->add_set.index, skb, par->family,
109 info->add_set.u.compat.dim,
110 info->add_set.u.compat.flags);
111 if (info->del_set.index != IPSET_INVALID_ID)
112 ip_set_del(info->del_set.index, skb, par->family,
113 info->del_set.u.compat.dim,
114 info->del_set.u.compat.flags);
115
116 return XT_CONTINUE;
117}
118
119static int
120set_target_v0_checkentry(const struct xt_tgchk_param *par)
121{
122 struct xt_set_info_target_v0 *info = par->targinfo;
123 ip_set_id_t index;
124
125 if (info->add_set.index != IPSET_INVALID_ID) {
126 index = ip_set_nfnl_get_byindex(info->add_set.index);
127 if (index == IPSET_INVALID_ID) {
128 pr_warning("Cannot find add_set index %u as target\n",
129 info->add_set.index);
130 return -ENOENT;
131 }
132 }
133
134 if (info->del_set.index != IPSET_INVALID_ID) {
135 index = ip_set_nfnl_get_byindex(info->del_set.index);
136 if (index == IPSET_INVALID_ID) {
137 pr_warning("Cannot find del_set index %u as target\n",
138 info->del_set.index);
139 if (info->add_set.index != IPSET_INVALID_ID)
140 ip_set_nfnl_put(info->add_set.index);
141 return -ENOENT;
142 }
143 }
144 if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
145 info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
146 pr_warning("Protocol error: SET target dimension "
147 "is over the limit!\n");
148 if (info->add_set.index != IPSET_INVALID_ID)
149 ip_set_nfnl_put(info->add_set.index);
150 if (info->del_set.index != IPSET_INVALID_ID)
151 ip_set_nfnl_put(info->del_set.index);
152 return -ERANGE;
153 }
154
155 /* Fill out compatibility data */
156 compat_flags(&info->add_set);
157 compat_flags(&info->del_set);
158
159 return 0;
160}
161
162static void
163set_target_v0_destroy(const struct xt_tgdtor_param *par)
164{
165 const struct xt_set_info_target_v0 *info = par->targinfo;
166
167 if (info->add_set.index != IPSET_INVALID_ID)
168 ip_set_nfnl_put(info->add_set.index);
169 if (info->del_set.index != IPSET_INVALID_ID)
170 ip_set_nfnl_put(info->del_set.index);
171}
172
173/* Revision 1: current interface to netfilter/iptables */
174
175static bool
176set_match(const struct sk_buff *skb, struct xt_action_param *par)
177{
178 const struct xt_set_info_match *info = par->matchinfo;
179
180 return match_set(info->match_set.index, skb, par->family,
181 info->match_set.dim,
182 info->match_set.flags,
183 info->match_set.flags & IPSET_INV_MATCH);
184}
185
186static int
187set_match_checkentry(const struct xt_mtchk_param *par)
188{
189 struct xt_set_info_match *info = par->matchinfo;
190 ip_set_id_t index;
191
192 index = ip_set_nfnl_get_byindex(info->match_set.index);
193
194 if (index == IPSET_INVALID_ID) {
195 pr_warning("Cannot find set indentified by id %u to match\n",
196 info->match_set.index);
197 return -ENOENT;
198 }
199 if (info->match_set.dim > IPSET_DIM_MAX) {
200 pr_warning("Protocol error: set match dimension "
201 "is over the limit!\n");
202 ip_set_nfnl_put(info->match_set.index);
203 return -ERANGE;
204 }
205
206 return 0;
207}
208
209static void
210set_match_destroy(const struct xt_mtdtor_param *par)
211{
212 struct xt_set_info_match *info = par->matchinfo;
213
214 ip_set_nfnl_put(info->match_set.index);
215}
216
217static unsigned int
218set_target(struct sk_buff *skb, const struct xt_action_param *par)
219{
220 const struct xt_set_info_target *info = par->targinfo;
221
222 if (info->add_set.index != IPSET_INVALID_ID)
223 ip_set_add(info->add_set.index,
224 skb, par->family,
225 info->add_set.dim,
226 info->add_set.flags);
227 if (info->del_set.index != IPSET_INVALID_ID)
228 ip_set_del(info->del_set.index,
229 skb, par->family,
230 info->del_set.dim,
231 info->del_set.flags);
232
233 return XT_CONTINUE;
234}
235
236static int
237set_target_checkentry(const struct xt_tgchk_param *par)
238{
239 const struct xt_set_info_target *info = par->targinfo;
240 ip_set_id_t index;
241
242 if (info->add_set.index != IPSET_INVALID_ID) {
243 index = ip_set_nfnl_get_byindex(info->add_set.index);
244 if (index == IPSET_INVALID_ID) {
245 pr_warning("Cannot find add_set index %u as target\n",
246 info->add_set.index);
247 return -ENOENT;
248 }
249 }
250
251 if (info->del_set.index != IPSET_INVALID_ID) {
252 index = ip_set_nfnl_get_byindex(info->del_set.index);
253 if (index == IPSET_INVALID_ID) {
254 pr_warning("Cannot find del_set index %u as target\n",
255 info->del_set.index);
256 if (info->add_set.index != IPSET_INVALID_ID)
257 ip_set_nfnl_put(info->add_set.index);
258 return -ENOENT;
259 }
260 }
261 if (info->add_set.dim > IPSET_DIM_MAX ||
262 info->del_set.dim > IPSET_DIM_MAX) {
263 pr_warning("Protocol error: SET target dimension "
264 "is over the limit!\n");
265 if (info->add_set.index != IPSET_INVALID_ID)
266 ip_set_nfnl_put(info->add_set.index);
267 if (info->del_set.index != IPSET_INVALID_ID)
268 ip_set_nfnl_put(info->del_set.index);
269 return -ERANGE;
270 }
271
272 return 0;
273}
274
275static void
276set_target_destroy(const struct xt_tgdtor_param *par)
277{
278 const struct xt_set_info_target *info = par->targinfo;
279
280 if (info->add_set.index != IPSET_INVALID_ID)
281 ip_set_nfnl_put(info->add_set.index);
282 if (info->del_set.index != IPSET_INVALID_ID)
283 ip_set_nfnl_put(info->del_set.index);
284}
285
286static struct xt_match set_matches[] __read_mostly = {
287 {
288 .name = "set",
289 .family = NFPROTO_IPV4,
290 .revision = 0,
291 .match = set_match_v0,
292 .matchsize = sizeof(struct xt_set_info_match_v0),
293 .checkentry = set_match_v0_checkentry,
294 .destroy = set_match_v0_destroy,
295 .me = THIS_MODULE
296 },
297 {
298 .name = "set",
299 .family = NFPROTO_IPV4,
300 .revision = 1,
301 .match = set_match,
302 .matchsize = sizeof(struct xt_set_info_match),
303 .checkentry = set_match_checkentry,
304 .destroy = set_match_destroy,
305 .me = THIS_MODULE
306 },
307 {
308 .name = "set",
309 .family = NFPROTO_IPV6,
310 .revision = 1,
311 .match = set_match,
312 .matchsize = sizeof(struct xt_set_info_match),
313 .checkentry = set_match_checkentry,
314 .destroy = set_match_destroy,
315 .me = THIS_MODULE
316 },
317};
318
319static struct xt_target set_targets[] __read_mostly = {
320 {
321 .name = "SET",
322 .revision = 0,
323 .family = NFPROTO_IPV4,
324 .target = set_target_v0,
325 .targetsize = sizeof(struct xt_set_info_target_v0),
326 .checkentry = set_target_v0_checkentry,
327 .destroy = set_target_v0_destroy,
328 .me = THIS_MODULE
329 },
330 {
331 .name = "SET",
332 .revision = 1,
333 .family = NFPROTO_IPV4,
334 .target = set_target,
335 .targetsize = sizeof(struct xt_set_info_target),
336 .checkentry = set_target_checkentry,
337 .destroy = set_target_destroy,
338 .me = THIS_MODULE
339 },
340 {
341 .name = "SET",
342 .revision = 1,
343 .family = NFPROTO_IPV6,
344 .target = set_target,
345 .targetsize = sizeof(struct xt_set_info_target),
346 .checkentry = set_target_checkentry,
347 .destroy = set_target_destroy,
348 .me = THIS_MODULE
349 },
350};
351
352static int __init xt_set_init(void)
353{
354 int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches));
355
356 if (!ret) {
357 ret = xt_register_targets(set_targets,
358 ARRAY_SIZE(set_targets));
359 if (ret)
360 xt_unregister_matches(set_matches,
361 ARRAY_SIZE(set_matches));
362 }
363 return ret;
364}
365
366static void __exit xt_set_fini(void)
367{
368 xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches));
369 xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets));
370}
371
372module_init(xt_set_init);
373module_exit(xt_set_fini);
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 5f14c8462e30..bae5756b1626 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -422,7 +422,6 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
422 422
423{ 423{
424 int ret_val = -EINVAL; 424 int ret_val = -EINVAL;
425 const char *type_str = "(unknown)";
426 struct netlbl_audit audit_info; 425 struct netlbl_audit audit_info;
427 426
428 if (!info->attrs[NLBL_CIPSOV4_A_DOI] || 427 if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
@@ -432,15 +431,12 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
432 netlbl_netlink_auditinfo(skb, &audit_info); 431 netlbl_netlink_auditinfo(skb, &audit_info);
433 switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) { 432 switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) {
434 case CIPSO_V4_MAP_TRANS: 433 case CIPSO_V4_MAP_TRANS:
435 type_str = "trans";
436 ret_val = netlbl_cipsov4_add_std(info, &audit_info); 434 ret_val = netlbl_cipsov4_add_std(info, &audit_info);
437 break; 435 break;
438 case CIPSO_V4_MAP_PASS: 436 case CIPSO_V4_MAP_PASS:
439 type_str = "pass";
440 ret_val = netlbl_cipsov4_add_pass(info, &audit_info); 437 ret_val = netlbl_cipsov4_add_pass(info, &audit_info);
441 break; 438 break;
442 case CIPSO_V4_MAP_LOCAL: 439 case CIPSO_V4_MAP_LOCAL:
443 type_str = "local";
444 ret_val = netlbl_cipsov4_add_local(info, &audit_info); 440 ret_val = netlbl_cipsov4_add_local(info, &audit_info);
445 break; 441 break;
446 } 442 }
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index d37b7f80fa37..de0d8e4cbfb6 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -109,7 +109,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
109 * 109 *
110 * Description: 110 * Description:
111 * This is the hashing function for the domain hash table, it returns the 111 * This is the hashing function for the domain hash table, it returns the
112 * correct bucket number for the domain. The caller is responsibile for 112 * correct bucket number for the domain. The caller is responsible for
113 * ensuring that the hash table is protected with either a RCU read lock or the 113 * ensuring that the hash table is protected with either a RCU read lock or the
114 * hash table lock. 114 * hash table lock.
115 * 115 *
@@ -134,7 +134,7 @@ static u32 netlbl_domhsh_hash(const char *key)
134 * 134 *
135 * Description: 135 * Description:
136 * Searches the domain hash table and returns a pointer to the hash table 136 * Searches the domain hash table and returns a pointer to the hash table
137 * entry if found, otherwise NULL is returned. The caller is responsibile for 137 * entry if found, otherwise NULL is returned. The caller is responsible for
138 * ensuring that the hash table is protected with either a RCU read lock or the 138 * ensuring that the hash table is protected with either a RCU read lock or the
139 * hash table lock. 139 * hash table lock.
140 * 140 *
@@ -165,7 +165,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
165 * Searches the domain hash table and returns a pointer to the hash table 165 * Searches the domain hash table and returns a pointer to the hash table
166 * entry if an exact match is found, if an exact match is not present in the 166 * entry if an exact match is found, if an exact match is not present in the
167 * hash table then the default entry is returned if valid otherwise NULL is 167 * hash table then the default entry is returned if valid otherwise NULL is
168 * returned. The caller is responsibile ensuring that the hash table is 168 * returned. The caller is responsible ensuring that the hash table is
169 * protected with either a RCU read lock or the hash table lock. 169 * protected with either a RCU read lock or the hash table lock.
170 * 170 *
171 */ 171 */
@@ -193,7 +193,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
193 * 193 *
194 * Description: 194 * Description:
195 * Generate an audit record for adding a new NetLabel/LSM mapping entry with 195 * Generate an audit record for adding a new NetLabel/LSM mapping entry with
196 * the given information. Caller is responsibile for holding the necessary 196 * the given information. Caller is responsible for holding the necessary
197 * locks. 197 * locks.
198 * 198 *
199 */ 199 */
@@ -605,7 +605,7 @@ int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info)
605 * 605 *
606 * Description: 606 * Description:
607 * Look through the domain hash table searching for an entry to match @domain, 607 * Look through the domain hash table searching for an entry to match @domain,
608 * return a pointer to a copy of the entry or NULL. The caller is responsibile 608 * return a pointer to a copy of the entry or NULL. The caller is responsible
609 * for ensuring that rcu_read_[un]lock() is called. 609 * for ensuring that rcu_read_[un]lock() is called.
610 * 610 *
611 */ 611 */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 998e85e895d0..4f251b19fbcc 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -259,7 +259,7 @@ add_failure:
259 * 259 *
260 * Description: 260 * Description:
261 * This function is a helper function used by the LISTALL and LISTDEF command 261 * This function is a helper function used by the LISTALL and LISTDEF command
262 * handlers. The caller is responsibile for ensuring that the RCU read lock 262 * handlers. The caller is responsible for ensuring that the RCU read lock
263 * is held. Returns zero on success, negative values on failure. 263 * is held. Returns zero on success, negative values on failure.
264 * 264 *
265 */ 265 */
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e2b0a680dd56..9c38658fba8b 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -154,44 +154,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
154 */ 154 */
155 155
156/** 156/**
157 * netlbl_unlhsh_free_addr4 - Frees an IPv4 address entry from the hash table
158 * @entry: the entry's RCU field
159 *
160 * Description:
161 * This function is designed to be used as a callback to the call_rcu()
162 * function so that memory allocated to a hash table address entry can be
163 * released safely.
164 *
165 */
166static void netlbl_unlhsh_free_addr4(struct rcu_head *entry)
167{
168 struct netlbl_unlhsh_addr4 *ptr;
169
170 ptr = container_of(entry, struct netlbl_unlhsh_addr4, rcu);
171 kfree(ptr);
172}
173
174#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
175/**
176 * netlbl_unlhsh_free_addr6 - Frees an IPv6 address entry from the hash table
177 * @entry: the entry's RCU field
178 *
179 * Description:
180 * This function is designed to be used as a callback to the call_rcu()
181 * function so that memory allocated to a hash table address entry can be
182 * released safely.
183 *
184 */
185static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
186{
187 struct netlbl_unlhsh_addr6 *ptr;
188
189 ptr = container_of(entry, struct netlbl_unlhsh_addr6, rcu);
190 kfree(ptr);
191}
192#endif /* IPv6 */
193
194/**
195 * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table 157 * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table
196 * @entry: the entry's RCU field 158 * @entry: the entry's RCU field
197 * 159 *
@@ -568,7 +530,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
568 if (entry == NULL) 530 if (entry == NULL)
569 return -ENOENT; 531 return -ENOENT;
570 532
571 call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); 533 kfree_rcu(entry, rcu);
572 return 0; 534 return 0;
573} 535}
574 536
@@ -629,7 +591,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
629 if (entry == NULL) 591 if (entry == NULL)
630 return -ENOENT; 592 return -ENOENT;
631 593
632 call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); 594 kfree_rcu(entry, rcu);
633 return 0; 595 return 0;
634} 596}
635#endif /* IPv6 */ 597#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 6caef8b20611..f4fc4c9ad567 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -49,9 +49,9 @@
49static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, 49static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
50 struct netlbl_audit *audit_info) 50 struct netlbl_audit *audit_info)
51{ 51{
52 audit_info->secid = NETLINK_CB(skb).sid; 52 security_task_getsecid(current, &audit_info->secid);
53 audit_info->loginuid = NETLINK_CB(skb).loginuid; 53 audit_info->loginuid = audit_get_loginuid(current);
54 audit_info->sessionid = NETLINK_CB(skb).sessionid; 54 audit_info->sessionid = audit_get_sessionid(current);
55} 55}
56 56
57/* NetLabel NETLINK I/O functions */ 57/* NetLabel NETLINK I/O functions */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 478181d53c55..5fe4f3b04ed3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,17 +1362,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1362 1362
1363 NETLINK_CB(skb).pid = nlk->pid; 1363 NETLINK_CB(skb).pid = nlk->pid;
1364 NETLINK_CB(skb).dst_group = dst_group; 1364 NETLINK_CB(skb).dst_group = dst_group;
1365 NETLINK_CB(skb).loginuid = audit_get_loginuid(current);
1366 NETLINK_CB(skb).sessionid = audit_get_sessionid(current);
1367 security_task_getsecid(current, &(NETLINK_CB(skb).sid));
1368 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1365 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1369 1366
1370 /* What can I do? Netlink is asynchronous, so that
1371 we will have to save current capabilities to
1372 check them, when this message will be delivered
1373 to corresponding kernel module. --ANK (980802)
1374 */
1375
1376 err = -EFAULT; 1367 err = -EFAULT;
1377 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1368 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1378 kfree_skb(skb); 1369 kfree_skb(skb);
@@ -1407,7 +1398,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1407 int noblock = flags&MSG_DONTWAIT; 1398 int noblock = flags&MSG_DONTWAIT;
1408 size_t copied; 1399 size_t copied;
1409 struct sk_buff *skb, *data_skb; 1400 struct sk_buff *skb, *data_skb;
1410 int err; 1401 int err, ret;
1411 1402
1412 if (flags&MSG_OOB) 1403 if (flags&MSG_OOB)
1413 return -EOPNOTSUPP; 1404 return -EOPNOTSUPP;
@@ -1470,8 +1461,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1470 1461
1471 skb_free_datagram(sk, skb); 1462 skb_free_datagram(sk, skb);
1472 1463
1473 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1464 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1474 netlink_dump(sk); 1465 ret = netlink_dump(sk);
1466 if (ret) {
1467 sk->sk_err = ret;
1468 sk->sk_error_report(sk);
1469 }
1470 }
1475 1471
1476 scm_recv(sock, msg, siocb->scm, flags); 1472 scm_recv(sock, msg, siocb->scm, flags);
1477out: 1473out:
@@ -1570,12 +1566,6 @@ netlink_kernel_release(struct sock *sk)
1570} 1566}
1571EXPORT_SYMBOL(netlink_kernel_release); 1567EXPORT_SYMBOL(netlink_kernel_release);
1572 1568
1573
1574static void listeners_free_rcu(struct rcu_head *head)
1575{
1576 kfree(container_of(head, struct listeners, rcu));
1577}
1578
1579int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1569int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1580{ 1570{
1581 struct listeners *new, *old; 1571 struct listeners *new, *old;
@@ -1592,7 +1582,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1592 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 1582 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1593 rcu_assign_pointer(tbl->listeners, new); 1583 rcu_assign_pointer(tbl->listeners, new);
1594 1584
1595 call_rcu(&old->rcu, listeners_free_rcu); 1585 kfree_rcu(old, rcu);
1596 } 1586 }
1597 tbl->groups = groups; 1587 tbl->groups = groups;
1598 1588
@@ -1736,6 +1726,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1736 struct netlink_callback *cb; 1726 struct netlink_callback *cb;
1737 struct sock *sk; 1727 struct sock *sk;
1738 struct netlink_sock *nlk; 1728 struct netlink_sock *nlk;
1729 int ret;
1739 1730
1740 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1731 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1741 if (cb == NULL) 1732 if (cb == NULL)
@@ -1764,9 +1755,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1764 nlk->cb = cb; 1755 nlk->cb = cb;
1765 mutex_unlock(nlk->cb_mutex); 1756 mutex_unlock(nlk->cb_mutex);
1766 1757
1767 netlink_dump(sk); 1758 ret = netlink_dump(sk);
1759
1768 sock_put(sk); 1760 sock_put(sk);
1769 1761
1762 if (ret)
1763 return ret;
1764
1770 /* We successfully started a dump, by returning -EINTR we 1765 /* We successfully started a dump, by returning -EINTR we
1771 * signal not to send ACK even if it was requested. 1766 * signal not to send ACK even if it was requested.
1772 */ 1767 */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 06cb02796a0e..732152f718e0 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -591,7 +591,6 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
591 return -EINVAL; 591 return -EINVAL;
592 } 592 }
593 if ((dev = nr_dev_get(&addr->fsa_ax25.sax25_call)) == NULL) { 593 if ((dev = nr_dev_get(&addr->fsa_ax25.sax25_call)) == NULL) {
594 SOCK_DEBUG(sk, "NET/ROM: bind failed: invalid node callsign\n");
595 release_sock(sk); 594 release_sock(sk);
596 return -EADDRNOTAVAIL; 595 return -EADDRNOTAVAIL;
597 } 596 }
@@ -632,7 +631,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
632 sock_reset_flag(sk, SOCK_ZAPPED); 631 sock_reset_flag(sk, SOCK_ZAPPED);
633 dev_put(dev); 632 dev_put(dev);
634 release_sock(sk); 633 release_sock(sk);
635 SOCK_DEBUG(sk, "NET/ROM: socket is bound\n"); 634
636 return 0; 635 return 0;
637} 636}
638 637
@@ -1082,8 +1081,6 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1082 sax.sax25_call = nr->dest_addr; 1081 sax.sax25_call = nr->dest_addr;
1083 } 1082 }
1084 1083
1085 SOCK_DEBUG(sk, "NET/ROM: sendto: Addresses built.\n");
1086
1087 /* Build a packet - the conventional user limit is 236 bytes. We can 1084 /* Build a packet - the conventional user limit is 236 bytes. We can
1088 do ludicrously large NetROM frames but must not overflow */ 1085 do ludicrously large NetROM frames but must not overflow */
1089 if (len > 65536) { 1086 if (len > 65536) {
@@ -1091,7 +1088,6 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1091 goto out; 1088 goto out;
1092 } 1089 }
1093 1090
1094 SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n");
1095 size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; 1091 size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN;
1096 1092
1097 if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) 1093 if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
@@ -1105,7 +1101,6 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1105 */ 1101 */
1106 1102
1107 asmptr = skb_push(skb, NR_TRANSPORT_LEN); 1103 asmptr = skb_push(skb, NR_TRANSPORT_LEN);
1108 SOCK_DEBUG(sk, "Building NET/ROM Header.\n");
1109 1104
1110 /* Build a NET/ROM Transport header */ 1105 /* Build a NET/ROM Transport header */
1111 1106
@@ -1114,15 +1109,12 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1114 *asmptr++ = 0; /* To be filled in later */ 1109 *asmptr++ = 0; /* To be filled in later */
1115 *asmptr++ = 0; /* Ditto */ 1110 *asmptr++ = 0; /* Ditto */
1116 *asmptr++ = NR_INFO; 1111 *asmptr++ = NR_INFO;
1117 SOCK_DEBUG(sk, "Built header.\n");
1118 1112
1119 /* 1113 /*
1120 * Put the data on the end 1114 * Put the data on the end
1121 */ 1115 */
1122 skb_put(skb, len); 1116 skb_put(skb, len);
1123 1117
1124 SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
1125
1126 /* User data follows immediately after the NET/ROM transport header */ 1118 /* User data follows immediately after the NET/ROM transport header */
1127 if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) { 1119 if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
1128 kfree_skb(skb); 1120 kfree_skb(skb);
@@ -1130,8 +1122,6 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1130 goto out; 1122 goto out;
1131 } 1123 }
1132 1124
1133 SOCK_DEBUG(sk, "NET/ROM: Transmitting buffer\n");
1134
1135 if (sk->sk_state != TCP_ESTABLISHED) { 1125 if (sk->sk_state != TCP_ESTABLISHED) {
1136 kfree_skb(skb); 1126 kfree_skb(skb);
1137 err = -ENOTCONN; 1127 err = -ENOTCONN;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 91cb1d71f018..549527bca87a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -164,7 +164,6 @@ struct packet_mreq_max {
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring); 165 int closing, int tx_ring);
166 166
167#define PGV_FROM_VMALLOC 1
168struct pgv { 167struct pgv {
169 char *buffer; 168 char *buffer;
170}; 169};
@@ -466,7 +465,7 @@ retry:
466 */ 465 */
467 466
468 err = -EMSGSIZE; 467 err = -EMSGSIZE;
469 if (len > dev->mtu + dev->hard_header_len) 468 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN)
470 goto out_unlock; 469 goto out_unlock;
471 470
472 if (!skb) { 471 if (!skb) {
@@ -497,6 +496,19 @@ retry:
497 goto retry; 496 goto retry;
498 } 497 }
499 498
499 if (len > (dev->mtu + dev->hard_header_len)) {
500 /* Earlier code assumed this would be a VLAN pkt,
501 * double-check this now that we have the actual
502 * packet in hand.
503 */
504 struct ethhdr *ehdr;
505 skb_reset_mac_header(skb);
506 ehdr = eth_hdr(skb);
507 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
508 err = -EMSGSIZE;
509 goto out_unlock;
510 }
511 }
500 512
501 skb->protocol = proto; 513 skb->protocol = proto;
502 skb->dev = dev; 514 skb->dev = dev;
@@ -523,11 +535,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
523{ 535{
524 struct sk_filter *filter; 536 struct sk_filter *filter;
525 537
526 rcu_read_lock_bh(); 538 rcu_read_lock();
527 filter = rcu_dereference_bh(sk->sk_filter); 539 filter = rcu_dereference(sk->sk_filter);
528 if (filter != NULL) 540 if (filter != NULL)
529 res = sk_run_filter(skb, filter->insns); 541 res = SK_RUN_FILTER(filter, skb);
530 rcu_read_unlock_bh(); 542 rcu_read_unlock();
531 543
532 return res; 544 return res;
533} 545}
@@ -954,7 +966,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
954 966
955static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 967static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
956{ 968{
957 struct socket *sock;
958 struct sk_buff *skb; 969 struct sk_buff *skb;
959 struct net_device *dev; 970 struct net_device *dev;
960 __be16 proto; 971 __be16 proto;
@@ -966,8 +977,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
966 int len_sum = 0; 977 int len_sum = 0;
967 int status = 0; 978 int status = 0;
968 979
969 sock = po->sk.sk_socket;
970
971 mutex_lock(&po->pg_vec_lock); 980 mutex_lock(&po->pg_vec_lock);
972 981
973 err = -EBUSY; 982 err = -EBUSY;
@@ -1200,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1200 } 1209 }
1201 1210
1202 err = -EMSGSIZE; 1211 err = -EMSGSIZE;
1203 if (!gso_type && (len > dev->mtu+reserve)) 1212 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN))
1204 goto out_unlock; 1213 goto out_unlock;
1205 1214
1206 err = -ENOBUFS; 1215 err = -ENOBUFS;
@@ -1225,6 +1234,20 @@ static int packet_snd(struct socket *sock,
1225 if (err < 0) 1234 if (err < 0)
1226 goto out_free; 1235 goto out_free;
1227 1236
1237 if (!gso_type && (len > dev->mtu + reserve)) {
1238 /* Earlier code assumed this would be a VLAN pkt,
1239 * double-check this now that we have the actual
1240 * packet in hand.
1241 */
1242 struct ethhdr *ehdr;
1243 skb_reset_mac_header(skb);
1244 ehdr = eth_hdr(skb);
1245 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1246 err = -EMSGSIZE;
1247 goto out_free;
1248 }
1249 }
1250
1228 skb->protocol = proto; 1251 skb->protocol = proto;
1229 skb->dev = dev; 1252 skb->dev = dev;
1230 skb->priority = sk->sk_priority; 1253 skb->priority = sk->sk_priority;
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 0d9b8a220a78..6ec7d55b1769 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,15 +14,3 @@ config PHONET
14 14
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called phonet. If unsure, say N. 16 will be called phonet. If unsure, say N.
17
18config PHONET_PIPECTRLR
19 bool "Phonet Pipe Controller (EXPERIMENTAL)"
20 depends on PHONET && EXPERIMENTAL
21 default N
22 help
23 The Pipe Controller implementation in Phonet stack to support Pipe
24 data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
25 platform.
26
27 This option is incompatible with older Nokia modems.
28 Say N here unless you really know what you are doing.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 1072b2c19d31..c6fffd946d42 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -110,6 +110,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
110 sk->sk_protocol = protocol; 110 sk->sk_protocol = protocol;
111 pn = pn_sk(sk); 111 pn = pn_sk(sk);
112 pn->sobject = 0; 112 pn->sobject = 0;
113 pn->dobject = 0;
113 pn->resource = 0; 114 pn->resource = 0;
114 sk->sk_prot->init(sk); 115 sk->sk_prot->init(sk);
115 err = 0; 116 err = 0;
@@ -194,11 +195,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
194 if (skb->pkt_type == PACKET_LOOPBACK) { 195 if (skb->pkt_type == PACKET_LOOPBACK) {
195 skb_reset_mac_header(skb); 196 skb_reset_mac_header(skb);
196 skb_orphan(skb); 197 skb_orphan(skb);
197 if (irq) 198 err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0;
198 netif_rx(skb);
199 else
200 netif_rx_ni(skb);
201 err = 0;
202 } else { 199 } else {
203 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 200 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
204 NULL, NULL, skb->len); 201 NULL, NULL, skb->len);
@@ -207,6 +204,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
207 goto drop; 204 goto drop;
208 } 205 }
209 err = dev_queue_xmit(skb); 206 err = dev_queue_xmit(skb);
207 if (unlikely(err > 0))
208 err = net_xmit_errno(err);
210 } 209 }
211 210
212 return err; 211 return err;
@@ -242,8 +241,18 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
242 struct net_device *dev; 241 struct net_device *dev;
243 struct pn_sock *pn = pn_sk(sk); 242 struct pn_sock *pn = pn_sk(sk);
244 int err; 243 int err;
245 u16 src; 244 u16 src, dst;
246 u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; 245 u8 daddr, saddr, res;
246
247 src = pn->sobject;
248 if (target != NULL) {
249 dst = pn_sockaddr_get_object(target);
250 res = pn_sockaddr_get_resource(target);
251 } else {
252 dst = pn->dobject;
253 res = pn->resource;
254 }
255 daddr = pn_addr(dst);
247 256
248 err = -EHOSTUNREACH; 257 err = -EHOSTUNREACH;
249 if (sk->sk_bound_dev_if) 258 if (sk->sk_bound_dev_if)
@@ -251,10 +260,9 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
251 else if (phonet_address_lookup(net, daddr) == 0) { 260 else if (phonet_address_lookup(net, daddr) == 0) {
252 dev = phonet_device_get(net); 261 dev = phonet_device_get(net);
253 skb->pkt_type = PACKET_LOOPBACK; 262 skb->pkt_type = PACKET_LOOPBACK;
254 } else if (pn_sockaddr_get_object(target) == 0) { 263 } else if (dst == 0) {
255 /* Resource routing (small race until phonet_rcv()) */ 264 /* Resource routing (small race until phonet_rcv()) */
256 struct sock *sk = pn_find_sock_by_res(net, 265 struct sock *sk = pn_find_sock_by_res(net, res);
257 target->spn_resource);
258 if (sk) { 266 if (sk) {
259 sock_put(sk); 267 sock_put(sk);
260 dev = phonet_device_get(net); 268 dev = phonet_device_get(net);
@@ -271,12 +279,10 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
271 if (saddr == PN_NO_ADDR) 279 if (saddr == PN_NO_ADDR)
272 goto drop; 280 goto drop;
273 281
274 src = pn->sobject;
275 if (!pn_addr(src)) 282 if (!pn_addr(src))
276 src = pn_object(saddr, pn_obj(src)); 283 src = pn_object(saddr, pn_obj(src));
277 284
278 err = pn_send(skb, dev, pn_sockaddr_get_object(target), 285 err = pn_send(skb, dev, dst, src, res, 0);
279 src, pn_sockaddr_get_resource(target), 0);
280 dev_put(dev); 286 dev_put(dev);
281 return err; 287 return err;
282 288
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 3e60f2e4e6c2..f17fd841f948 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -42,7 +42,7 @@
42 * TCP_ESTABLISHED connected pipe in enabled state 42 * TCP_ESTABLISHED connected pipe in enabled state
43 * 43 *
44 * pep_sock locking: 44 * pep_sock locking:
45 * - sk_state, ackq, hlist: sock lock needed 45 * - sk_state, hlist: sock lock needed
46 * - listener: read only 46 * - listener: read only
47 * - pipe_handle: read only 47 * - pipe_handle: read only
48 */ 48 */
@@ -50,11 +50,6 @@
50#define CREDITS_MAX 10 50#define CREDITS_MAX 10
51#define CREDITS_THR 7 51#define CREDITS_THR 7
52 52
53static const struct sockaddr_pn pipe_srv = {
54 .spn_family = AF_PHONET,
55 .spn_resource = 0xD9, /* pipe service */
56};
57
58#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ 53#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */
59 54
60/* Get the next TLV sub-block. */ 55/* Get the next TLV sub-block. */
@@ -82,236 +77,95 @@ static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen,
82 return data; 77 return data;
83} 78}
84 79
85static int pep_reply(struct sock *sk, struct sk_buff *oskb, 80static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload,
86 u8 code, const void *data, int len, gfp_t priority) 81 int len, gfp_t priority)
87{ 82{
88 const struct pnpipehdr *oph = pnp_hdr(oskb); 83 struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
89 struct pnpipehdr *ph;
90 struct sk_buff *skb;
91
92 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
93 if (!skb) 84 if (!skb)
94 return -ENOMEM; 85 return NULL;
95 skb_set_owner_w(skb, sk); 86 skb_set_owner_w(skb, sk);
96 87
97 skb_reserve(skb, MAX_PNPIPE_HEADER); 88 skb_reserve(skb, MAX_PNPIPE_HEADER);
98 __skb_put(skb, len); 89 __skb_put(skb, len);
99 skb_copy_to_linear_data(skb, data, len); 90 skb_copy_to_linear_data(skb, payload, len);
100 __skb_push(skb, sizeof(*ph)); 91 __skb_push(skb, sizeof(struct pnpipehdr));
101 skb_reset_transport_header(skb); 92 skb_reset_transport_header(skb);
102 ph = pnp_hdr(skb); 93 return skb;
103 ph->utid = oph->utid;
104 ph->message_id = oph->message_id + 1; /* REQ -> RESP */
105 ph->pipe_handle = oph->pipe_handle;
106 ph->error_code = code;
107
108 return pn_skb_send(sk, skb, &pipe_srv);
109}
110
111#define PAD 0x00
112
113#ifdef CONFIG_PHONET_PIPECTRLR
114static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
115{
116 int i, j;
117 u8 base_fc, final_fc;
118
119 for (i = 0; i < len; i++) {
120 base_fc = host_fc[i];
121 for (j = 0; j < len; j++) {
122 if (remote_fc[j] == base_fc) {
123 final_fc = base_fc;
124 goto done;
125 }
126 }
127 }
128 return -EINVAL;
129
130done:
131 return final_fc;
132
133}
134
135static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
136 u8 *pref_rx_fc, u8 *req_tx_fc)
137{
138 struct pnpipehdr *hdr;
139 u8 n_sb;
140
141 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
142 return -EINVAL;
143
144 hdr = pnp_hdr(skb);
145 n_sb = hdr->data[4];
146
147 __skb_pull(skb, sizeof(*hdr) + 4);
148 while (n_sb > 0) {
149 u8 type, buf[3], len = sizeof(buf);
150 u8 *data = pep_get_sb(skb, &type, &len, buf);
151
152 if (data == NULL)
153 return -EINVAL;
154
155 switch (type) {
156 case PN_PIPE_SB_REQUIRED_FC_TX:
157 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
158 break;
159 req_tx_fc[0] = data[2];
160 req_tx_fc[1] = data[3];
161 req_tx_fc[2] = data[4];
162 break;
163
164 case PN_PIPE_SB_PREFERRED_FC_RX:
165 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
166 break;
167 pref_rx_fc[0] = data[2];
168 pref_rx_fc[1] = data[3];
169 pref_rx_fc[2] = data[4];
170 break;
171
172 }
173 n_sb--;
174 }
175 return 0;
176} 94}
177 95
178static int pipe_handler_send_req(struct sock *sk, u8 utid, 96static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code,
179 u8 msg_id, gfp_t priority) 97 const void *data, int len, gfp_t priority)
180{ 98{
181 int len; 99 const struct pnpipehdr *oph = pnp_hdr(oskb);
182 struct pnpipehdr *ph; 100 struct pnpipehdr *ph;
183 struct sk_buff *skb; 101 struct sk_buff *skb;
184 struct pep_sock *pn = pep_sk(sk); 102 struct sockaddr_pn peer;
185
186 static const u8 data[4] = {
187 PAD, PAD, PAD, PAD,
188 };
189 103
190 switch (msg_id) { 104 skb = pep_alloc_skb(sk, data, len, priority);
191 case PNS_PEP_CONNECT_REQ:
192 len = sizeof(data);
193 break;
194
195 case PNS_PEP_DISCONNECT_REQ:
196 case PNS_PEP_ENABLE_REQ:
197 case PNS_PEP_DISABLE_REQ:
198 len = 0;
199 break;
200
201 default:
202 return -EINVAL;
203 }
204
205 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
206 if (!skb) 105 if (!skb)
207 return -ENOMEM; 106 return -ENOMEM;
208 skb_set_owner_w(skb, sk);
209 107
210 skb_reserve(skb, MAX_PNPIPE_HEADER);
211 if (len) {
212 __skb_put(skb, len);
213 skb_copy_to_linear_data(skb, data, len);
214 }
215 __skb_push(skb, sizeof(*ph));
216 skb_reset_transport_header(skb);
217 ph = pnp_hdr(skb); 108 ph = pnp_hdr(skb);
218 ph->utid = utid; 109 ph->utid = oph->utid;
219 ph->message_id = msg_id; 110 ph->message_id = oph->message_id + 1; /* REQ -> RESP */
220 ph->pipe_handle = pn->pipe_handle; 111 ph->pipe_handle = oph->pipe_handle;
221 ph->error_code = PN_PIPE_NO_ERROR; 112 ph->error_code = code;
222 113
223 return pn_skb_send(sk, skb, &pn->remote_pep); 114 pn_skb_get_src_sockaddr(oskb, &peer);
115 return pn_skb_send(sk, skb, &peer);
224} 116}
225 117
226static int pipe_handler_send_created_ind(struct sock *sk, 118static int pep_indicate(struct sock *sk, u8 id, u8 code,
227 u8 utid, u8 msg_id) 119 const void *data, int len, gfp_t priority)
228{ 120{
229 int err_code; 121 struct pep_sock *pn = pep_sk(sk);
230 struct pnpipehdr *ph; 122 struct pnpipehdr *ph;
231 struct sk_buff *skb; 123 struct sk_buff *skb;
232 124
233 struct pep_sock *pn = pep_sk(sk); 125 skb = pep_alloc_skb(sk, data, len, priority);
234 static u8 data[4] = {
235 0x03, 0x04,
236 };
237 data[2] = pn->tx_fc;
238 data[3] = pn->rx_fc;
239
240 /*
241 * actually, below is number of sub-blocks and not error code.
242 * Pipe_created_ind message format does not have any
243 * error code field. However, the Phonet stack will always send
244 * an error code as part of pnpipehdr. So, use that err_code to
245 * specify the number of sub-blocks.
246 */
247 err_code = 0x01;
248
249 skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
250 if (!skb) 126 if (!skb)
251 return -ENOMEM; 127 return -ENOMEM;
252 skb_set_owner_w(skb, sk);
253 128
254 skb_reserve(skb, MAX_PNPIPE_HEADER);
255 __skb_put(skb, sizeof(data));
256 skb_copy_to_linear_data(skb, data, sizeof(data));
257 __skb_push(skb, sizeof(*ph));
258 skb_reset_transport_header(skb);
259 ph = pnp_hdr(skb); 129 ph = pnp_hdr(skb);
260 ph->utid = utid; 130 ph->utid = 0;
261 ph->message_id = msg_id; 131 ph->message_id = id;
262 ph->pipe_handle = pn->pipe_handle; 132 ph->pipe_handle = pn->pipe_handle;
263 ph->error_code = err_code; 133 ph->data[0] = code;
264 134 return pn_skb_send(sk, skb, NULL);
265 return pn_skb_send(sk, skb, &pn->remote_pep);
266} 135}
267 136
268static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) 137#define PAD 0x00
138
139static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
140 const void *data, int len)
269{ 141{
270 int err_code; 142 struct pep_sock *pn = pep_sk(sk);
271 struct pnpipehdr *ph; 143 struct pnpipehdr *ph;
272 struct sk_buff *skb; 144 struct sk_buff *skb;
273 struct pep_sock *pn = pep_sk(sk);
274
275 /*
276 * actually, below is a filler.
277 * Pipe_enabled/disabled_ind message format does not have any
278 * error code field. However, the Phonet stack will always send
279 * an error code as part of pnpipehdr. So, use that err_code to
280 * specify the filler value.
281 */
282 err_code = 0x0;
283 145
284 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); 146 skb = pep_alloc_skb(sk, data, len, GFP_KERNEL);
285 if (!skb) 147 if (!skb)
286 return -ENOMEM; 148 return -ENOMEM;
287 skb_set_owner_w(skb, sk);
288 149
289 skb_reserve(skb, MAX_PNPIPE_HEADER);
290 __skb_push(skb, sizeof(*ph));
291 skb_reset_transport_header(skb);
292 ph = pnp_hdr(skb); 150 ph = pnp_hdr(skb);
293 ph->utid = utid; 151 ph->utid = id; /* whatever */
294 ph->message_id = msg_id; 152 ph->message_id = id;
295 ph->pipe_handle = pn->pipe_handle; 153 ph->pipe_handle = pn->pipe_handle;
296 ph->error_code = err_code; 154 ph->data[0] = code;
297 155 return pn_skb_send(sk, skb, NULL);
298 return pn_skb_send(sk, skb, &pn->remote_pep);
299} 156}
300 157
301static int pipe_handler_enable_pipe(struct sock *sk, int enable) 158static int pipe_handler_send_created_ind(struct sock *sk)
302{ 159{
303 int utid, req; 160 struct pep_sock *pn = pep_sk(sk);
304 161 u8 data[4] = {
305 if (enable) { 162 PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2),
306 utid = PNS_PIPE_ENABLE_UTID; 163 pn->tx_fc, pn->rx_fc,
307 req = PNS_PEP_ENABLE_REQ; 164 };
308 } else { 165
309 utid = PNS_PIPE_DISABLE_UTID; 166 return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */,
310 req = PNS_PEP_DISABLE_REQ; 167 data, 4, GFP_ATOMIC);
311 }
312 return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC);
313} 168}
314#endif
315 169
316static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) 170static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
317{ 171{
@@ -334,11 +188,12 @@ static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
334 GFP_KERNEL); 188 GFP_KERNEL);
335} 189}
336 190
337static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) 191static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code,
192 gfp_t priority)
338{ 193{
339 static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; 194 static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ };
340 WARN_ON(code == PN_PIPE_NO_ERROR); 195 WARN_ON(code == PN_PIPE_NO_ERROR);
341 return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); 196 return pep_reply(sk, skb, code, data, sizeof(data), priority);
342} 197}
343 198
344/* Control requests are not sent by the pipe service and have a specific 199/* Control requests are not sent by the pipe service and have a specific
@@ -350,23 +205,21 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
350 struct sk_buff *skb; 205 struct sk_buff *skb;
351 struct pnpipehdr *ph; 206 struct pnpipehdr *ph;
352 struct sockaddr_pn dst; 207 struct sockaddr_pn dst;
208 u8 data[4] = {
209 oph->data[0], /* PEP type */
210 code, /* error code, at an unusual offset */
211 PAD, PAD,
212 };
353 213
354 skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); 214 skb = pep_alloc_skb(sk, data, 4, priority);
355 if (!skb) 215 if (!skb)
356 return -ENOMEM; 216 return -ENOMEM;
357 skb_set_owner_w(skb, sk);
358
359 skb_reserve(skb, MAX_PHONET_HEADER);
360 ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4);
361 217
218 ph = pnp_hdr(skb);
362 ph->utid = oph->utid; 219 ph->utid = oph->utid;
363 ph->message_id = PNS_PEP_CTRL_RESP; 220 ph->message_id = PNS_PEP_CTRL_RESP;
364 ph->pipe_handle = oph->pipe_handle; 221 ph->pipe_handle = oph->pipe_handle;
365 ph->data[0] = oph->data[1]; /* CTRL id */ 222 ph->data[0] = oph->data[1]; /* CTRL id */
366 ph->data[1] = oph->data[0]; /* PEP type */
367 ph->data[2] = code; /* error code, at an usual offset */
368 ph->data[3] = PAD;
369 ph->data[4] = PAD;
370 223
371 pn_skb_get_src_sockaddr(oskb, &dst); 224 pn_skb_get_src_sockaddr(oskb, &dst);
372 return pn_skb_send(sk, skb, &dst); 225 return pn_skb_send(sk, skb, &dst);
@@ -374,38 +227,15 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
374 227
375static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) 228static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
376{ 229{
377 struct pep_sock *pn = pep_sk(sk); 230 u8 data[4] = { type, PAD, PAD, status };
378 struct pnpipehdr *ph;
379 struct sk_buff *skb;
380 231
381 skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); 232 return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON,
382 if (!skb) 233 data, 4, priority);
383 return -ENOMEM;
384 skb_set_owner_w(skb, sk);
385
386 skb_reserve(skb, MAX_PNPIPE_HEADER + 4);
387 __skb_push(skb, sizeof(*ph) + 4);
388 skb_reset_transport_header(skb);
389 ph = pnp_hdr(skb);
390 ph->utid = 0;
391 ph->message_id = PNS_PEP_STATUS_IND;
392 ph->pipe_handle = pn->pipe_handle;
393 ph->pep_type = PN_PEP_TYPE_COMMON;
394 ph->data[1] = type;
395 ph->data[2] = PAD;
396 ph->data[3] = PAD;
397 ph->data[4] = status;
398
399#ifdef CONFIG_PHONET_PIPECTRLR
400 return pn_skb_send(sk, skb, &pn->remote_pep);
401#else
402 return pn_skb_send(sk, skb, &pipe_srv);
403#endif
404} 234}
405 235
406/* Send our RX flow control information to the sender. 236/* Send our RX flow control information to the sender.
407 * Socket must be locked. */ 237 * Socket must be locked. */
408static void pipe_grant_credits(struct sock *sk) 238static void pipe_grant_credits(struct sock *sk, gfp_t priority)
409{ 239{
410 struct pep_sock *pn = pep_sk(sk); 240 struct pep_sock *pn = pep_sk(sk);
411 241
@@ -415,16 +245,16 @@ static void pipe_grant_credits(struct sock *sk)
415 case PN_LEGACY_FLOW_CONTROL: /* TODO */ 245 case PN_LEGACY_FLOW_CONTROL: /* TODO */
416 break; 246 break;
417 case PN_ONE_CREDIT_FLOW_CONTROL: 247 case PN_ONE_CREDIT_FLOW_CONTROL:
418 pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, 248 if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL,
419 PEP_IND_READY, GFP_ATOMIC); 249 PEP_IND_READY, priority) == 0)
420 pn->rx_credits = 1; 250 pn->rx_credits = 1;
421 break; 251 break;
422 case PN_MULTI_CREDIT_FLOW_CONTROL: 252 case PN_MULTI_CREDIT_FLOW_CONTROL:
423 if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) 253 if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX)
424 break; 254 break;
425 if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, 255 if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS,
426 CREDITS_MAX - pn->rx_credits, 256 CREDITS_MAX - pn->rx_credits,
427 GFP_ATOMIC) == 0) 257 priority) == 0)
428 pn->rx_credits = CREDITS_MAX; 258 pn->rx_credits = CREDITS_MAX;
429 break; 259 break;
430 } 260 }
@@ -522,7 +352,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
522 352
523 switch (hdr->message_id) { 353 switch (hdr->message_id) {
524 case PNS_PEP_CONNECT_REQ: 354 case PNS_PEP_CONNECT_REQ:
525 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); 355 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC);
526 break; 356 break;
527 357
528 case PNS_PEP_DISCONNECT_REQ: 358 case PNS_PEP_DISCONNECT_REQ:
@@ -532,35 +362,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
532 sk->sk_state_change(sk); 362 sk->sk_state_change(sk);
533 break; 363 break;
534 364
535#ifdef CONFIG_PHONET_PIPECTRLR
536 case PNS_PEP_DISCONNECT_RESP:
537 pn->pipe_state = PIPE_IDLE;
538 sk->sk_state = TCP_CLOSE;
539 break;
540#endif
541
542 case PNS_PEP_ENABLE_REQ: 365 case PNS_PEP_ENABLE_REQ:
543 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ 366 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
544 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 367 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
545 break; 368 break;
546 369
547#ifdef CONFIG_PHONET_PIPECTRLR
548 case PNS_PEP_ENABLE_RESP:
549 pn->pipe_state = PIPE_ENABLED;
550 pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID,
551 PNS_PIPE_ENABLED_IND);
552
553 if (!pn_flow_safe(pn->tx_fc)) {
554 atomic_set(&pn->tx_credits, 1);
555 sk->sk_write_space(sk);
556 }
557 if (sk->sk_state == TCP_ESTABLISHED)
558 break; /* Nothing to do */
559 sk->sk_state = TCP_ESTABLISHED;
560 pipe_grant_credits(sk);
561 break;
562#endif
563
564 case PNS_PEP_RESET_REQ: 370 case PNS_PEP_RESET_REQ:
565 switch (hdr->state_after_reset) { 371 switch (hdr->state_after_reset) {
566 case PN_PIPE_DISABLE: 372 case PN_PIPE_DISABLE:
@@ -579,17 +385,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
579 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 385 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
580 break; 386 break;
581 387
582#ifdef CONFIG_PHONET_PIPECTRLR
583 case PNS_PEP_DISABLE_RESP:
584 pn->pipe_state = PIPE_DISABLED;
585 atomic_set(&pn->tx_credits, 0);
586 pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID,
587 PNS_PIPE_DISABLED_IND);
588 sk->sk_state = TCP_SYN_RECV;
589 pn->rx_credits = 0;
590 break;
591#endif
592
593 case PNS_PEP_CTRL_REQ: 388 case PNS_PEP_CTRL_REQ:
594 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { 389 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
595 atomic_inc(&sk->sk_drops); 390 atomic_inc(&sk->sk_drops);
@@ -607,7 +402,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
607 if (!pn_flow_safe(pn->rx_fc)) { 402 if (!pn_flow_safe(pn->rx_fc)) {
608 err = sock_queue_rcv_skb(sk, skb); 403 err = sock_queue_rcv_skb(sk, skb);
609 if (!err) 404 if (!err)
610 return 0; 405 return NET_RX_SUCCESS;
406 err = -ENOBUFS;
611 break; 407 break;
612 } 408 }
613 409
@@ -645,7 +441,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
645 if (sk->sk_state == TCP_ESTABLISHED) 441 if (sk->sk_state == TCP_ESTABLISHED)
646 break; /* Nothing to do */ 442 break; /* Nothing to do */
647 sk->sk_state = TCP_ESTABLISHED; 443 sk->sk_state = TCP_ESTABLISHED;
648 pipe_grant_credits(sk); 444 pipe_grant_credits(sk, GFP_ATOMIC);
649 break; 445 break;
650 446
651 case PNS_PIPE_DISABLED_IND: 447 case PNS_PIPE_DISABLED_IND:
@@ -660,7 +456,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
660 } 456 }
661out: 457out:
662 kfree_skb(skb); 458 kfree_skb(skb);
663 return err; 459 return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS;
664 460
665queue: 461queue:
666 skb->dev = NULL; 462 skb->dev = NULL;
@@ -669,7 +465,7 @@ queue:
669 skb_queue_tail(queue, skb); 465 skb_queue_tail(queue, skb);
670 if (!sock_flag(sk, SOCK_DEAD)) 466 if (!sock_flag(sk, SOCK_DEAD))
671 sk->sk_data_ready(sk, err); 467 sk->sk_data_ready(sk, err);
672 return 0; 468 return NET_RX_SUCCESS;
673} 469}
674 470
675/* Destroy connected sock. */ 471/* Destroy connected sock. */
@@ -681,133 +477,126 @@ static void pipe_destruct(struct sock *sk)
681 skb_queue_purge(&pn->ctrlreq_queue); 477 skb_queue_purge(&pn->ctrlreq_queue);
682} 478}
683 479
684#ifdef CONFIG_PHONET_PIPECTRLR 480static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n)
685static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
686{ 481{
687 struct pep_sock *pn = pep_sk(sk); 482 unsigned i;
688 u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1}; 483 u8 final_fc = PN_NO_FLOW_CONTROL;
689 u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
690 u8 negotiated_rx_fc, negotiated_tx_fc;
691 int ret;
692
693 pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
694 remote_req_tx_fc);
695 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
696 host_pref_rx_fc,
697 sizeof(host_pref_rx_fc));
698 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
699 remote_pref_rx_fc,
700 sizeof(host_pref_rx_fc));
701
702 pn->pipe_state = PIPE_DISABLED;
703 sk->sk_state = TCP_SYN_RECV;
704 sk->sk_backlog_rcv = pipe_do_rcv;
705 sk->sk_destruct = pipe_destruct;
706 pn->rx_credits = 0;
707 pn->rx_fc = negotiated_rx_fc;
708 pn->tx_fc = negotiated_tx_fc;
709 sk->sk_state_change(sk);
710 484
711 ret = pipe_handler_send_created_ind(sk, 485 for (i = 0; i < n; i++) {
712 PNS_PIPE_CREATED_IND_UTID, 486 u8 fc = fcs[i];
713 PNS_PIPE_CREATED_IND
714 );
715 487
716 return ret; 488 if (fc > final_fc && fc < PN_MAX_FLOW_CONTROL)
489 final_fc = fc;
490 }
491 return final_fc;
717} 492}
718#endif
719 493
720static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) 494static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
721{ 495{
722 struct sock *newsk; 496 struct pep_sock *pn = pep_sk(sk);
723 struct pep_sock *newpn, *pn = pep_sk(sk);
724 struct pnpipehdr *hdr; 497 struct pnpipehdr *hdr;
725 struct sockaddr_pn dst; 498 u8 n_sb;
726 u16 peer_type;
727 u8 pipe_handle, enabled, n_sb;
728 u8 aligned = 0;
729 499
730 if (!pskb_pull(skb, sizeof(*hdr) + 4)) 500 if (!pskb_pull(skb, sizeof(*hdr) + 4))
731 return -EINVAL; 501 return -EINVAL;
732 502
733 hdr = pnp_hdr(skb); 503 hdr = pnp_hdr(skb);
734 pipe_handle = hdr->pipe_handle; 504 if (hdr->error_code != PN_PIPE_NO_ERROR)
735 switch (hdr->state_after_connect) { 505 return -ECONNREFUSED;
736 case PN_PIPE_DISABLE:
737 enabled = 0;
738 break;
739 case PN_PIPE_ENABLE:
740 enabled = 1;
741 break;
742 default:
743 pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM);
744 return -EINVAL;
745 }
746 peer_type = hdr->other_pep_type << 8;
747
748 if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) {
749 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
750 return -ENOBUFS;
751 }
752 506
753 /* Parse sub-blocks (options) */ 507 /* Parse sub-blocks */
754 n_sb = hdr->data[4]; 508 n_sb = hdr->data[4];
755 while (n_sb > 0) { 509 while (n_sb > 0) {
756 u8 type, buf[1], len = sizeof(buf); 510 u8 type, buf[6], len = sizeof(buf);
757 const u8 *data = pep_get_sb(skb, &type, &len, buf); 511 const u8 *data = pep_get_sb(skb, &type, &len, buf);
758 512
759 if (data == NULL) 513 if (data == NULL)
760 return -EINVAL; 514 return -EINVAL;
515
761 switch (type) { 516 switch (type) {
762 case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: 517 case PN_PIPE_SB_REQUIRED_FC_TX:
763 if (len < 1) 518 if (len < 2 || len < data[0])
764 return -EINVAL; 519 break;
765 peer_type = (peer_type & 0xff00) | data[0]; 520 pn->tx_fc = pipe_negotiate_fc(data + 2, len - 2);
766 break; 521 break;
767 case PN_PIPE_SB_ALIGNED_DATA: 522
768 aligned = data[0] != 0; 523 case PN_PIPE_SB_PREFERRED_FC_RX:
524 if (len < 2 || len < data[0])
525 break;
526 pn->rx_fc = pipe_negotiate_fc(data + 2, len - 2);
769 break; 527 break;
528
770 } 529 }
771 n_sb--; 530 n_sb--;
772 } 531 }
773 532
774 skb = skb_clone(skb, GFP_ATOMIC); 533 return pipe_handler_send_created_ind(sk);
775 if (!skb) 534}
776 return -ENOMEM;
777 535
778 /* Create a new to-be-accepted sock */ 536/* Queue an skb to an actively connected sock.
779 newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); 537 * Socket lock must be held. */
780 if (!newsk) { 538static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
781 kfree_skb(skb); 539{
782 return -ENOMEM; 540 struct pep_sock *pn = pep_sk(sk);
783 } 541 struct pnpipehdr *hdr = pnp_hdr(skb);
784 sock_init_data(NULL, newsk); 542 int err = NET_RX_SUCCESS;
785 newsk->sk_state = TCP_SYN_RECV;
786 newsk->sk_backlog_rcv = pipe_do_rcv;
787 newsk->sk_protocol = sk->sk_protocol;
788 newsk->sk_destruct = pipe_destruct;
789 543
790 newpn = pep_sk(newsk); 544 switch (hdr->message_id) {
791 pn_skb_get_dst_sockaddr(skb, &dst); 545 case PNS_PIPE_ALIGNED_DATA:
792 newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); 546 __skb_pull(skb, 1);
793 newpn->pn_sk.resource = pn->pn_sk.resource; 547 /* fall through */
794 skb_queue_head_init(&newpn->ctrlreq_queue); 548 case PNS_PIPE_DATA:
795 newpn->pipe_handle = pipe_handle; 549 __skb_pull(skb, 3); /* Pipe data header */
796 atomic_set(&newpn->tx_credits, 0); 550 if (!pn_flow_safe(pn->rx_fc)) {
797 newpn->peer_type = peer_type; 551 err = sock_queue_rcv_skb(sk, skb);
798 newpn->rx_credits = 0; 552 if (!err)
799 newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; 553 return NET_RX_SUCCESS;
800 newpn->init_enable = enabled; 554 err = NET_RX_DROP;
801 newpn->aligned = aligned; 555 break;
556 }
802 557
803 BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); 558 if (pn->rx_credits == 0) {
804 skb_queue_head(&newsk->sk_receive_queue, skb); 559 atomic_inc(&sk->sk_drops);
805 if (!sock_flag(sk, SOCK_DEAD)) 560 err = NET_RX_DROP;
806 sk->sk_data_ready(sk, 0); 561 break;
562 }
563 pn->rx_credits--;
564 skb->dev = NULL;
565 skb_set_owner_r(skb, sk);
566 err = skb->len;
567 skb_queue_tail(&sk->sk_receive_queue, skb);
568 if (!sock_flag(sk, SOCK_DEAD))
569 sk->sk_data_ready(sk, err);
570 return NET_RX_SUCCESS;
807 571
808 sk_acceptq_added(sk); 572 case PNS_PEP_CONNECT_RESP:
809 sk_add_node(newsk, &pn->ackq); 573 if (sk->sk_state != TCP_SYN_SENT)
810 return 0; 574 break;
575 if (!sock_flag(sk, SOCK_DEAD))
576 sk->sk_state_change(sk);
577 if (pep_connresp_rcv(sk, skb)) {
578 sk->sk_state = TCP_CLOSE_WAIT;
579 break;
580 }
581
582 sk->sk_state = TCP_ESTABLISHED;
583 if (!pn_flow_safe(pn->tx_fc)) {
584 atomic_set(&pn->tx_credits, 1);
585 sk->sk_write_space(sk);
586 }
587 pipe_grant_credits(sk, GFP_ATOMIC);
588 break;
589
590 case PNS_PEP_DISCONNECT_RESP:
591 /* sock should already be dead, nothing to do */
592 break;
593
594 case PNS_PEP_STATUS_IND:
595 pipe_rcv_status(sk, skb);
596 break;
597 }
598 kfree_skb(skb);
599 return err;
811} 600}
812 601
813/* Listening sock must be locked */ 602/* Listening sock must be locked */
@@ -847,7 +636,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
847 struct sock *sknode; 636 struct sock *sknode;
848 struct pnpipehdr *hdr; 637 struct pnpipehdr *hdr;
849 struct sockaddr_pn dst; 638 struct sockaddr_pn dst;
850 int err = NET_RX_SUCCESS;
851 u8 pipe_handle; 639 u8 pipe_handle;
852 640
853 if (!pskb_may_pull(skb, sizeof(*hdr))) 641 if (!pskb_may_pull(skb, sizeof(*hdr)))
@@ -865,26 +653,18 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
865 if (sknode) 653 if (sknode)
866 return sk_receive_skb(sknode, skb, 1); 654 return sk_receive_skb(sknode, skb, 1);
867 655
868 /* Look for a pipe handle pending accept */
869 sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle);
870 if (sknode) {
871 sock_put(sknode);
872 if (net_ratelimit())
873 printk(KERN_WARNING"Phonet unconnected PEP ignored");
874 err = NET_RX_DROP;
875 goto drop;
876 }
877
878 switch (hdr->message_id) { 656 switch (hdr->message_id) {
879 case PNS_PEP_CONNECT_REQ: 657 case PNS_PEP_CONNECT_REQ:
880 err = pep_connreq_rcv(sk, skb); 658 if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) {
881 break; 659 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE,
882 660 GFP_ATOMIC);
883#ifdef CONFIG_PHONET_PIPECTRLR 661 break;
884 case PNS_PEP_CONNECT_RESP: 662 }
885 err = pep_connresp_rcv(sk, skb); 663 skb_queue_head(&sk->sk_receive_queue, skb);
886 break; 664 sk_acceptq_added(sk);
887#endif 665 if (!sock_flag(sk, SOCK_DEAD))
666 sk->sk_data_ready(sk, 0);
667 return NET_RX_SUCCESS;
888 668
889 case PNS_PEP_DISCONNECT_REQ: 669 case PNS_PEP_DISCONNECT_REQ:
890 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 670 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
@@ -898,12 +678,17 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
898 case PNS_PEP_ENABLE_REQ: 678 case PNS_PEP_ENABLE_REQ:
899 case PNS_PEP_DISABLE_REQ: 679 case PNS_PEP_DISABLE_REQ:
900 /* invalid handle is not even allowed here! */ 680 /* invalid handle is not even allowed here! */
681 break;
682
901 default: 683 default:
902 err = NET_RX_DROP; 684 if ((1 << sk->sk_state)
685 & ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT))
686 /* actively connected socket */
687 return pipe_handler_do_rcv(sk, skb);
903 } 688 }
904drop: 689drop:
905 kfree_skb(skb); 690 kfree_skb(skb);
906 return err; 691 return NET_RX_SUCCESS;
907} 692}
908 693
909static int pipe_do_remove(struct sock *sk) 694static int pipe_do_remove(struct sock *sk)
@@ -912,20 +697,16 @@ static int pipe_do_remove(struct sock *sk)
912 struct pnpipehdr *ph; 697 struct pnpipehdr *ph;
913 struct sk_buff *skb; 698 struct sk_buff *skb;
914 699
915 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL); 700 skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL);
916 if (!skb) 701 if (!skb)
917 return -ENOMEM; 702 return -ENOMEM;
918 703
919 skb_reserve(skb, MAX_PNPIPE_HEADER);
920 __skb_push(skb, sizeof(*ph));
921 skb_reset_transport_header(skb);
922 ph = pnp_hdr(skb); 704 ph = pnp_hdr(skb);
923 ph->utid = 0; 705 ph->utid = 0;
924 ph->message_id = PNS_PIPE_REMOVE_REQ; 706 ph->message_id = PNS_PIPE_REMOVE_REQ;
925 ph->pipe_handle = pn->pipe_handle; 707 ph->pipe_handle = pn->pipe_handle;
926 ph->data[0] = PAD; 708 ph->data[0] = PAD;
927 709 return pn_skb_send(sk, skb, NULL);
928 return pn_skb_send(sk, skb, &pipe_srv);
929} 710}
930 711
931/* associated socket ceases to exist */ 712/* associated socket ceases to exist */
@@ -938,29 +719,15 @@ static void pep_sock_close(struct sock *sk, long timeout)
938 sk_common_release(sk); 719 sk_common_release(sk);
939 720
940 lock_sock(sk); 721 lock_sock(sk);
941 if (sk->sk_state == TCP_LISTEN) { 722 if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) {
942 /* Destroy the listen queue */ 723 if (sk->sk_backlog_rcv == pipe_do_rcv)
943 struct sock *sknode; 724 /* Forcefully remove dangling Phonet pipe */
944 struct hlist_node *p, *n; 725 pipe_do_remove(sk);
945 726 else
946 sk_for_each_safe(sknode, p, n, &pn->ackq) 727 pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD,
947 sk_del_node_init(sknode); 728 NULL, 0);
948 sk->sk_state = TCP_CLOSE;
949 } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
950 /* Forcefully remove dangling Phonet pipe */
951 pipe_do_remove(sk);
952
953#ifdef CONFIG_PHONET_PIPECTRLR
954 if (pn->pipe_state != PIPE_IDLE) {
955 /* send pep disconnect request */
956 pipe_handler_send_req(sk,
957 PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ,
958 GFP_KERNEL);
959
960 pn->pipe_state = PIPE_IDLE;
961 sk->sk_state = TCP_CLOSE;
962 } 729 }
963#endif 730 sk->sk_state = TCP_CLOSE;
964 731
965 ifindex = pn->ifindex; 732 ifindex = pn->ifindex;
966 pn->ifindex = 0; 733 pn->ifindex = 0;
@@ -971,86 +738,141 @@ static void pep_sock_close(struct sock *sk, long timeout)
971 sock_put(sk); 738 sock_put(sk);
972} 739}
973 740
974static int pep_wait_connreq(struct sock *sk, int noblock) 741static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
975{ 742{
976 struct task_struct *tsk = current; 743 struct pep_sock *pn = pep_sk(sk), *newpn;
977 struct pep_sock *pn = pep_sk(sk); 744 struct sock *newsk = NULL;
978 long timeo = sock_rcvtimeo(sk, noblock); 745 struct sk_buff *skb;
979 746 struct pnpipehdr *hdr;
980 for (;;) { 747 struct sockaddr_pn dst, src;
981 DEFINE_WAIT(wait); 748 int err;
749 u16 peer_type;
750 u8 pipe_handle, enabled, n_sb;
751 u8 aligned = 0;
982 752
983 if (sk->sk_state != TCP_LISTEN) 753 skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
984 return -EINVAL; 754 if (!skb)
985 if (!hlist_empty(&pn->ackq)) 755 return NULL;
986 break;
987 if (!timeo)
988 return -EWOULDBLOCK;
989 if (signal_pending(tsk))
990 return sock_intr_errno(timeo);
991 756
992 prepare_to_wait_exclusive(sk_sleep(sk), &wait, 757 lock_sock(sk);
993 TASK_INTERRUPTIBLE); 758 if (sk->sk_state != TCP_LISTEN) {
994 release_sock(sk); 759 err = -EINVAL;
995 timeo = schedule_timeout(timeo); 760 goto drop;
996 lock_sock(sk);
997 finish_wait(sk_sleep(sk), &wait);
998 } 761 }
762 sk_acceptq_removed(sk);
999 763
1000 return 0; 764 err = -EPROTO;
1001} 765 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
766 goto drop;
1002 767
1003static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) 768 hdr = pnp_hdr(skb);
1004{ 769 pipe_handle = hdr->pipe_handle;
1005 struct pep_sock *pn = pep_sk(sk); 770 switch (hdr->state_after_connect) {
1006 struct sock *newsk = NULL; 771 case PN_PIPE_DISABLE:
1007 struct sk_buff *oskb; 772 enabled = 0;
1008 int err; 773 break;
774 case PN_PIPE_ENABLE:
775 enabled = 1;
776 break;
777 default:
778 pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM,
779 GFP_KERNEL);
780 goto drop;
781 }
782 peer_type = hdr->other_pep_type << 8;
1009 783
1010 lock_sock(sk); 784 /* Parse sub-blocks (options) */
1011 err = pep_wait_connreq(sk, flags & O_NONBLOCK); 785 n_sb = hdr->data[4];
1012 if (err) 786 while (n_sb > 0) {
1013 goto out; 787 u8 type, buf[1], len = sizeof(buf);
788 const u8 *data = pep_get_sb(skb, &type, &len, buf);
1014 789
1015 newsk = __sk_head(&pn->ackq); 790 if (data == NULL)
791 goto drop;
792 switch (type) {
793 case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE:
794 if (len < 1)
795 goto drop;
796 peer_type = (peer_type & 0xff00) | data[0];
797 break;
798 case PN_PIPE_SB_ALIGNED_DATA:
799 aligned = data[0] != 0;
800 break;
801 }
802 n_sb--;
803 }
1016 804
1017 oskb = skb_dequeue(&newsk->sk_receive_queue); 805 /* Check for duplicate pipe handle */
1018 err = pep_accept_conn(newsk, oskb); 806 newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
1019 if (err) { 807 if (unlikely(newsk)) {
1020 skb_queue_head(&newsk->sk_receive_queue, oskb); 808 __sock_put(newsk);
1021 newsk = NULL; 809 newsk = NULL;
1022 goto out; 810 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL);
811 goto drop;
812 }
813
814 /* Create a new to-be-accepted sock */
815 newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot);
816 if (!newsk) {
817 pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
818 err = -ENOBUFS;
819 goto drop;
1023 } 820 }
1024 kfree_skb(oskb);
1025 821
822 sock_init_data(NULL, newsk);
823 newsk->sk_state = TCP_SYN_RECV;
824 newsk->sk_backlog_rcv = pipe_do_rcv;
825 newsk->sk_protocol = sk->sk_protocol;
826 newsk->sk_destruct = pipe_destruct;
827
828 newpn = pep_sk(newsk);
829 pn_skb_get_dst_sockaddr(skb, &dst);
830 pn_skb_get_src_sockaddr(skb, &src);
831 newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
832 newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
833 newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst);
1026 sock_hold(sk); 834 sock_hold(sk);
1027 pep_sk(newsk)->listener = sk; 835 newpn->listener = sk;
836 skb_queue_head_init(&newpn->ctrlreq_queue);
837 newpn->pipe_handle = pipe_handle;
838 atomic_set(&newpn->tx_credits, 0);
839 newpn->ifindex = 0;
840 newpn->peer_type = peer_type;
841 newpn->rx_credits = 0;
842 newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
843 newpn->init_enable = enabled;
844 newpn->aligned = aligned;
1028 845
1029 sock_hold(newsk); 846 err = pep_accept_conn(newsk, skb);
1030 sk_del_node_init(newsk); 847 if (err) {
1031 sk_acceptq_removed(sk); 848 sock_put(newsk);
849 newsk = NULL;
850 goto drop;
851 }
1032 sk_add_node(newsk, &pn->hlist); 852 sk_add_node(newsk, &pn->hlist);
1033 __sock_put(newsk); 853drop:
1034
1035out:
1036 release_sock(sk); 854 release_sock(sk);
855 kfree_skb(skb);
1037 *errp = err; 856 *errp = err;
1038 return newsk; 857 return newsk;
1039} 858}
1040 859
1041#ifdef CONFIG_PHONET_PIPECTRLR
1042static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) 860static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
1043{ 861{
1044 struct pep_sock *pn = pep_sk(sk); 862 struct pep_sock *pn = pep_sk(sk);
1045 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; 863 int err;
1046 864 u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
1047 memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn));
1048 865
1049 return pipe_handler_send_req(sk, 866 pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
1050 PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, 867 err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
1051 GFP_ATOMIC); 868 PN_PIPE_ENABLE, data, 4);
869 if (err) {
870 pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
871 return err;
872 }
873 sk->sk_state = TCP_SYN_SENT;
874 return 0;
1052} 875}
1053#endif
1054 876
1055static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) 877static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
1056{ 878{
@@ -1081,10 +903,18 @@ static int pep_init(struct sock *sk)
1081{ 903{
1082 struct pep_sock *pn = pep_sk(sk); 904 struct pep_sock *pn = pep_sk(sk);
1083 905
1084 INIT_HLIST_HEAD(&pn->ackq); 906 sk->sk_destruct = pipe_destruct;
1085 INIT_HLIST_HEAD(&pn->hlist); 907 INIT_HLIST_HEAD(&pn->hlist);
908 pn->listener = NULL;
1086 skb_queue_head_init(&pn->ctrlreq_queue); 909 skb_queue_head_init(&pn->ctrlreq_queue);
910 atomic_set(&pn->tx_credits, 0);
911 pn->ifindex = 0;
912 pn->peer_type = 0;
1087 pn->pipe_handle = PN_PIPE_INVALID_HANDLE; 913 pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
914 pn->rx_credits = 0;
915 pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
916 pn->init_enable = 1;
917 pn->aligned = 0;
1088 return 0; 918 return 0;
1089} 919}
1090 920
@@ -1103,18 +933,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
1103 933
1104 lock_sock(sk); 934 lock_sock(sk);
1105 switch (optname) { 935 switch (optname) {
1106#ifdef CONFIG_PHONET_PIPECTRLR
1107 case PNPIPE_PIPE_HANDLE:
1108 if (val) {
1109 if (pn->pipe_state > PIPE_IDLE) {
1110 err = -EFAULT;
1111 break;
1112 }
1113 pn->pipe_handle = val;
1114 break;
1115 }
1116#endif
1117
1118 case PNPIPE_ENCAP: 936 case PNPIPE_ENCAP:
1119 if (val && val != PNPIPE_ENCAP_IP) { 937 if (val && val != PNPIPE_ENCAP_IP) {
1120 err = -EINVAL; 938 err = -EINVAL;
@@ -1141,16 +959,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
1141 } 959 }
1142 goto out_norel; 960 goto out_norel;
1143 961
1144#ifdef CONFIG_PHONET_PIPECTRLR
1145 case PNPIPE_ENABLE:
1146 if (pn->pipe_state <= PIPE_IDLE) {
1147 err = -ENOTCONN;
1148 break;
1149 }
1150 err = pipe_handler_enable_pipe(sk, val);
1151 break;
1152#endif
1153
1154 default: 962 default:
1155 err = -ENOPROTOOPT; 963 err = -ENOPROTOOPT;
1156 } 964 }
@@ -1180,13 +988,11 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
1180 val = pn->ifindex; 988 val = pn->ifindex;
1181 break; 989 break;
1182 990
1183#ifdef CONFIG_PHONET_PIPECTRLR 991 case PNPIPE_HANDLE:
1184 case PNPIPE_ENABLE: 992 val = pn->pipe_handle;
1185 if (pn->pipe_state <= PIPE_IDLE) 993 if (val == PN_PIPE_INVALID_HANDLE)
1186 return -ENOTCONN; 994 return -EINVAL;
1187 val = pn->pipe_state != PIPE_DISABLED;
1188 break; 995 break;
1189#endif
1190 996
1191 default: 997 default:
1192 return -ENOPROTOOPT; 998 return -ENOPROTOOPT;
@@ -1222,11 +1028,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
1222 } else 1028 } else
1223 ph->message_id = PNS_PIPE_DATA; 1029 ph->message_id = PNS_PIPE_DATA;
1224 ph->pipe_handle = pn->pipe_handle; 1030 ph->pipe_handle = pn->pipe_handle;
1225#ifdef CONFIG_PHONET_PIPECTRLR 1031 err = pn_skb_send(sk, skb, NULL);
1226 err = pn_skb_send(sk, skb, &pn->remote_pep);
1227#else
1228 err = pn_skb_send(sk, skb, &pipe_srv);
1229#endif
1230 1032
1231 if (err && pn_flow_safe(pn->tx_fc)) 1033 if (err && pn_flow_safe(pn->tx_fc))
1232 atomic_inc(&pn->tx_credits); 1034 atomic_inc(&pn->tx_credits);
@@ -1253,7 +1055,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
1253 if (!skb) 1055 if (!skb)
1254 return err; 1056 return err;
1255 1057
1256 skb_reserve(skb, MAX_PHONET_HEADER + 3); 1058 skb_reserve(skb, MAX_PHONET_HEADER + 3 + pn->aligned);
1257 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1059 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1258 if (err < 0) 1060 if (err < 0)
1259 goto outfree; 1061 goto outfree;
@@ -1355,7 +1157,7 @@ struct sk_buff *pep_read(struct sock *sk)
1355 struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); 1157 struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
1356 1158
1357 if (sk->sk_state == TCP_ESTABLISHED) 1159 if (sk->sk_state == TCP_ESTABLISHED)
1358 pipe_grant_credits(sk); 1160 pipe_grant_credits(sk, GFP_ATOMIC);
1359 return skb; 1161 return skb;
1360} 1162}
1361 1163
@@ -1400,7 +1202,7 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
1400 } 1202 }
1401 1203
1402 if (sk->sk_state == TCP_ESTABLISHED) 1204 if (sk->sk_state == TCP_ESTABLISHED)
1403 pipe_grant_credits(sk); 1205 pipe_grant_credits(sk, GFP_KERNEL);
1404 release_sock(sk); 1206 release_sock(sk);
1405copy: 1207copy:
1406 msg->msg_flags |= MSG_EOR; 1208 msg->msg_flags |= MSG_EOR;
@@ -1424,9 +1226,9 @@ static void pep_sock_unhash(struct sock *sk)
1424 1226
1425 lock_sock(sk); 1227 lock_sock(sk);
1426 1228
1427#ifndef CONFIG_PHONET_PIPECTRLR 1229 if (pn->listener != NULL) {
1428 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
1429 skparent = pn->listener; 1230 skparent = pn->listener;
1231 pn->listener = NULL;
1430 release_sock(sk); 1232 release_sock(sk);
1431 1233
1432 pn = pep_sk(skparent); 1234 pn = pep_sk(skparent);
@@ -1434,7 +1236,7 @@ static void pep_sock_unhash(struct sock *sk)
1434 sk_del_node_init(sk); 1236 sk_del_node_init(sk);
1435 sk = skparent; 1237 sk = skparent;
1436 } 1238 }
1437#endif 1239
1438 /* Unhash a listening sock only when it is closed 1240 /* Unhash a listening sock only when it is closed
1439 * and all of its active connected pipes are closed. */ 1241 * and all of its active connected pipes are closed. */
1440 if (hlist_empty(&pn->hlist)) 1242 if (hlist_empty(&pn->hlist))
@@ -1448,9 +1250,7 @@ static void pep_sock_unhash(struct sock *sk)
1448static struct proto pep_proto = { 1250static struct proto pep_proto = {
1449 .close = pep_sock_close, 1251 .close = pep_sock_close,
1450 .accept = pep_sock_accept, 1252 .accept = pep_sock_accept,
1451#ifdef CONFIG_PHONET_PIPECTRLR
1452 .connect = pep_sock_connect, 1253 .connect = pep_sock_connect,
1453#endif
1454 .ioctl = pep_ioctl, 1254 .ioctl = pep_ioctl,
1455 .init = pep_init, 1255 .init = pep_init,
1456 .setsockopt = pep_setsockopt, 1256 .setsockopt = pep_setsockopt,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 947038ddd04c..d2df8f33160b 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,14 +162,6 @@ int phonet_address_add(struct net_device *dev, u8 addr)
162 return err; 162 return err;
163} 163}
164 164
165static void phonet_device_rcu_free(struct rcu_head *head)
166{
167 struct phonet_device *pnd;
168
169 pnd = container_of(head, struct phonet_device, rcu);
170 kfree(pnd);
171}
172
173int phonet_address_del(struct net_device *dev, u8 addr) 165int phonet_address_del(struct net_device *dev, u8 addr)
174{ 166{
175 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); 167 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -188,7 +180,7 @@ int phonet_address_del(struct net_device *dev, u8 addr)
188 mutex_unlock(&pndevs->lock); 180 mutex_unlock(&pndevs->lock);
189 181
190 if (pnd) 182 if (pnd)
191 call_rcu(&pnd->rcu, phonet_device_rcu_free); 183 kfree_rcu(pnd, rcu);
192 184
193 return err; 185 return err;
194} 186}
@@ -426,18 +418,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
426 return 0; 418 return 0;
427} 419}
428 420
429struct net_device *phonet_route_get(struct net *net, u8 daddr) 421struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr)
430{ 422{
431 struct phonet_net *pnn = phonet_pernet(net); 423 struct phonet_net *pnn = phonet_pernet(net);
432 struct phonet_routes *routes = &pnn->routes; 424 struct phonet_routes *routes = &pnn->routes;
433 struct net_device *dev; 425 struct net_device *dev;
434 426
435 ASSERT_RTNL(); /* no need to hold the device */
436
437 daddr >>= 2; 427 daddr >>= 2;
438 rcu_read_lock();
439 dev = rcu_dereference(routes->table[daddr]); 428 dev = rcu_dereference(routes->table[daddr]);
440 rcu_read_unlock();
441 return dev; 429 return dev;
442} 430}
443 431
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 58b3b1f991ed..438accb7a5a8 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
264 struct net *net = sock_net(skb->sk); 264 struct net *net = sock_net(skb->sk);
265 u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; 265 u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
266 266
267 rcu_read_lock();
267 for (addr = 0; addr < 64; addr++) { 268 for (addr = 0; addr < 64; addr++) {
268 struct net_device *dev; 269 struct net_device *dev;
269 270
270 dev = phonet_route_get(net, addr << 2); 271 dev = phonet_route_get_rcu(net, addr << 2);
271 if (!dev) 272 if (!dev)
272 continue; 273 continue;
273 274
@@ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
279 } 280 }
280 281
281out: 282out:
283 rcu_read_unlock();
282 cb->args[0] = addr_idx; 284 cb->args[0] = addr_idx;
283 cb->args[1] = 0; 285 cb->args[1] = 0;
284 286
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 25f746d20c1f..8c5bfcef92cb 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -52,7 +52,7 @@ static int pn_socket_release(struct socket *sock)
52 52
53static struct { 53static struct {
54 struct hlist_head hlist[PN_HASHSIZE]; 54 struct hlist_head hlist[PN_HASHSIZE];
55 spinlock_t lock; 55 struct mutex lock;
56} pnsocks; 56} pnsocks;
57 57
58void __init pn_sock_init(void) 58void __init pn_sock_init(void)
@@ -61,7 +61,7 @@ void __init pn_sock_init(void)
61 61
62 for (i = 0; i < PN_HASHSIZE; i++) 62 for (i = 0; i < PN_HASHSIZE; i++)
63 INIT_HLIST_HEAD(pnsocks.hlist + i); 63 INIT_HLIST_HEAD(pnsocks.hlist + i);
64 spin_lock_init(&pnsocks.lock); 64 mutex_init(&pnsocks.lock);
65} 65}
66 66
67static struct hlist_head *pn_hash_list(u16 obj) 67static struct hlist_head *pn_hash_list(u16 obj)
@@ -82,9 +82,8 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
82 u8 res = spn->spn_resource; 82 u8 res = spn->spn_resource;
83 struct hlist_head *hlist = pn_hash_list(obj); 83 struct hlist_head *hlist = pn_hash_list(obj);
84 84
85 spin_lock_bh(&pnsocks.lock); 85 rcu_read_lock();
86 86 sk_for_each_rcu(sknode, node, hlist) {
87 sk_for_each(sknode, node, hlist) {
88 struct pn_sock *pn = pn_sk(sknode); 87 struct pn_sock *pn = pn_sk(sknode);
89 BUG_ON(!pn->sobject); /* unbound socket */ 88 BUG_ON(!pn->sobject); /* unbound socket */
90 89
@@ -107,8 +106,7 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
107 sock_hold(sknode); 106 sock_hold(sknode);
108 break; 107 break;
109 } 108 }
110 109 rcu_read_unlock();
111 spin_unlock_bh(&pnsocks.lock);
112 110
113 return rval; 111 return rval;
114} 112}
@@ -119,7 +117,7 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
119 struct hlist_head *hlist = pnsocks.hlist; 117 struct hlist_head *hlist = pnsocks.hlist;
120 unsigned h; 118 unsigned h;
121 119
122 spin_lock(&pnsocks.lock); 120 rcu_read_lock();
123 for (h = 0; h < PN_HASHSIZE; h++) { 121 for (h = 0; h < PN_HASHSIZE; h++) {
124 struct hlist_node *node; 122 struct hlist_node *node;
125 struct sock *sknode; 123 struct sock *sknode;
@@ -140,25 +138,26 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
140 } 138 }
141 hlist++; 139 hlist++;
142 } 140 }
143 spin_unlock(&pnsocks.lock); 141 rcu_read_unlock();
144} 142}
145 143
146void pn_sock_hash(struct sock *sk) 144void pn_sock_hash(struct sock *sk)
147{ 145{
148 struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject); 146 struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
149 147
150 spin_lock_bh(&pnsocks.lock); 148 mutex_lock(&pnsocks.lock);
151 sk_add_node(sk, hlist); 149 sk_add_node_rcu(sk, hlist);
152 spin_unlock_bh(&pnsocks.lock); 150 mutex_unlock(&pnsocks.lock);
153} 151}
154EXPORT_SYMBOL(pn_sock_hash); 152EXPORT_SYMBOL(pn_sock_hash);
155 153
156void pn_sock_unhash(struct sock *sk) 154void pn_sock_unhash(struct sock *sk)
157{ 155{
158 spin_lock_bh(&pnsocks.lock); 156 mutex_lock(&pnsocks.lock);
159 sk_del_node_init(sk); 157 sk_del_node_init_rcu(sk);
160 spin_unlock_bh(&pnsocks.lock); 158 mutex_unlock(&pnsocks.lock);
161 pn_sock_unbind_all_res(sk); 159 pn_sock_unbind_all_res(sk);
160 synchronize_rcu();
162} 161}
163EXPORT_SYMBOL(pn_sock_unhash); 162EXPORT_SYMBOL(pn_sock_unhash);
164 163
@@ -225,15 +224,18 @@ static int pn_socket_autobind(struct socket *sock)
225 return 0; /* socket was already bound */ 224 return 0; /* socket was already bound */
226} 225}
227 226
228#ifdef CONFIG_PHONET_PIPECTRLR
229static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, 227static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
230 int len, int flags) 228 int len, int flags)
231{ 229{
232 struct sock *sk = sock->sk; 230 struct sock *sk = sock->sk;
231 struct pn_sock *pn = pn_sk(sk);
233 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; 232 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
234 long timeo; 233 struct task_struct *tsk = current;
234 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
235 int err; 235 int err;
236 236
237 if (pn_socket_autobind(sock))
238 return -ENOBUFS;
237 if (len < sizeof(struct sockaddr_pn)) 239 if (len < sizeof(struct sockaddr_pn))
238 return -EINVAL; 240 return -EINVAL;
239 if (spn->spn_family != AF_PHONET) 241 if (spn->spn_family != AF_PHONET)
@@ -243,82 +245,61 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
243 245
244 switch (sock->state) { 246 switch (sock->state) {
245 case SS_UNCONNECTED: 247 case SS_UNCONNECTED:
246 sk->sk_state = TCP_CLOSE; 248 if (sk->sk_state != TCP_CLOSE) {
247 break;
248 case SS_CONNECTING:
249 switch (sk->sk_state) {
250 case TCP_SYN_RECV:
251 sock->state = SS_CONNECTED;
252 err = -EISCONN;
253 goto out;
254 case TCP_CLOSE:
255 err = -EALREADY;
256 if (flags & O_NONBLOCK)
257 goto out;
258 goto wait_connect;
259 }
260 break;
261 case SS_CONNECTED:
262 switch (sk->sk_state) {
263 case TCP_SYN_RECV:
264 err = -EISCONN; 249 err = -EISCONN;
265 goto out; 250 goto out;
266 case TCP_CLOSE:
267 sock->state = SS_UNCONNECTED;
268 break;
269 } 251 }
270 break; 252 break;
271 case SS_DISCONNECTING: 253 case SS_CONNECTING:
272 case SS_FREE: 254 err = -EALREADY;
273 break; 255 goto out;
256 default:
257 err = -EISCONN;
258 goto out;
274 } 259 }
275 sk->sk_state = TCP_CLOSE;
276 sk_stream_kill_queues(sk);
277 260
261 pn->dobject = pn_sockaddr_get_object(spn);
262 pn->resource = pn_sockaddr_get_resource(spn);
278 sock->state = SS_CONNECTING; 263 sock->state = SS_CONNECTING;
264
279 err = sk->sk_prot->connect(sk, addr, len); 265 err = sk->sk_prot->connect(sk, addr, len);
280 if (err < 0) { 266 if (err) {
281 sock->state = SS_UNCONNECTED; 267 sock->state = SS_UNCONNECTED;
282 sk->sk_state = TCP_CLOSE; 268 pn->dobject = 0;
283 goto out; 269 goto out;
284 } 270 }
285 271
286 err = -EINPROGRESS; 272 while (sk->sk_state == TCP_SYN_SENT) {
287wait_connect: 273 DEFINE_WAIT(wait);
288 if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK))
289 goto out;
290
291 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
292 release_sock(sk);
293
294 err = -ERESTARTSYS;
295 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
296 sk->sk_state != TCP_CLOSE,
297 timeo);
298 274
299 lock_sock(sk); 275 if (!timeo) {
300 if (timeo < 0) 276 err = -EINPROGRESS;
301 goto out; /* -ERESTARTSYS */ 277 goto out;
302 278 }
303 err = -ETIMEDOUT; 279 if (signal_pending(tsk)) {
304 if (timeo == 0 && sk->sk_state != TCP_SYN_RECV) 280 err = sock_intr_errno(timeo);
305 goto out; 281 goto out;
282 }
306 283
307 if (sk->sk_state != TCP_SYN_RECV) { 284 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
308 sock->state = SS_UNCONNECTED; 285 TASK_INTERRUPTIBLE);
309 err = sock_error(sk); 286 release_sock(sk);
310 if (!err) 287 timeo = schedule_timeout(timeo);
311 err = -ECONNREFUSED; 288 lock_sock(sk);
312 goto out; 289 finish_wait(sk_sleep(sk), &wait);
313 } 290 }
314 sock->state = SS_CONNECTED;
315 err = 0;
316 291
292 if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
293 err = 0;
294 else if (sk->sk_state == TCP_CLOSE_WAIT)
295 err = -ECONNRESET;
296 else
297 err = -ECONNREFUSED;
298 sock->state = err ? SS_UNCONNECTED : SS_CONNECTED;
317out: 299out:
318 release_sock(sk); 300 release_sock(sk);
319 return err; 301 return err;
320} 302}
321#endif
322 303
323static int pn_socket_accept(struct socket *sock, struct socket *newsock, 304static int pn_socket_accept(struct socket *sock, struct socket *newsock,
324 int flags) 305 int flags)
@@ -327,6 +308,9 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
327 struct sock *newsk; 308 struct sock *newsk;
328 int err; 309 int err;
329 310
311 if (unlikely(sk->sk_state != TCP_LISTEN))
312 return -EINVAL;
313
330 newsk = sk->sk_prot->accept(sk, flags, &err); 314 newsk = sk->sk_prot->accept(sk, flags, &err);
331 if (!newsk) 315 if (!newsk)
332 return err; 316 return err;
@@ -363,13 +347,8 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
363 347
364 poll_wait(file, sk_sleep(sk), wait); 348 poll_wait(file, sk_sleep(sk), wait);
365 349
366 switch (sk->sk_state) { 350 if (sk->sk_state == TCP_CLOSE)
367 case TCP_LISTEN:
368 return hlist_empty(&pn->ackq) ? 0 : POLLIN;
369 case TCP_CLOSE:
370 return POLLERR; 351 return POLLERR;
371 }
372
373 if (!skb_queue_empty(&sk->sk_receive_queue)) 352 if (!skb_queue_empty(&sk->sk_receive_queue))
374 mask |= POLLIN | POLLRDNORM; 353 mask |= POLLIN | POLLRDNORM;
375 if (!skb_queue_empty(&pn->ctrlreq_queue)) 354 if (!skb_queue_empty(&pn->ctrlreq_queue))
@@ -428,19 +407,19 @@ static int pn_socket_listen(struct socket *sock, int backlog)
428 struct sock *sk = sock->sk; 407 struct sock *sk = sock->sk;
429 int err = 0; 408 int err = 0;
430 409
431 if (sock->state != SS_UNCONNECTED)
432 return -EINVAL;
433 if (pn_socket_autobind(sock)) 410 if (pn_socket_autobind(sock))
434 return -ENOBUFS; 411 return -ENOBUFS;
435 412
436 lock_sock(sk); 413 lock_sock(sk);
437 if (sk->sk_state != TCP_CLOSE) { 414 if (sock->state != SS_UNCONNECTED) {
438 err = -EINVAL; 415 err = -EINVAL;
439 goto out; 416 goto out;
440 } 417 }
441 418
442 sk->sk_state = TCP_LISTEN; 419 if (sk->sk_state != TCP_LISTEN) {
443 sk->sk_ack_backlog = 0; 420 sk->sk_state = TCP_LISTEN;
421 sk->sk_ack_backlog = 0;
422 }
444 sk->sk_max_ack_backlog = backlog; 423 sk->sk_max_ack_backlog = backlog;
445out: 424out:
446 release_sock(sk); 425 release_sock(sk);
@@ -488,11 +467,7 @@ const struct proto_ops phonet_stream_ops = {
488 .owner = THIS_MODULE, 467 .owner = THIS_MODULE,
489 .release = pn_socket_release, 468 .release = pn_socket_release,
490 .bind = pn_socket_bind, 469 .bind = pn_socket_bind,
491#ifdef CONFIG_PHONET_PIPECTRLR
492 .connect = pn_socket_connect, 470 .connect = pn_socket_connect,
493#else
494 .connect = sock_no_connect,
495#endif
496 .socketpair = sock_no_socketpair, 471 .socketpair = sock_no_socketpair,
497 .accept = pn_socket_accept, 472 .accept = pn_socket_accept,
498 .getname = pn_socket_getname, 473 .getname = pn_socket_getname,
@@ -572,7 +547,7 @@ static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
572 unsigned h; 547 unsigned h;
573 548
574 for (h = 0; h < PN_HASHSIZE; h++) { 549 for (h = 0; h < PN_HASHSIZE; h++) {
575 sk_for_each(sknode, node, hlist) { 550 sk_for_each_rcu(sknode, node, hlist) {
576 if (!net_eq(net, sock_net(sknode))) 551 if (!net_eq(net, sock_net(sknode)))
577 continue; 552 continue;
578 if (!pos) 553 if (!pos)
@@ -596,9 +571,9 @@ static struct sock *pn_sock_get_next(struct seq_file *seq, struct sock *sk)
596} 571}
597 572
598static void *pn_sock_seq_start(struct seq_file *seq, loff_t *pos) 573static void *pn_sock_seq_start(struct seq_file *seq, loff_t *pos)
599 __acquires(pnsocks.lock) 574 __acquires(rcu)
600{ 575{
601 spin_lock_bh(&pnsocks.lock); 576 rcu_read_lock();
602 return *pos ? pn_sock_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 577 return *pos ? pn_sock_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
603} 578}
604 579
@@ -615,9 +590,9 @@ static void *pn_sock_seq_next(struct seq_file *seq, void *v, loff_t *pos)
615} 590}
616 591
617static void pn_sock_seq_stop(struct seq_file *seq, void *v) 592static void pn_sock_seq_stop(struct seq_file *seq, void *v)
618 __releases(pnsocks.lock) 593 __releases(rcu)
619{ 594{
620 spin_unlock_bh(&pnsocks.lock); 595 rcu_read_unlock();
621} 596}
622 597
623static int pn_sock_seq_show(struct seq_file *seq, void *v) 598static int pn_sock_seq_show(struct seq_file *seq, void *v)
@@ -633,8 +608,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
633 608
634 seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " 609 seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
635 "%d %p %d%n", 610 "%d %p %d%n",
636 sk->sk_protocol, pn->sobject, 0, pn->resource, 611 sk->sk_protocol, pn->sobject, pn->dobject,
637 sk->sk_state, 612 pn->resource, sk->sk_state,
638 sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), 613 sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
639 sock_i_uid(sk), sock_i_ino(sk), 614 sock_i_uid(sk), sock_i_ino(sk),
640 atomic_read(&sk->sk_refcnt), sk, 615 atomic_read(&sk->sk_refcnt), sk,
@@ -745,13 +720,11 @@ void pn_sock_unbind_all_res(struct sock *sk)
745 } 720 }
746 mutex_unlock(&resource_mutex); 721 mutex_unlock(&resource_mutex);
747 722
748 if (match == 0)
749 return;
750 synchronize_rcu();
751 while (match > 0) { 723 while (match > 0) {
752 sock_put(sk); 724 __sock_put(sk);
753 match--; 725 match--;
754 } 726 }
727 /* Caller is responsible for RCU sync before final sock_put() */
755} 728}
756 729
757#ifdef CONFIG_PROC_FS 730#ifdef CONFIG_PROC_FS
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 75ea686f27d5..6daaa49d133f 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -33,8 +33,7 @@
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36 36#include <linux/bitops.h>
37#include <asm-generic/bitops/le.h>
38 37
39#include "rds.h" 38#include "rds.h"
40 39
@@ -285,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
285 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 284 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
286 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 285 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
287 286
288 generic___set_le_bit(off, (void *)map->m_page_addrs[i]); 287 __set_bit_le(off, (void *)map->m_page_addrs[i]);
289} 288}
290 289
291void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) 290void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -299,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
299 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 298 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
300 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 299 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
301 300
302 generic___clear_le_bit(off, (void *)map->m_page_addrs[i]); 301 __clear_bit_le(off, (void *)map->m_page_addrs[i]);
303} 302}
304 303
305static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) 304static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
@@ -310,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
310 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 309 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
311 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 310 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
312 311
313 return generic_test_le_bit(off, (void *)map->m_page_addrs[i]); 312 return test_bit_le(off, (void *)map->m_page_addrs[i]);
314} 313}
315 314
316void rds_cong_add_socket(struct rds_sock *rs) 315void rds_cong_add_socket(struct rds_sock *rs)
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 4123967d4d65..cce19f95c624 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -364,7 +364,6 @@ void rds_ib_exit(void)
364 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
365 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
366 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
368} 367}
369 368
370struct rds_transport rds_ib_transport = { 369struct rds_transport rds_ib_transport = {
@@ -400,13 +399,9 @@ int rds_ib_init(void)
400 399
401 INIT_LIST_HEAD(&rds_ib_devices); 400 INIT_LIST_HEAD(&rds_ib_devices);
402 401
403 ret = rds_ib_fmr_init();
404 if (ret)
405 goto out;
406
407 ret = ib_register_client(&rds_ib_client); 402 ret = ib_register_client(&rds_ib_client);
408 if (ret) 403 if (ret)
409 goto out_fmr_exit; 404 goto out;
410 405
411 ret = rds_ib_sysctl_init(); 406 ret = rds_ib_sysctl_init();
412 if (ret) 407 if (ret)
@@ -430,8 +425,6 @@ out_sysctl:
430 rds_ib_sysctl_exit(); 425 rds_ib_sysctl_exit();
431out_ibreg: 426out_ibreg:
432 rds_ib_unregister_client(); 427 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
435out: 428out:
436 return ret; 429 return ret;
437} 430}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index e34ad032b66d..4297d92788dc 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
307void rds_ib_sync_mr(void *trans_private, int dir); 307void rds_ib_sync_mr(void *trans_private, int dir);
308void rds_ib_free_mr(void *trans_private, int invalidate); 308void rds_ib_free_mr(void *trans_private, int invalidate);
309void rds_ib_flush_mrs(void); 309void rds_ib_flush_mrs(void);
310int rds_ib_fmr_init(void);
311void rds_ib_fmr_exit(void);
312 310
313/* ib_recv.c */ 311/* ib_recv.c */
314int rds_ib_recv_init(void); 312int rds_ib_recv_init(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 18a833c450c8..819c35a0d9cb 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -38,8 +38,6 @@
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h" 39#include "xlist.h"
40 40
41static struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace); 41static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0 42#define CLEAN_LIST_BUSY_BIT 0
45 43
@@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
307 int err = 0, iter = 0; 305 int err = 0, iter = 0;
308 306
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) 307 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 308 schedule_delayed_work(&pool->flush_worker, 10);
311 309
312 while (1) { 310 while (1) {
313 ibmr = rds_ib_reuse_fmr(pool); 311 ibmr = rds_ib_reuse_fmr(pool);
@@ -696,24 +694,6 @@ out_nolock:
696 return ret; 694 return ret;
697} 695}
698 696
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
717static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 697static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
718{ 698{
719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); 699 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
741 /* If we've pinned too many pages, request a flush */ 721 /* If we've pinned too many pages, request a flush */
742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 722 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
743 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 723 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 724 schedule_delayed_work(&pool->flush_worker, 10);
745 725
746 if (invalidate) { 726 if (invalidate) {
747 if (likely(!in_interrupt())) { 727 if (likely(!in_interrupt())) {
@@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
749 } else { 729 } else {
750 /* We get here if the user created a MR marked 730 /* We get here if the user created a MR marked
751 * as use_once and invalidate at the same time. */ 731 * as use_once and invalidate at the same time. */
752 queue_delayed_work(rds_ib_fmr_wq, 732 schedule_delayed_work(&pool->flush_worker, 10);
753 &pool->flush_worker, 10);
754 } 733 }
755 } 734 }
756 735
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421bc..7c4dce8fa5e6 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -355,7 +355,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
355 * 355 *
356 * Conceptually, we have two counters: 356 * Conceptually, we have two counters:
357 * - send credits: this tells us how many WRs we're allowed 357 * - send credits: this tells us how many WRs we're allowed
358 * to submit without overruning the reciever's queue. For 358 * to submit without overruning the receiver's queue. For
359 * each SEND WR we post, we decrement this by one. 359 * each SEND WR we post, we decrement this by one.
360 * 360 *
361 * - posted credits: this tells us how many WRs we recently 361 * - posted credits: this tells us how many WRs we recently
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
551 if (conn->c_loopback 551 if (conn->c_loopback
552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
554 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 554 scat = &rm->data.op_sg[sg];
555 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
556 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
557 return ret;
555 } 558 }
556 559
557 /* FIXME we may overallocate here */ 560 /* FIXME we may overallocate here */
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 712cf2d1f28e..3a60a15d1b4a 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -181,7 +181,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
181 unsigned int send_size, recv_size; 181 unsigned int send_size, recv_size;
182 int ret; 182 int ret;
183 183
184 /* The offset of 1 is to accomodate the additional ACK WR. */ 184 /* The offset of 1 is to accommodate the additional ACK WR. */
185 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); 185 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
186 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); 186 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
187 rds_iw_ring_resize(send_ring, send_size - 1); 187 rds_iw_ring_resize(send_ring, send_size - 1);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 59509e9a9e72..6deaa77495e3 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -122,7 +122,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
122#else 122#else
123 /* FIXME - needs to compare the local and remote 123 /* FIXME - needs to compare the local and remote
124 * ipaddr/port tuple, but the ipaddr is the only 124 * ipaddr/port tuple, but the ipaddr is the only
125 * available infomation in the rds_sock (as the rest are 125 * available information in the rds_sock (as the rest are
126 * zero'ed. It doesn't appear to be properly populated 126 * zero'ed. It doesn't appear to be properly populated
127 * during connection setup... 127 * during connection setup...
128 */ 128 */
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 6280ea020d4e..545d8ee3efb1 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -307,7 +307,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
307 * 307 *
308 * Conceptually, we have two counters: 308 * Conceptually, we have two counters:
309 * - send credits: this tells us how many WRs we're allowed 309 * - send credits: this tells us how many WRs we're allowed
310 * to submit without overruning the reciever's queue. For 310 * to submit without overruning the receiver's queue. For
311 * each SEND WR we post, we decrement this by one. 311 * each SEND WR we post, we decrement this by one.
312 * 312 *
313 * - posted credits: this tells us how many WRs we recently 313 * - posted credits: this tells us how many WRs we recently
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b17..bca6761a3ca2 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 struct scatterlist *sgp = &rm->data.op_sg[sg];
65 int ret = sizeof(struct rds_header) +
66 be32_to_cpu(rm->m_inc.i_hdr.h_len);
67
64 /* Do not send cong updates to loopback */ 68 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 69 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 70 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 71 ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
72 goto out;
68 } 73 }
69 74
70 BUG_ON(hdr_off || sg || off); 75 BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
80 NULL); 85 NULL);
81 86
82 rds_inc_put(&rm->m_inc); 87 rds_inc_put(&rm->m_inc);
83 88out:
84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 89 return ret;
85} 90}
86 91
87/* 92/*
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 9542449c0720..da8adac2bf06 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) 50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
51 51
52#define RDS_CONG_MAP_BYTES (65536 / 8) 52#define RDS_CONG_MAP_BYTES (65536 / 8)
53#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
54#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) 53#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
55#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 54#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
56 55
diff --git a/net/rds/send.c b/net/rds/send.c
index 35b9c2e9caf1..d58ae5f9339e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -116,7 +116,7 @@ static void release_in_xmit(struct rds_connection *conn)
116} 116}
117 117
118/* 118/*
119 * We're making the concious trade-off here to only send one message 119 * We're making the conscious trade-off here to only send one message
120 * down the connection at a time. 120 * down the connection at a time.
121 * Pro: 121 * Pro:
122 * - tx queueing is a simple fifo list 122 * - tx queueing is a simple fifo list
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 7fce6dfd2180..48464ca13b24 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -22,3 +22,14 @@ config RFKILL_INPUT
22 depends on RFKILL 22 depends on RFKILL
23 depends on INPUT = y || RFKILL = INPUT 23 depends on INPUT = y || RFKILL = INPUT
24 default y if !EXPERT 24 default y if !EXPERT
25
26config RFKILL_REGULATOR
27 tristate "Generic rfkill regulator driver"
28 depends on RFKILL || !RFKILL
29 depends on REGULATOR
30 help
31 This options enable controlling radio transmitters connected to
32 voltage regulator using the regulator framework.
33
34 To compile this driver as a module, choose M here: the module will
35 be called rfkill-regulator.
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index 662105352691..d9a5a58ffd8c 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -5,3 +5,4 @@
5rfkill-y += core.o 5rfkill-y += core.o
6rfkill-$(CONFIG_RFKILL_INPUT) += input.o 6rfkill-$(CONFIG_RFKILL_INPUT) += input.o
7obj-$(CONFIG_RFKILL) += rfkill.o 7obj-$(CONFIG_RFKILL) += rfkill.o
8obj-$(CONFIG_RFKILL_REGULATOR) += rfkill-regulator.o
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 0198191b756d..be90640a2774 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1024,7 +1024,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
1024 * start getting events from elsewhere but hold mtx to get 1024 * start getting events from elsewhere but hold mtx to get
1025 * startup events added first 1025 * startup events added first
1026 */ 1026 */
1027 list_add(&data->list, &rfkill_fds);
1028 1027
1029 list_for_each_entry(rfkill, &rfkill_list, node) { 1028 list_for_each_entry(rfkill, &rfkill_list, node) {
1030 ev = kzalloc(sizeof(*ev), GFP_KERNEL); 1029 ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -1033,6 +1032,7 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
1033 rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); 1032 rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD);
1034 list_add_tail(&ev->list, &data->events); 1033 list_add_tail(&ev->list, &data->events);
1035 } 1034 }
1035 list_add(&data->list, &rfkill_fds);
1036 mutex_unlock(&data->mtx); 1036 mutex_unlock(&data->mtx);
1037 mutex_unlock(&rfkill_global_mutex); 1037 mutex_unlock(&rfkill_global_mutex);
1038 1038
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
new file mode 100644
index 000000000000..18dc512a10f3
--- /dev/null
+++ b/net/rfkill/rfkill-regulator.c
@@ -0,0 +1,164 @@
1/*
2 * rfkill-regulator.c - Regulator consumer driver for rfkill
3 *
4 * Copyright (C) 2009 Guiming Zhuo <gmzhuo@gmail.com>
5 * Copyright (C) 2011 Antonio Ospite <ospite@studenti.unina.it>
6 *
7 * Implementation inspired by leds-regulator driver.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/err.h>
17#include <linux/slab.h>
18#include <linux/platform_device.h>
19#include <linux/regulator/consumer.h>
20#include <linux/rfkill.h>
21#include <linux/rfkill-regulator.h>
22
23struct rfkill_regulator_data {
24 struct rfkill *rf_kill;
25 bool reg_enabled;
26
27 struct regulator *vcc;
28};
29
30static int rfkill_regulator_set_block(void *data, bool blocked)
31{
32 struct rfkill_regulator_data *rfkill_data = data;
33
34 pr_debug("%s: blocked: %d\n", __func__, blocked);
35
36 if (blocked) {
37 if (rfkill_data->reg_enabled) {
38 regulator_disable(rfkill_data->vcc);
39 rfkill_data->reg_enabled = 0;
40 }
41 } else {
42 if (!rfkill_data->reg_enabled) {
43 regulator_enable(rfkill_data->vcc);
44 rfkill_data->reg_enabled = 1;
45 }
46 }
47
48 pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__,
49 regulator_is_enabled(rfkill_data->vcc));
50
51 return 0;
52}
53
54struct rfkill_ops rfkill_regulator_ops = {
55 .set_block = rfkill_regulator_set_block,
56};
57
58static int __devinit rfkill_regulator_probe(struct platform_device *pdev)
59{
60 struct rfkill_regulator_platform_data *pdata = pdev->dev.platform_data;
61 struct rfkill_regulator_data *rfkill_data;
62 struct regulator *vcc;
63 struct rfkill *rf_kill;
64 int ret = 0;
65
66 if (pdata == NULL) {
67 dev_err(&pdev->dev, "no platform data\n");
68 return -ENODEV;
69 }
70
71 if (pdata->name == NULL || pdata->type == 0) {
72 dev_err(&pdev->dev, "invalid name or type in platform data\n");
73 return -EINVAL;
74 }
75
76 vcc = regulator_get_exclusive(&pdev->dev, "vrfkill");
77 if (IS_ERR(vcc)) {
78 dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name);
79 ret = PTR_ERR(vcc);
80 goto out;
81 }
82
83 rfkill_data = kzalloc(sizeof(*rfkill_data), GFP_KERNEL);
84 if (rfkill_data == NULL) {
85 ret = -ENOMEM;
86 goto err_data_alloc;
87 }
88
89 rf_kill = rfkill_alloc(pdata->name, &pdev->dev,
90 pdata->type,
91 &rfkill_regulator_ops, rfkill_data);
92 if (rf_kill == NULL) {
93 dev_err(&pdev->dev, "Cannot alloc rfkill device\n");
94 ret = -ENOMEM;
95 goto err_rfkill_alloc;
96 }
97
98 if (regulator_is_enabled(vcc)) {
99 dev_dbg(&pdev->dev, "Regulator already enabled\n");
100 rfkill_data->reg_enabled = 1;
101 }
102 rfkill_data->vcc = vcc;
103 rfkill_data->rf_kill = rf_kill;
104
105 ret = rfkill_register(rf_kill);
106 if (ret) {
107 dev_err(&pdev->dev, "Cannot register rfkill device\n");
108 goto err_rfkill_register;
109 }
110
111 platform_set_drvdata(pdev, rfkill_data);
112 dev_info(&pdev->dev, "%s initialized\n", pdata->name);
113
114 return 0;
115
116err_rfkill_register:
117 rfkill_destroy(rf_kill);
118err_rfkill_alloc:
119 kfree(rfkill_data);
120err_data_alloc:
121 regulator_put(vcc);
122out:
123 return ret;
124}
125
126static int __devexit rfkill_regulator_remove(struct platform_device *pdev)
127{
128 struct rfkill_regulator_data *rfkill_data = platform_get_drvdata(pdev);
129 struct rfkill *rf_kill = rfkill_data->rf_kill;
130
131 rfkill_unregister(rf_kill);
132 rfkill_destroy(rf_kill);
133 regulator_put(rfkill_data->vcc);
134 kfree(rfkill_data);
135
136 return 0;
137}
138
139static struct platform_driver rfkill_regulator_driver = {
140 .probe = rfkill_regulator_probe,
141 .remove = __devexit_p(rfkill_regulator_remove),
142 .driver = {
143 .name = "rfkill-regulator",
144 .owner = THIS_MODULE,
145 },
146};
147
148static int __init rfkill_regulator_init(void)
149{
150 return platform_driver_register(&rfkill_regulator_driver);
151}
152module_init(rfkill_regulator_init);
153
154static void __exit rfkill_regulator_exit(void)
155{
156 platform_driver_unregister(&rfkill_regulator_driver);
157}
158module_exit(rfkill_regulator_exit);
159
160MODULE_AUTHOR("Guiming Zhuo <gmzhuo@gmail.com>");
161MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
162MODULE_DESCRIPTION("Regulator consumer driver for rfkill");
163MODULE_LICENSE("GPL");
164MODULE_ALIAS("platform:rfkill-regulator");
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d952e7eac188..f9ea925ad9cb 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -682,10 +682,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
682 if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) 682 if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
683 return -EINVAL; 683 return -EINVAL;
684 684
685 if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) { 685 if ((dev = rose_dev_get(&addr->srose_addr)) == NULL)
686 SOCK_DEBUG(sk, "ROSE: bind failed: invalid address\n");
687 return -EADDRNOTAVAIL; 686 return -EADDRNOTAVAIL;
688 }
689 687
690 source = &addr->srose_call; 688 source = &addr->srose_call;
691 689
@@ -716,7 +714,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
716 rose_insert_socket(sk); 714 rose_insert_socket(sk);
717 715
718 sock_reset_flag(sk, SOCK_ZAPPED); 716 sock_reset_flag(sk, SOCK_ZAPPED);
719 SOCK_DEBUG(sk, "ROSE: socket is bound\n"); 717
720 return 0; 718 return 0;
721} 719}
722 720
@@ -803,7 +801,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
803 801
804 rose_insert_socket(sk); /* Finish the bind */ 802 rose_insert_socket(sk); /* Finish the bind */
805 } 803 }
806rose_try_next_neigh:
807 rose->dest_addr = addr->srose_addr; 804 rose->dest_addr = addr->srose_addr;
808 rose->dest_call = addr->srose_call; 805 rose->dest_call = addr->srose_call;
809 rose->rand = ((long)rose & 0xFFFF) + rose->lci; 806 rose->rand = ((long)rose & 0xFFFF) + rose->lci;
@@ -865,12 +862,6 @@ rose_try_next_neigh:
865 } 862 }
866 863
867 if (sk->sk_state != TCP_ESTABLISHED) { 864 if (sk->sk_state != TCP_ESTABLISHED) {
868 /* Try next neighbour */
869 rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0);
870 if (rose->neighbour)
871 goto rose_try_next_neigh;
872
873 /* No more neighbours */
874 sock->state = SS_UNCONNECTED; 865 sock->state = SS_UNCONNECTED;
875 err = sock_error(sk); /* Always set at this point */ 866 err = sock_error(sk); /* Always set at this point */
876 goto out_release; 867 goto out_release;
@@ -985,7 +976,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
985 struct sock *make; 976 struct sock *make;
986 struct rose_sock *make_rose; 977 struct rose_sock *make_rose;
987 struct rose_facilities_struct facilities; 978 struct rose_facilities_struct facilities;
988 int n, len; 979 int n;
989 980
990 skb->sk = NULL; /* Initially we don't know who it's for */ 981 skb->sk = NULL; /* Initially we don't know who it's for */
991 982
@@ -994,9 +985,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
994 */ 985 */
995 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 986 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
996 987
997 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; 988 if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
998 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; 989 skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
999 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { 990 &facilities)) {
1000 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); 991 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
1001 return 0; 992 return 0;
1002 } 993 }
@@ -1116,10 +1107,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1116 srose.srose_digis[n] = rose->dest_digis[n]; 1107 srose.srose_digis[n] = rose->dest_digis[n];
1117 } 1108 }
1118 1109
1119 SOCK_DEBUG(sk, "ROSE: sendto: Addresses built.\n");
1120
1121 /* Build a packet */ 1110 /* Build a packet */
1122 SOCK_DEBUG(sk, "ROSE: sendto: building packet.\n");
1123 /* Sanity check the packet size */ 1111 /* Sanity check the packet size */
1124 if (len > 65535) 1112 if (len > 65535)
1125 return -EMSGSIZE; 1113 return -EMSGSIZE;
@@ -1134,7 +1122,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1134 /* 1122 /*
1135 * Put the data on the end 1123 * Put the data on the end
1136 */ 1124 */
1137 SOCK_DEBUG(sk, "ROSE: Appending user data\n");
1138 1125
1139 skb_reset_transport_header(skb); 1126 skb_reset_transport_header(skb);
1140 skb_put(skb, len); 1127 skb_put(skb, len);
@@ -1159,8 +1146,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1159 */ 1146 */
1160 asmptr = skb_push(skb, ROSE_MIN_LEN); 1147 asmptr = skb_push(skb, ROSE_MIN_LEN);
1161 1148
1162 SOCK_DEBUG(sk, "ROSE: Building Network Header.\n");
1163
1164 /* Build a ROSE Network header */ 1149 /* Build a ROSE Network header */
1165 asmptr[0] = ((rose->lci >> 8) & 0x0F) | ROSE_GFI; 1150 asmptr[0] = ((rose->lci >> 8) & 0x0F) | ROSE_GFI;
1166 asmptr[1] = (rose->lci >> 0) & 0xFF; 1151 asmptr[1] = (rose->lci >> 0) & 0xFF;
@@ -1169,10 +1154,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1169 if (qbit) 1154 if (qbit)
1170 asmptr[0] |= ROSE_Q_BIT; 1155 asmptr[0] |= ROSE_Q_BIT;
1171 1156
1172 SOCK_DEBUG(sk, "ROSE: Built header.\n");
1173
1174 SOCK_DEBUG(sk, "ROSE: Transmitting buffer\n");
1175
1176 if (sk->sk_state != TCP_ESTABLISHED) { 1157 if (sk->sk_state != TCP_ESTABLISHED) {
1177 kfree_skb(skb); 1158 kfree_skb(skb);
1178 return -ENOTCONN; 1159 return -ENOTCONN;
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index ae4a9d99aec7..344456206b70 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -73,9 +73,20 @@ static void rose_loopback_timer(unsigned long param)
73 unsigned int lci_i, lci_o; 73 unsigned int lci_i, lci_o;
74 74
75 while ((skb = skb_dequeue(&loopback_queue)) != NULL) { 75 while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
76 if (skb->len < ROSE_MIN_LEN) {
77 kfree_skb(skb);
78 continue;
79 }
76 lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); 80 lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
77 frametype = skb->data[2]; 81 frametype = skb->data[2];
78 dest = (rose_address *)(skb->data + 4); 82 if (frametype == ROSE_CALL_REQUEST &&
83 (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
84 skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
85 ROSE_CALL_REQ_ADDR_LEN_VAL)) {
86 kfree_skb(skb);
87 continue;
88 }
89 dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
79 lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i; 90 lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i;
80 91
81 skb_reset_transport_header(skb); 92 skb_reset_transport_header(skb);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index b4fdaac233f7..479cae57d187 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -587,7 +587,7 @@ static int rose_clear_routes(void)
587 587
588/* 588/*
589 * Check that the device given is a valid AX.25 interface that is "up". 589 * Check that the device given is a valid AX.25 interface that is "up".
590 * called whith RTNL 590 * called with RTNL
591 */ 591 */
592static struct net_device *rose_ax25_dev_find(char *devname) 592static struct net_device *rose_ax25_dev_find(char *devname)
593{ 593{
@@ -674,29 +674,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig
674 * Find a neighbour or a route given a ROSE address. 674 * Find a neighbour or a route given a ROSE address.
675 */ 675 */
676struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, 676struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
677 unsigned char *diagnostic, int new) 677 unsigned char *diagnostic, int route_frame)
678{ 678{
679 struct rose_neigh *res = NULL; 679 struct rose_neigh *res = NULL;
680 struct rose_node *node; 680 struct rose_node *node;
681 int failed = 0; 681 int failed = 0;
682 int i; 682 int i;
683 683
684 if (!new) spin_lock_bh(&rose_node_list_lock); 684 if (!route_frame) spin_lock_bh(&rose_node_list_lock);
685 for (node = rose_node_list; node != NULL; node = node->next) { 685 for (node = rose_node_list; node != NULL; node = node->next) {
686 if (rosecmpm(addr, &node->address, node->mask) == 0) { 686 if (rosecmpm(addr, &node->address, node->mask) == 0) {
687 for (i = 0; i < node->count; i++) { 687 for (i = 0; i < node->count; i++) {
688 if (new) { 688 if (node->neighbour[i]->restarted) {
689 if (node->neighbour[i]->restarted) { 689 res = node->neighbour[i];
690 res = node->neighbour[i]; 690 goto out;
691 goto out;
692 }
693 } 691 }
694 else { 692 }
693 }
694 }
695 if (!route_frame) { /* connect request */
696 for (node = rose_node_list; node != NULL; node = node->next) {
697 if (rosecmpm(addr, &node->address, node->mask) == 0) {
698 for (i = 0; i < node->count; i++) {
695 if (!rose_ftimer_running(node->neighbour[i])) { 699 if (!rose_ftimer_running(node->neighbour[i])) {
696 res = node->neighbour[i]; 700 res = node->neighbour[i];
701 failed = 0;
697 goto out; 702 goto out;
698 } else 703 }
699 failed = 1; 704 failed = 1;
700 } 705 }
701 } 706 }
702 } 707 }
@@ -711,8 +716,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
711 } 716 }
712 717
713out: 718out:
714 if (!new) spin_unlock_bh(&rose_node_list_lock); 719 if (!route_frame) spin_unlock_bh(&rose_node_list_lock);
715
716 return res; 720 return res;
717} 721}
718 722
@@ -857,7 +861,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
857 unsigned int lci, new_lci; 861 unsigned int lci, new_lci;
858 unsigned char cause, diagnostic; 862 unsigned char cause, diagnostic;
859 struct net_device *dev; 863 struct net_device *dev;
860 int len, res = 0; 864 int res = 0;
861 char buf[11]; 865 char buf[11];
862 866
863#if 0 867#if 0
@@ -865,10 +869,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
865 return res; 869 return res;
866#endif 870#endif
867 871
872 if (skb->len < ROSE_MIN_LEN)
873 return res;
868 frametype = skb->data[2]; 874 frametype = skb->data[2];
869 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); 875 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
870 src_addr = (rose_address *)(skb->data + 9); 876 if (frametype == ROSE_CALL_REQUEST &&
871 dest_addr = (rose_address *)(skb->data + 4); 877 (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
878 skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
879 ROSE_CALL_REQ_ADDR_LEN_VAL))
880 return res;
881 src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF);
882 dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
872 883
873 spin_lock_bh(&rose_neigh_list_lock); 884 spin_lock_bh(&rose_neigh_list_lock);
874 spin_lock_bh(&rose_route_list_lock); 885 spin_lock_bh(&rose_route_list_lock);
@@ -1006,12 +1017,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
1006 goto out; 1017 goto out;
1007 } 1018 }
1008 1019
1009 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
1010 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
1011
1012 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 1020 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
1013 1021
1014 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { 1022 if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
1023 skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
1024 &facilities)) {
1015 rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); 1025 rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
1016 goto out; 1026 goto out;
1017 } 1027 }
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 1734abba26a2..f6c71caa94b9 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -142,7 +142,7 @@ void rose_write_internal(struct sock *sk, int frametype)
142 *dptr++ = ROSE_GFI | lci1; 142 *dptr++ = ROSE_GFI | lci1;
143 *dptr++ = lci2; 143 *dptr++ = lci2;
144 *dptr++ = frametype; 144 *dptr++ = frametype;
145 *dptr++ = 0xAA; 145 *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL;
146 memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN); 146 memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN);
147 dptr += ROSE_ADDR_LEN; 147 dptr += ROSE_ADDR_LEN;
148 memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); 148 memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
@@ -246,12 +246,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
246 do { 246 do {
247 switch (*p & 0xC0) { 247 switch (*p & 0xC0) {
248 case 0x00: 248 case 0x00:
249 if (len < 2)
250 return -1;
249 p += 2; 251 p += 2;
250 n += 2; 252 n += 2;
251 len -= 2; 253 len -= 2;
252 break; 254 break;
253 255
254 case 0x40: 256 case 0x40:
257 if (len < 3)
258 return -1;
255 if (*p == FAC_NATIONAL_RAND) 259 if (*p == FAC_NATIONAL_RAND)
256 facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF); 260 facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
257 p += 3; 261 p += 3;
@@ -260,40 +264,61 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
260 break; 264 break;
261 265
262 case 0x80: 266 case 0x80:
267 if (len < 4)
268 return -1;
263 p += 4; 269 p += 4;
264 n += 4; 270 n += 4;
265 len -= 4; 271 len -= 4;
266 break; 272 break;
267 273
268 case 0xC0: 274 case 0xC0:
275 if (len < 2)
276 return -1;
269 l = p[1]; 277 l = p[1];
278 if (len < 2 + l)
279 return -1;
270 if (*p == FAC_NATIONAL_DEST_DIGI) { 280 if (*p == FAC_NATIONAL_DEST_DIGI) {
271 if (!fac_national_digis_received) { 281 if (!fac_national_digis_received) {
282 if (l < AX25_ADDR_LEN)
283 return -1;
272 memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN); 284 memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
273 facilities->source_ndigis = 1; 285 facilities->source_ndigis = 1;
274 } 286 }
275 } 287 }
276 else if (*p == FAC_NATIONAL_SRC_DIGI) { 288 else if (*p == FAC_NATIONAL_SRC_DIGI) {
277 if (!fac_national_digis_received) { 289 if (!fac_national_digis_received) {
290 if (l < AX25_ADDR_LEN)
291 return -1;
278 memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN); 292 memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
279 facilities->dest_ndigis = 1; 293 facilities->dest_ndigis = 1;
280 } 294 }
281 } 295 }
282 else if (*p == FAC_NATIONAL_FAIL_CALL) { 296 else if (*p == FAC_NATIONAL_FAIL_CALL) {
297 if (l < AX25_ADDR_LEN)
298 return -1;
283 memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN); 299 memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
284 } 300 }
285 else if (*p == FAC_NATIONAL_FAIL_ADD) { 301 else if (*p == FAC_NATIONAL_FAIL_ADD) {
302 if (l < 1 + ROSE_ADDR_LEN)
303 return -1;
286 memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN); 304 memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
287 } 305 }
288 else if (*p == FAC_NATIONAL_DIGIS) { 306 else if (*p == FAC_NATIONAL_DIGIS) {
307 if (l % AX25_ADDR_LEN)
308 return -1;
289 fac_national_digis_received = 1; 309 fac_national_digis_received = 1;
290 facilities->source_ndigis = 0; 310 facilities->source_ndigis = 0;
291 facilities->dest_ndigis = 0; 311 facilities->dest_ndigis = 0;
292 for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) { 312 for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) {
293 if (pt[6] & AX25_HBIT) 313 if (pt[6] & AX25_HBIT) {
314 if (facilities->dest_ndigis >= ROSE_MAX_DIGIS)
315 return -1;
294 memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN); 316 memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN);
295 else 317 } else {
318 if (facilities->source_ndigis >= ROSE_MAX_DIGIS)
319 return -1;
296 memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN); 320 memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN);
321 }
297 } 322 }
298 } 323 }
299 p += l + 2; 324 p += l + 2;
@@ -314,25 +339,38 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
314 do { 339 do {
315 switch (*p & 0xC0) { 340 switch (*p & 0xC0) {
316 case 0x00: 341 case 0x00:
342 if (len < 2)
343 return -1;
317 p += 2; 344 p += 2;
318 n += 2; 345 n += 2;
319 len -= 2; 346 len -= 2;
320 break; 347 break;
321 348
322 case 0x40: 349 case 0x40:
350 if (len < 3)
351 return -1;
323 p += 3; 352 p += 3;
324 n += 3; 353 n += 3;
325 len -= 3; 354 len -= 3;
326 break; 355 break;
327 356
328 case 0x80: 357 case 0x80:
358 if (len < 4)
359 return -1;
329 p += 4; 360 p += 4;
330 n += 4; 361 n += 4;
331 len -= 4; 362 len -= 4;
332 break; 363 break;
333 364
334 case 0xC0: 365 case 0xC0:
366 if (len < 2)
367 return -1;
335 l = p[1]; 368 l = p[1];
369
370 /* Prevent overflows*/
371 if (l < 10 || l > 20)
372 return -1;
373
336 if (*p == FAC_CCITT_DEST_NSAP) { 374 if (*p == FAC_CCITT_DEST_NSAP) {
337 memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); 375 memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
338 memcpy(callsign, p + 12, l - 10); 376 memcpy(callsign, p + 12, l - 10);
@@ -355,45 +393,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
355 return n; 393 return n;
356} 394}
357 395
358int rose_parse_facilities(unsigned char *p, 396int rose_parse_facilities(unsigned char *p, unsigned packet_len,
359 struct rose_facilities_struct *facilities) 397 struct rose_facilities_struct *facilities)
360{ 398{
361 int facilities_len, len; 399 int facilities_len, len;
362 400
363 facilities_len = *p++; 401 facilities_len = *p++;
364 402
365 if (facilities_len == 0) 403 if (facilities_len == 0 || (unsigned)facilities_len > packet_len)
366 return 0; 404 return 0;
367 405
368 while (facilities_len > 0) { 406 while (facilities_len >= 3 && *p == 0x00) {
369 if (*p == 0x00) { 407 facilities_len--;
370 facilities_len--; 408 p++;
371 p++; 409
372 410 switch (*p) {
373 switch (*p) { 411 case FAC_NATIONAL: /* National */
374 case FAC_NATIONAL: /* National */ 412 len = rose_parse_national(p + 1, facilities, facilities_len - 1);
375 len = rose_parse_national(p + 1, facilities, facilities_len - 1); 413 break;
376 facilities_len -= len + 1; 414
377 p += len + 1; 415 case FAC_CCITT: /* CCITT */
378 break; 416 len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
379 417 break;
380 case FAC_CCITT: /* CCITT */ 418
381 len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); 419 default:
382 facilities_len -= len + 1; 420 printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
383 p += len + 1; 421 len = 1;
384 break; 422 break;
385 423 }
386 default: 424
387 printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); 425 if (len < 0)
388 facilities_len--; 426 return 0;
389 p++; 427 if (WARN_ON(len >= facilities_len))
390 break; 428 return 0;
391 } 429 facilities_len -= len + 1;
392 } else 430 p += len + 1;
393 break; /* Error in facilities format */
394 } 431 }
395 432
396 return 1; 433 return facilities_len == 0;
397} 434}
398 435
399static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) 436static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index b6ffe4e1b84a..f99cfce7ca97 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -375,7 +375,6 @@ protocol_error:
375 */ 375 */
376static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) 376static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
377{ 377{
378 struct rxrpc_skb_priv *sp;
379 unsigned long _skb; 378 unsigned long _skb;
380 int tail = call->acks_tail, old_tail; 379 int tail = call->acks_tail, old_tail;
381 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); 380 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
@@ -387,7 +386,6 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
387 while (call->acks_hard < hard) { 386 while (call->acks_hard < hard) {
388 smp_read_barrier_depends(); 387 smp_read_barrier_depends();
389 _skb = call->acks_window[tail] & ~1; 388 _skb = call->acks_window[tail] & ~1;
390 sp = rxrpc_skb((struct sk_buff *) _skb);
391 rxrpc_free_skb((struct sk_buff *) _skb); 389 rxrpc_free_skb((struct sk_buff *) _skb);
392 old_tail = tail; 390 old_tail = tail;
393 tail = (tail + 1) & (call->acks_winsz - 1); 391 tail = (tail + 1) & (call->acks_winsz - 1);
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
index 0505cdc4d6d4..e7ed43a54c41 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/ar-connevent.c
@@ -259,7 +259,6 @@ void rxrpc_process_connection(struct work_struct *work)
259{ 259{
260 struct rxrpc_connection *conn = 260 struct rxrpc_connection *conn =
261 container_of(work, struct rxrpc_connection, processor); 261 container_of(work, struct rxrpc_connection, processor);
262 struct rxrpc_skb_priv *sp;
263 struct sk_buff *skb; 262 struct sk_buff *skb;
264 u32 abort_code = RX_PROTOCOL_ERROR; 263 u32 abort_code = RX_PROTOCOL_ERROR;
265 int ret; 264 int ret;
@@ -276,8 +275,6 @@ void rxrpc_process_connection(struct work_struct *work)
276 /* go through the conn-level event packets, releasing the ref on this 275 /* go through the conn-level event packets, releasing the ref on this
277 * connection that each one has when we've finished with it */ 276 * connection that each one has when we've finished with it */
278 while ((skb = skb_dequeue(&conn->rx_queue))) { 277 while ((skb = skb_dequeue(&conn->rx_queue))) {
279 sp = rxrpc_skb(skb);
280
281 ret = rxrpc_process_event(conn, skb, &abort_code); 278 ret = rxrpc_process_event(conn, skb, &abort_code);
282 switch (ret) { 279 switch (ret) {
283 case -EPROTO: 280 case -EPROTO:
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index d4d1ae26d293..5d6b572a6704 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -139,7 +139,7 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
139 struct rxrpc_transport *trans = 139 struct rxrpc_transport *trans =
140 container_of(work, struct rxrpc_transport, error_handler); 140 container_of(work, struct rxrpc_transport, error_handler);
141 struct sk_buff *skb; 141 struct sk_buff *skb;
142 int local, err; 142 int err;
143 143
144 _enter(""); 144 _enter("");
145 145
@@ -157,7 +157,6 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
157 157
158 switch (ee->ee_origin) { 158 switch (ee->ee_origin) {
159 case SO_EE_ORIGIN_ICMP: 159 case SO_EE_ORIGIN_ICMP:
160 local = 0;
161 switch (ee->ee_type) { 160 switch (ee->ee_type) {
162 case ICMP_DEST_UNREACH: 161 case ICMP_DEST_UNREACH:
163 switch (ee->ee_code) { 162 switch (ee->ee_code) {
@@ -207,7 +206,6 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
207 case SO_EE_ORIGIN_LOCAL: 206 case SO_EE_ORIGIN_LOCAL:
208 _proto("Rx Received local error { error=%d }", 207 _proto("Rx Received local error { error=%d }",
209 ee->ee_errno); 208 ee->ee_errno);
210 local = 1;
211 break; 209 break;
212 210
213 case SO_EE_ORIGIN_NONE: 211 case SO_EE_ORIGIN_NONE:
@@ -215,7 +213,6 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
215 default: 213 default:
216 _proto("Rx Received error report { orig=%u }", 214 _proto("Rx Received error report { orig=%u }",
217 ee->ee_origin); 215 ee->ee_origin);
218 local = 0;
219 break; 216 break;
220 } 217 }
221 218
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 89315009bab1..1a2b0633fece 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
423 goto protocol_error; 423 goto protocol_error;
424 } 424 }
425 425
426 case RXRPC_PACKET_TYPE_ACKALL:
426 case RXRPC_PACKET_TYPE_ACK: 427 case RXRPC_PACKET_TYPE_ACK:
427 /* ACK processing is done in process context */ 428 /* ACK processing is done in process context */
428 read_lock_bh(&call->state_lock); 429 read_lock_bh(&call->state_lock);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index d763793d39de..43ea7de2fc8e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -25,6 +25,7 @@
25#include <keys/user-type.h> 25#include <keys/user-type.h>
26#include "ar-internal.h" 26#include "ar-internal.h"
27 27
28static int rxrpc_vet_description_s(const char *);
28static int rxrpc_instantiate(struct key *, const void *, size_t); 29static int rxrpc_instantiate(struct key *, const void *, size_t);
29static int rxrpc_instantiate_s(struct key *, const void *, size_t); 30static int rxrpc_instantiate_s(struct key *, const void *, size_t);
30static void rxrpc_destroy(struct key *); 31static void rxrpc_destroy(struct key *);
@@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc);
52 */ 53 */
53struct key_type key_type_rxrpc_s = { 54struct key_type key_type_rxrpc_s = {
54 .name = "rxrpc_s", 55 .name = "rxrpc_s",
56 .vet_description = rxrpc_vet_description_s,
55 .instantiate = rxrpc_instantiate_s, 57 .instantiate = rxrpc_instantiate_s,
56 .match = user_match, 58 .match = user_match,
57 .destroy = rxrpc_destroy_s, 59 .destroy = rxrpc_destroy_s,
@@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = {
59}; 61};
60 62
61/* 63/*
64 * Vet the description for an RxRPC server key
65 */
66static int rxrpc_vet_description_s(const char *desc)
67{
68 unsigned long num;
69 char *p;
70
71 num = simple_strtoul(desc, &p, 10);
72 if (*p != ':' || num > 65535)
73 return -EINVAL;
74 num = simple_strtoul(p + 1, &p, 10);
75 if (*p || num < 1 || num > 255)
76 return -EINVAL;
77 return 0;
78}
79
80/*
62 * parse an RxKAD type XDR format token 81 * parse an RxKAD type XDR format token
63 * - the caller guarantees we have at least 4 words 82 * - the caller guarantees we have at least 4 words
64 */ 83 */
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index a53fb25a64ed..2754f098d436 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -36,31 +36,16 @@ static void rxrpc_destroy_peer(struct work_struct *work);
36static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) 36static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
37{ 37{
38 struct rtable *rt; 38 struct rtable *rt;
39 struct flowi fl; 39 struct flowi4 fl4;
40 int ret;
41 40
42 peer->if_mtu = 1500; 41 peer->if_mtu = 1500;
43 42
44 memset(&fl, 0, sizeof(fl)); 43 rt = ip_route_output_ports(&init_net, &fl4, NULL,
45 44 peer->srx.transport.sin.sin_addr.s_addr, 0,
46 switch (peer->srx.transport.family) { 45 htons(7000), htons(7001),
47 case AF_INET: 46 IPPROTO_UDP, 0, 0);
48 fl.oif = 0; 47 if (IS_ERR(rt)) {
49 fl.proto = IPPROTO_UDP, 48 _leave(" [route err %ld]", PTR_ERR(rt));
50 fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr;
51 fl.fl4_src = 0;
52 fl.fl4_tos = 0;
53 /* assume AFS.CM talking to AFS.FS */
54 fl.fl_ip_sport = htons(7001);
55 fl.fl_ip_dport = htons(7000);
56 break;
57 default:
58 BUG();
59 }
60
61 ret = ip_route_output_key(&init_net, &rt, &fl);
62 if (ret < 0) {
63 _leave(" [route err %d]", ret);
64 return; 49 return;
65 } 50 }
66 51
@@ -172,6 +157,7 @@ struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
172 /* we can now add the new candidate to the list */ 157 /* we can now add the new candidate to the list */
173 peer = candidate; 158 peer = candidate;
174 candidate = NULL; 159 candidate = NULL;
160 usage = atomic_read(&peer->usage);
175 161
176 list_add_tail(&peer->link, &rxrpc_peers); 162 list_add_tail(&peer->link, &rxrpc_peers);
177 write_unlock_bh(&rxrpc_peer_lock); 163 write_unlock_bh(&rxrpc_peer_lock);
@@ -186,7 +172,7 @@ success:
186 &peer->srx.transport.sin.sin_addr, 172 &peer->srx.transport.sin.sin_addr,
187 ntohs(peer->srx.transport.sin.sin_port)); 173 ntohs(peer->srx.transport.sin.sin_port));
188 174
189 _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); 175 _leave(" = %p {u=%d}", peer, usage);
190 return peer; 176 return peer;
191 177
192 /* we found the peer in the list immediately */ 178 /* we found the peer in the list immediately */
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 5e0226fe587e..92df566930b9 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -111,6 +111,7 @@ struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
111 /* we can now add the new candidate to the list */ 111 /* we can now add the new candidate to the list */
112 trans = candidate; 112 trans = candidate;
113 candidate = NULL; 113 candidate = NULL;
114 usage = atomic_read(&trans->usage);
114 115
115 rxrpc_get_local(trans->local); 116 rxrpc_get_local(trans->local);
116 atomic_inc(&trans->peer->usage); 117 atomic_inc(&trans->peer->usage);
@@ -125,7 +126,7 @@ success:
125 trans->local->debug_id, 126 trans->local->debug_id,
126 trans->peer->debug_id); 127 trans->peer->debug_id);
127 128
128 _leave(" = %p {u=%d}", trans, atomic_read(&trans->usage)); 129 _leave(" = %p {u=%d}", trans, usage);
129 return trans; 130 return trans;
130 131
131 /* we found the transport in the list immediately */ 132 /* we found the transport in the list immediately */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f04d4a484d53..2590e91b3289 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -126,6 +126,17 @@ config NET_SCH_RED
126 To compile this code as a module, choose M here: the 126 To compile this code as a module, choose M here: the
127 module will be called sch_red. 127 module will be called sch_red.
128 128
129config NET_SCH_SFB
130 tristate "Stochastic Fair Blue (SFB)"
131 ---help---
132 Say Y here if you want to use the Stochastic Fair Blue (SFB)
133 packet scheduling algorithm.
134
135 See the top of <file:net/sched/sch_sfb.c> for more details.
136
137 To compile this code as a module, choose M here: the
138 module will be called sch_sfb.
139
129config NET_SCH_SFQ 140config NET_SCH_SFQ
130 tristate "Stochastic Fairness Queueing (SFQ)" 141 tristate "Stochastic Fairness Queueing (SFQ)"
131 ---help--- 142 ---help---
@@ -205,6 +216,40 @@ config NET_SCH_DRR
205 216
206 If unsure, say N. 217 If unsure, say N.
207 218
219config NET_SCH_MQPRIO
220 tristate "Multi-queue priority scheduler (MQPRIO)"
221 help
222 Say Y here if you want to use the Multi-queue Priority scheduler.
223 This scheduler allows QOS to be offloaded on NICs that have support
224 for offloading QOS schedulers.
225
226 To compile this driver as a module, choose M here: the module will
227 be called sch_mqprio.
228
229 If unsure, say N.
230
231config NET_SCH_CHOKE
232 tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
233 help
234 Say Y here if you want to use the CHOKe packet scheduler (CHOose
235 and Keep for responsive flows, CHOose and Kill for unresponsive
236 flows). This is a variation of RED which trys to penalize flows
237 that monopolize the queue.
238
239 To compile this code as a module, choose M here: the
240 module will be called sch_choke.
241
242config NET_SCH_QFQ
243 tristate "Quick Fair Queueing scheduler (QFQ)"
244 help
245 Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ)
246 packet scheduling algorithm.
247
248 To compile this driver as a module, choose M here: the module
249 will be called sch_qfq.
250
251 If unsure, say N.
252
208config NET_SCH_INGRESS 253config NET_SCH_INGRESS
209 tristate "Ingress Qdisc" 254 tristate "Ingress Qdisc"
210 depends on NET_CLS_ACT 255 depends on NET_CLS_ACT
@@ -243,7 +288,8 @@ config NET_CLS_TCINDEX
243 288
244config NET_CLS_ROUTE4 289config NET_CLS_ROUTE4
245 tristate "Routing decision (ROUTE)" 290 tristate "Routing decision (ROUTE)"
246 select NET_CLS_ROUTE 291 depends on INET
292 select IP_ROUTE_CLASSID
247 select NET_CLS 293 select NET_CLS
248 ---help--- 294 ---help---
249 If you say Y here, you will be able to classify packets 295 If you say Y here, you will be able to classify packets
@@ -252,9 +298,6 @@ config NET_CLS_ROUTE4
252 To compile this code as a module, choose M here: the 298 To compile this code as a module, choose M here: the
253 module will be called cls_route. 299 module will be called cls_route.
254 300
255config NET_CLS_ROUTE
256 bool
257
258config NET_CLS_FW 301config NET_CLS_FW
259 tristate "Netfilter mark (FW)" 302 tristate "Netfilter mark (FW)"
260 select NET_CLS 303 select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5dba6304..dc5889c0a15a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o
24obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o 24obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
25obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o 25obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
26obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o 26obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
27obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
27obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o 28obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
28obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o 29obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
29obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o 30obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
@@ -32,6 +33,10 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
32obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o 33obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
33obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o 34obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
34obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o 35obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
36obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
37obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
38obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
39
35obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 40obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
36obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 41obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
37obj-$(CONFIG_NET_CLS_FW) += cls_fw.o 42obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f89e7e0..a606025814a1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,11 +26,6 @@
26#include <net/act_api.h> 26#include <net/act_api.h>
27#include <net/netlink.h> 27#include <net/netlink.h>
28 28
29static void tcf_common_free_rcu(struct rcu_head *head)
30{
31 kfree(container_of(head, struct tcf_common, tcfc_rcu));
32}
33
34void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) 29void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
35{ 30{
36 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 31 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -47,7 +42,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
47 * gen_estimator est_timer() might access p->tcfc_lock 42 * gen_estimator est_timer() might access p->tcfc_lock
48 * or bstats, wait a RCU grace period before freeing p 43 * or bstats, wait a RCU grace period before freeing p
49 */ 44 */
50 call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); 45 kfree_rcu(p, tcfc_rcu);
51 return; 46 return;
52 } 47 }
53 } 48 }
@@ -78,7 +73,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
78 struct tc_action *a, struct tcf_hashinfo *hinfo) 73 struct tc_action *a, struct tcf_hashinfo *hinfo)
79{ 74{
80 struct tcf_common *p; 75 struct tcf_common *p;
81 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; 76 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
82 struct nlattr *nest; 77 struct nlattr *nest;
83 78
84 read_lock_bh(hinfo->lock); 79 read_lock_bh(hinfo->lock);
@@ -126,7 +121,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
126{ 121{
127 struct tcf_common *p, *s_p; 122 struct tcf_common *p, *s_p;
128 struct nlattr *nest; 123 struct nlattr *nest;
129 int i= 0, n_i = 0; 124 int i = 0, n_i = 0;
130 125
131 nest = nla_nest_start(skb, a->order); 126 nest = nla_nest_start(skb, a->order);
132 if (nest == NULL) 127 if (nest == NULL)
@@ -138,7 +133,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
138 while (p != NULL) { 133 while (p != NULL) {
139 s_p = p->tcfc_next; 134 s_p = p->tcfc_next;
140 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) 135 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
141 module_put(a->ops->owner); 136 module_put(a->ops->owner);
142 n_i++; 137 n_i++;
143 p = s_p; 138 p = s_p;
144 } 139 }
@@ -447,7 +442,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
447 nest = nla_nest_start(skb, TCA_OPTIONS); 442 nest = nla_nest_start(skb, TCA_OPTIONS);
448 if (nest == NULL) 443 if (nest == NULL)
449 goto nla_put_failure; 444 goto nla_put_failure;
450 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { 445 err = tcf_action_dump_old(skb, a, bind, ref);
446 if (err > 0) {
451 nla_nest_end(skb, nest); 447 nla_nest_end(skb, nest);
452 return err; 448 return err;
453 } 449 }
@@ -491,7 +487,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
491 struct tc_action *a; 487 struct tc_action *a;
492 struct tc_action_ops *a_o; 488 struct tc_action_ops *a_o;
493 char act_name[IFNAMSIZ]; 489 char act_name[IFNAMSIZ];
494 struct nlattr *tb[TCA_ACT_MAX+1]; 490 struct nlattr *tb[TCA_ACT_MAX + 1];
495 struct nlattr *kind; 491 struct nlattr *kind;
496 int err; 492 int err;
497 493
@@ -549,9 +545,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
549 goto err_free; 545 goto err_free;
550 546
551 /* module count goes up only when brand new policy is created 547 /* module count goes up only when brand new policy is created
552 if it exists and is only bound to in a_o->init() then 548 * if it exists and is only bound to in a_o->init() then
553 ACT_P_CREATED is not returned (a zero is). 549 * ACT_P_CREATED is not returned (a zero is).
554 */ 550 */
555 if (err != ACT_P_CREATED) 551 if (err != ACT_P_CREATED)
556 module_put(a_o->owner); 552 module_put(a_o->owner);
557 a->ops = a_o; 553 a->ops = a_o;
@@ -569,7 +565,7 @@ err_out:
569struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, 565struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
570 char *name, int ovr, int bind) 566 char *name, int ovr, int bind)
571{ 567{
572 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 568 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
573 struct tc_action *head = NULL, *act, *act_prev = NULL; 569 struct tc_action *head = NULL, *act, *act_prev = NULL;
574 int err; 570 int err;
575 int i; 571 int i;
@@ -697,7 +693,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
697static struct tc_action * 693static struct tc_action *
698tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) 694tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
699{ 695{
700 struct nlattr *tb[TCA_ACT_MAX+1]; 696 struct nlattr *tb[TCA_ACT_MAX + 1];
701 struct tc_action *a; 697 struct tc_action *a;
702 int index; 698 int index;
703 int err; 699 int err;
@@ -770,7 +766,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
770 struct tcamsg *t; 766 struct tcamsg *t;
771 struct netlink_callback dcb; 767 struct netlink_callback dcb;
772 struct nlattr *nest; 768 struct nlattr *nest;
773 struct nlattr *tb[TCA_ACT_MAX+1]; 769 struct nlattr *tb[TCA_ACT_MAX + 1];
774 struct nlattr *kind; 770 struct nlattr *kind;
775 struct tc_action *a = create_a(0); 771 struct tc_action *a = create_a(0);
776 int err = -ENOMEM; 772 int err = -ENOMEM;
@@ -821,7 +817,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
821 nlh->nlmsg_flags |= NLM_F_ROOT; 817 nlh->nlmsg_flags |= NLM_F_ROOT;
822 module_put(a->ops->owner); 818 module_put(a->ops->owner);
823 kfree(a); 819 kfree(a);
824 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 820 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
821 n->nlmsg_flags & NLM_F_ECHO);
825 if (err > 0) 822 if (err > 0)
826 return 0; 823 return 0;
827 824
@@ -842,14 +839,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
842 u32 pid, int event) 839 u32 pid, int event)
843{ 840{
844 int i, ret; 841 int i, ret;
845 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 842 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
846 struct tc_action *head = NULL, *act, *act_prev = NULL; 843 struct tc_action *head = NULL, *act, *act_prev = NULL;
847 844
848 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); 845 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
849 if (ret < 0) 846 if (ret < 0)
850 return ret; 847 return ret;
851 848
852 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { 849 if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
853 if (tb[1] != NULL) 850 if (tb[1] != NULL)
854 return tca_action_flush(net, tb[1], n, pid); 851 return tca_action_flush(net, tb[1], n, pid);
855 else 852 else
@@ -892,7 +889,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
892 /* now do the delete */ 889 /* now do the delete */
893 tcf_action_destroy(head, 0); 890 tcf_action_destroy(head, 0);
894 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, 891 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
895 n->nlmsg_flags&NLM_F_ECHO); 892 n->nlmsg_flags & NLM_F_ECHO);
896 if (ret > 0) 893 if (ret > 0)
897 return 0; 894 return 0;
898 return ret; 895 return ret;
@@ -936,7 +933,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
936 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 933 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
937 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 934 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
938 935
939 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 936 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
940 if (err > 0) 937 if (err > 0)
941 err = 0; 938 err = 0;
942 return err; 939 return err;
@@ -967,7 +964,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
967 964
968 /* dump then free all the actions after update; inserted policy 965 /* dump then free all the actions after update; inserted policy
969 * stays intact 966 * stays intact
970 * */ 967 */
971 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); 968 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
972 for (a = act; a; a = act) { 969 for (a = act; a; a = act) {
973 act = a->next; 970 act = a->next;
@@ -993,17 +990,16 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
993 return -EINVAL; 990 return -EINVAL;
994 } 991 }
995 992
996 /* n->nlmsg_flags&NLM_F_CREATE 993 /* n->nlmsg_flags & NLM_F_CREATE */
997 * */
998 switch (n->nlmsg_type) { 994 switch (n->nlmsg_type) {
999 case RTM_NEWACTION: 995 case RTM_NEWACTION:
1000 /* we are going to assume all other flags 996 /* we are going to assume all other flags
1001 * imply create only if it doesnt exist 997 * imply create only if it doesn't exist
1002 * Note that CREATE | EXCL implies that 998 * Note that CREATE | EXCL implies that
1003 * but since we want avoid ambiguity (eg when flags 999 * but since we want avoid ambiguity (eg when flags
1004 * is zero) then just set this 1000 * is zero) then just set this
1005 */ 1001 */
1006 if (n->nlmsg_flags&NLM_F_REPLACE) 1002 if (n->nlmsg_flags & NLM_F_REPLACE)
1007 ovr = 1; 1003 ovr = 1;
1008replay: 1004replay:
1009 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); 1005 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1024,7 @@ replay:
1028static struct nlattr * 1024static struct nlattr *
1029find_dump_kind(const struct nlmsghdr *n) 1025find_dump_kind(const struct nlmsghdr *n)
1030{ 1026{
1031 struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; 1027 struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
1032 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; 1028 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
1033 struct nlattr *nla[TCAA_MAX + 1]; 1029 struct nlattr *nla[TCAA_MAX + 1];
1034 struct nlattr *kind; 1030 struct nlattr *kind;
@@ -1071,9 +1067,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1071 } 1067 }
1072 1068
1073 a_o = tc_lookup_action(kind); 1069 a_o = tc_lookup_action(kind);
1074 if (a_o == NULL) { 1070 if (a_o == NULL)
1075 return 0; 1071 return 0;
1076 }
1077 1072
1078 memset(&a, 0, sizeof(struct tc_action)); 1073 memset(&a, 0, sizeof(struct tc_action));
1079 a.ops = a_o; 1074 a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 83ddfc07e45d..6cdf9abe475f 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
63 if (nla == NULL) 63 if (nla == NULL)
64 return -EINVAL; 64 return -EINVAL;
65 65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); 66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
67 if (err < 0) 67 if (err < 0)
68 return err; 68 return err;
69 69
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a4c0b4..2b4ab4b05ce8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
50} 50}
51 51
52typedef int (*g_rand)(struct tcf_gact *gact); 52typedef int (*g_rand)(struct tcf_gact *gact);
53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; 53static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
54#endif /* CONFIG_GACT_PROB */ 54#endif /* CONFIG_GACT_PROB */
55 55
56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { 56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), 89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
90 bind, &gact_idx_gen, &gact_hash_info); 90 bind, &gact_idx_gen, &gact_hash_info);
91 if (IS_ERR(pc)) 91 if (IS_ERR(pc))
92 return PTR_ERR(pc); 92 return PTR_ERR(pc);
93 ret = ACT_P_CREATED; 93 ret = ACT_P_CREATED;
94 } else { 94 } else {
95 if (!ovr) { 95 if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
205static int __init gact_init_module(void) 205static int __init gact_init_module(void)
206{ 206{
207#ifdef CONFIG_GACT_PROB 207#ifdef CONFIG_GACT_PROB
208 printk(KERN_INFO "GACT probability on\n"); 208 pr_info("GACT probability on\n");
209#else 209#else
210 printk(KERN_INFO "GACT probability NOT on\n"); 210 pr_info("GACT probability NOT on\n");
211#endif 211#endif
212 return tcf_register_action(&act_gact_ops); 212 return tcf_register_action(&act_gact_ops);
213} 213}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c2a7c20e81c1..9fc211a1b20e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
138 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, 138 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
139 &ipt_idx_gen, &ipt_hash_info); 139 &ipt_idx_gen, &ipt_hash_info);
140 if (IS_ERR(pc)) 140 if (IS_ERR(pc))
141 return PTR_ERR(pc); 141 return PTR_ERR(pc);
142 ret = ACT_P_CREATED; 142 ret = ACT_P_CREATED;
143 } else { 143 } else {
144 if (!ovr) { 144 if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
162 if (unlikely(!t)) 162 if (unlikely(!t))
163 goto err2; 163 goto err2;
164 164
165 if ((err = ipt_init_target(t, tname, hook)) < 0) 165 err = ipt_init_target(t, tname, hook);
166 if (err < 0)
166 goto err3; 167 goto err3;
167 168
168 spin_lock_bh(&ipt->tcf_lock); 169 spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
212 bstats_update(&ipt->tcf_bstats, skb); 213 bstats_update(&ipt->tcf_bstats, skb);
213 214
214 /* yes, we have to worry about both in and out dev 215 /* yes, we have to worry about both in and out dev
215 worry later - danger - this API seems to have changed 216 * worry later - danger - this API seems to have changed
216 from earlier kernels */ 217 * from earlier kernels
218 */
217 par.in = skb->dev; 219 par.in = skb->dev;
218 par.out = NULL; 220 par.out = NULL;
219 par.hooknum = ipt->tcfi_hook; 221 par.hooknum = ipt->tcfi_hook;
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
253 struct tc_cnt c; 255 struct tc_cnt c;
254 256
255 /* for simple targets kernel size == user size 257 /* for simple targets kernel size == user size
256 ** user name = target name 258 * user name = target name
257 ** for foolproof you need to not assume this 259 * for foolproof you need to not assume this
258 */ 260 */
259 261
260 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); 262 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
261 if (unlikely(!t)) 263 if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d765067e99db..961386e2f2c0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
41 .lock = &mirred_lock, 41 .lock = &mirred_lock,
42}; 42};
43 43
44static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) 44static int tcf_mirred_release(struct tcf_mirred *m, int bind)
45{ 45{
46 if (m) { 46 if (m) {
47 if (bind) 47 if (bind)
48 m->tcf_bindcnt--; 48 m->tcf_bindcnt--;
49 m->tcf_refcnt--; 49 m->tcf_refcnt--;
50 if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { 50 if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
51 list_del(&m->tcfm_list); 51 list_del(&m->tcfm_list);
52 if (m->tcfm_dev) 52 if (m->tcfm_dev)
53 dev_put(m->tcfm_dev); 53 dev_put(m->tcfm_dev);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 178a4bd7b7cb..762b027650a9 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
70 &nat_idx_gen, &nat_hash_info); 70 &nat_idx_gen, &nat_hash_info);
71 if (IS_ERR(pc)) 71 if (IS_ERR(pc))
72 return PTR_ERR(pc); 72 return PTR_ERR(pc);
73 p = to_tcf_nat(pc); 73 p = to_tcf_nat(pc);
74 ret = ACT_P_CREATED; 74 ret = ACT_P_CREATED;
75 } else { 75 } else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 445bef716f77..7affe9a92757 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
70 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 70 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
71 &pedit_idx_gen, &pedit_hash_info); 71 &pedit_idx_gen, &pedit_hash_info);
72 if (IS_ERR(pc)) 72 if (IS_ERR(pc))
73 return PTR_ERR(pc); 73 return PTR_ERR(pc);
74 p = to_pedit(pc); 74 p = to_pedit(pc);
75 keys = kmalloc(ksize, GFP_KERNEL); 75 keys = kmalloc(ksize, GFP_KERNEL);
76 if (keys == NULL) { 76 if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
127 int i, munged = 0; 127 int i, munged = 0;
128 unsigned int off; 128 unsigned int off;
129 129
130 if (skb_cloned(skb)) { 130 if (skb_cloned(skb) &&
131 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 131 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
132 return p->tcf_action; 132 return p->tcf_action;
133 }
134 }
135 133
136 off = skb_network_offset(skb); 134 off = skb_network_offset(skb);
137 135
@@ -163,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
163 } 161 }
164 if (offset > 0 && offset > skb->len) { 162 if (offset > 0 && offset > skb->len) {
165 pr_info("tc filter pedit" 163 pr_info("tc filter pedit"
166 " offset %d cant exceed pkt length %d\n", 164 " offset %d can't exceed pkt length %d\n",
167 offset, skb->len); 165 offset, skb->len);
168 goto bad; 166 goto bad;
169 } 167 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e2f08b1e2e58..b3b9b32f4e00 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
22#include <net/act_api.h> 22#include <net/act_api.h>
23#include <net/netlink.h> 23#include <net/netlink.h>
24 24
25#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) 25#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L)
26#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) 26#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
27 27
28#define POL_TAB_MASK 15 28#define POL_TAB_MASK 15
29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
37}; 37};
38 38
39/* old policer structure from before tc actions */ 39/* old policer structure from before tc actions */
40struct tc_police_compat 40struct tc_police_compat {
41{
42 u32 index; 41 u32 index;
43 int action; 42 int action;
44 u32 limit; 43 u32 limit;
@@ -97,11 +96,6 @@ nla_put_failure:
97 goto done; 96 goto done;
98} 97}
99 98
100static void tcf_police_free_rcu(struct rcu_head *head)
101{
102 kfree(container_of(head, struct tcf_police, tcf_rcu));
103}
104
105static void tcf_police_destroy(struct tcf_police *p) 99static void tcf_police_destroy(struct tcf_police *p)
106{ 100{
107 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 101 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -122,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
122 * gen_estimator est_timer() might access p->tcf_lock 116 * gen_estimator est_timer() might access p->tcf_lock
123 * or bstats, wait a RCU grace period before freeing p 117 * or bstats, wait a RCU grace period before freeing p
124 */ 118 */
125 call_rcu(&p->tcf_rcu, tcf_police_free_rcu); 119 kfree_rcu(p, tcf_rcu);
126 return; 120 return;
127 } 121 }
128 } 122 }
@@ -139,7 +133,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
139static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, 133static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
140 struct tc_action *a, int ovr, int bind) 134 struct tc_action *a, int ovr, int bind)
141{ 135{
142 unsigned h; 136 unsigned int h;
143 int ret = 0, err; 137 int ret = 0, err;
144 struct nlattr *tb[TCA_POLICE_MAX + 1]; 138 struct nlattr *tb[TCA_POLICE_MAX + 1];
145 struct tc_police *parm; 139 struct tc_police *parm;
@@ -402,7 +396,6 @@ static void __exit
402police_cleanup_module(void) 396police_cleanup_module(void)
403{ 397{
404 tcf_unregister_action(&act_police_ops); 398 tcf_unregister_action(&act_police_ops);
405 rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
406} 399}
407 400
408module_init(police_init_module); 401module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7287cff7af3e..a34a22de60b3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
47 /* print policy string followed by _ then packet count 47 /* print policy string followed by _ then packet count
48 * Example if this was the 3rd packet and the string was "hello" 48 * Example if this was the 3rd packet and the string was "hello"
49 * then it would look like "hello_3" (without quotes) 49 * then it would look like "hello_3" (without quotes)
50 **/ 50 */
51 pr_info("simple: %s_%d\n", 51 pr_info("simple: %s_%d\n",
52 (char *)d->tcfd_defdata, d->tcf_bstats.packets); 52 (char *)d->tcfd_defdata, d->tcf_bstats.packets);
53 spin_unlock(&d->tcf_lock); 53 spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
126 &simp_idx_gen, &simp_hash_info); 126 &simp_idx_gen, &simp_hash_info);
127 if (IS_ERR(pc)) 127 if (IS_ERR(pc))
128 return PTR_ERR(pc); 128 return PTR_ERR(pc);
129 129
130 d = to_defact(pc); 130 d = to_defact(pc);
131 ret = alloc_defdata(d, defdata); 131 ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
149 return ret; 149 return ret;
150} 150}
151 151
152static inline int tcf_simp_cleanup(struct tc_action *a, int bind) 152static int tcf_simp_cleanup(struct tc_action *a, int bind)
153{ 153{
154 struct tcf_defact *d = a->priv; 154 struct tcf_defact *d = a->priv;
155 155
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
158 return 0; 158 return 0;
159} 159}
160 160
161static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, 161static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
162 int bind, int ref) 162 int bind, int ref)
163{ 163{
164 unsigned char *b = skb_tail_pointer(skb); 164 unsigned char *b = skb_tail_pointer(skb);
165 struct tcf_defact *d = a->priv; 165 struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 836f5fee9e58..5f6f0c7c3905 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
113 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 113 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
114 &skbedit_idx_gen, &skbedit_hash_info); 114 &skbedit_idx_gen, &skbedit_hash_info);
115 if (IS_ERR(pc)) 115 if (IS_ERR(pc))
116 return PTR_ERR(pc); 116 return PTR_ERR(pc);
117 117
118 d = to_skbedit(pc); 118 d = to_skbedit(pc);
119 ret = ACT_P_CREATED; 119 ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
144 return ret; 144 return ret;
145} 145}
146 146
147static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) 147static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
148{ 148{
149 struct tcf_skbedit *d = a->priv; 149 struct tcf_skbedit *d = a->priv;
150 150
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
153 return 0; 153 return 0;
154} 154}
155 155
156static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, 156static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
157 int bind, int ref) 157 int bind, int ref)
158{ 158{
159 unsigned char *b = skb_tail_pointer(skb); 159 unsigned char *b = skb_tail_pointer(skb);
160 struct tcf_skbedit *d = a->priv; 160 struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28ef79a..bb2c523f8158 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
85 int rc = -ENOENT; 85 int rc = -ENOENT;
86 86
87 write_lock(&cls_mod_lock); 87 write_lock(&cls_mod_lock);
88 for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) 88 for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
89 if (t == ops) 89 if (t == ops)
90 break; 90 break;
91 91
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
111 u32 first = TC_H_MAKE(0xC0000000U, 0U); 111 u32 first = TC_H_MAKE(0xC0000000U, 0U);
112 112
113 if (tp) 113 if (tp)
114 first = tp->prio-1; 114 first = tp->prio - 1;
115 115
116 return first; 116 return first;
117} 117}
@@ -149,7 +149,8 @@ replay:
149 149
150 if (prio == 0) { 150 if (prio == 0) {
151 /* If no priority is given, user wants we allocated it. */ 151 /* If no priority is given, user wants we allocated it. */
152 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 152 if (n->nlmsg_type != RTM_NEWTFILTER ||
153 !(n->nlmsg_flags & NLM_F_CREATE))
153 return -ENOENT; 154 return -ENOENT;
154 prio = TC_H_MAKE(0x80000000U, 0U); 155 prio = TC_H_MAKE(0x80000000U, 0U);
155 } 156 }
@@ -176,7 +177,8 @@ replay:
176 } 177 }
177 178
178 /* Is it classful? */ 179 /* Is it classful? */
179 if ((cops = q->ops->cl_ops) == NULL) 180 cops = q->ops->cl_ops;
181 if (!cops)
180 return -EINVAL; 182 return -EINVAL;
181 183
182 if (cops->tcf_chain == NULL) 184 if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
196 goto errout; 198 goto errout;
197 199
198 /* Check the chain for existence of proto-tcf with this priority */ 200 /* Check the chain for existence of proto-tcf with this priority */
199 for (back = chain; (tp=*back) != NULL; back = &tp->next) { 201 for (back = chain; (tp = *back) != NULL; back = &tp->next) {
200 if (tp->prio >= prio) { 202 if (tp->prio >= prio) {
201 if (tp->prio == prio) { 203 if (tp->prio == prio) {
202 if (!nprio || (tp->protocol != protocol && protocol)) 204 if (!nprio ||
205 (tp->protocol != protocol && protocol))
203 goto errout; 206 goto errout;
204 } else 207 } else
205 tp = NULL; 208 tp = NULL;
@@ -216,7 +219,8 @@ replay:
216 goto errout; 219 goto errout;
217 220
218 err = -ENOENT; 221 err = -ENOENT;
219 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 222 if (n->nlmsg_type != RTM_NEWTFILTER ||
223 !(n->nlmsg_flags & NLM_F_CREATE))
220 goto errout; 224 goto errout;
221 225
222 226
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
420 424
421 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 425 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
422 return skb->len; 426 return skb->len;
423 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 427 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
428 if (!dev)
424 return skb->len; 429 return skb->len;
425 430
426 if (!tcm->tcm_parent) 431 if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
429 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 434 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
430 if (!q) 435 if (!q)
431 goto out; 436 goto out;
432 if ((cops = q->ops->cl_ops) == NULL) 437 cops = q->ops->cl_ops;
438 if (!cops)
433 goto errout; 439 goto errout;
434 if (cops->tcf_chain == NULL) 440 if (cops->tcf_chain == NULL)
435 goto errout; 441 goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
444 450
445 s_t = cb->args[0]; 451 s_t = cb->args[0];
446 452
447 for (tp=*chain, t=0; tp; tp = tp->next, t++) { 453 for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
448 if (t < s_t) continue; 454 if (t < s_t)
455 continue;
449 if (TC_H_MAJ(tcm->tcm_info) && 456 if (TC_H_MAJ(tcm->tcm_info) &&
450 TC_H_MAJ(tcm->tcm_info) != tp->prio) 457 TC_H_MAJ(tcm->tcm_info) != tp->prio)
451 continue; 458 continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
468 arg.skb = skb; 475 arg.skb = skb;
469 arg.cb = cb; 476 arg.cb = cb;
470 arg.w.stop = 0; 477 arg.w.stop = 0;
471 arg.w.skip = cb->args[1]-1; 478 arg.w.skip = cb->args[1] - 1;
472 arg.w.count = 0; 479 arg.w.count = 0;
473 tp->ops->walk(tp, &arg.w); 480 tp->ops->walk(tp, &arg.w);
474 cb->args[1] = arg.w.count+1; 481 cb->args[1] = arg.w.count + 1;
475 if (arg.w.stop) 482 if (arg.w.stop)
476 break; 483 break;
477 } 484 }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f23d9155b1ef..8be8872dd571 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
21#include <net/act_api.h> 21#include <net/act_api.h>
22#include <net/pkt_cls.h> 22#include <net/pkt_cls.h>
23 23
24struct basic_head 24struct basic_head {
25{
26 u32 hgenerator; 25 u32 hgenerator;
27 struct list_head flist; 26 struct list_head flist;
28}; 27};
29 28
30struct basic_filter 29struct basic_filter {
31{
32 u32 handle; 30 u32 handle;
33 struct tcf_exts exts; 31 struct tcf_exts exts;
34 struct tcf_ematch_tree ematches; 32 struct tcf_ematch_tree ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
92 return 0; 90 return 0;
93} 91}
94 92
95static inline void basic_delete_filter(struct tcf_proto *tp, 93static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
96 struct basic_filter *f)
97{ 94{
98 tcf_unbind_filter(tp, &f->res); 95 tcf_unbind_filter(tp, &f->res);
99 tcf_exts_destroy(tp, &f->exts); 96 tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
135 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, 132 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED },
136}; 133};
137 134
138static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, 135static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
139 unsigned long base, struct nlattr **tb, 136 unsigned long base, struct nlattr **tb,
140 struct nlattr *est) 137 struct nlattr *est)
141{ 138{
142 int err = -EINVAL; 139 int err = -EINVAL;
143 struct tcf_exts e; 140 struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
203 } while (--i > 0 && basic_get(tp, head->hgenerator)); 200 } while (--i > 0 && basic_get(tp, head->hgenerator));
204 201
205 if (i <= 0) { 202 if (i <= 0) {
206 printk(KERN_ERR "Insufficient number of handles\n"); 203 pr_err("Insufficient number of handles\n");
207 goto errout; 204 goto errout;
208 } 205 }
209 206
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d49c40fb7e09..32a335194ca5 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
56{ 56{
57 struct cgroup_cls_state *cs; 57 struct cgroup_cls_state *cs;
58 58
59 if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) 59 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
60 if (!cs)
60 return ERR_PTR(-ENOMEM); 61 return ERR_PTR(-ENOMEM);
61 62
62 if (cgrp->parent) 63 if (cgrp->parent)
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
94 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 95 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
95} 96}
96 97
97struct cls_cgroup_head 98struct cls_cgroup_head {
98{
99 u32 handle; 99 u32 handle;
100 struct tcf_exts exts; 100 struct tcf_exts exts;
101 struct tcf_ematch_tree ematches; 101 struct tcf_ematch_tree ematches;
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
166 u32 handle, struct nlattr **tca, 166 u32 handle, struct nlattr **tca,
167 unsigned long *arg) 167 unsigned long *arg)
168{ 168{
169 struct nlattr *tb[TCA_CGROUP_MAX+1]; 169 struct nlattr *tb[TCA_CGROUP_MAX + 1];
170 struct cls_cgroup_head *head = tp->root; 170 struct cls_cgroup_head *head = tp->root;
171 struct tcf_ematch_tree t; 171 struct tcf_ematch_tree t;
172 struct tcf_exts e; 172 struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5b271a18bc3a..8ec01391d988 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb)
121 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
122 break; 122 break;
123 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
124 if (iph->frag_off & htons(IP_MF|IP_OFFSET)) 124 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
125 break; 125 break;
126 poff = proto_ports_offset(iph->protocol); 126 poff = proto_ports_offset(iph->protocol);
127 if (poff >= 0 && 127 if (poff >= 0 &&
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
163 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
164 break; 164 break;
165 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
166 if (iph->frag_off & htons(IP_MF|IP_OFFSET)) 166 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
167 break; 167 break;
168 poff = proto_ports_offset(iph->protocol); 168 poff = proto_ports_offset(iph->protocol);
169 if (poff >= 0 && 169 if (poff >= 0 &&
@@ -276,7 +276,7 @@ fallback:
276 276
277static u32 flow_get_rtclassid(const struct sk_buff *skb) 277static u32 flow_get_rtclassid(const struct sk_buff *skb)
278{ 278{
279#ifdef CONFIG_NET_CLS_ROUTE 279#ifdef CONFIG_IP_ROUTE_CLASSID
280 if (skb_dst(skb)) 280 if (skb_dst(skb))
281 return skb_dst(skb)->tclassid; 281 return skb_dst(skb)->tclassid;
282#endif 282#endif
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b6f9b4..26e7bc4ffb79 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
31 31
32#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) 32#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
33 33
34struct fw_head 34struct fw_head {
35{
36 struct fw_filter *ht[HTSIZE]; 35 struct fw_filter *ht[HTSIZE];
37 u32 mask; 36 u32 mask;
38}; 37};
39 38
40struct fw_filter 39struct fw_filter {
41{
42 struct fw_filter *next; 40 struct fw_filter *next;
43 u32 id; 41 u32 id;
44 struct tcf_result res; 42 struct tcf_result res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
53 .police = TCA_FW_POLICE 51 .police = TCA_FW_POLICE
54}; 52};
55 53
56static __inline__ int fw_hash(u32 handle) 54static inline int fw_hash(u32 handle)
57{ 55{
58 if (HTSIZE == 4096) 56 if (HTSIZE == 4096)
59 return ((handle >> 24) & 0xFFF) ^ 57 return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
82static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, 80static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
83 struct tcf_result *res) 81 struct tcf_result *res)
84{ 82{
85 struct fw_head *head = (struct fw_head*)tp->root; 83 struct fw_head *head = (struct fw_head *)tp->root;
86 struct fw_filter *f; 84 struct fw_filter *f;
87 int r; 85 int r;
88 u32 id = skb->mark; 86 u32 id = skb->mark;
89 87
90 if (head != NULL) { 88 if (head != NULL) {
91 id &= head->mask; 89 id &= head->mask;
92 for (f=head->ht[fw_hash(id)]; f; f=f->next) { 90 for (f = head->ht[fw_hash(id)]; f; f = f->next) {
93 if (f->id == id) { 91 if (f->id == id) {
94 *res = f->res; 92 *res = f->res;
95#ifdef CONFIG_NET_CLS_IND 93#ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
105 } 103 }
106 } else { 104 } else {
107 /* old method */ 105 /* old method */
108 if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) { 106 if (id && (TC_H_MAJ(id) == 0 ||
107 !(TC_H_MAJ(id ^ tp->q->handle)))) {
109 res->classid = id; 108 res->classid = id;
110 res->class = 0; 109 res->class = 0;
111 return 0; 110 return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
117 116
118static unsigned long fw_get(struct tcf_proto *tp, u32 handle) 117static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
119{ 118{
120 struct fw_head *head = (struct fw_head*)tp->root; 119 struct fw_head *head = (struct fw_head *)tp->root;
121 struct fw_filter *f; 120 struct fw_filter *f;
122 121
123 if (head == NULL) 122 if (head == NULL)
124 return 0; 123 return 0;
125 124
126 for (f=head->ht[fw_hash(handle)]; f; f=f->next) { 125 for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
127 if (f->id == handle) 126 if (f->id == handle)
128 return (unsigned long)f; 127 return (unsigned long)f;
129 } 128 }
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
139 return 0; 138 return 0;
140} 139}
141 140
142static inline void 141static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
143fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
144{ 142{
145 tcf_unbind_filter(tp, &f->res); 143 tcf_unbind_filter(tp, &f->res);
146 tcf_exts_destroy(tp, &f->exts); 144 tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
156 if (head == NULL) 154 if (head == NULL)
157 return; 155 return;
158 156
159 for (h=0; h<HTSIZE; h++) { 157 for (h = 0; h < HTSIZE; h++) {
160 while ((f=head->ht[h]) != NULL) { 158 while ((f = head->ht[h]) != NULL) {
161 head->ht[h] = f->next; 159 head->ht[h] = f->next;
162 fw_delete_filter(tp, f); 160 fw_delete_filter(tp, f);
163 } 161 }
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
167 165
168static int fw_delete(struct tcf_proto *tp, unsigned long arg) 166static int fw_delete(struct tcf_proto *tp, unsigned long arg)
169{ 167{
170 struct fw_head *head = (struct fw_head*)tp->root; 168 struct fw_head *head = (struct fw_head *)tp->root;
171 struct fw_filter *f = (struct fw_filter*)arg; 169 struct fw_filter *f = (struct fw_filter *)arg;
172 struct fw_filter **fp; 170 struct fw_filter **fp;
173 171
174 if (head == NULL || f == NULL) 172 if (head == NULL || f == NULL)
175 goto out; 173 goto out;
176 174
177 for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { 175 for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
178 if (*fp == f) { 176 if (*fp == f) {
179 tcf_tree_lock(tp); 177 tcf_tree_lock(tp);
180 *fp = f->next; 178 *fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
240 struct nlattr **tca, 238 struct nlattr **tca,
241 unsigned long *arg) 239 unsigned long *arg)
242{ 240{
243 struct fw_head *head = (struct fw_head*)tp->root; 241 struct fw_head *head = (struct fw_head *)tp->root;
244 struct fw_filter *f = (struct fw_filter *) *arg; 242 struct fw_filter *f = (struct fw_filter *) *arg;
245 struct nlattr *opt = tca[TCA_OPTIONS]; 243 struct nlattr *opt = tca[TCA_OPTIONS];
246 struct nlattr *tb[TCA_FW_MAX + 1]; 244 struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
302 300
303static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) 301static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
304{ 302{
305 struct fw_head *head = (struct fw_head*)tp->root; 303 struct fw_head *head = (struct fw_head *)tp->root;
306 int h; 304 int h;
307 305
308 if (head == NULL) 306 if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
332 struct sk_buff *skb, struct tcmsg *t) 330 struct sk_buff *skb, struct tcmsg *t)
333{ 331{
334 struct fw_head *head = (struct fw_head *)tp->root; 332 struct fw_head *head = (struct fw_head *)tp->root;
335 struct fw_filter *f = (struct fw_filter*)fh; 333 struct fw_filter *f = (struct fw_filter *)fh;
336 unsigned char *b = skb_tail_pointer(skb); 334 unsigned char *b = skb_tail_pointer(skb);
337 struct nlattr *nest; 335 struct nlattr *nest;
338 336
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd85dec8..a907905376df 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
23#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
24 24
25/* 25/*
26 1. For now we assume that route tags < 256. 26 * 1. For now we assume that route tags < 256.
27 It allows to use direct table lookups, instead of hash tables. 27 * It allows to use direct table lookups, instead of hash tables.
28 2. For now we assume that "from TAG" and "fromdev DEV" statements 28 * 2. For now we assume that "from TAG" and "fromdev DEV" statements
29 are mutually exclusive. 29 * are mutually exclusive.
30 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" 30 * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
31 */ 31 */
32 32
33struct route4_fastmap 33struct route4_fastmap {
34{
35 struct route4_filter *filter; 34 struct route4_filter *filter;
36 u32 id; 35 u32 id;
37 int iif; 36 int iif;
38}; 37};
39 38
40struct route4_head 39struct route4_head {
41{
42 struct route4_fastmap fastmap[16]; 40 struct route4_fastmap fastmap[16];
43 struct route4_bucket *table[256+1]; 41 struct route4_bucket *table[256 + 1];
44}; 42};
45 43
46struct route4_bucket 44struct route4_bucket {
47{
48 /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ 45 /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
49 struct route4_filter *ht[16+16+1]; 46 struct route4_filter *ht[16 + 16 + 1];
50}; 47};
51 48
52struct route4_filter 49struct route4_filter {
53{
54 struct route4_filter *next; 50 struct route4_filter *next;
55 u32 id; 51 u32 id;
56 int iif; 52 int iif;
@@ -61,20 +57,20 @@ struct route4_filter
61 struct route4_bucket *bkt; 57 struct route4_bucket *bkt;
62}; 58};
63 59
64#define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) 60#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
65 61
66static const struct tcf_ext_map route_ext_map = { 62static const struct tcf_ext_map route_ext_map = {
67 .police = TCA_ROUTE4_POLICE, 63 .police = TCA_ROUTE4_POLICE,
68 .action = TCA_ROUTE4_ACT 64 .action = TCA_ROUTE4_ACT
69}; 65};
70 66
71static __inline__ int route4_fastmap_hash(u32 id, int iif) 67static inline int route4_fastmap_hash(u32 id, int iif)
72{ 68{
73 return id&0xF; 69 return id & 0xF;
74} 70}
75 71
76static inline 72static void
77void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) 73route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
78{ 74{
79 spinlock_t *root_lock = qdisc_root_sleeping_lock(q); 75 spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
80 76
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
83 spin_unlock_bh(root_lock); 79 spin_unlock_bh(root_lock);
84} 80}
85 81
86static inline void 82static void
87route4_set_fastmap(struct route4_head *head, u32 id, int iif, 83route4_set_fastmap(struct route4_head *head, u32 id, int iif,
88 struct route4_filter *f) 84 struct route4_filter *f)
89{ 85{
90 int h = route4_fastmap_hash(id, iif); 86 int h = route4_fastmap_hash(id, iif);
87
91 head->fastmap[h].id = id; 88 head->fastmap[h].id = id;
92 head->fastmap[h].iif = iif; 89 head->fastmap[h].iif = iif;
93 head->fastmap[h].filter = f; 90 head->fastmap[h].filter = f;
94} 91}
95 92
96static __inline__ int route4_hash_to(u32 id) 93static inline int route4_hash_to(u32 id)
97{ 94{
98 return id&0xFF; 95 return id & 0xFF;
99} 96}
100 97
101static __inline__ int route4_hash_from(u32 id) 98static inline int route4_hash_from(u32 id)
102{ 99{
103 return (id>>16)&0xF; 100 return (id >> 16) & 0xF;
104} 101}
105 102
106static __inline__ int route4_hash_iif(int iif) 103static inline int route4_hash_iif(int iif)
107{ 104{
108 return 16 + ((iif>>16)&0xF); 105 return 16 + ((iif >> 16) & 0xF);
109} 106}
110 107
111static __inline__ int route4_hash_wild(void) 108static inline int route4_hash_wild(void)
112{ 109{
113 return 32; 110 return 32;
114} 111}
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
131static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, 128static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
132 struct tcf_result *res) 129 struct tcf_result *res)
133{ 130{
134 struct route4_head *head = (struct route4_head*)tp->root; 131 struct route4_head *head = (struct route4_head *)tp->root;
135 struct dst_entry *dst; 132 struct dst_entry *dst;
136 struct route4_bucket *b; 133 struct route4_bucket *b;
137 struct route4_filter *f; 134 struct route4_filter *f;
138 u32 id, h; 135 u32 id, h;
139 int iif, dont_cache = 0; 136 int iif, dont_cache = 0;
140 137
141 if ((dst = skb_dst(skb)) == NULL) 138 dst = skb_dst(skb);
139 if (!dst)
142 goto failure; 140 goto failure;
143 141
144 id = dst->tclassid; 142 id = dst->tclassid;
145 if (head == NULL) 143 if (head == NULL)
146 goto old_method; 144 goto old_method;
147 145
148 iif = ((struct rtable*)dst)->fl.iif; 146 iif = ((struct rtable *)dst)->rt_iif;
149 147
150 h = route4_fastmap_hash(id, iif); 148 h = route4_fastmap_hash(id, iif);
151 if (id == head->fastmap[h].id && 149 if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
161 h = route4_hash_to(id); 159 h = route4_hash_to(id);
162 160
163restart: 161restart:
164 if ((b = head->table[h]) != NULL) { 162 b = head->table[h];
163 if (b) {
165 for (f = b->ht[route4_hash_from(id)]; f; f = f->next) 164 for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
166 if (f->id == id) 165 if (f->id == id)
167 ROUTE4_APPLY_RESULT(); 166 ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
197 196
198static inline u32 to_hash(u32 id) 197static inline u32 to_hash(u32 id)
199{ 198{
200 u32 h = id&0xFF; 199 u32 h = id & 0xFF;
201 if (id&0x8000) 200
201 if (id & 0x8000)
202 h += 256; 202 h += 256;
203 return h; 203 return h;
204} 204}
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
211 if (!(id & 0x8000)) { 211 if (!(id & 0x8000)) {
212 if (id > 255) 212 if (id > 255)
213 return 256; 213 return 256;
214 return id&0xF; 214 return id & 0xF;
215 } 215 }
216 return 16 + (id&0xF); 216 return 16 + (id & 0xF);
217} 217}
218 218
219static unsigned long route4_get(struct tcf_proto *tp, u32 handle) 219static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
220{ 220{
221 struct route4_head *head = (struct route4_head*)tp->root; 221 struct route4_head *head = (struct route4_head *)tp->root;
222 struct route4_bucket *b; 222 struct route4_bucket *b;
223 struct route4_filter *f; 223 struct route4_filter *f;
224 unsigned h1, h2; 224 unsigned int h1, h2;
225 225
226 if (!head) 226 if (!head)
227 return 0; 227 return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
230 if (h1 > 256) 230 if (h1 > 256)
231 return 0; 231 return 0;
232 232
233 h2 = from_hash(handle>>16); 233 h2 = from_hash(handle >> 16);
234 if (h2 > 32) 234 if (h2 > 32)
235 return 0; 235 return 0;
236 236
237 if ((b = head->table[h1]) != NULL) { 237 b = head->table[h1];
238 if (b) {
238 for (f = b->ht[h2]; f; f = f->next) 239 for (f = b->ht[h2]; f; f = f->next)
239 if (f->handle == handle) 240 if (f->handle == handle)
240 return (unsigned long)f; 241 return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
251 return 0; 252 return 0;
252} 253}
253 254
254static inline void 255static void
255route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) 256route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
256{ 257{
257 tcf_unbind_filter(tp, &f->res); 258 tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
267 if (head == NULL) 268 if (head == NULL)
268 return; 269 return;
269 270
270 for (h1=0; h1<=256; h1++) { 271 for (h1 = 0; h1 <= 256; h1++) {
271 struct route4_bucket *b; 272 struct route4_bucket *b;
272 273
273 if ((b = head->table[h1]) != NULL) { 274 b = head->table[h1];
274 for (h2=0; h2<=32; h2++) { 275 if (b) {
276 for (h2 = 0; h2 <= 32; h2++) {
275 struct route4_filter *f; 277 struct route4_filter *f;
276 278
277 while ((f = b->ht[h2]) != NULL) { 279 while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
287 289
288static int route4_delete(struct tcf_proto *tp, unsigned long arg) 290static int route4_delete(struct tcf_proto *tp, unsigned long arg)
289{ 291{
290 struct route4_head *head = (struct route4_head*)tp->root; 292 struct route4_head *head = (struct route4_head *)tp->root;
291 struct route4_filter **fp, *f = (struct route4_filter*)arg; 293 struct route4_filter **fp, *f = (struct route4_filter *)arg;
292 unsigned h = 0; 294 unsigned int h = 0;
293 struct route4_bucket *b; 295 struct route4_bucket *b;
294 int i; 296 int i;
295 297
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
299 h = f->handle; 301 h = f->handle;
300 b = f->bkt; 302 b = f->bkt;
301 303
302 for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { 304 for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
303 if (*fp == f) { 305 if (*fp == f) {
304 tcf_tree_lock(tp); 306 tcf_tree_lock(tp);
305 *fp = f->next; 307 *fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
310 312
311 /* Strip tree */ 313 /* Strip tree */
312 314
313 for (i=0; i<=32; i++) 315 for (i = 0; i <= 32; i++)
314 if (b->ht[i]) 316 if (b->ht[i])
315 return 0; 317 return 0;
316 318
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
380 } 382 }
381 383
382 h1 = to_hash(nhandle); 384 h1 = to_hash(nhandle);
383 if ((b = head->table[h1]) == NULL) { 385 b = head->table[h1];
386 if (!b) {
384 err = -ENOBUFS; 387 err = -ENOBUFS;
385 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); 388 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
386 if (b == NULL) 389 if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
391 tcf_tree_unlock(tp); 394 tcf_tree_unlock(tp);
392 } else { 395 } else {
393 unsigned int h2 = from_hash(nhandle >> 16); 396 unsigned int h2 = from_hash(nhandle >> 16);
397
394 err = -EEXIST; 398 err = -EEXIST;
395 for (fp = b->ht[h2]; fp; fp = fp->next) 399 for (fp = b->ht[h2]; fp; fp = fp->next)
396 if (fp->handle == f->handle) 400 if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
444 if (err < 0) 448 if (err < 0)
445 return err; 449 return err;
446 450
447 if ((f = (struct route4_filter*)*arg) != NULL) { 451 f = (struct route4_filter *)*arg;
452 if (f) {
448 if (f->handle != handle && handle) 453 if (f->handle != handle && handle)
449 return -EINVAL; 454 return -EINVAL;
450 455
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
481 486
482reinsert: 487reinsert:
483 h = from_hash(f->handle >> 16); 488 h = from_hash(f->handle >> 16);
484 for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next) 489 for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
485 if (f->handle < f1->handle) 490 if (f->handle < f1->handle)
486 break; 491 break;
487 492
@@ -492,7 +497,8 @@ reinsert:
492 if (old_handle && f->handle != old_handle) { 497 if (old_handle && f->handle != old_handle) {
493 th = to_hash(old_handle); 498 th = to_hash(old_handle);
494 h = from_hash(old_handle >> 16); 499 h = from_hash(old_handle >> 16);
495 if ((b = head->table[th]) != NULL) { 500 b = head->table[th];
501 if (b) {
496 for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { 502 for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
497 if (*fp == f) { 503 if (*fp == f) {
498 *fp = f->next; 504 *fp = f->next;
@@ -515,7 +521,7 @@ errout:
515static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) 521static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
516{ 522{
517 struct route4_head *head = tp->root; 523 struct route4_head *head = tp->root;
518 unsigned h, h1; 524 unsigned int h, h1;
519 525
520 if (head == NULL) 526 if (head == NULL)
521 arg->stop = 1; 527 arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
549static int route4_dump(struct tcf_proto *tp, unsigned long fh, 555static int route4_dump(struct tcf_proto *tp, unsigned long fh,
550 struct sk_buff *skb, struct tcmsg *t) 556 struct sk_buff *skb, struct tcmsg *t)
551{ 557{
552 struct route4_filter *f = (struct route4_filter*)fh; 558 struct route4_filter *f = (struct route4_filter *)fh;
553 unsigned char *b = skb_tail_pointer(skb); 559 unsigned char *b = skb_tail_pointer(skb);
554 struct nlattr *nest; 560 struct nlattr *nest;
555 u32 id; 561 u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
563 if (nest == NULL) 569 if (nest == NULL)
564 goto nla_put_failure; 570 goto nla_put_failure;
565 571
566 if (!(f->handle&0x8000)) { 572 if (!(f->handle & 0x8000)) {
567 id = f->id&0xFF; 573 id = f->id & 0xFF;
568 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); 574 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
569 } 575 }
570 if (f->handle&0x80000000) { 576 if (f->handle & 0x80000000) {
571 if ((f->handle>>16) != 0xFFFF) 577 if ((f->handle >> 16) != 0xFFFF)
572 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); 578 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
573 } else { 579 } else {
574 id = f->id>>16; 580 id = f->id >> 16;
575 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); 581 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
576 } 582 }
577 if (f->res.classid) 583 if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a1790b048..402c44b241a3 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
66 powerful classification engine. */ 66 powerful classification engine. */
67 67
68 68
69struct rsvp_head 69struct rsvp_head {
70{
71 u32 tmap[256/32]; 70 u32 tmap[256/32];
72 u32 hgenerator; 71 u32 hgenerator;
73 u8 tgenerator; 72 u8 tgenerator;
74 struct rsvp_session *ht[256]; 73 struct rsvp_session *ht[256];
75}; 74};
76 75
77struct rsvp_session 76struct rsvp_session {
78{
79 struct rsvp_session *next; 77 struct rsvp_session *next;
80 __be32 dst[RSVP_DST_LEN]; 78 __be32 dst[RSVP_DST_LEN];
81 struct tc_rsvp_gpi dpi; 79 struct tc_rsvp_gpi dpi;
82 u8 protocol; 80 u8 protocol;
83 u8 tunnelid; 81 u8 tunnelid;
84 /* 16 (src,sport) hash slots, and one wildcard source slot */ 82 /* 16 (src,sport) hash slots, and one wildcard source slot */
85 struct rsvp_filter *ht[16+1]; 83 struct rsvp_filter *ht[16 + 1];
86}; 84};
87 85
88 86
89struct rsvp_filter 87struct rsvp_filter {
90{
91 struct rsvp_filter *next; 88 struct rsvp_filter *next;
92 __be32 src[RSVP_DST_LEN]; 89 __be32 src[RSVP_DST_LEN];
93 struct tc_rsvp_gpi spi; 90 struct tc_rsvp_gpi spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
100 struct rsvp_session *sess; 97 struct rsvp_session *sess;
101}; 98};
102 99
103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) 100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{ 101{
105 unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; 102 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103
106 h ^= h>>16; 104 h ^= h>>16;
107 h ^= h>>8; 105 h ^= h>>8;
108 return (h ^ protocol ^ tunnelid) & 0xFF; 106 return (h ^ protocol ^ tunnelid) & 0xFF;
109} 107}
110 108
111static __inline__ unsigned hash_src(__be32 *src) 109static inline unsigned int hash_src(__be32 *src)
112{ 110{
113 unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; 111 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112
114 h ^= h>>16; 113 h ^= h>>16;
115 h ^= h>>8; 114 h ^= h>>8;
116 h ^= h>>4; 115 h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, 133static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135 struct tcf_result *res) 134 struct tcf_result *res)
136{ 135{
137 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; 136 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
138 struct rsvp_session *s; 137 struct rsvp_session *s;
139 struct rsvp_filter *f; 138 struct rsvp_filter *f;
140 unsigned h1, h2; 139 unsigned int h1, h2;
141 __be32 *dst, *src; 140 __be32 *dst, *src;
142 u8 protocol; 141 u8 protocol;
143 u8 tunnelid = 0; 142 u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
162 src = &nhptr->saddr.s6_addr32[0]; 161 src = &nhptr->saddr.s6_addr32[0];
163 dst = &nhptr->daddr.s6_addr32[0]; 162 dst = &nhptr->daddr.s6_addr32[0];
164 protocol = nhptr->nexthdr; 163 protocol = nhptr->nexthdr;
165 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); 164 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
166#else 165#else
167 src = &nhptr->saddr; 166 src = &nhptr->saddr;
168 dst = &nhptr->daddr; 167 dst = &nhptr->daddr;
169 protocol = nhptr->protocol; 168 protocol = nhptr->protocol;
170 xprt = ((u8*)nhptr) + (nhptr->ihl<<2); 169 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
171 if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) 170 if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
172 return -1; 171 return -1;
173#endif 172#endif
174 173
@@ -176,10 +175,10 @@ restart:
176 h2 = hash_src(src); 175 h2 = hash_src(src);
177 176
178 for (s = sht[h1]; s; s = s->next) { 177 for (s = sht[h1]; s; s = s->next) {
179 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && 178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
180 protocol == s->protocol && 179 protocol == s->protocol &&
181 !(s->dpi.mask & 180 !(s->dpi.mask &
182 (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && 181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
183#if RSVP_DST_LEN == 4 182#if RSVP_DST_LEN == 4
184 dst[0] == s->dst[0] && 183 dst[0] == s->dst[0] &&
185 dst[1] == s->dst[1] && 184 dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
188 tunnelid == s->tunnelid) { 187 tunnelid == s->tunnelid) {
189 188
190 for (f = s->ht[h2]; f; f = f->next) { 189 for (f = s->ht[h2]; f; f = f->next) {
191 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && 190 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
192 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) 191 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
193#if RSVP_DST_LEN == 4 192#if RSVP_DST_LEN == 4
194 && 193 &&
195 src[0] == f->src[0] && 194 src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
205 return 0; 204 return 0;
206 205
207 tunnelid = f->res.classid; 206 tunnelid = f->res.classid;
208 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); 207 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
209 goto restart; 208 goto restart;
210 } 209 }
211 } 210 }
@@ -224,11 +223,11 @@ matched:
224 223
225static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) 224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
226{ 225{
227 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; 226 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
228 struct rsvp_session *s; 227 struct rsvp_session *s;
229 struct rsvp_filter *f; 228 struct rsvp_filter *f;
230 unsigned h1 = handle&0xFF; 229 unsigned int h1 = handle & 0xFF;
231 unsigned h2 = (handle>>8)&0xFF; 230 unsigned int h2 = (handle >> 8) & 0xFF;
232 231
233 if (h2 > 16) 232 if (h2 > 16)
234 return 0; 233 return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
258 return -ENOBUFS; 257 return -ENOBUFS;
259} 258}
260 259
261static inline void 260static void
262rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) 261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
263{ 262{
264 tcf_unbind_filter(tp, &f->res); 263 tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
277 276
278 sht = data->ht; 277 sht = data->ht;
279 278
280 for (h1=0; h1<256; h1++) { 279 for (h1 = 0; h1 < 256; h1++) {
281 struct rsvp_session *s; 280 struct rsvp_session *s;
282 281
283 while ((s = sht[h1]) != NULL) { 282 while ((s = sht[h1]) != NULL) {
284 sht[h1] = s->next; 283 sht[h1] = s->next;
285 284
286 for (h2=0; h2<=16; h2++) { 285 for (h2 = 0; h2 <= 16; h2++) {
287 struct rsvp_filter *f; 286 struct rsvp_filter *f;
288 287
289 while ((f = s->ht[h2]) != NULL) { 288 while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
299 298
300static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) 299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
301{ 300{
302 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; 301 struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
303 unsigned h = f->handle; 302 unsigned int h = f->handle;
304 struct rsvp_session **sp; 303 struct rsvp_session **sp;
305 struct rsvp_session *s = f->sess; 304 struct rsvp_session *s = f->sess;
306 int i; 305 int i;
307 306
308 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { 307 for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
309 if (*fp == f) { 308 if (*fp == f) {
310 tcf_tree_lock(tp); 309 tcf_tree_lock(tp);
311 *fp = f->next; 310 *fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
314 313
315 /* Strip tree */ 314 /* Strip tree */
316 315
317 for (i=0; i<=16; i++) 316 for (i = 0; i <= 16; i++)
318 if (s->ht[i]) 317 if (s->ht[i])
319 return 0; 318 return 0;
320 319
321 /* OK, session has no flows */ 320 /* OK, session has no flows */
322 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; 321 for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
323 *sp; sp = &(*sp)->next) { 322 *sp; sp = &(*sp)->next) {
324 if (*sp == s) { 323 if (*sp == s) {
325 tcf_tree_lock(tp); 324 tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
337 return 0; 336 return 0;
338} 337}
339 338
340static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) 339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
341{ 340{
342 struct rsvp_head *data = tp->root; 341 struct rsvp_head *data = tp->root;
343 int i = 0xFFFF; 342 int i = 0xFFFF;
344 343
345 while (i-- > 0) { 344 while (i-- > 0) {
346 u32 h; 345 u32 h;
346
347 if ((data->hgenerator += 0x10000) == 0) 347 if ((data->hgenerator += 0x10000) == 0)
348 data->hgenerator = 0x10000; 348 data->hgenerator = 0x10000;
349 h = data->hgenerator|salt; 349 h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
355 355
356static int tunnel_bts(struct rsvp_head *data) 356static int tunnel_bts(struct rsvp_head *data)
357{ 357{
358 int n = data->tgenerator>>5; 358 int n = data->tgenerator >> 5;
359 u32 b = 1<<(data->tgenerator&0x1F); 359 u32 b = 1 << (data->tgenerator & 0x1F);
360 360
361 if (data->tmap[n]&b) 361 if (data->tmap[n] & b)
362 return 0; 362 return 0;
363 data->tmap[n] |= b; 363 data->tmap[n] |= b;
364 return 1; 364 return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
372 372
373 memset(tmap, 0, sizeof(tmap)); 373 memset(tmap, 0, sizeof(tmap));
374 374
375 for (h1=0; h1<256; h1++) { 375 for (h1 = 0; h1 < 256; h1++) {
376 struct rsvp_session *s; 376 struct rsvp_session *s;
377 for (s = sht[h1]; s; s = s->next) { 377 for (s = sht[h1]; s; s = s->next) {
378 for (h2=0; h2<=16; h2++) { 378 for (h2 = 0; h2 <= 16; h2++) {
379 struct rsvp_filter *f; 379 struct rsvp_filter *f;
380 380
381 for (f = s->ht[h2]; f; f = f->next) { 381 for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
395{ 395{
396 int i, k; 396 int i, k;
397 397
398 for (k=0; k<2; k++) { 398 for (k = 0; k < 2; k++) {
399 for (i=255; i>0; i--) { 399 for (i = 255; i > 0; i--) {
400 if (++data->tgenerator == 0) 400 if (++data->tgenerator == 0)
401 data->tgenerator = 1; 401 data->tgenerator = 1;
402 if (tunnel_bts(data)) 402 if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
428 struct nlattr *opt = tca[TCA_OPTIONS-1]; 428 struct nlattr *opt = tca[TCA_OPTIONS-1];
429 struct nlattr *tb[TCA_RSVP_MAX + 1]; 429 struct nlattr *tb[TCA_RSVP_MAX + 1];
430 struct tcf_exts e; 430 struct tcf_exts e;
431 unsigned h1, h2; 431 unsigned int h1, h2;
432 __be32 *dst; 432 __be32 *dst;
433 int err; 433 int err;
434 434
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
443 if (err < 0) 443 if (err < 0)
444 return err; 444 return err;
445 445
446 if ((f = (struct rsvp_filter*)*arg) != NULL) { 446 f = (struct rsvp_filter *)*arg;
447 if (f) {
447 /* Node exists: adjust only classid */ 448 /* Node exists: adjust only classid */
448 449
449 if (f->handle != handle && handle) 450 if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
500 goto errout; 501 goto errout;
501 } 502 }
502 503
503 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { 504 for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
504 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && 505 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
505 pinfo && pinfo->protocol == s->protocol && 506 pinfo && pinfo->protocol == s->protocol &&
506 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && 507 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
523 tcf_exts_change(tp, &f->exts, &e); 524 tcf_exts_change(tp, &f->exts, &e);
524 525
525 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) 526 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
526 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) 527 if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
527 break; 528 break;
528 f->next = *fp; 529 f->next = *fp;
529 wmb(); 530 wmb();
@@ -567,7 +568,7 @@ errout2:
567static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) 568static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
568{ 569{
569 struct rsvp_head *head = tp->root; 570 struct rsvp_head *head = tp->root;
570 unsigned h, h1; 571 unsigned int h, h1;
571 572
572 if (arg->stop) 573 if (arg->stop)
573 return; 574 return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
598static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, 599static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
599 struct sk_buff *skb, struct tcmsg *t) 600 struct sk_buff *skb, struct tcmsg *t)
600{ 601{
601 struct rsvp_filter *f = (struct rsvp_filter*)fh; 602 struct rsvp_filter *f = (struct rsvp_filter *)fh;
602 struct rsvp_session *s; 603 struct rsvp_session *s;
603 unsigned char *b = skb_tail_pointer(skb); 604 unsigned char *b = skb_tail_pointer(skb);
604 struct nlattr *nest; 605 struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
624 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); 625 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
625 if (f->res.classid) 626 if (f->res.classid)
626 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); 627 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
627 if (((f->handle>>8)&0xFF) != 16) 628 if (((f->handle >> 8) & 0xFF) != 16)
628 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); 629 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
629 630
630 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) 631 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330bb918..36667fa64237 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
249 * of the hashing index is below the threshold. 249 * of the hashing index is below the threshold.
250 */ 250 */
251 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) 251 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
252 cp.hash = (cp.mask >> cp.shift)+1; 252 cp.hash = (cp.mask >> cp.shift) + 1;
253 else 253 else
254 cp.hash = DEFAULT_HASH_SIZE; 254 cp.hash = DEFAULT_HASH_SIZE;
255 } 255 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82178af..3b93fc0c8955 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
42#include <net/act_api.h> 42#include <net/act_api.h>
43#include <net/pkt_cls.h> 43#include <net/pkt_cls.h>
44 44
45struct tc_u_knode 45struct tc_u_knode {
46{
47 struct tc_u_knode *next; 46 struct tc_u_knode *next;
48 u32 handle; 47 u32 handle;
49 struct tc_u_hnode *ht_up; 48 struct tc_u_hnode *ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
63 struct tc_u32_sel sel; 62 struct tc_u32_sel sel;
64}; 63};
65 64
66struct tc_u_hnode 65struct tc_u_hnode {
67{
68 struct tc_u_hnode *next; 66 struct tc_u_hnode *next;
69 u32 handle; 67 u32 handle;
70 u32 prio; 68 u32 prio;
71 struct tc_u_common *tp_c; 69 struct tc_u_common *tp_c;
72 int refcnt; 70 int refcnt;
73 unsigned divisor; 71 unsigned int divisor;
74 struct tc_u_knode *ht[1]; 72 struct tc_u_knode *ht[1];
75}; 73};
76 74
77struct tc_u_common 75struct tc_u_common {
78{
79 struct tc_u_hnode *hlist; 76 struct tc_u_hnode *hlist;
80 struct Qdisc *q; 77 struct Qdisc *q;
81 int refcnt; 78 int refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
87 .police = TCA_U32_POLICE 84 .police = TCA_U32_POLICE
88}; 85};
89 86
90static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) 87static inline unsigned int u32_hash_fold(__be32 key,
88 const struct tc_u32_sel *sel,
89 u8 fshift)
91{ 90{
92 unsigned h = ntohl(key & sel->hmask)>>fshift; 91 unsigned int h = ntohl(key & sel->hmask) >> fshift;
93 92
94 return h; 93 return h;
95} 94}
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
101 unsigned int off; 100 unsigned int off;
102 } stack[TC_U32_MAXDEPTH]; 101 } stack[TC_U32_MAXDEPTH];
103 102
104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 103 struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
105 unsigned int off = skb_network_offset(skb); 104 unsigned int off = skb_network_offset(skb);
106 struct tc_u_knode *n; 105 struct tc_u_knode *n;
107 int sdepth = 0; 106 int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
120 struct tc_u32_key *key = n->sel.keys; 119 struct tc_u32_key *key = n->sel.keys;
121 120
122#ifdef CONFIG_CLS_U32_PERF 121#ifdef CONFIG_CLS_U32_PERF
123 n->pf->rcnt +=1; 122 n->pf->rcnt += 1;
124 j = 0; 123 j = 0;
125#endif 124#endif
126 125
@@ -133,14 +132,14 @@ next_knode:
133 } 132 }
134#endif 133#endif
135 134
136 for (i = n->sel.nkeys; i>0; i--, key++) { 135 for (i = n->sel.nkeys; i > 0; i--, key++) {
137 int toff = off + key->off + (off2 & key->offmask); 136 int toff = off + key->off + (off2 & key->offmask);
138 __be32 *data, _data; 137 __be32 *data, hdata;
139 138
140 if (skb_headroom(skb) + toff > INT_MAX) 139 if (skb_headroom(skb) + toff > INT_MAX)
141 goto out; 140 goto out;
142 141
143 data = skb_header_pointer(skb, toff, 4, &_data); 142 data = skb_header_pointer(skb, toff, 4, &hdata);
144 if (!data) 143 if (!data)
145 goto out; 144 goto out;
146 if ((*data ^ key->val) & key->mask) { 145 if ((*data ^ key->val) & key->mask) {
@@ -148,13 +147,13 @@ next_knode:
148 goto next_knode; 147 goto next_knode;
149 } 148 }
150#ifdef CONFIG_CLS_U32_PERF 149#ifdef CONFIG_CLS_U32_PERF
151 n->pf->kcnts[j] +=1; 150 n->pf->kcnts[j] += 1;
152 j++; 151 j++;
153#endif 152#endif
154 } 153 }
155 if (n->ht_down == NULL) { 154 if (n->ht_down == NULL) {
156check_terminal: 155check_terminal:
157 if (n->sel.flags&TC_U32_TERMINAL) { 156 if (n->sel.flags & TC_U32_TERMINAL) {
158 157
159 *res = n->res; 158 *res = n->res;
160#ifdef CONFIG_NET_CLS_IND 159#ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
164 } 163 }
165#endif 164#endif
166#ifdef CONFIG_CLS_U32_PERF 165#ifdef CONFIG_CLS_U32_PERF
167 n->pf->rhit +=1; 166 n->pf->rhit += 1;
168#endif 167#endif
169 r = tcf_exts_exec(skb, &n->exts, res); 168 r = tcf_exts_exec(skb, &n->exts, res);
170 if (r < 0) { 169 if (r < 0) {
@@ -188,26 +187,26 @@ check_terminal:
188 ht = n->ht_down; 187 ht = n->ht_down;
189 sel = 0; 188 sel = 0;
190 if (ht->divisor) { 189 if (ht->divisor) {
191 __be32 *data, _data; 190 __be32 *data, hdata;
192 191
193 data = skb_header_pointer(skb, off + n->sel.hoff, 4, 192 data = skb_header_pointer(skb, off + n->sel.hoff, 4,
194 &_data); 193 &hdata);
195 if (!data) 194 if (!data)
196 goto out; 195 goto out;
197 sel = ht->divisor & u32_hash_fold(*data, &n->sel, 196 sel = ht->divisor & u32_hash_fold(*data, &n->sel,
198 n->fshift); 197 n->fshift);
199 } 198 }
200 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) 199 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
201 goto next_ht; 200 goto next_ht;
202 201
203 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { 202 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
204 off2 = n->sel.off + 3; 203 off2 = n->sel.off + 3;
205 if (n->sel.flags & TC_U32_VAROFFSET) { 204 if (n->sel.flags & TC_U32_VAROFFSET) {
206 __be16 *data, _data; 205 __be16 *data, hdata;
207 206
208 data = skb_header_pointer(skb, 207 data = skb_header_pointer(skb,
209 off + n->sel.offoff, 208 off + n->sel.offoff,
210 2, &_data); 209 2, &hdata);
211 if (!data) 210 if (!data)
212 goto out; 211 goto out;
213 off2 += ntohs(n->sel.offmask & *data) >> 212 off2 += ntohs(n->sel.offmask & *data) >>
@@ -215,7 +214,7 @@ check_terminal:
215 } 214 }
216 off2 &= ~3; 215 off2 &= ~3;
217 } 216 }
218 if (n->sel.flags&TC_U32_EAT) { 217 if (n->sel.flags & TC_U32_EAT) {
219 off += off2; 218 off += off2;
220 off2 = 0; 219 off2 = 0;
221 } 220 }
@@ -236,11 +235,11 @@ out:
236 235
237deadloop: 236deadloop:
238 if (net_ratelimit()) 237 if (net_ratelimit())
239 printk(KERN_WARNING "cls_u32: dead loop\n"); 238 pr_warning("cls_u32: dead loop\n");
240 return -1; 239 return -1;
241} 240}
242 241
243static __inline__ struct tc_u_hnode * 242static struct tc_u_hnode *
244u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) 243u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
245{ 244{
246 struct tc_u_hnode *ht; 245 struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
252 return ht; 251 return ht;
253} 252}
254 253
255static __inline__ struct tc_u_knode * 254static struct tc_u_knode *
256u32_lookup_key(struct tc_u_hnode *ht, u32 handle) 255u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
257{ 256{
258 unsigned sel; 257 unsigned int sel;
259 struct tc_u_knode *n = NULL; 258 struct tc_u_knode *n = NULL;
260 259
261 sel = TC_U32_HASH(handle); 260 sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
300 do { 299 do {
301 if (++tp_c->hgenerator == 0x7FF) 300 if (++tp_c->hgenerator == 0x7FF)
302 tp_c->hgenerator = 1; 301 tp_c->hgenerator = 1;
303 } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); 302 } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
304 303
305 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; 304 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
306} 305}
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
378static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 377static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
379{ 378{
380 struct tc_u_knode *n; 379 struct tc_u_knode *n;
381 unsigned h; 380 unsigned int h;
382 381
383 for (h=0; h<=ht->divisor; h++) { 382 for (h = 0; h <= ht->divisor; h++) {
384 while ((n = ht->ht[h]) != NULL) { 383 while ((n = ht->ht[h]) != NULL) {
385 ht->ht[h] = n->next; 384 ht->ht[h] = n->next;
386 385
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
446 445
447static int u32_delete(struct tcf_proto *tp, unsigned long arg) 446static int u32_delete(struct tcf_proto *tp, unsigned long arg)
448{ 447{
449 struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; 448 struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
450 449
451 if (ht == NULL) 450 if (ht == NULL)
452 return 0; 451 return 0;
453 452
454 if (TC_U32_KEY(ht->handle)) 453 if (TC_U32_KEY(ht->handle))
455 return u32_delete_key(tp, (struct tc_u_knode*)ht); 454 return u32_delete_key(tp, (struct tc_u_knode *)ht);
456 455
457 if (tp->root == ht) 456 if (tp->root == ht)
458 return -EINVAL; 457 return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
470static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) 469static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
471{ 470{
472 struct tc_u_knode *n; 471 struct tc_u_knode *n;
473 unsigned i = 0x7FF; 472 unsigned int i = 0x7FF;
474 473
475 for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) 474 for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
476 if (i < TC_U32_NODE(n->handle)) 475 if (i < TC_U32_NODE(n->handle))
477 i = TC_U32_NODE(n->handle); 476 i = TC_U32_NODE(n->handle);
478 i++; 477 i++;
479 478
480 return handle|(i>0xFFF ? 0xFFF : i); 479 return handle | (i > 0xFFF ? 0xFFF : i);
481} 480}
482 481
483static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 482static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
566 if (err < 0) 565 if (err < 0)
567 return err; 566 return err;
568 567
569 if ((n = (struct tc_u_knode*)*arg) != NULL) { 568 n = (struct tc_u_knode *)*arg;
569 if (n) {
570 if (TC_U32_KEY(n->handle) == 0) 570 if (TC_U32_KEY(n->handle) == 0)
571 return -EINVAL; 571 return -EINVAL;
572 572
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
574 } 574 }
575 575
576 if (tb[TCA_U32_DIVISOR]) { 576 if (tb[TCA_U32_DIVISOR]) {
577 unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); 577 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
578 578
579 if (--divisor > 0x100) 579 if (--divisor > 0x100)
580 return -EINVAL; 580 return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
585 if (handle == 0) 585 if (handle == 0)
586 return -ENOMEM; 586 return -ENOMEM;
587 } 587 }
588 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); 588 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
589 if (ht == NULL) 589 if (ht == NULL)
590 return -ENOBUFS; 590 return -ENOBUFS;
591 ht->tp_c = tp_c; 591 ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
683 struct tc_u_common *tp_c = tp->data; 683 struct tc_u_common *tp_c = tp->data;
684 struct tc_u_hnode *ht; 684 struct tc_u_hnode *ht;
685 struct tc_u_knode *n; 685 struct tc_u_knode *n;
686 unsigned h; 686 unsigned int h;
687 687
688 if (arg->stop) 688 if (arg->stop)
689 return; 689 return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
717static int u32_dump(struct tcf_proto *tp, unsigned long fh, 717static int u32_dump(struct tcf_proto *tp, unsigned long fh,
718 struct sk_buff *skb, struct tcmsg *t) 718 struct sk_buff *skb, struct tcmsg *t)
719{ 719{
720 struct tc_u_knode *n = (struct tc_u_knode*)fh; 720 struct tc_u_knode *n = (struct tc_u_knode *)fh;
721 struct nlattr *nest; 721 struct nlattr *nest;
722 722
723 if (n == NULL) 723 if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
730 goto nla_put_failure; 730 goto nla_put_failure;
731 731
732 if (TC_U32_KEY(n->handle) == 0) { 732 if (TC_U32_KEY(n->handle) == 0) {
733 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; 733 struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
734 u32 divisor = ht->divisor+1; 734 u32 divisor = ht->divisor + 1;
735
735 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); 736 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
736 } else { 737 } else {
737 NLA_PUT(skb, TCA_U32_SEL, 738 NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
755 goto nla_put_failure; 756 goto nla_put_failure;
756 757
757#ifdef CONFIG_NET_CLS_IND 758#ifdef CONFIG_NET_CLS_IND
758 if(strlen(n->indev)) 759 if (strlen(n->indev))
759 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); 760 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
760#endif 761#endif
761#ifdef CONFIG_CLS_U32_PERF 762#ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc450397487a..1c8360a2752a 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
33 return 0; 33 return 0;
34 34
35 switch (cmp->align) { 35 switch (cmp->align) {
36 case TCF_EM_ALIGN_U8: 36 case TCF_EM_ALIGN_U8:
37 val = *ptr; 37 val = *ptr;
38 break; 38 break;
39 39
40 case TCF_EM_ALIGN_U16: 40 case TCF_EM_ALIGN_U16:
41 val = get_unaligned_be16(ptr); 41 val = get_unaligned_be16(ptr);
42 42
43 if (cmp_needs_transformation(cmp)) 43 if (cmp_needs_transformation(cmp))
44 val = be16_to_cpu(val); 44 val = be16_to_cpu(val);
45 break; 45 break;
46 46
47 case TCF_EM_ALIGN_U32: 47 case TCF_EM_ALIGN_U32:
48 /* Worth checking boundries? The branching seems 48 /* Worth checking boundries? The branching seems
49 * to get worse. Visit again. */ 49 * to get worse. Visit again.
50 val = get_unaligned_be32(ptr); 50 */
51 val = get_unaligned_be32(ptr);
51 52
52 if (cmp_needs_transformation(cmp)) 53 if (cmp_needs_transformation(cmp))
53 val = be32_to_cpu(val); 54 val = be32_to_cpu(val);
54 break; 55 break;
55 56
56 default: 57 default:
57 return 0; 58 return 0;
58 } 59 }
59 60
60 if (cmp->mask) 61 if (cmp->mask)
61 val &= cmp->mask; 62 val &= cmp->mask;
62 63
63 switch (cmp->opnd) { 64 switch (cmp->opnd) {
64 case TCF_EM_OPND_EQ: 65 case TCF_EM_OPND_EQ:
65 return val == cmp->val; 66 return val == cmp->val;
66 case TCF_EM_OPND_LT: 67 case TCF_EM_OPND_LT:
67 return val < cmp->val; 68 return val < cmp->val;
68 case TCF_EM_OPND_GT: 69 case TCF_EM_OPND_GT:
69 return val > cmp->val; 70 return val > cmp->val;
70 } 71 }
71 72
72 return 0; 73 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 34da5e29ea1a..49130e8abff0 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -47,7 +47,7 @@
47 * on the meta type. Obviously, the length of the data must also 47 * on the meta type. Obviously, the length of the data must also
48 * be provided for non-numeric types. 48 * be provided for non-numeric types.
49 * 49 *
50 * Additionaly, type dependant modifiers such as shift operators 50 * Additionally, type dependent modifiers such as shift operators
51 * or mask may be applied to extend the functionaliy. As of now, 51 * or mask may be applied to extend the functionaliy. As of now,
52 * the variable length type supports shifting the byte string to 52 * the variable length type supports shifting the byte string to
53 * the right, eating up any number of octets and thus supporting 53 * the right, eating up any number of octets and thus supporting
@@ -73,21 +73,18 @@
73#include <net/pkt_cls.h> 73#include <net/pkt_cls.h>
74#include <net/sock.h> 74#include <net/sock.h>
75 75
76struct meta_obj 76struct meta_obj {
77{
78 unsigned long value; 77 unsigned long value;
79 unsigned int len; 78 unsigned int len;
80}; 79};
81 80
82struct meta_value 81struct meta_value {
83{
84 struct tcf_meta_val hdr; 82 struct tcf_meta_val hdr;
85 unsigned long val; 83 unsigned long val;
86 unsigned int len; 84 unsigned int len;
87}; 85};
88 86
89struct meta_match 87struct meta_match {
90{
91 struct meta_value lvalue; 88 struct meta_value lvalue;
92 struct meta_value rvalue; 89 struct meta_value rvalue;
93}; 90};
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
255 if (unlikely(skb_dst(skb) == NULL)) 252 if (unlikely(skb_dst(skb) == NULL))
256 *err = -1; 253 *err = -1;
257 else 254 else
258#ifdef CONFIG_NET_CLS_ROUTE 255#ifdef CONFIG_IP_ROUTE_CLASSID
259 dst->value = skb_dst(skb)->tclassid; 256 dst->value = skb_dst(skb)->tclassid;
260#else 257#else
261 dst->value = 0; 258 dst->value = 0;
@@ -267,7 +264,7 @@ META_COLLECTOR(int_rtiif)
267 if (unlikely(skb_rtable(skb) == NULL)) 264 if (unlikely(skb_rtable(skb) == NULL))
268 *err = -1; 265 *err = -1;
269 else 266 else
270 dst->value = skb_rtable(skb)->fl.iif; 267 dst->value = skb_rtable(skb)->rt_iif;
271} 268}
272 269
273/************************************************************************** 270/**************************************************************************
@@ -404,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf)
404META_COLLECTOR(int_sk_alloc) 401META_COLLECTOR(int_sk_alloc)
405{ 402{
406 SKIP_NONLOCAL(skb); 403 SKIP_NONLOCAL(skb);
407 dst->value = skb->sk->sk_allocation; 404 dst->value = (__force int) skb->sk->sk_allocation;
408} 405}
409 406
410META_COLLECTOR(int_sk_route_caps) 407META_COLLECTOR(int_sk_route_caps)
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
483 * Meta value collectors assignment table 480 * Meta value collectors assignment table
484 **************************************************************************/ 481 **************************************************************************/
485 482
486struct meta_ops 483struct meta_ops {
487{
488 void (*get)(struct sk_buff *, struct tcf_pkt_info *, 484 void (*get)(struct sk_buff *, struct tcf_pkt_info *,
489 struct meta_value *, struct meta_obj *, int *); 485 struct meta_value *, struct meta_obj *, int *);
490}; 486};
@@ -494,7 +490,7 @@ struct meta_ops
494 490
495/* Meta value operations table listing all meta value collectors and 491/* Meta value operations table listing all meta value collectors and
496 * assigns them to a type and meta id. */ 492 * assigns them to a type and meta id. */
497static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { 493static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
498 [TCF_META_TYPE_VAR] = { 494 [TCF_META_TYPE_VAR] = {
499 [META_ID(DEV)] = META_FUNC(var_dev), 495 [META_ID(DEV)] = META_FUNC(var_dev),
500 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), 496 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
550 } 546 }
551}; 547};
552 548
553static inline struct meta_ops * meta_ops(struct meta_value *val) 549static inline struct meta_ops *meta_ops(struct meta_value *val)
554{ 550{
555 return &__meta_ops[meta_type(val)][meta_id(val)]; 551 return &__meta_ops[meta_type(val)][meta_id(val)];
556} 552}
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
649{ 645{
650 if (v->len == sizeof(unsigned long)) 646 if (v->len == sizeof(unsigned long))
651 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); 647 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
652 else if (v->len == sizeof(u32)) { 648 else if (v->len == sizeof(u32))
653 NLA_PUT_U32(skb, tlv, v->val); 649 NLA_PUT_U32(skb, tlv, v->val);
654 }
655 650
656 return 0; 651 return 0;
657 652
@@ -663,8 +658,7 @@ nla_put_failure:
663 * Type specific operations table 658 * Type specific operations table
664 **************************************************************************/ 659 **************************************************************************/
665 660
666struct meta_type_ops 661struct meta_type_ops {
667{
668 void (*destroy)(struct meta_value *); 662 void (*destroy)(struct meta_value *);
669 int (*compare)(struct meta_obj *, struct meta_obj *); 663 int (*compare)(struct meta_obj *, struct meta_obj *);
670 int (*change)(struct meta_value *, struct nlattr *); 664 int (*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@ struct meta_type_ops
672 int (*dump)(struct sk_buff *, struct meta_value *, int); 666 int (*dump)(struct sk_buff *, struct meta_value *, int);
673}; 667};
674 668
675static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { 669static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
676 [TCF_META_TYPE_VAR] = { 670 [TCF_META_TYPE_VAR] = {
677 .destroy = meta_var_destroy, 671 .destroy = meta_var_destroy,
678 .compare = meta_var_compare, 672 .compare = meta_var_compare,
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
688 } 682 }
689}; 683};
690 684
691static inline struct meta_type_ops * meta_type_ops(struct meta_value *v) 685static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
692{ 686{
693 return &__meta_type_ops[meta_type(v)]; 687 return &__meta_type_ops[meta_type(v)];
694} 688}
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
713 return err; 707 return err;
714 708
715 if (meta_type_ops(v)->apply_extras) 709 if (meta_type_ops(v)->apply_extras)
716 meta_type_ops(v)->apply_extras(v, dst); 710 meta_type_ops(v)->apply_extras(v, dst);
717 711
718 return 0; 712 return 0;
719} 713}
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
732 r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); 726 r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
733 727
734 switch (meta->lvalue.hdr.op) { 728 switch (meta->lvalue.hdr.op) {
735 case TCF_EM_OPND_EQ: 729 case TCF_EM_OPND_EQ:
736 return !r; 730 return !r;
737 case TCF_EM_OPND_LT: 731 case TCF_EM_OPND_LT:
738 return r < 0; 732 return r < 0;
739 case TCF_EM_OPND_GT: 733 case TCF_EM_OPND_GT:
740 return r > 0; 734 return r > 0;
741 } 735 }
742 736
743 return 0; 737 return 0;
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
771 765
772static inline int meta_is_supported(struct meta_value *val) 766static inline int meta_is_supported(struct meta_value *val)
773{ 767{
774 return (!meta_id(val) || meta_ops(val)->get); 768 return !meta_id(val) || meta_ops(val)->get;
775} 769}
776 770
777static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { 771static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176aee6e5..a3bed07a008b 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
18#include <linux/tc_ematch/tc_em_nbyte.h> 18#include <linux/tc_ematch/tc_em_nbyte.h>
19#include <net/pkt_cls.h> 19#include <net/pkt_cls.h>
20 20
21struct nbyte_data 21struct nbyte_data {
22{
23 struct tcf_em_nbyte hdr; 22 struct tcf_em_nbyte hdr;
24 char pattern[0]; 23 char pattern[0];
25}; 24};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index ea8f566e720c..15d353d2e4be 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
19#include <linux/tc_ematch/tc_em_text.h> 19#include <linux/tc_ematch/tc_em_text.h>
20#include <net/pkt_cls.h> 20#include <net/pkt_cls.h>
21 21
22struct text_match 22struct text_match {
23{
24 u16 from_offset; 23 u16 from_offset;
25 u16 to_offset; 24 u16 to_offset;
26 u8 from_layer; 25 u8 from_layer;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f1479f7da..797bdb88c010 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
35 if (!tcf_valid_offset(skb, ptr, sizeof(u32))) 35 if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
36 return 0; 36 return 0;
37 37
38 return !(((*(__be32*) ptr) ^ key->val) & key->mask); 38 return !(((*(__be32 *) ptr) ^ key->val) & key->mask);
39} 39}
40 40
41static struct tcf_ematch_ops em_u32_ops = { 41static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da961f80..88d93eb92507 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
93static LIST_HEAD(ematch_ops); 93static LIST_HEAD(ematch_ops);
94static DEFINE_RWLOCK(ematch_mod_lock); 94static DEFINE_RWLOCK(ematch_mod_lock);
95 95
96static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind) 96static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
97{ 97{
98 struct tcf_ematch_ops *e = NULL; 98 struct tcf_ematch_ops *e = NULL;
99 99
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
163} 163}
164EXPORT_SYMBOL(tcf_em_unregister); 164EXPORT_SYMBOL(tcf_em_unregister);
165 165
166static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, 166static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
167 int index) 167 int index)
168{ 168{
169 return &tree->matches[index]; 169 return &tree->matches[index];
170} 170}
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
184 184
185 if (em_hdr->kind == TCF_EM_CONTAINER) { 185 if (em_hdr->kind == TCF_EM_CONTAINER) {
186 /* Special ematch called "container", carries an index 186 /* Special ematch called "container", carries an index
187 * referencing an external ematch sequence. */ 187 * referencing an external ematch sequence.
188 */
188 u32 ref; 189 u32 ref;
189 190
190 if (data_len < sizeof(ref)) 191 if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
195 goto errout; 196 goto errout;
196 197
197 /* We do not allow backward jumps to avoid loops and jumps 198 /* We do not allow backward jumps to avoid loops and jumps
198 * to our own position are of course illegal. */ 199 * to our own position are of course illegal.
200 */
199 if (ref <= idx) 201 if (ref <= idx)
200 goto errout; 202 goto errout;
201 203
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
208 * which automatically releases the reference again, therefore 210 * which automatically releases the reference again, therefore
209 * the module MUST not be given back under any circumstances 211 * the module MUST not be given back under any circumstances
210 * here. Be aware, the destroy function assumes that the 212 * here. Be aware, the destroy function assumes that the
211 * module is held if the ops field is non zero. */ 213 * module is held if the ops field is non zero.
214 */
212 em->ops = tcf_em_lookup(em_hdr->kind); 215 em->ops = tcf_em_lookup(em_hdr->kind);
213 216
214 if (em->ops == NULL) { 217 if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
221 if (em->ops) { 224 if (em->ops) {
222 /* We dropped the RTNL mutex in order to 225 /* We dropped the RTNL mutex in order to
223 * perform the module load. Tell the caller 226 * perform the module load. Tell the caller
224 * to replay the request. */ 227 * to replay the request.
228 */
225 module_put(em->ops->owner); 229 module_put(em->ops->owner);
226 err = -EAGAIN; 230 err = -EAGAIN;
227 } 231 }
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
230 } 234 }
231 235
232 /* ematch module provides expected length of data, so we 236 /* ematch module provides expected length of data, so we
233 * can do a basic sanity check. */ 237 * can do a basic sanity check.
238 */
234 if (em->ops->datalen && data_len < em->ops->datalen) 239 if (em->ops->datalen && data_len < em->ops->datalen)
235 goto errout; 240 goto errout;
236 241
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
246 * TCF_EM_SIMPLE may be specified stating that the 251 * TCF_EM_SIMPLE may be specified stating that the
247 * data only consists of a u32 integer and the module 252 * data only consists of a u32 integer and the module
248 * does not expected a memory reference but rather 253 * does not expected a memory reference but rather
249 * the value carried. */ 254 * the value carried.
255 */
250 if (em_hdr->flags & TCF_EM_SIMPLE) { 256 if (em_hdr->flags & TCF_EM_SIMPLE) {
251 if (data_len < sizeof(u32)) 257 if (data_len < sizeof(u32))
252 goto errout; 258 goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
334 * The array of rt attributes is parsed in the order as they are 340 * The array of rt attributes is parsed in the order as they are
335 * provided, their type must be incremental from 1 to n. Even 341 * provided, their type must be incremental from 1 to n. Even
336 * if it does not serve any real purpose, a failure of sticking 342 * if it does not serve any real purpose, a failure of sticking
337 * to this policy will result in parsing failure. */ 343 * to this policy will result in parsing failure.
344 */
338 for (idx = 0; nla_ok(rt_match, list_len); idx++) { 345 for (idx = 0; nla_ok(rt_match, list_len); idx++) {
339 err = -EINVAL; 346 err = -EINVAL;
340 347
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
359 /* Check if the number of matches provided by userspace actually 366 /* Check if the number of matches provided by userspace actually
360 * complies with the array of matches. The number was used for 367 * complies with the array of matches. The number was used for
361 * the validation of references and a mismatch could lead to 368 * the validation of references and a mismatch could lead to
362 * undefined references during the matching process. */ 369 * undefined references during the matching process.
370 */
363 if (idx != tree_hdr->nmatches) { 371 if (idx != tree_hdr->nmatches) {
364 err = -EINVAL; 372 err = -EINVAL;
365 goto errout_abort; 373 goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
449 .flags = em->flags 457 .flags = em->flags
450 }; 458 };
451 459
452 NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); 460 NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
453 461
454 if (em->ops && em->ops->dump) { 462 if (em->ops && em->ops->dump) {
455 if (em->ops->dump(skb, em) < 0) 463 if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
478 struct tcf_pkt_info *info) 486 struct tcf_pkt_info *info)
479{ 487{
480 int r = em->ops->match(skb, em, info); 488 int r = em->ops->match(skb, em, info);
489
481 return tcf_em_is_inverted(em) ? !r : r; 490 return tcf_em_is_inverted(em) ? !r : r;
482} 491}
483 492
@@ -527,8 +536,8 @@ pop_stack:
527 536
528stack_overflow: 537stack_overflow:
529 if (net_ratelimit()) 538 if (net_ratelimit())
530 printk(KERN_WARNING "tc ematch: local stack overflow," 539 pr_warning("tc ematch: local stack overflow,"
531 " increase NET_EMATCH_STACK\n"); 540 " increase NET_EMATCH_STACK\n");
532 return -1; 541 return -1;
533} 542}
534EXPORT_SYMBOL(__tcf_em_tree_match); 543EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b22ca2d1cebc..6b8627661c98 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
187 int err = -ENOENT; 187 int err = -ENOENT;
188 188
189 write_lock(&qdisc_mod_lock); 189 write_lock(&qdisc_mod_lock);
190 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) 190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
191 if (q == qops) 191 if (q == qops)
192 break; 192 break;
193 if (q) { 193 if (q) {
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
321 if (!tab || --tab->refcnt) 321 if (!tab || --tab->refcnt)
322 return; 322 return;
323 323
324 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { 324 for (rtabp = &qdisc_rtab_list;
325 (rtab = *rtabp) != NULL;
326 rtabp = &rtab->next) {
325 if (rtab == tab) { 327 if (rtab == tab) {
326 *rtabp = rtab->next; 328 *rtabp = rtab->next;
327 kfree(rtab); 329 kfree(rtab);
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
396 return stab; 398 return stab;
397} 399}
398 400
401static void stab_kfree_rcu(struct rcu_head *head)
402{
403 kfree(container_of(head, struct qdisc_size_table, rcu));
404}
405
399void qdisc_put_stab(struct qdisc_size_table *tab) 406void qdisc_put_stab(struct qdisc_size_table *tab)
400{ 407{
401 if (!tab) 408 if (!tab)
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
405 412
406 if (--tab->refcnt == 0) { 413 if (--tab->refcnt == 0) {
407 list_del(&tab->list); 414 list_del(&tab->list);
408 kfree(tab); 415 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
409 } 416 }
410 417
411 spin_unlock(&qdisc_stab_lock); 418 spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@ nla_put_failure:
428 return -1; 435 return -1;
429} 436}
430 437
431void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) 438void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
432{ 439{
433 int pkt_len, slot; 440 int pkt_len, slot;
434 441
@@ -454,14 +461,13 @@ out:
454 pkt_len = 1; 461 pkt_len = 1;
455 qdisc_skb_cb(skb)->pkt_len = pkt_len; 462 qdisc_skb_cb(skb)->pkt_len = pkt_len;
456} 463}
457EXPORT_SYMBOL(qdisc_calculate_pkt_len); 464EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
458 465
459void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) 466void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
460{ 467{
461 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 468 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
462 printk(KERN_WARNING 469 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
463 "%s: %s qdisc %X: is non-work-conserving?\n", 470 txt, qdisc->ops->id, qdisc->handle >> 16);
464 txt, qdisc->ops->id, qdisc->handle >> 16);
465 qdisc->flags |= TCQ_F_WARN_NONWC; 471 qdisc->flags |= TCQ_F_WARN_NONWC;
466 } 472 }
467} 473}
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
472 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 478 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
473 timer); 479 timer);
474 480
475 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 481 qdisc_unthrottled(wd->qdisc);
476 __netif_schedule(qdisc_root(wd->qdisc)); 482 __netif_schedule(qdisc_root(wd->qdisc));
477 483
478 return HRTIMER_NORESTART; 484 return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
494 &qdisc_root_sleeping(wd->qdisc)->state)) 500 &qdisc_root_sleeping(wd->qdisc)->state))
495 return; 501 return;
496 502
497 wd->qdisc->flags |= TCQ_F_THROTTLED; 503 qdisc_throttled(wd->qdisc);
498 time = ktime_set(0, 0); 504 time = ktime_set(0, 0);
499 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); 505 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
500 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); 506 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
504void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 510void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
505{ 511{
506 hrtimer_cancel(&wd->timer); 512 hrtimer_cancel(&wd->timer);
507 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 513 qdisc_unthrottled(wd->qdisc);
508} 514}
509EXPORT_SYMBOL(qdisc_watchdog_cancel); 515EXPORT_SYMBOL(qdisc_watchdog_cancel);
510 516
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
625 autohandle = TC_H_MAKE(0x80000000U, 0); 631 autohandle = TC_H_MAKE(0x80000000U, 0);
626 } while (qdisc_lookup(dev, autohandle) && --i > 0); 632 } while (qdisc_lookup(dev, autohandle) && --i > 0);
627 633
628 return i>0 ? autohandle : 0; 634 return i > 0 ? autohandle : 0;
629} 635}
630 636
631void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 637void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
834 err = PTR_ERR(stab); 840 err = PTR_ERR(stab);
835 goto err_out4; 841 goto err_out4;
836 } 842 }
837 sch->stab = stab; 843 rcu_assign_pointer(sch->stab, stab);
838 } 844 }
839 if (tca[TCA_RATE]) { 845 if (tca[TCA_RATE]) {
840 spinlock_t *root_lock; 846 spinlock_t *root_lock;
@@ -874,7 +880,7 @@ err_out4:
874 * Any broken qdiscs that would require a ops->reset() here? 880 * Any broken qdiscs that would require a ops->reset() here?
875 * The qdisc was never in action so it shouldn't be necessary. 881 * The qdisc was never in action so it shouldn't be necessary.
876 */ 882 */
877 qdisc_put_stab(sch->stab); 883 qdisc_put_stab(rtnl_dereference(sch->stab));
878 if (ops->destroy) 884 if (ops->destroy)
879 ops->destroy(sch); 885 ops->destroy(sch);
880 goto err_out3; 886 goto err_out3;
@@ -882,7 +888,7 @@ err_out4:
882 888
883static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 889static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
884{ 890{
885 struct qdisc_size_table *stab = NULL; 891 struct qdisc_size_table *ostab, *stab = NULL;
886 int err = 0; 892 int err = 0;
887 893
888 if (tca[TCA_OPTIONS]) { 894 if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
899 return PTR_ERR(stab); 905 return PTR_ERR(stab);
900 } 906 }
901 907
902 qdisc_put_stab(sch->stab); 908 ostab = rtnl_dereference(sch->stab);
903 sch->stab = stab; 909 rcu_assign_pointer(sch->stab, stab);
910 qdisc_put_stab(ostab);
904 911
905 if (tca[TCA_RATE]) { 912 if (tca[TCA_RATE]) {
906 /* NB: ignores errors from replace_estimator 913 /* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@ out:
915 return 0; 922 return 0;
916} 923}
917 924
918struct check_loop_arg 925struct check_loop_arg {
919{ 926 struct qdisc_walker w;
920 struct qdisc_walker w;
921 struct Qdisc *p; 927 struct Qdisc *p;
922 int depth; 928 int depth;
923}; 929};
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
970 struct Qdisc *p = NULL; 976 struct Qdisc *p = NULL;
971 int err; 977 int err;
972 978
973 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 979 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
980 if (!dev)
974 return -ENODEV; 981 return -ENODEV;
975 982
976 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 983 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
980 if (clid) { 987 if (clid) {
981 if (clid != TC_H_ROOT) { 988 if (clid != TC_H_ROOT) {
982 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 989 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
983 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 990 p = qdisc_lookup(dev, TC_H_MAJ(clid));
991 if (!p)
984 return -ENOENT; 992 return -ENOENT;
985 q = qdisc_leaf(p, clid); 993 q = qdisc_leaf(p, clid);
986 } else { /* ingress */ 994 } else if (dev_ingress_queue(dev)) {
987 if (dev_ingress_queue(dev)) 995 q = dev_ingress_queue(dev)->qdisc_sleeping;
988 q = dev_ingress_queue(dev)->qdisc_sleeping;
989 } 996 }
990 } else { 997 } else {
991 q = dev->qdisc; 998 q = dev->qdisc;
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
996 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) 1003 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
997 return -EINVAL; 1004 return -EINVAL;
998 } else { 1005 } else {
999 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) 1006 q = qdisc_lookup(dev, tcm->tcm_handle);
1007 if (!q)
1000 return -ENOENT; 1008 return -ENOENT;
1001 } 1009 }
1002 1010
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1008 return -EINVAL; 1016 return -EINVAL;
1009 if (q->handle == 0) 1017 if (q->handle == 0)
1010 return -ENOENT; 1018 return -ENOENT;
1011 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) 1019 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1020 if (err != 0)
1012 return err; 1021 return err;
1013 } else { 1022 } else {
1014 qdisc_notify(net, skb, n, clid, NULL, q); 1023 qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1017} 1026}
1018 1027
1019/* 1028/*
1020 Create/change qdisc. 1029 * Create/change qdisc.
1021 */ 1030 */
1022 1031
1023static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1032static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@ replay:
1036 clid = tcm->tcm_parent; 1045 clid = tcm->tcm_parent;
1037 q = p = NULL; 1046 q = p = NULL;
1038 1047
1039 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1048 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1049 if (!dev)
1040 return -ENODEV; 1050 return -ENODEV;
1041 1051
1042 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1052 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@ replay:
1046 if (clid) { 1056 if (clid) {
1047 if (clid != TC_H_ROOT) { 1057 if (clid != TC_H_ROOT) {
1048 if (clid != TC_H_INGRESS) { 1058 if (clid != TC_H_INGRESS) {
1049 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 1059 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1060 if (!p)
1050 return -ENOENT; 1061 return -ENOENT;
1051 q = qdisc_leaf(p, clid); 1062 q = qdisc_leaf(p, clid);
1052 } else { /* ingress */ 1063 } else if (dev_ingress_queue_create(dev)) {
1053 if (dev_ingress_queue_create(dev)) 1064 q = dev_ingress_queue(dev)->qdisc_sleeping;
1054 q = dev_ingress_queue(dev)->qdisc_sleeping;
1055 } 1065 }
1056 } else { 1066 } else {
1057 q = dev->qdisc; 1067 q = dev->qdisc;
@@ -1063,13 +1073,14 @@ replay:
1063 1073
1064 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1074 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1065 if (tcm->tcm_handle) { 1075 if (tcm->tcm_handle) {
1066 if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) 1076 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1067 return -EEXIST; 1077 return -EEXIST;
1068 if (TC_H_MIN(tcm->tcm_handle)) 1078 if (TC_H_MIN(tcm->tcm_handle))
1069 return -EINVAL; 1079 return -EINVAL;
1070 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) 1080 q = qdisc_lookup(dev, tcm->tcm_handle);
1081 if (!q)
1071 goto create_n_graft; 1082 goto create_n_graft;
1072 if (n->nlmsg_flags&NLM_F_EXCL) 1083 if (n->nlmsg_flags & NLM_F_EXCL)
1073 return -EEXIST; 1084 return -EEXIST;
1074 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1085 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1075 return -EINVAL; 1086 return -EINVAL;
@@ -1079,7 +1090,7 @@ replay:
1079 atomic_inc(&q->refcnt); 1090 atomic_inc(&q->refcnt);
1080 goto graft; 1091 goto graft;
1081 } else { 1092 } else {
1082 if (q == NULL) 1093 if (!q)
1083 goto create_n_graft; 1094 goto create_n_graft;
1084 1095
1085 /* This magic test requires explanation. 1096 /* This magic test requires explanation.
@@ -1101,9 +1112,9 @@ replay:
1101 * For now we select create/graft, if 1112 * For now we select create/graft, if
1102 * user gave KIND, which does not match existing. 1113 * user gave KIND, which does not match existing.
1103 */ 1114 */
1104 if ((n->nlmsg_flags&NLM_F_CREATE) && 1115 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1105 (n->nlmsg_flags&NLM_F_REPLACE) && 1116 (n->nlmsg_flags & NLM_F_REPLACE) &&
1106 ((n->nlmsg_flags&NLM_F_EXCL) || 1117 ((n->nlmsg_flags & NLM_F_EXCL) ||
1107 (tca[TCA_KIND] && 1118 (tca[TCA_KIND] &&
1108 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1119 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1109 goto create_n_graft; 1120 goto create_n_graft;
@@ -1118,7 +1129,7 @@ replay:
1118 /* Change qdisc parameters */ 1129 /* Change qdisc parameters */
1119 if (q == NULL) 1130 if (q == NULL)
1120 return -ENOENT; 1131 return -ENOENT;
1121 if (n->nlmsg_flags&NLM_F_EXCL) 1132 if (n->nlmsg_flags & NLM_F_EXCL)
1122 return -EEXIST; 1133 return -EEXIST;
1123 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1134 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1124 return -EINVAL; 1135 return -EINVAL;
@@ -1128,7 +1139,7 @@ replay:
1128 return err; 1139 return err;
1129 1140
1130create_n_graft: 1141create_n_graft:
1131 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1142 if (!(n->nlmsg_flags & NLM_F_CREATE))
1132 return -ENOENT; 1143 return -ENOENT;
1133 if (clid == TC_H_INGRESS) { 1144 if (clid == TC_H_INGRESS) {
1134 if (dev_ingress_queue(dev)) 1145 if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1175 struct nlmsghdr *nlh; 1186 struct nlmsghdr *nlh;
1176 unsigned char *b = skb_tail_pointer(skb); 1187 unsigned char *b = skb_tail_pointer(skb);
1177 struct gnet_dump d; 1188 struct gnet_dump d;
1189 struct qdisc_size_table *stab;
1178 1190
1179 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1191 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1180 tcm = NLMSG_DATA(nlh); 1192 tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1190 goto nla_put_failure; 1202 goto nla_put_failure;
1191 q->qstats.qlen = q->q.qlen; 1203 q->qstats.qlen = q->q.qlen;
1192 1204
1193 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) 1205 stab = rtnl_dereference(q->stab);
1206 if (stab && qdisc_dump_stab(skb, stab) < 0)
1194 goto nla_put_failure; 1207 goto nla_put_failure;
1195 1208
1196 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1209 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1234 return -ENOBUFS; 1247 return -ENOBUFS;
1235 1248
1236 if (old && !tc_qdisc_dump_ignore(old)) { 1249 if (old && !tc_qdisc_dump_ignore(old)) {
1237 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) 1250 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
1251 0, RTM_DELQDISC) < 0)
1238 goto err_out; 1252 goto err_out;
1239 } 1253 }
1240 if (new && !tc_qdisc_dump_ignore(new)) { 1254 if (new && !tc_qdisc_dump_ignore(new)) {
1241 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 1255 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
1256 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1242 goto err_out; 1257 goto err_out;
1243 } 1258 }
1244 1259
1245 if (skb->len) 1260 if (skb->len)
1246 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1261 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
1262 n->nlmsg_flags & NLM_F_ECHO);
1247 1263
1248err_out: 1264err_out:
1249 kfree_skb(skb); 1265 kfree_skb(skb);
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1275 q_idx++; 1291 q_idx++;
1276 continue; 1292 continue;
1277 } 1293 }
1278 if (!tc_qdisc_dump_ignore(q) && 1294 if (!tc_qdisc_dump_ignore(q) &&
1279 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, 1295 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1280 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) 1296 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1281 goto done; 1297 goto done;
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1356 u32 qid = TC_H_MAJ(clid); 1372 u32 qid = TC_H_MAJ(clid);
1357 int err; 1373 int err;
1358 1374
1359 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1375 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1376 if (!dev)
1360 return -ENODEV; 1377 return -ENODEV;
1361 1378
1362 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1379 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1391 qid = dev->qdisc->handle; 1408 qid = dev->qdisc->handle;
1392 1409
1393 /* Now qid is genuine qdisc handle consistent 1410 /* Now qid is genuine qdisc handle consistent
1394 both with parent and child. 1411 * both with parent and child.
1395 1412 *
1396 TC_H_MAJ(pid) still may be unspecified, complete it now. 1413 * TC_H_MAJ(pid) still may be unspecified, complete it now.
1397 */ 1414 */
1398 if (pid) 1415 if (pid)
1399 pid = TC_H_MAKE(qid, pid); 1416 pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1403 } 1420 }
1404 1421
1405 /* OK. Locate qdisc */ 1422 /* OK. Locate qdisc */
1406 if ((q = qdisc_lookup(dev, qid)) == NULL) 1423 q = qdisc_lookup(dev, qid);
1424 if (!q)
1407 return -ENOENT; 1425 return -ENOENT;
1408 1426
1409 /* An check that it supports classes */ 1427 /* An check that it supports classes */
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1423 1441
1424 if (cl == 0) { 1442 if (cl == 0) {
1425 err = -ENOENT; 1443 err = -ENOENT;
1426 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) 1444 if (n->nlmsg_type != RTM_NEWTCLASS ||
1445 !(n->nlmsg_flags & NLM_F_CREATE))
1427 goto out; 1446 goto out;
1428 } else { 1447 } else {
1429 switch (n->nlmsg_type) { 1448 switch (n->nlmsg_type) {
1430 case RTM_NEWTCLASS: 1449 case RTM_NEWTCLASS:
1431 err = -EEXIST; 1450 err = -EEXIST;
1432 if (n->nlmsg_flags&NLM_F_EXCL) 1451 if (n->nlmsg_flags & NLM_F_EXCL)
1433 goto out; 1452 goto out;
1434 break; 1453 break;
1435 case RTM_DELTCLASS: 1454 case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
1521 return -EINVAL; 1540 return -EINVAL;
1522 } 1541 }
1523 1542
1524 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1543 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
1544 n->nlmsg_flags & NLM_F_ECHO);
1525} 1545}
1526 1546
1527struct qdisc_dump_args 1547struct qdisc_dump_args {
1528{ 1548 struct qdisc_walker w;
1529 struct qdisc_walker w; 1549 struct sk_buff *skb;
1530 struct sk_buff *skb; 1550 struct netlink_callback *cb;
1531 struct netlink_callback *cb;
1532}; 1551};
1533 1552
1534static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) 1553static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1590 1609
1591static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 1610static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1592{ 1611{
1593 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 1612 struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
1594 struct net *net = sock_net(skb->sk); 1613 struct net *net = sock_net(skb->sk);
1595 struct netdev_queue *dev_queue; 1614 struct netdev_queue *dev_queue;
1596 struct net_device *dev; 1615 struct net_device *dev;
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1598 1617
1599 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1618 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1600 return 0; 1619 return 0;
1601 if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1620 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1621 if (!dev)
1602 return 0; 1622 return 0;
1603 1623
1604 s_t = cb->args[0]; 1624 s_t = cb->args[0];
@@ -1621,19 +1641,22 @@ done:
1621} 1641}
1622 1642
1623/* Main classifier routine: scans classifier chain attached 1643/* Main classifier routine: scans classifier chain attached
1624 to this qdisc, (optionally) tests for protocol and asks 1644 * to this qdisc, (optionally) tests for protocol and asks
1625 specific classifiers. 1645 * specific classifiers.
1626 */ 1646 */
1627int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, 1647int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1628 struct tcf_result *res) 1648 struct tcf_result *res)
1629{ 1649{
1630 __be16 protocol = skb->protocol; 1650 __be16 protocol = skb->protocol;
1631 int err = 0; 1651 int err;
1632 1652
1633 for (; tp; tp = tp->next) { 1653 for (; tp; tp = tp->next) {
1634 if ((tp->protocol == protocol || 1654 if (tp->protocol != protocol &&
1635 tp->protocol == htons(ETH_P_ALL)) && 1655 tp->protocol != htons(ETH_P_ALL))
1636 (err = tp->classify(skb, tp, res)) >= 0) { 1656 continue;
1657 err = tp->classify(skb, tp, res);
1658
1659 if (err >= 0) {
1637#ifdef CONFIG_NET_CLS_ACT 1660#ifdef CONFIG_NET_CLS_ACT
1638 if (err != TC_ACT_RECLASSIFY && skb->tc_verd) 1661 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1639 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); 1662 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1649,12 +1672,10 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1649 struct tcf_result *res) 1672 struct tcf_result *res)
1650{ 1673{
1651 int err = 0; 1674 int err = 0;
1652 __be16 protocol;
1653#ifdef CONFIG_NET_CLS_ACT 1675#ifdef CONFIG_NET_CLS_ACT
1654 struct tcf_proto *otp = tp; 1676 struct tcf_proto *otp = tp;
1655reclassify: 1677reclassify:
1656#endif 1678#endif
1657 protocol = skb->protocol;
1658 1679
1659 err = tc_classify_compat(skb, tp, res); 1680 err = tc_classify_compat(skb, tp, res);
1660#ifdef CONFIG_NET_CLS_ACT 1681#ifdef CONFIG_NET_CLS_ACT
@@ -1664,11 +1685,11 @@ reclassify:
1664 1685
1665 if (verd++ >= MAX_REC_LOOP) { 1686 if (verd++ >= MAX_REC_LOOP) {
1666 if (net_ratelimit()) 1687 if (net_ratelimit())
1667 printk(KERN_NOTICE 1688 pr_notice("%s: packet reclassify loop"
1668 "%s: packet reclassify loop"
1669 " rule prio %u protocol %02x\n", 1689 " rule prio %u protocol %02x\n",
1670 tp->q->ops->id, 1690 tp->q->ops->id,
1671 tp->prio & 0xffff, ntohs(tp->protocol)); 1691 tp->prio & 0xffff,
1692 ntohs(tp->protocol));
1672 return TC_ACT_SHOT; 1693 return TC_ACT_SHOT;
1673 } 1694 }
1674 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); 1695 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1782,7 @@ static int __init pktsched_init(void)
1761 1782
1762 err = register_pernet_subsys(&psched_net_ops); 1783 err = register_pernet_subsys(&psched_net_ops);
1763 if (err) { 1784 if (err) {
1764 printk(KERN_ERR "pktsched_init: " 1785 pr_err("pktsched_init: "
1765 "cannot initialize per netns operations\n"); 1786 "cannot initialize per netns operations\n");
1766 return err; 1787 return err;
1767 } 1788 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 943d733409d0..3f08158b8688 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
319 * creation), and one for the reference held when calling delete. 319 * creation), and one for the reference held when calling delete.
320 */ 320 */
321 if (flow->ref < 2) { 321 if (flow->ref < 2) {
322 printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); 322 pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
323 return -EINVAL; 323 return -EINVAL;
324 } 324 }
325 if (flow->ref > 2) 325 if (flow->ref > 2)
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
384 } 384 }
385 } 385 }
386 flow = NULL; 386 flow = NULL;
387 done: 387done:
388 ; 388 ;
389 } 389 }
390 if (!flow) 390 if (!flow) {
391 flow = &p->link; 391 flow = &p->link;
392 else { 392 } else {
393 if (flow->vcc) 393 if (flow->vcc)
394 ATM_SKB(skb)->atm_options = flow->vcc->atm_options; 394 ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
395 /*@@@ looks good ... but it's not supposed to work :-) */ 395 /*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
576 576
577 list_for_each_entry_safe(flow, tmp, &p->flows, list) { 577 list_for_each_entry_safe(flow, tmp, &p->flows, list) {
578 if (flow->ref > 1) 578 if (flow->ref > 1)
579 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, 579 pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
580 flow->ref);
581 atm_tc_put(sch, (unsigned long)flow); 580 atm_tc_put(sch, (unsigned long)flow);
582 } 581 }
583 tasklet_kill(&p->task); 582 tasklet_kill(&p->task);
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
616 } 615 }
617 if (flow->excess) 616 if (flow->excess)
618 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); 617 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
619 else { 618 else
620 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); 619 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
621 }
622 620
623 nla_nest_end(skb, nest); 621 nla_nest_end(skb, nest);
624 return skb->len; 622 return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5f63ec58942c..24d94c097b35 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
72struct cbq_sched_data; 72struct cbq_sched_data;
73 73
74 74
75struct cbq_class 75struct cbq_class {
76{
77 struct Qdisc_class_common common; 76 struct Qdisc_class_common common;
78 struct cbq_class *next_alive; /* next class with backlog in this priority band */ 77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
79 78
@@ -139,19 +138,18 @@ struct cbq_class
139 int refcnt; 138 int refcnt;
140 int filters; 139 int filters;
141 140
142 struct cbq_class *defaults[TC_PRIO_MAX+1]; 141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
143}; 142};
144 143
145struct cbq_sched_data 144struct cbq_sched_data {
146{
147 struct Qdisc_class_hash clhash; /* Hash table of all classes */ 145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1]; 146 int nclasses[TC_CBQ_MAXPRIO + 1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1]; 147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
150 148
151 struct cbq_class link; 149 struct cbq_class link;
152 150
153 unsigned activemask; 151 unsigned int activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes 152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
155 with backlog */ 153 with backlog */
156 154
157#ifdef CONFIG_NET_CLS_ACT 155#ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
162 int tx_len; 160 int tx_len;
163 psched_time_t now; /* Cached timestamp */ 161 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */ 162 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask; 163 unsigned int pmask;
166 164
167 struct hrtimer delay_timer; 165 struct hrtimer delay_timer;
168 struct qdisc_watchdog watchdog; /* Watchdog timer, 166 struct qdisc_watchdog watchdog; /* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
175}; 173};
176 174
177 175
178#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) 176#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
179 177
180static __inline__ struct cbq_class * 178static inline struct cbq_class *
181cbq_class_lookup(struct cbq_sched_data *q, u32 classid) 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182{ 180{
183 struct Qdisc_class_common *clc; 181 struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
193static struct cbq_class * 191static struct cbq_class *
194cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195{ 193{
196 struct cbq_class *cl, *new; 194 struct cbq_class *cl;
197 195
198 for (cl = this->tparent; cl; cl = cl->tparent) 196 for (cl = this->tparent; cl; cl = cl->tparent) {
199 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) 197 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
200 return new;
201 198
199 if (new != NULL && new != this)
200 return new;
201 }
202 return NULL; 202 return NULL;
203} 203}
204 204
205#endif 205#endif
206 206
207/* Classify packet. The procedure is pretty complicated, but 207/* Classify packet. The procedure is pretty complicated, but
208 it allows us to combine link sharing and priority scheduling 208 * it allows us to combine link sharing and priority scheduling
209 transparently. 209 * transparently.
210 210 *
211 Namely, you can put link sharing rules (f.e. route based) at root of CBQ, 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 so that it resolves to split nodes. Then packets are classified 212 * so that it resolves to split nodes. Then packets are classified
213 by logical priority, or a more specific classifier may be attached 213 * by logical priority, or a more specific classifier may be attached
214 to the split node. 214 * to the split node.
215 */ 215 */
216 216
217static struct cbq_class * 217static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227 /* 227 /*
228 * Step 1. If skb->priority points to one of our classes, use it. 228 * Step 1. If skb->priority points to one of our classes, use it.
229 */ 229 */
230 if (TC_H_MAJ(prio^sch->handle) == 0 && 230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
231 (cl = cbq_class_lookup(q, prio)) != NULL) 231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl; 232 return cl;
233 233
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) 243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
244 goto fallback; 244 goto fallback;
245 245
246 if ((cl = (void*)res.class) == NULL) { 246 cl = (void *)res.class;
247 if (!cl) {
247 if (TC_H_MAJ(res.classid)) 248 if (TC_H_MAJ(res.classid))
248 cl = cbq_class_lookup(q, res.classid); 249 cl = cbq_class_lookup(q, res.classid);
249 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) 250 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
250 cl = defmap[TC_PRIO_BESTEFFORT]; 251 cl = defmap[TC_PRIO_BESTEFFORT];
251 252
252 if (cl == NULL || cl->level >= head->level) 253 if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
282 * Step 4. No success... 283 * Step 4. No success...
283 */ 284 */
284 if (TC_H_MAJ(prio) == 0 && 285 if (TC_H_MAJ(prio) == 0 &&
285 !(cl = head->defaults[prio&TC_PRIO_MAX]) && 286 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
286 !(cl = head->defaults[TC_PRIO_BESTEFFORT])) 287 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
287 return head; 288 return head;
288 289
@@ -290,12 +291,12 @@ fallback:
290} 291}
291 292
292/* 293/*
293 A packet has just been enqueued on the empty class. 294 * A packet has just been enqueued on the empty class.
294 cbq_activate_class adds it to the tail of active class list 295 * cbq_activate_class adds it to the tail of active class list
295 of its priority band. 296 * of its priority band.
296 */ 297 */
297 298
298static __inline__ void cbq_activate_class(struct cbq_class *cl) 299static inline void cbq_activate_class(struct cbq_class *cl)
299{ 300{
300 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 301 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
301 int prio = cl->cpriority; 302 int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
314} 315}
315 316
316/* 317/*
317 Unlink class from active chain. 318 * Unlink class from active chain.
318 Note that this same procedure is done directly in cbq_dequeue* 319 * Note that this same procedure is done directly in cbq_dequeue*
319 during round-robin procedure. 320 * during round-robin procedure.
320 */ 321 */
321 322
322static void cbq_deactivate_class(struct cbq_class *this) 323static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350{ 351{
351 int toplevel = q->toplevel; 352 int toplevel = q->toplevel;
352 353
353 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { 354 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
354 psched_time_t now; 355 psched_time_t now;
355 psched_tdiff_t incr; 356 psched_tdiff_t incr;
356 357
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
363 q->toplevel = cl->level; 364 q->toplevel = cl->level;
364 return; 365 return;
365 } 366 }
366 } while ((cl=cl->borrow) != NULL && toplevel > cl->level); 367 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
367 } 368 }
368} 369}
369 370
@@ -417,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
417 delay += cl->offtime; 418 delay += cl->offtime;
418 419
419 /* 420 /*
420 Class goes to sleep, so that it will have no 421 * Class goes to sleep, so that it will have no
421 chance to work avgidle. Let's forgive it 8) 422 * chance to work avgidle. Let's forgive it 8)
422 423 *
423 BTW cbq-2.0 has a crap in this 424 * BTW cbq-2.0 has a crap in this
424 place, apparently they forgot to shift it by cl->ewma_log. 425 * place, apparently they forgot to shift it by cl->ewma_log.
425 */ 426 */
426 if (cl->avgidle < 0) 427 if (cl->avgidle < 0)
427 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); 428 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -438,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
438 q->wd_expires = delay; 439 q->wd_expires = delay;
439 440
440 /* Dirty work! We must schedule wakeups based on 441 /* Dirty work! We must schedule wakeups based on
441 real available rate, rather than leaf rate, 442 * real available rate, rather than leaf rate,
442 which may be tiny (even zero). 443 * which may be tiny (even zero).
443 */ 444 */
444 if (q->toplevel == TC_CBQ_MAXLEVEL) { 445 if (q->toplevel == TC_CBQ_MAXLEVEL) {
445 struct cbq_class *b; 446 struct cbq_class *b;
@@ -459,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
459} 460}
460 461
461/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when 462/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
462 they go overlimit 463 * they go overlimit
463 */ 464 */
464 465
465static void cbq_ovl_rclassic(struct cbq_class *cl) 466static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -594,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
594 struct Qdisc *sch = q->watchdog.qdisc; 595 struct Qdisc *sch = q->watchdog.qdisc;
595 psched_time_t now; 596 psched_time_t now;
596 psched_tdiff_t delay = 0; 597 psched_tdiff_t delay = 0;
597 unsigned pmask; 598 unsigned int pmask;
598 599
599 now = psched_get_time(); 600 now = psched_get_time();
600 601
@@ -623,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
623 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); 624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
624 } 625 }
625 626
626 sch->flags &= ~TCQ_F_THROTTLED; 627 qdisc_unthrottled(sch);
627 __netif_schedule(qdisc_root(sch)); 628 __netif_schedule(qdisc_root(sch));
628 return HRTIMER_NORESTART; 629 return HRTIMER_NORESTART;
629} 630}
@@ -663,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
663#endif 664#endif
664 665
665/* 666/*
666 It is mission critical procedure. 667 * It is mission critical procedure.
667 668 *
668 We "regenerate" toplevel cutoff, if transmitting class 669 * We "regenerate" toplevel cutoff, if transmitting class
669 has backlog and it is not regulated. It is not part of 670 * has backlog and it is not regulated. It is not part of
670 original CBQ description, but looks more reasonable. 671 * original CBQ description, but looks more reasonable.
671 Probably, it is wrong. This question needs further investigation. 672 * Probably, it is wrong. This question needs further investigation.
672*/ 673 */
673 674
674static __inline__ void 675static inline void
675cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
676 struct cbq_class *borrowed) 677 struct cbq_class *borrowed)
677{ 678{
@@ -682,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
682 q->toplevel = borrowed->level; 683 q->toplevel = borrowed->level;
683 return; 684 return;
684 } 685 }
685 } while ((borrowed=borrowed->borrow) != NULL); 686 } while ((borrowed = borrowed->borrow) != NULL);
686 } 687 }
687#if 0 688#if 0
688 /* It is not necessary now. Uncommenting it 689 /* It is not necessary now. Uncommenting it
@@ -710,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
710 cl->bstats.bytes += len; 711 cl->bstats.bytes += len;
711 712
712 /* 713 /*
713 (now - last) is total time between packet right edges. 714 * (now - last) is total time between packet right edges.
714 (last_pktlen/rate) is "virtual" busy time, so that 715 * (last_pktlen/rate) is "virtual" busy time, so that
715 716 *
716 idle = (now - last) - last_pktlen/rate 717 * idle = (now - last) - last_pktlen/rate
717 */ 718 */
718 719
719 idle = q->now - cl->last; 720 idle = q->now - cl->last;
@@ -723,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
723 idle -= L2T(cl, len); 724 idle -= L2T(cl, len);
724 725
725 /* true_avgidle := (1-W)*true_avgidle + W*idle, 726 /* true_avgidle := (1-W)*true_avgidle + W*idle,
726 where W=2^{-ewma_log}. But cl->avgidle is scaled: 727 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
727 cl->avgidle == true_avgidle/W, 728 * cl->avgidle == true_avgidle/W,
728 hence: 729 * hence:
729 */ 730 */
730 avgidle += idle - (avgidle>>cl->ewma_log); 731 avgidle += idle - (avgidle>>cl->ewma_log);
731 } 732 }
@@ -739,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
739 cl->avgidle = avgidle; 740 cl->avgidle = avgidle;
740 741
741 /* Calculate expected time, when this class 742 /* Calculate expected time, when this class
742 will be allowed to send. 743 * will be allowed to send.
743 It will occur, when: 744 * It will occur, when:
744 (1-W)*true_avgidle + W*delay = 0, i.e. 745 * (1-W)*true_avgidle + W*delay = 0, i.e.
745 idle = (1/W - 1)*(-true_avgidle) 746 * idle = (1/W - 1)*(-true_avgidle)
746 or 747 * or
747 idle = (1 - W)*(-cl->avgidle); 748 * idle = (1 - W)*(-cl->avgidle);
748 */ 749 */
749 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); 750 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
750 751
751 /* 752 /*
752 That is not all. 753 * That is not all.
753 To maintain the rate allocated to the class, 754 * To maintain the rate allocated to the class,
754 we add to undertime virtual clock, 755 * we add to undertime virtual clock,
755 necessary to complete transmitted packet. 756 * necessary to complete transmitted packet.
756 (len/phys_bandwidth has been already passed 757 * (len/phys_bandwidth has been already passed
757 to the moment of cbq_update) 758 * to the moment of cbq_update)
758 */ 759 */
759 760
760 idle -= L2T(&q->link, len); 761 idle -= L2T(&q->link, len);
@@ -776,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
776 cbq_update_toplevel(q, this, q->tx_borrowed); 777 cbq_update_toplevel(q, this, q->tx_borrowed);
777} 778}
778 779
779static __inline__ struct cbq_class * 780static inline struct cbq_class *
780cbq_under_limit(struct cbq_class *cl) 781cbq_under_limit(struct cbq_class *cl)
781{ 782{
782 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 783 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -792,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
792 793
793 do { 794 do {
794 /* It is very suspicious place. Now overlimit 795 /* It is very suspicious place. Now overlimit
795 action is generated for not bounded classes 796 * action is generated for not bounded classes
796 only if link is completely congested. 797 * only if link is completely congested.
797 Though it is in agree with ancestor-only paradigm, 798 * Though it is in agree with ancestor-only paradigm,
798 it looks very stupid. Particularly, 799 * it looks very stupid. Particularly,
799 it means that this chunk of code will either 800 * it means that this chunk of code will either
800 never be called or result in strong amplification 801 * never be called or result in strong amplification
801 of burstiness. Dangerous, silly, and, however, 802 * of burstiness. Dangerous, silly, and, however,
802 no another solution exists. 803 * no another solution exists.
803 */ 804 */
804 if ((cl = cl->borrow) == NULL) { 805 cl = cl->borrow;
806 if (!cl) {
805 this_cl->qstats.overlimits++; 807 this_cl->qstats.overlimits++;
806 this_cl->overlimit(this_cl); 808 this_cl->overlimit(this_cl);
807 return NULL; 809 return NULL;
@@ -814,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
814 return cl; 816 return cl;
815} 817}
816 818
817static __inline__ struct sk_buff * 819static inline struct sk_buff *
818cbq_dequeue_prio(struct Qdisc *sch, int prio) 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
819{ 821{
820 struct cbq_sched_data *q = qdisc_priv(sch); 822 struct cbq_sched_data *q = qdisc_priv(sch);
@@ -838,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
838 840
839 if (cl->deficit <= 0) { 841 if (cl->deficit <= 0) {
840 /* Class exhausted its allotment per 842 /* Class exhausted its allotment per
841 this round. Switch to the next one. 843 * this round. Switch to the next one.
842 */ 844 */
843 deficit = 1; 845 deficit = 1;
844 cl->deficit += cl->quantum; 846 cl->deficit += cl->quantum;
@@ -848,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
848 skb = cl->q->dequeue(cl->q); 850 skb = cl->q->dequeue(cl->q);
849 851
850 /* Class did not give us any skb :-( 852 /* Class did not give us any skb :-(
851 It could occur even if cl->q->q.qlen != 0 853 * It could occur even if cl->q->q.qlen != 0
852 f.e. if cl->q == "tbf" 854 * f.e. if cl->q == "tbf"
853 */ 855 */
854 if (skb == NULL) 856 if (skb == NULL)
855 goto skip_class; 857 goto skip_class;
@@ -878,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
878skip_class: 880skip_class:
879 if (cl->q->q.qlen == 0 || prio != cl->cpriority) { 881 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
880 /* Class is empty or penalized. 882 /* Class is empty or penalized.
881 Unlink it from active chain. 883 * Unlink it from active chain.
882 */ 884 */
883 cl_prev->next_alive = cl->next_alive; 885 cl_prev->next_alive = cl->next_alive;
884 cl->next_alive = NULL; 886 cl->next_alive = NULL;
@@ -917,14 +919,14 @@ next_class:
917 return NULL; 919 return NULL;
918} 920}
919 921
920static __inline__ struct sk_buff * 922static inline struct sk_buff *
921cbq_dequeue_1(struct Qdisc *sch) 923cbq_dequeue_1(struct Qdisc *sch)
922{ 924{
923 struct cbq_sched_data *q = qdisc_priv(sch); 925 struct cbq_sched_data *q = qdisc_priv(sch);
924 struct sk_buff *skb; 926 struct sk_buff *skb;
925 unsigned activemask; 927 unsigned int activemask;
926 928
927 activemask = q->activemask&0xFF; 929 activemask = q->activemask & 0xFF;
928 while (activemask) { 930 while (activemask) {
929 int prio = ffz(~activemask); 931 int prio = ffz(~activemask);
930 activemask &= ~(1<<prio); 932 activemask &= ~(1<<prio);
@@ -949,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
949 if (q->tx_class) { 951 if (q->tx_class) {
950 psched_tdiff_t incr2; 952 psched_tdiff_t incr2;
951 /* Time integrator. We calculate EOS time 953 /* Time integrator. We calculate EOS time
952 by adding expected packet transmission time. 954 * by adding expected packet transmission time.
953 If real time is greater, we warp artificial clock, 955 * If real time is greater, we warp artificial clock,
954 so that: 956 * so that:
955 957 *
956 cbq_time = max(real_time, work); 958 * cbq_time = max(real_time, work);
957 */ 959 */
958 incr2 = L2T(&q->link, q->tx_len); 960 incr2 = L2T(&q->link, q->tx_len);
959 q->now += incr2; 961 q->now += incr2;
@@ -971,27 +973,27 @@ cbq_dequeue(struct Qdisc *sch)
971 if (skb) { 973 if (skb) {
972 qdisc_bstats_update(sch, skb); 974 qdisc_bstats_update(sch, skb);
973 sch->q.qlen--; 975 sch->q.qlen--;
974 sch->flags &= ~TCQ_F_THROTTLED; 976 qdisc_unthrottled(sch);
975 return skb; 977 return skb;
976 } 978 }
977 979
978 /* All the classes are overlimit. 980 /* All the classes are overlimit.
979 981 *
980 It is possible, if: 982 * It is possible, if:
981 983 *
982 1. Scheduler is empty. 984 * 1. Scheduler is empty.
983 2. Toplevel cutoff inhibited borrowing. 985 * 2. Toplevel cutoff inhibited borrowing.
984 3. Root class is overlimit. 986 * 3. Root class is overlimit.
985 987 *
986 Reset 2d and 3d conditions and retry. 988 * Reset 2d and 3d conditions and retry.
987 989 *
988 Note, that NS and cbq-2.0 are buggy, peeking 990 * Note, that NS and cbq-2.0 are buggy, peeking
989 an arbitrary class is appropriate for ancestor-only 991 * an arbitrary class is appropriate for ancestor-only
990 sharing, but not for toplevel algorithm. 992 * sharing, but not for toplevel algorithm.
991 993 *
992 Our version is better, but slower, because it requires 994 * Our version is better, but slower, because it requires
993 two passes, but it is unavoidable with top-level sharing. 995 * two passes, but it is unavoidable with top-level sharing.
994 */ 996 */
995 997
996 if (q->toplevel == TC_CBQ_MAXLEVEL && 998 if (q->toplevel == TC_CBQ_MAXLEVEL &&
997 q->link.undertime == PSCHED_PASTPERFECT) 999 q->link.undertime == PSCHED_PASTPERFECT)
@@ -1002,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
1002 } 1004 }
1003 1005
1004 /* No packets in scheduler or nobody wants to give them to us :-( 1006 /* No packets in scheduler or nobody wants to give them to us :-(
1005 Sigh... start watchdog timer in the last case. */ 1007 * Sigh... start watchdog timer in the last case.
1008 */
1006 1009
1007 if (sch->q.qlen) { 1010 if (sch->q.qlen) {
1008 sch->qstats.overlimits++; 1011 sch->qstats.overlimits++;
@@ -1024,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
1024 int level = 0; 1027 int level = 0;
1025 struct cbq_class *cl; 1028 struct cbq_class *cl;
1026 1029
1027 if ((cl = this->children) != NULL) { 1030 cl = this->children;
1031 if (cl) {
1028 do { 1032 do {
1029 if (cl->level > level) 1033 if (cl->level > level)
1030 level = cl->level; 1034 level = cl->level;
1031 } while ((cl = cl->sibling) != this->children); 1035 } while ((cl = cl->sibling) != this->children);
1032 } 1036 }
1033 this->level = level+1; 1037 this->level = level + 1;
1034 } while ((this = this->tparent) != NULL); 1038 } while ((this = this->tparent) != NULL);
1035} 1039}
1036 1040
@@ -1046,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1046 for (h = 0; h < q->clhash.hashsize; h++) { 1050 for (h = 0; h < q->clhash.hashsize; h++) {
1047 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { 1051 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1048 /* BUGGGG... Beware! This expression suffer of 1052 /* BUGGGG... Beware! This expression suffer of
1049 arithmetic overflows! 1053 * arithmetic overflows!
1050 */ 1054 */
1051 if (cl->priority == prio) { 1055 if (cl->priority == prio) {
1052 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ 1056 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1053 q->quanta[prio]; 1057 q->quanta[prio];
1054 } 1058 }
1055 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { 1059 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1056 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); 1060 pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061 cl->common.classid, cl->quantum);
1057 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; 1062 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1058 } 1063 }
1059 } 1064 }
@@ -1064,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
1064{ 1069{
1065 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 1070 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1066 struct cbq_class *split = cl->split; 1071 struct cbq_class *split = cl->split;
1067 unsigned h; 1072 unsigned int h;
1068 int i; 1073 int i;
1069 1074
1070 if (split == NULL) 1075 if (split == NULL)
1071 return; 1076 return;
1072 1077
1073 for (i=0; i<=TC_PRIO_MAX; i++) { 1078 for (i = 0; i <= TC_PRIO_MAX; i++) {
1074 if (split->defaults[i] == cl && !(cl->defmap&(1<<i))) 1079 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1075 split->defaults[i] = NULL; 1080 split->defaults[i] = NULL;
1076 } 1081 }
1077 1082
1078 for (i=0; i<=TC_PRIO_MAX; i++) { 1083 for (i = 0; i <= TC_PRIO_MAX; i++) {
1079 int level = split->level; 1084 int level = split->level;
1080 1085
1081 if (split->defaults[i]) 1086 if (split->defaults[i])
@@ -1088,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
1088 hlist_for_each_entry(c, n, &q->clhash.hash[h], 1093 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1089 common.hnode) { 1094 common.hnode) {
1090 if (c->split == split && c->level < level && 1095 if (c->split == split && c->level < level &&
1091 c->defmap&(1<<i)) { 1096 c->defmap & (1<<i)) {
1092 split->defaults[i] = c; 1097 split->defaults[i] = c;
1093 level = c->level; 1098 level = c->level;
1094 } 1099 }
@@ -1102,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
1102 struct cbq_class *split = NULL; 1107 struct cbq_class *split = NULL;
1103 1108
1104 if (splitid == 0) { 1109 if (splitid == 0) {
1105 if ((split = cl->split) == NULL) 1110 split = cl->split;
1111 if (!split)
1106 return; 1112 return;
1107 splitid = split->common.classid; 1113 splitid = split->common.classid;
1108 } 1114 }
@@ -1120,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
1120 cl->defmap = 0; 1126 cl->defmap = 0;
1121 cbq_sync_defmap(cl); 1127 cbq_sync_defmap(cl);
1122 cl->split = split; 1128 cl->split = split;
1123 cl->defmap = def&mask; 1129 cl->defmap = def & mask;
1124 } else 1130 } else
1125 cl->defmap = (cl->defmap&~mask)|(def&mask); 1131 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1126 1132
1127 cbq_sync_defmap(cl); 1133 cbq_sync_defmap(cl);
1128} 1134}
@@ -1135,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
1135 qdisc_class_hash_remove(&q->clhash, &this->common); 1141 qdisc_class_hash_remove(&q->clhash, &this->common);
1136 1142
1137 if (this->tparent) { 1143 if (this->tparent) {
1138 clp=&this->sibling; 1144 clp = &this->sibling;
1139 cl = *clp; 1145 cl = *clp;
1140 do { 1146 do {
1141 if (cl == this) { 1147 if (cl == this) {
@@ -1174,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
1174 } 1180 }
1175} 1181}
1176 1182
1177static unsigned int cbq_drop(struct Qdisc* sch) 1183static unsigned int cbq_drop(struct Qdisc *sch)
1178{ 1184{
1179 struct cbq_sched_data *q = qdisc_priv(sch); 1185 struct cbq_sched_data *q = qdisc_priv(sch);
1180 struct cbq_class *cl, *cl_head; 1186 struct cbq_class *cl, *cl_head;
@@ -1182,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
1182 unsigned int len; 1188 unsigned int len;
1183 1189
1184 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { 1190 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1185 if ((cl_head = q->active[prio]) == NULL) 1191 cl_head = q->active[prio];
1192 if (!cl_head)
1186 continue; 1193 continue;
1187 1194
1188 cl = cl_head; 1195 cl = cl_head;
@@ -1199,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
1199} 1206}
1200 1207
1201static void 1208static void
1202cbq_reset(struct Qdisc* sch) 1209cbq_reset(struct Qdisc *sch)
1203{ 1210{
1204 struct cbq_sched_data *q = qdisc_priv(sch); 1211 struct cbq_sched_data *q = qdisc_priv(sch);
1205 struct cbq_class *cl; 1212 struct cbq_class *cl;
1206 struct hlist_node *n; 1213 struct hlist_node *n;
1207 int prio; 1214 int prio;
1208 unsigned h; 1215 unsigned int h;
1209 1216
1210 q->activemask = 0; 1217 q->activemask = 0;
1211 q->pmask = 0; 1218 q->pmask = 0;
@@ -1237,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
1237 1244
1238static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) 1245static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1239{ 1246{
1240 if (lss->change&TCF_CBQ_LSS_FLAGS) { 1247 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1241 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; 1248 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1242 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; 1249 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1243 } 1250 }
1244 if (lss->change&TCF_CBQ_LSS_EWMA) 1251 if (lss->change & TCF_CBQ_LSS_EWMA)
1245 cl->ewma_log = lss->ewma_log; 1252 cl->ewma_log = lss->ewma_log;
1246 if (lss->change&TCF_CBQ_LSS_AVPKT) 1253 if (lss->change & TCF_CBQ_LSS_AVPKT)
1247 cl->avpkt = lss->avpkt; 1254 cl->avpkt = lss->avpkt;
1248 if (lss->change&TCF_CBQ_LSS_MINIDLE) 1255 if (lss->change & TCF_CBQ_LSS_MINIDLE)
1249 cl->minidle = -(long)lss->minidle; 1256 cl->minidle = -(long)lss->minidle;
1250 if (lss->change&TCF_CBQ_LSS_MAXIDLE) { 1257 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1251 cl->maxidle = lss->maxidle; 1258 cl->maxidle = lss->maxidle;
1252 cl->avgidle = lss->maxidle; 1259 cl->avgidle = lss->maxidle;
1253 } 1260 }
1254 if (lss->change&TCF_CBQ_LSS_OFFTIME) 1261 if (lss->change & TCF_CBQ_LSS_OFFTIME)
1255 cl->offtime = lss->offtime; 1262 cl->offtime = lss->offtime;
1256 return 0; 1263 return 0;
1257} 1264}
@@ -1279,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1279 if (wrr->weight) 1286 if (wrr->weight)
1280 cl->weight = wrr->weight; 1287 cl->weight = wrr->weight;
1281 if (wrr->priority) { 1288 if (wrr->priority) {
1282 cl->priority = wrr->priority-1; 1289 cl->priority = wrr->priority - 1;
1283 cl->cpriority = cl->priority; 1290 cl->cpriority = cl->priority;
1284 if (cl->priority >= cl->priority2) 1291 if (cl->priority >= cl->priority2)
1285 cl->priority2 = TC_CBQ_MAXPRIO-1; 1292 cl->priority2 = TC_CBQ_MAXPRIO - 1;
1286 } 1293 }
1287 1294
1288 cbq_addprio(q, cl); 1295 cbq_addprio(q, cl);
@@ -1299,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1299 cl->overlimit = cbq_ovl_delay; 1306 cl->overlimit = cbq_ovl_delay;
1300 break; 1307 break;
1301 case TC_CBQ_OVL_LOWPRIO: 1308 case TC_CBQ_OVL_LOWPRIO:
1302 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || 1309 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1303 ovl->priority2-1 <= cl->priority) 1310 ovl->priority2 - 1 <= cl->priority)
1304 return -EINVAL; 1311 return -EINVAL;
1305 cl->priority2 = ovl->priority2-1; 1312 cl->priority2 = ovl->priority2 - 1;
1306 cl->overlimit = cbq_ovl_lowprio; 1313 cl->overlimit = cbq_ovl_lowprio;
1307 break; 1314 break;
1308 case TC_CBQ_OVL_DROP: 1315 case TC_CBQ_OVL_DROP:
@@ -1381,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1381 if (!q->link.q) 1388 if (!q->link.q)
1382 q->link.q = &noop_qdisc; 1389 q->link.q = &noop_qdisc;
1383 1390
1384 q->link.priority = TC_CBQ_MAXPRIO-1; 1391 q->link.priority = TC_CBQ_MAXPRIO - 1;
1385 q->link.priority2 = TC_CBQ_MAXPRIO-1; 1392 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1386 q->link.cpriority = TC_CBQ_MAXPRIO-1; 1393 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1387 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; 1394 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1388 q->link.overlimit = cbq_ovl_classic; 1395 q->link.overlimit = cbq_ovl_classic;
1389 q->link.allot = psched_mtu(qdisc_dev(sch)); 1396 q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1414,7 +1421,7 @@ put_rtab:
1414 return err; 1421 return err;
1415} 1422}
1416 1423
1417static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) 1424static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1418{ 1425{
1419 unsigned char *b = skb_tail_pointer(skb); 1426 unsigned char *b = skb_tail_pointer(skb);
1420 1427
@@ -1426,7 +1433,7 @@ nla_put_failure:
1426 return -1; 1433 return -1;
1427} 1434}
1428 1435
1429static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) 1436static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1430{ 1437{
1431 unsigned char *b = skb_tail_pointer(skb); 1438 unsigned char *b = skb_tail_pointer(skb);
1432 struct tc_cbq_lssopt opt; 1439 struct tc_cbq_lssopt opt;
@@ -1451,15 +1458,15 @@ nla_put_failure:
1451 return -1; 1458 return -1;
1452} 1459}
1453 1460
1454static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) 1461static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1455{ 1462{
1456 unsigned char *b = skb_tail_pointer(skb); 1463 unsigned char *b = skb_tail_pointer(skb);
1457 struct tc_cbq_wrropt opt; 1464 struct tc_cbq_wrropt opt;
1458 1465
1459 opt.flags = 0; 1466 opt.flags = 0;
1460 opt.allot = cl->allot; 1467 opt.allot = cl->allot;
1461 opt.priority = cl->priority+1; 1468 opt.priority = cl->priority + 1;
1462 opt.cpriority = cl->cpriority+1; 1469 opt.cpriority = cl->cpriority + 1;
1463 opt.weight = cl->weight; 1470 opt.weight = cl->weight;
1464 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); 1471 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1465 return skb->len; 1472 return skb->len;
@@ -1469,13 +1476,13 @@ nla_put_failure:
1469 return -1; 1476 return -1;
1470} 1477}
1471 1478
1472static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) 1479static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1473{ 1480{
1474 unsigned char *b = skb_tail_pointer(skb); 1481 unsigned char *b = skb_tail_pointer(skb);
1475 struct tc_cbq_ovl opt; 1482 struct tc_cbq_ovl opt;
1476 1483
1477 opt.strategy = cl->ovl_strategy; 1484 opt.strategy = cl->ovl_strategy;
1478 opt.priority2 = cl->priority2+1; 1485 opt.priority2 = cl->priority2 + 1;
1479 opt.pad = 0; 1486 opt.pad = 0;
1480 opt.penalty = cl->penalty; 1487 opt.penalty = cl->penalty;
1481 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); 1488 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1486,7 +1493,7 @@ nla_put_failure:
1486 return -1; 1493 return -1;
1487} 1494}
1488 1495
1489static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) 1496static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1490{ 1497{
1491 unsigned char *b = skb_tail_pointer(skb); 1498 unsigned char *b = skb_tail_pointer(skb);
1492 struct tc_cbq_fopt opt; 1499 struct tc_cbq_fopt opt;
@@ -1505,7 +1512,7 @@ nla_put_failure:
1505} 1512}
1506 1513
1507#ifdef CONFIG_NET_CLS_ACT 1514#ifdef CONFIG_NET_CLS_ACT
1508static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) 1515static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1509{ 1516{
1510 unsigned char *b = skb_tail_pointer(skb); 1517 unsigned char *b = skb_tail_pointer(skb);
1511 struct tc_cbq_police opt; 1518 struct tc_cbq_police opt;
@@ -1569,7 +1576,7 @@ static int
1569cbq_dump_class(struct Qdisc *sch, unsigned long arg, 1576cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1570 struct sk_buff *skb, struct tcmsg *tcm) 1577 struct sk_buff *skb, struct tcmsg *tcm)
1571{ 1578{
1572 struct cbq_class *cl = (struct cbq_class*)arg; 1579 struct cbq_class *cl = (struct cbq_class *)arg;
1573 struct nlattr *nest; 1580 struct nlattr *nest;
1574 1581
1575 if (cl->tparent) 1582 if (cl->tparent)
@@ -1597,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1597 struct gnet_dump *d) 1604 struct gnet_dump *d)
1598{ 1605{
1599 struct cbq_sched_data *q = qdisc_priv(sch); 1606 struct cbq_sched_data *q = qdisc_priv(sch);
1600 struct cbq_class *cl = (struct cbq_class*)arg; 1607 struct cbq_class *cl = (struct cbq_class *)arg;
1601 1608
1602 cl->qstats.qlen = cl->q->q.qlen; 1609 cl->qstats.qlen = cl->q->q.qlen;
1603 cl->xstats.avgidle = cl->avgidle; 1610 cl->xstats.avgidle = cl->avgidle;
@@ -1617,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1617static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1624static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1618 struct Qdisc **old) 1625 struct Qdisc **old)
1619{ 1626{
1620 struct cbq_class *cl = (struct cbq_class*)arg; 1627 struct cbq_class *cl = (struct cbq_class *)arg;
1621 1628
1622 if (new == NULL) { 1629 if (new == NULL) {
1623 new = qdisc_create_dflt(sch->dev_queue, 1630 new = qdisc_create_dflt(sch->dev_queue,
@@ -1640,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1640 return 0; 1647 return 0;
1641} 1648}
1642 1649
1643static struct Qdisc * 1650static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1644cbq_leaf(struct Qdisc *sch, unsigned long arg)
1645{ 1651{
1646 struct cbq_class *cl = (struct cbq_class*)arg; 1652 struct cbq_class *cl = (struct cbq_class *)arg;
1647 1653
1648 return cl->q; 1654 return cl->q;
1649} 1655}
@@ -1682,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1682 kfree(cl); 1688 kfree(cl);
1683} 1689}
1684 1690
1685static void 1691static void cbq_destroy(struct Qdisc *sch)
1686cbq_destroy(struct Qdisc* sch)
1687{ 1692{
1688 struct cbq_sched_data *q = qdisc_priv(sch); 1693 struct cbq_sched_data *q = qdisc_priv(sch);
1689 struct hlist_node *n, *next; 1694 struct hlist_node *n, *next;
1690 struct cbq_class *cl; 1695 struct cbq_class *cl;
1691 unsigned h; 1696 unsigned int h;
1692 1697
1693#ifdef CONFIG_NET_CLS_ACT 1698#ifdef CONFIG_NET_CLS_ACT
1694 q->rx_class = NULL; 1699 q->rx_class = NULL;
@@ -1712,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
1712 1717
1713static void cbq_put(struct Qdisc *sch, unsigned long arg) 1718static void cbq_put(struct Qdisc *sch, unsigned long arg)
1714{ 1719{
1715 struct cbq_class *cl = (struct cbq_class*)arg; 1720 struct cbq_class *cl = (struct cbq_class *)arg;
1716 1721
1717 if (--cl->refcnt == 0) { 1722 if (--cl->refcnt == 0) {
1718#ifdef CONFIG_NET_CLS_ACT 1723#ifdef CONFIG_NET_CLS_ACT
@@ -1735,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1735{ 1740{
1736 int err; 1741 int err;
1737 struct cbq_sched_data *q = qdisc_priv(sch); 1742 struct cbq_sched_data *q = qdisc_priv(sch);
1738 struct cbq_class *cl = (struct cbq_class*)*arg; 1743 struct cbq_class *cl = (struct cbq_class *)*arg;
1739 struct nlattr *opt = tca[TCA_OPTIONS]; 1744 struct nlattr *opt = tca[TCA_OPTIONS];
1740 struct nlattr *tb[TCA_CBQ_MAX + 1]; 1745 struct nlattr *tb[TCA_CBQ_MAX + 1];
1741 struct cbq_class *parent; 1746 struct cbq_class *parent;
@@ -1827,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1827 1832
1828 if (classid) { 1833 if (classid) {
1829 err = -EINVAL; 1834 err = -EINVAL;
1830 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) 1835 if (TC_H_MAJ(classid ^ sch->handle) ||
1836 cbq_class_lookup(q, classid))
1831 goto failure; 1837 goto failure;
1832 } else { 1838 } else {
1833 int i; 1839 int i;
1834 classid = TC_H_MAKE(sch->handle,0x8000); 1840 classid = TC_H_MAKE(sch->handle, 0x8000);
1835 1841
1836 for (i=0; i<0x8000; i++) { 1842 for (i = 0; i < 0x8000; i++) {
1837 if (++q->hgenerator >= 0x8000) 1843 if (++q->hgenerator >= 0x8000)
1838 q->hgenerator = 1; 1844 q->hgenerator = 1;
1839 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) 1845 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1890,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1890 cl->minidle = -0x7FFFFFFF; 1896 cl->minidle = -0x7FFFFFFF;
1891 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); 1897 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1892 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); 1898 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1893 if (cl->ewma_log==0) 1899 if (cl->ewma_log == 0)
1894 cl->ewma_log = q->link.ewma_log; 1900 cl->ewma_log = q->link.ewma_log;
1895 if (cl->maxidle==0) 1901 if (cl->maxidle == 0)
1896 cl->maxidle = q->link.maxidle; 1902 cl->maxidle = q->link.maxidle;
1897 if (cl->avpkt==0) 1903 if (cl->avpkt == 0)
1898 cl->avpkt = q->link.avpkt; 1904 cl->avpkt = q->link.avpkt;
1899 cl->overlimit = cbq_ovl_classic; 1905 cl->overlimit = cbq_ovl_classic;
1900 if (tb[TCA_CBQ_OVL_STRATEGY]) 1906 if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1920,7 +1926,7 @@ failure:
1920static int cbq_delete(struct Qdisc *sch, unsigned long arg) 1926static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1921{ 1927{
1922 struct cbq_sched_data *q = qdisc_priv(sch); 1928 struct cbq_sched_data *q = qdisc_priv(sch);
1923 struct cbq_class *cl = (struct cbq_class*)arg; 1929 struct cbq_class *cl = (struct cbq_class *)arg;
1924 unsigned int qlen; 1930 unsigned int qlen;
1925 1931
1926 if (cl->filters || cl->children || cl == &q->link) 1932 if (cl->filters || cl->children || cl == &q->link)
@@ -1978,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1978 u32 classid) 1984 u32 classid)
1979{ 1985{
1980 struct cbq_sched_data *q = qdisc_priv(sch); 1986 struct cbq_sched_data *q = qdisc_priv(sch);
1981 struct cbq_class *p = (struct cbq_class*)parent; 1987 struct cbq_class *p = (struct cbq_class *)parent;
1982 struct cbq_class *cl = cbq_class_lookup(q, classid); 1988 struct cbq_class *cl = cbq_class_lookup(q, classid);
1983 1989
1984 if (cl) { 1990 if (cl) {
@@ -1992,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1992 1998
1993static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) 1999static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1994{ 2000{
1995 struct cbq_class *cl = (struct cbq_class*)arg; 2001 struct cbq_class *cl = (struct cbq_class *)arg;
1996 2002
1997 cl->filters--; 2003 cl->filters--;
1998} 2004}
@@ -2002,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2002 struct cbq_sched_data *q = qdisc_priv(sch); 2008 struct cbq_sched_data *q = qdisc_priv(sch);
2003 struct cbq_class *cl; 2009 struct cbq_class *cl;
2004 struct hlist_node *n; 2010 struct hlist_node *n;
2005 unsigned h; 2011 unsigned int h;
2006 2012
2007 if (arg->stop) 2013 if (arg->stop)
2008 return; 2014 return;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
new file mode 100644
index 000000000000..06afbaeb4c88
--- /dev/null
+++ b/net/sched/sch_choke.c
@@ -0,0 +1,688 @@
1/*
2 * net/sched/sch_choke.c CHOKE scheduler
3 *
4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/reciprocal_div.h>
18#include <linux/vmalloc.h>
19#include <net/pkt_sched.h>
20#include <net/inet_ecn.h>
21#include <net/red.h>
22#include <linux/ip.h>
23#include <net/ip.h>
24#include <linux/ipv6.h>
25#include <net/ipv6.h>
26
27/*
28 CHOKe stateless AQM for fair bandwidth allocation
29 =================================================
30
31 CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
32 unresponsive flows) is a variant of RED that penalizes misbehaving flows but
33 maintains no flow state. The difference from RED is an additional step
34 during the enqueuing process. If average queue size is over the
35 low threshold (qmin), a packet is chosen at random from the queue.
36 If both the new and chosen packet are from the same flow, both
37 are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
38 needs to access packets in queue randomly. It has a minimal class
39 interface to allow overriding the builtin flow classifier with
40 filters.
41
42 Source:
43 R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
44 Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
45 IEEE INFOCOM, 2000.
46
47 A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
48 Characteristics", IEEE/ACM Transactions on Networking, 2004
49
50 */
51
52/* Upper bound on size of sk_buff table (packets) */
53#define CHOKE_MAX_QUEUE (128*1024 - 1)
54
55struct choke_sched_data {
56/* Parameters */
57 u32 limit;
58 unsigned char flags;
59
60 struct red_parms parms;
61
62/* Variables */
63 struct tcf_proto *filter_list;
64 struct {
65 u32 prob_drop; /* Early probability drops */
66 u32 prob_mark; /* Early probability marks */
67 u32 forced_drop; /* Forced drops, qavg > max_thresh */
68 u32 forced_mark; /* Forced marks, qavg > max_thresh */
69 u32 pdrop; /* Drops due to queue limits */
70 u32 other; /* Drops due to drop() calls */
71 u32 matched; /* Drops to flow match */
72 } stats;
73
74 unsigned int head;
75 unsigned int tail;
76
77 unsigned int tab_mask; /* size - 1 */
78
79 struct sk_buff **tab;
80};
81
82/* deliver a random number between 0 and N - 1 */
83static u32 random_N(unsigned int N)
84{
85 return reciprocal_divide(random32(), N);
86}
87
88/* number of elements in queue including holes */
89static unsigned int choke_len(const struct choke_sched_data *q)
90{
91 return (q->tail - q->head) & q->tab_mask;
92}
93
94/* Is ECN parameter configured */
95static int use_ecn(const struct choke_sched_data *q)
96{
97 return q->flags & TC_RED_ECN;
98}
99
100/* Should packets over max just be dropped (versus marked) */
101static int use_harddrop(const struct choke_sched_data *q)
102{
103 return q->flags & TC_RED_HARDDROP;
104}
105
106/* Move head pointer forward to skip over holes */
107static void choke_zap_head_holes(struct choke_sched_data *q)
108{
109 do {
110 q->head = (q->head + 1) & q->tab_mask;
111 if (q->head == q->tail)
112 break;
113 } while (q->tab[q->head] == NULL);
114}
115
116/* Move tail pointer backwards to reuse holes */
117static void choke_zap_tail_holes(struct choke_sched_data *q)
118{
119 do {
120 q->tail = (q->tail - 1) & q->tab_mask;
121 if (q->head == q->tail)
122 break;
123 } while (q->tab[q->tail] == NULL);
124}
125
126/* Drop packet from queue array by creating a "hole" */
127static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
128{
129 struct choke_sched_data *q = qdisc_priv(sch);
130 struct sk_buff *skb = q->tab[idx];
131
132 q->tab[idx] = NULL;
133
134 if (idx == q->head)
135 choke_zap_head_holes(q);
136 if (idx == q->tail)
137 choke_zap_tail_holes(q);
138
139 sch->qstats.backlog -= qdisc_pkt_len(skb);
140 qdisc_drop(skb, sch);
141 qdisc_tree_decrease_qlen(sch, 1);
142 --sch->q.qlen;
143}
144
145/*
146 * Compare flow of two packets
147 * Returns true only if source and destination address and port match.
148 * false for special cases
149 */
150static bool choke_match_flow(struct sk_buff *skb1,
151 struct sk_buff *skb2)
152{
153 int off1, off2, poff;
154 const u32 *ports1, *ports2;
155 u8 ip_proto;
156 __u32 hash1;
157
158 if (skb1->protocol != skb2->protocol)
159 return false;
160
161 /* Use hash value as quick check
162 * Assumes that __skb_get_rxhash makes IP header and ports linear
163 */
164 hash1 = skb_get_rxhash(skb1);
165 if (!hash1 || hash1 != skb_get_rxhash(skb2))
166 return false;
167
168 /* Probably match, but be sure to avoid hash collisions */
169 off1 = skb_network_offset(skb1);
170 off2 = skb_network_offset(skb2);
171
172 switch (skb1->protocol) {
173 case __constant_htons(ETH_P_IP): {
174 const struct iphdr *ip1, *ip2;
175
176 ip1 = (const struct iphdr *) (skb1->data + off1);
177 ip2 = (const struct iphdr *) (skb2->data + off2);
178
179 ip_proto = ip1->protocol;
180 if (ip_proto != ip2->protocol ||
181 ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
182 return false;
183
184 if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
185 ip_proto = 0;
186 off1 += ip1->ihl * 4;
187 off2 += ip2->ihl * 4;
188 break;
189 }
190
191 case __constant_htons(ETH_P_IPV6): {
192 const struct ipv6hdr *ip1, *ip2;
193
194 ip1 = (const struct ipv6hdr *) (skb1->data + off1);
195 ip2 = (const struct ipv6hdr *) (skb2->data + off2);
196
197 ip_proto = ip1->nexthdr;
198 if (ip_proto != ip2->nexthdr ||
199 ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
200 ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
201 return false;
202 off1 += 40;
203 off2 += 40;
204 }
205
206 default: /* Maybe compare MAC header here? */
207 return false;
208 }
209
210 poff = proto_ports_offset(ip_proto);
211 if (poff < 0)
212 return true;
213
214 off1 += poff;
215 off2 += poff;
216
217 ports1 = (__force u32 *)(skb1->data + off1);
218 ports2 = (__force u32 *)(skb2->data + off2);
219 return *ports1 == *ports2;
220}
221
222struct choke_skb_cb {
223 u16 classid;
224};
225
226static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
227{
228 BUILD_BUG_ON(sizeof(skb->cb) <
229 sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb));
230 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
231}
232
233static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
234{
235 choke_skb_cb(skb)->classid = classid;
236}
237
238static u16 choke_get_classid(const struct sk_buff *skb)
239{
240 return choke_skb_cb(skb)->classid;
241}
242
243/*
244 * Classify flow using either:
245 * 1. pre-existing classification result in skb
246 * 2. fast internal classification
247 * 3. use TC filter based classification
248 */
249static bool choke_classify(struct sk_buff *skb,
250 struct Qdisc *sch, int *qerr)
251
252{
253 struct choke_sched_data *q = qdisc_priv(sch);
254 struct tcf_result res;
255 int result;
256
257 result = tc_classify(skb, q->filter_list, &res);
258 if (result >= 0) {
259#ifdef CONFIG_NET_CLS_ACT
260 switch (result) {
261 case TC_ACT_STOLEN:
262 case TC_ACT_QUEUED:
263 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
264 case TC_ACT_SHOT:
265 return false;
266 }
267#endif
268 choke_set_classid(skb, TC_H_MIN(res.classid));
269 return true;
270 }
271
272 return false;
273}
274
275/*
276 * Select a packet at random from queue
277 * HACK: since queue can have holes from previous deletion; retry several
278 * times to find a random skb but then just give up and return the head
279 * Will return NULL if queue is empty (q->head == q->tail)
280 */
281static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
282 unsigned int *pidx)
283{
284 struct sk_buff *skb;
285 int retrys = 3;
286
287 do {
288 *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
289 skb = q->tab[*pidx];
290 if (skb)
291 return skb;
292 } while (--retrys > 0);
293
294 return q->tab[*pidx = q->head];
295}
296
297/*
298 * Compare new packet with random packet in queue
299 * returns true if matched and sets *pidx
300 */
301static bool choke_match_random(const struct choke_sched_data *q,
302 struct sk_buff *nskb,
303 unsigned int *pidx)
304{
305 struct sk_buff *oskb;
306
307 if (q->head == q->tail)
308 return false;
309
310 oskb = choke_peek_random(q, pidx);
311 if (q->filter_list)
312 return choke_get_classid(nskb) == choke_get_classid(oskb);
313
314 return choke_match_flow(oskb, nskb);
315}
316
317static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
318{
319 struct choke_sched_data *q = qdisc_priv(sch);
320 struct red_parms *p = &q->parms;
321 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
322
323 if (q->filter_list) {
324 /* If using external classifiers, get result and record it. */
325 if (!choke_classify(skb, sch, &ret))
326 goto other_drop; /* Packet was eaten by filter */
327 }
328
329 /* Compute average queue usage (see RED) */
330 p->qavg = red_calc_qavg(p, sch->q.qlen);
331 if (red_is_idling(p))
332 red_end_of_idle_period(p);
333
334 /* Is queue small? */
335 if (p->qavg <= p->qth_min)
336 p->qcount = -1;
337 else {
338 unsigned int idx;
339
340 /* Draw a packet at random from queue and compare flow */
341 if (choke_match_random(q, skb, &idx)) {
342 q->stats.matched++;
343 choke_drop_by_idx(sch, idx);
344 goto congestion_drop;
345 }
346
347 /* Queue is large, always mark/drop */
348 if (p->qavg > p->qth_max) {
349 p->qcount = -1;
350
351 sch->qstats.overlimits++;
352 if (use_harddrop(q) || !use_ecn(q) ||
353 !INET_ECN_set_ce(skb)) {
354 q->stats.forced_drop++;
355 goto congestion_drop;
356 }
357
358 q->stats.forced_mark++;
359 } else if (++p->qcount) {
360 if (red_mark_probability(p, p->qavg)) {
361 p->qcount = 0;
362 p->qR = red_random(p);
363
364 sch->qstats.overlimits++;
365 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
366 q->stats.prob_drop++;
367 goto congestion_drop;
368 }
369
370 q->stats.prob_mark++;
371 }
372 } else
373 p->qR = red_random(p);
374 }
375
376 /* Admit new packet */
377 if (sch->q.qlen < q->limit) {
378 q->tab[q->tail] = skb;
379 q->tail = (q->tail + 1) & q->tab_mask;
380 ++sch->q.qlen;
381 sch->qstats.backlog += qdisc_pkt_len(skb);
382 return NET_XMIT_SUCCESS;
383 }
384
385 q->stats.pdrop++;
386 sch->qstats.drops++;
387 kfree_skb(skb);
388 return NET_XMIT_DROP;
389
390 congestion_drop:
391 qdisc_drop(skb, sch);
392 return NET_XMIT_CN;
393
394 other_drop:
395 if (ret & __NET_XMIT_BYPASS)
396 sch->qstats.drops++;
397 kfree_skb(skb);
398 return ret;
399}
400
401static struct sk_buff *choke_dequeue(struct Qdisc *sch)
402{
403 struct choke_sched_data *q = qdisc_priv(sch);
404 struct sk_buff *skb;
405
406 if (q->head == q->tail) {
407 if (!red_is_idling(&q->parms))
408 red_start_of_idle_period(&q->parms);
409 return NULL;
410 }
411
412 skb = q->tab[q->head];
413 q->tab[q->head] = NULL;
414 choke_zap_head_holes(q);
415 --sch->q.qlen;
416 sch->qstats.backlog -= qdisc_pkt_len(skb);
417 qdisc_bstats_update(sch, skb);
418
419 return skb;
420}
421
422static unsigned int choke_drop(struct Qdisc *sch)
423{
424 struct choke_sched_data *q = qdisc_priv(sch);
425 unsigned int len;
426
427 len = qdisc_queue_drop(sch);
428 if (len > 0)
429 q->stats.other++;
430 else {
431 if (!red_is_idling(&q->parms))
432 red_start_of_idle_period(&q->parms);
433 }
434
435 return len;
436}
437
438static void choke_reset(struct Qdisc *sch)
439{
440 struct choke_sched_data *q = qdisc_priv(sch);
441
442 red_restart(&q->parms);
443}
444
445static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
446 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
447 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
448};
449
450
451static void choke_free(void *addr)
452{
453 if (addr) {
454 if (is_vmalloc_addr(addr))
455 vfree(addr);
456 else
457 kfree(addr);
458 }
459}
460
461static int choke_change(struct Qdisc *sch, struct nlattr *opt)
462{
463 struct choke_sched_data *q = qdisc_priv(sch);
464 struct nlattr *tb[TCA_CHOKE_MAX + 1];
465 const struct tc_red_qopt *ctl;
466 int err;
467 struct sk_buff **old = NULL;
468 unsigned int mask;
469
470 if (opt == NULL)
471 return -EINVAL;
472
473 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
474 if (err < 0)
475 return err;
476
477 if (tb[TCA_CHOKE_PARMS] == NULL ||
478 tb[TCA_CHOKE_STAB] == NULL)
479 return -EINVAL;
480
481 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
482
483 if (ctl->limit > CHOKE_MAX_QUEUE)
484 return -EINVAL;
485
486 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
487 if (mask != q->tab_mask) {
488 struct sk_buff **ntab;
489
490 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
491 if (!ntab)
492 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
493 if (!ntab)
494 return -ENOMEM;
495
496 sch_tree_lock(sch);
497 old = q->tab;
498 if (old) {
499 unsigned int oqlen = sch->q.qlen, tail = 0;
500
501 while (q->head != q->tail) {
502 struct sk_buff *skb = q->tab[q->head];
503
504 q->head = (q->head + 1) & q->tab_mask;
505 if (!skb)
506 continue;
507 if (tail < mask) {
508 ntab[tail++] = skb;
509 continue;
510 }
511 sch->qstats.backlog -= qdisc_pkt_len(skb);
512 --sch->q.qlen;
513 qdisc_drop(skb, sch);
514 }
515 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
516 q->head = 0;
517 q->tail = tail;
518 }
519
520 q->tab_mask = mask;
521 q->tab = ntab;
522 } else
523 sch_tree_lock(sch);
524
525 q->flags = ctl->flags;
526 q->limit = ctl->limit;
527
528 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
529 ctl->Plog, ctl->Scell_log,
530 nla_data(tb[TCA_CHOKE_STAB]));
531
532 if (q->head == q->tail)
533 red_end_of_idle_period(&q->parms);
534
535 sch_tree_unlock(sch);
536 choke_free(old);
537 return 0;
538}
539
540static int choke_init(struct Qdisc *sch, struct nlattr *opt)
541{
542 return choke_change(sch, opt);
543}
544
545static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
546{
547 struct choke_sched_data *q = qdisc_priv(sch);
548 struct nlattr *opts = NULL;
549 struct tc_red_qopt opt = {
550 .limit = q->limit,
551 .flags = q->flags,
552 .qth_min = q->parms.qth_min >> q->parms.Wlog,
553 .qth_max = q->parms.qth_max >> q->parms.Wlog,
554 .Wlog = q->parms.Wlog,
555 .Plog = q->parms.Plog,
556 .Scell_log = q->parms.Scell_log,
557 };
558
559 opts = nla_nest_start(skb, TCA_OPTIONS);
560 if (opts == NULL)
561 goto nla_put_failure;
562
563 NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
564 return nla_nest_end(skb, opts);
565
566nla_put_failure:
567 nla_nest_cancel(skb, opts);
568 return -EMSGSIZE;
569}
570
571static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
572{
573 struct choke_sched_data *q = qdisc_priv(sch);
574 struct tc_choke_xstats st = {
575 .early = q->stats.prob_drop + q->stats.forced_drop,
576 .marked = q->stats.prob_mark + q->stats.forced_mark,
577 .pdrop = q->stats.pdrop,
578 .other = q->stats.other,
579 .matched = q->stats.matched,
580 };
581
582 return gnet_stats_copy_app(d, &st, sizeof(st));
583}
584
585static void choke_destroy(struct Qdisc *sch)
586{
587 struct choke_sched_data *q = qdisc_priv(sch);
588
589 tcf_destroy_chain(&q->filter_list);
590 choke_free(q->tab);
591}
592
593static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
594{
595 return NULL;
596}
597
598static unsigned long choke_get(struct Qdisc *sch, u32 classid)
599{
600 return 0;
601}
602
603static void choke_put(struct Qdisc *q, unsigned long cl)
604{
605}
606
607static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
608 u32 classid)
609{
610 return 0;
611}
612
613static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
614{
615 struct choke_sched_data *q = qdisc_priv(sch);
616
617 if (cl)
618 return NULL;
619 return &q->filter_list;
620}
621
622static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
623 struct sk_buff *skb, struct tcmsg *tcm)
624{
625 tcm->tcm_handle |= TC_H_MIN(cl);
626 return 0;
627}
628
629static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
630{
631 if (!arg->stop) {
632 if (arg->fn(sch, 1, arg) < 0) {
633 arg->stop = 1;
634 return;
635 }
636 arg->count++;
637 }
638}
639
640static const struct Qdisc_class_ops choke_class_ops = {
641 .leaf = choke_leaf,
642 .get = choke_get,
643 .put = choke_put,
644 .tcf_chain = choke_find_tcf,
645 .bind_tcf = choke_bind,
646 .unbind_tcf = choke_put,
647 .dump = choke_dump_class,
648 .walk = choke_walk,
649};
650
651static struct sk_buff *choke_peek_head(struct Qdisc *sch)
652{
653 struct choke_sched_data *q = qdisc_priv(sch);
654
655 return (q->head != q->tail) ? q->tab[q->head] : NULL;
656}
657
658static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
659 .id = "choke",
660 .priv_size = sizeof(struct choke_sched_data),
661
662 .enqueue = choke_enqueue,
663 .dequeue = choke_dequeue,
664 .peek = choke_peek_head,
665 .drop = choke_drop,
666 .init = choke_init,
667 .destroy = choke_destroy,
668 .reset = choke_reset,
669 .change = choke_change,
670 .dump = choke_dump,
671 .dump_stats = choke_dump_stats,
672 .owner = THIS_MODULE,
673};
674
675static int __init choke_module_init(void)
676{
677 return register_qdisc(&choke_qdisc_ops);
678}
679
680static void __exit choke_module_exit(void)
681{
682 unregister_qdisc(&choke_qdisc_ops);
683}
684
685module_init(choke_module_init)
686module_exit(choke_module_exit)
687
688MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0f7bf3fdfea5..2c790204d042 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
137 mask = nla_get_u8(tb[TCA_DSMARK_MASK]); 137 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
138 138
139 if (tb[TCA_DSMARK_VALUE]) 139 if (tb[TCA_DSMARK_VALUE])
140 p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); 140 p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
141 141
142 if (tb[TCA_DSMARK_MASK]) 142 if (tb[TCA_DSMARK_MASK])
143 p->mask[*arg-1] = mask; 143 p->mask[*arg - 1] = mask;
144 144
145 err = 0; 145 err = 0;
146 146
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
155 if (!dsmark_valid_index(p, arg)) 155 if (!dsmark_valid_index(p, arg))
156 return -EINVAL; 156 return -EINVAL;
157 157
158 p->mask[arg-1] = 0xff; 158 p->mask[arg - 1] = 0xff;
159 p->value[arg-1] = 0; 159 p->value[arg - 1] = 0;
160 160
161 return 0; 161 return 0;
162} 162}
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
175 if (p->mask[i] == 0xff && !p->value[i]) 175 if (p->mask[i] == 0xff && !p->value[i])
176 goto ignore; 176 goto ignore;
177 if (walker->count >= walker->skip) { 177 if (walker->count >= walker->skip) {
178 if (walker->fn(sch, i+1, walker) < 0) { 178 if (walker->fn(sch, i + 1, walker) < 0) {
179 walker->stop = 1; 179 walker->stop = 1;
180 break; 180 break;
181 } 181 }
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
304 * and don't need yet another qdisc as a bypass. 304 * and don't need yet another qdisc as a bypass.
305 */ 305 */
306 if (p->mask[index] != 0xff || p->value[index]) 306 if (p->mask[index] != 0xff || p->value[index])
307 printk(KERN_WARNING 307 pr_warning("dsmark_dequeue: unsupported protocol %d\n",
308 "dsmark_dequeue: unsupported protocol %d\n", 308 ntohs(skb->protocol));
309 ntohs(skb->protocol));
310 break; 309 break;
311 } 310 }
312 311
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
424 if (!dsmark_valid_index(p, cl)) 423 if (!dsmark_valid_index(p, cl))
425 return -EINVAL; 424 return -EINVAL;
426 425
427 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); 426 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
428 tcm->tcm_info = p->q->handle; 427 tcm->tcm_info = p->q->handle;
429 428
430 opts = nla_nest_start(skb, TCA_OPTIONS); 429 opts = nla_nest_start(skb, TCA_OPTIONS);
431 if (opts == NULL) 430 if (opts == NULL)
432 goto nla_put_failure; 431 goto nla_put_failure;
433 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]); 432 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
434 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]); 433 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
435 434
436 return nla_nest_end(skb, opts); 435 return nla_nest_end(skb, opts);
437 436
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index d468b479aa93..66effe2da8e0 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,36 +19,25 @@
19 19
20/* 1 band FIFO pseudo-"scheduler" */ 20/* 1 band FIFO pseudo-"scheduler" */
21 21
22struct fifo_sched_data 22static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
23{ 23{
24 u32 limit; 24 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
25};
26
27static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
28{
29 struct fifo_sched_data *q = qdisc_priv(sch);
30
31 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
32 return qdisc_enqueue_tail(skb, sch); 25 return qdisc_enqueue_tail(skb, sch);
33 26
34 return qdisc_reshape_fail(skb, sch); 27 return qdisc_reshape_fail(skb, sch);
35} 28}
36 29
37static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) 30static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
38{ 31{
39 struct fifo_sched_data *q = qdisc_priv(sch); 32 if (likely(skb_queue_len(&sch->q) < sch->limit))
40
41 if (likely(skb_queue_len(&sch->q) < q->limit))
42 return qdisc_enqueue_tail(skb, sch); 33 return qdisc_enqueue_tail(skb, sch);
43 34
44 return qdisc_reshape_fail(skb, sch); 35 return qdisc_reshape_fail(skb, sch);
45} 36}
46 37
47static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) 38static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
48{ 39{
49 struct fifo_sched_data *q = qdisc_priv(sch); 40 if (likely(skb_queue_len(&sch->q) < sch->limit))
50
51 if (likely(skb_queue_len(&sch->q) < q->limit))
52 return qdisc_enqueue_tail(skb, sch); 41 return qdisc_enqueue_tail(skb, sch);
53 42
54 /* queue full, remove one skb to fulfill the limit */ 43 /* queue full, remove one skb to fulfill the limit */
@@ -61,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
61 50
62static int fifo_init(struct Qdisc *sch, struct nlattr *opt) 51static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
63{ 52{
64 struct fifo_sched_data *q = qdisc_priv(sch); 53 bool bypass;
54 bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
65 55
66 if (opt == NULL) { 56 if (opt == NULL) {
67 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; 57 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
68 58
69 if (sch->ops == &bfifo_qdisc_ops) 59 if (is_bfifo)
70 limit *= psched_mtu(qdisc_dev(sch)); 60 limit *= psched_mtu(qdisc_dev(sch));
71 61
72 q->limit = limit; 62 sch->limit = limit;
73 } else { 63 } else {
74 struct tc_fifo_qopt *ctl = nla_data(opt); 64 struct tc_fifo_qopt *ctl = nla_data(opt);
75 65
76 if (nla_len(opt) < sizeof(*ctl)) 66 if (nla_len(opt) < sizeof(*ctl))
77 return -EINVAL; 67 return -EINVAL;
78 68
79 q->limit = ctl->limit; 69 sch->limit = ctl->limit;
80 } 70 }
81 71
72 if (is_bfifo)
73 bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
74 else
75 bypass = sch->limit >= 1;
76
77 if (bypass)
78 sch->flags |= TCQ_F_CAN_BYPASS;
79 else
80 sch->flags &= ~TCQ_F_CAN_BYPASS;
82 return 0; 81 return 0;
83} 82}
84 83
85static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 84static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
86{ 85{
87 struct fifo_sched_data *q = qdisc_priv(sch); 86 struct tc_fifo_qopt opt = { .limit = sch->limit };
88 struct tc_fifo_qopt opt = { .limit = q->limit };
89 87
90 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 88 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
91 return skb->len; 89 return skb->len;
@@ -96,7 +94,7 @@ nla_put_failure:
96 94
97struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { 95struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
98 .id = "pfifo", 96 .id = "pfifo",
99 .priv_size = sizeof(struct fifo_sched_data), 97 .priv_size = 0,
100 .enqueue = pfifo_enqueue, 98 .enqueue = pfifo_enqueue,
101 .dequeue = qdisc_dequeue_head, 99 .dequeue = qdisc_dequeue_head,
102 .peek = qdisc_peek_head, 100 .peek = qdisc_peek_head,
@@ -111,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops);
111 109
112struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { 110struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
113 .id = "bfifo", 111 .id = "bfifo",
114 .priv_size = sizeof(struct fifo_sched_data), 112 .priv_size = 0,
115 .enqueue = bfifo_enqueue, 113 .enqueue = bfifo_enqueue,
116 .dequeue = qdisc_dequeue_head, 114 .dequeue = qdisc_dequeue_head,
117 .peek = qdisc_peek_head, 115 .peek = qdisc_peek_head,
@@ -126,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops);
126 124
127struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { 125struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
128 .id = "pfifo_head_drop", 126 .id = "pfifo_head_drop",
129 .priv_size = sizeof(struct fifo_sched_data), 127 .priv_size = 0,
130 .enqueue = pfifo_tail_enqueue, 128 .enqueue = pfifo_tail_enqueue,
131 .dequeue = qdisc_dequeue_head, 129 .dequeue = qdisc_dequeue_head,
132 .peek = qdisc_peek_head, 130 .peek = qdisc_peek_head,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1bc698039ae2..c84b65920d1b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
87 */ 87 */
88 kfree_skb(skb); 88 kfree_skb(skb);
89 if (net_ratelimit()) 89 if (net_ratelimit())
90 printk(KERN_WARNING "Dead loop on netdevice %s, " 90 pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
91 "fix it urgently!\n", dev_queue->dev->name); 91 dev_queue->dev->name);
92 ret = qdisc_qlen(q); 92 ret = qdisc_qlen(q);
93 } else { 93 } else {
94 /* 94 /*
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
137 } else { 137 } else {
138 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 138 /* Driver returned NETDEV_TX_BUSY - requeue skb */
139 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) 139 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
140 printk(KERN_WARNING "BUG %s code %d qlen %d\n", 140 pr_warning("BUG %s code %d qlen %d\n",
141 dev->name, ret, q->q.qlen); 141 dev->name, ret, q->q.qlen);
142 142
143 ret = dev_requeue_skb(skb, q); 143 ret = dev_requeue_skb(skb, q);
144 } 144 }
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = {
412}; 412};
413 413
414 414
415static const u8 prio2band[TC_PRIO_MAX+1] = 415static const u8 prio2band[TC_PRIO_MAX + 1] = {
416 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; 416 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
417};
417 418
418/* 3-band FIFO queue: old style, but should be a bit faster than 419/* 3-band FIFO queue: old style, but should be a bit faster than
419 generic prio+fifo combination. 420 generic prio+fifo combination.
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
445 return priv->q + band; 446 return priv->q + band;
446} 447}
447 448
448static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 449static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
449{ 450{
450 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { 451 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
451 int band = prio2band[skb->priority & TC_PRIO_MAX]; 452 int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
460 return qdisc_drop(skb, qdisc); 461 return qdisc_drop(skb, qdisc);
461} 462}
462 463
463static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) 464static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
464{ 465{
465 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 466 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
466 int band = bitmap2band[priv->bitmap]; 467 int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
479 return NULL; 480 return NULL;
480} 481}
481 482
482static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) 483static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
483{ 484{
484 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 485 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
485 int band = bitmap2band[priv->bitmap]; 486 int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
493 return NULL; 494 return NULL;
494} 495}
495 496
496static void pfifo_fast_reset(struct Qdisc* qdisc) 497static void pfifo_fast_reset(struct Qdisc *qdisc)
497{ 498{
498 int prio; 499 int prio;
499 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 500 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
510{ 511{
511 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 512 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
512 513
513 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 514 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
514 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 515 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
515 return skb->len; 516 return skb->len;
516 517
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
526 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 527 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
527 skb_queue_head_init(band2list(priv, prio)); 528 skb_queue_head_init(band2list(priv, prio));
528 529
530 /* Can by-pass the queue discipline */
531 qdisc->flags |= TCQ_F_CAN_BYPASS;
529 return 0; 532 return 0;
530} 533}
531 534
@@ -540,27 +543,32 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
540 .dump = pfifo_fast_dump, 543 .dump = pfifo_fast_dump,
541 .owner = THIS_MODULE, 544 .owner = THIS_MODULE,
542}; 545};
546EXPORT_SYMBOL(pfifo_fast_ops);
543 547
544struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 548struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
545 struct Qdisc_ops *ops) 549 struct Qdisc_ops *ops)
546{ 550{
547 void *p; 551 void *p;
548 struct Qdisc *sch; 552 struct Qdisc *sch;
549 unsigned int size; 553 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
550 int err = -ENOBUFS; 554 int err = -ENOBUFS;
551 555
552 /* ensure that the Qdisc and the private data are 64-byte aligned */
553 size = QDISC_ALIGN(sizeof(*sch));
554 size += ops->priv_size + (QDISC_ALIGNTO - 1);
555
556 p = kzalloc_node(size, GFP_KERNEL, 556 p = kzalloc_node(size, GFP_KERNEL,
557 netdev_queue_numa_node_read(dev_queue)); 557 netdev_queue_numa_node_read(dev_queue));
558 558
559 if (!p) 559 if (!p)
560 goto errout; 560 goto errout;
561 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); 561 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
562 sch->padded = (char *) sch - (char *) p; 562 /* if we got non aligned memory, ask more and do alignment ourself */
563 563 if (sch != p) {
564 kfree(p);
565 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
566 netdev_queue_numa_node_read(dev_queue));
567 if (!p)
568 goto errout;
569 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
570 sch->padded = (char *) sch - (char *) p;
571 }
564 INIT_LIST_HEAD(&sch->list); 572 INIT_LIST_HEAD(&sch->list);
565 skb_queue_head_init(&sch->q); 573 skb_queue_head_init(&sch->q);
566 spin_lock_init(&sch->busylock); 574 spin_lock_init(&sch->busylock);
@@ -630,7 +638,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
630#ifdef CONFIG_NET_SCHED 638#ifdef CONFIG_NET_SCHED
631 qdisc_list_del(qdisc); 639 qdisc_list_del(qdisc);
632 640
633 qdisc_put_stab(qdisc->stab); 641 qdisc_put_stab(rtnl_dereference(qdisc->stab));
634#endif 642#endif
635 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 643 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
636 if (ops->reset) 644 if (ops->reset)
@@ -674,25 +682,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
674 682
675 return oqdisc; 683 return oqdisc;
676} 684}
685EXPORT_SYMBOL(dev_graft_qdisc);
677 686
678static void attach_one_default_qdisc(struct net_device *dev, 687static void attach_one_default_qdisc(struct net_device *dev,
679 struct netdev_queue *dev_queue, 688 struct netdev_queue *dev_queue,
680 void *_unused) 689 void *_unused)
681{ 690{
682 struct Qdisc *qdisc; 691 struct Qdisc *qdisc = &noqueue_qdisc;
683 692
684 if (dev->tx_queue_len) { 693 if (dev->tx_queue_len) {
685 qdisc = qdisc_create_dflt(dev_queue, 694 qdisc = qdisc_create_dflt(dev_queue,
686 &pfifo_fast_ops, TC_H_ROOT); 695 &pfifo_fast_ops, TC_H_ROOT);
687 if (!qdisc) { 696 if (!qdisc) {
688 printk(KERN_INFO "%s: activation failed\n", dev->name); 697 netdev_info(dev, "activation failed\n");
689 return; 698 return;
690 } 699 }
691
692 /* Can by-pass the queue discipline for default qdisc */
693 qdisc->flags |= TCQ_F_CAN_BYPASS;
694 } else {
695 qdisc = &noqueue_qdisc;
696 } 700 }
697 dev_queue->qdisc_sleeping = qdisc; 701 dev_queue->qdisc_sleeping = qdisc;
698} 702}
@@ -761,6 +765,7 @@ void dev_activate(struct net_device *dev)
761 dev_watchdog_up(dev); 765 dev_watchdog_up(dev);
762 } 766 }
763} 767}
768EXPORT_SYMBOL(dev_activate);
764 769
765static void dev_deactivate_queue(struct net_device *dev, 770static void dev_deactivate_queue(struct net_device *dev,
766 struct netdev_queue *dev_queue, 771 struct netdev_queue *dev_queue,
@@ -841,6 +846,7 @@ void dev_deactivate(struct net_device *dev)
841 dev_deactivate_many(&single); 846 dev_deactivate_many(&single);
842 list_del(&single); 847 list_del(&single);
843} 848}
849EXPORT_SYMBOL(dev_deactivate);
844 850
845static void dev_init_scheduler_queue(struct net_device *dev, 851static void dev_init_scheduler_queue(struct net_device *dev,
846 struct netdev_queue *dev_queue, 852 struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2aa5c92..b9493a09a870 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
32struct gred_sched_data; 32struct gred_sched_data;
33struct gred_sched; 33struct gred_sched;
34 34
35struct gred_sched_data 35struct gred_sched_data {
36{
37 u32 limit; /* HARD maximal queue length */ 36 u32 limit; /* HARD maximal queue length */
38 u32 DP; /* the drop pramaters */ 37 u32 DP; /* the drop pramaters */
39 u32 bytesin; /* bytes seen on virtualQ so far*/ 38 u32 bytesin; /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
50 GRED_RIO_MODE, 49 GRED_RIO_MODE,
51}; 50};
52 51
53struct gred_sched 52struct gred_sched {
54{
55 struct gred_sched_data *tab[MAX_DPs]; 53 struct gred_sched_data *tab[MAX_DPs];
56 unsigned long flags; 54 unsigned long flags;
57 u32 red_flags; 55 u32 red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
150 return t->red_flags & TC_RED_HARDDROP; 148 return t->red_flags & TC_RED_HARDDROP;
151} 149}
152 150
153static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 151static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
154{ 152{
155 struct gred_sched_data *q=NULL; 153 struct gred_sched_data *q = NULL;
156 struct gred_sched *t= qdisc_priv(sch); 154 struct gred_sched *t = qdisc_priv(sch);
157 unsigned long qavg = 0; 155 unsigned long qavg = 0;
158 u16 dp = tc_index_to_dp(skb); 156 u16 dp = tc_index_to_dp(skb);
159 157
160 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 158 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
161 dp = t->def; 159 dp = t->def;
162 160
163 if ((q = t->tab[dp]) == NULL) { 161 q = t->tab[dp];
162 if (!q) {
164 /* Pass through packets not assigned to a DP 163 /* Pass through packets not assigned to a DP
165 * if no default DP has been configured. This 164 * if no default DP has been configured. This
166 * allows for DP flows to be left untouched. 165 * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
183 for (i = 0; i < t->DPs; i++) { 182 for (i = 0; i < t->DPs; i++) {
184 if (t->tab[i] && t->tab[i]->prio < q->prio && 183 if (t->tab[i] && t->tab[i]->prio < q->prio &&
185 !red_is_idling(&t->tab[i]->parms)) 184 !red_is_idling(&t->tab[i]->parms))
186 qavg +=t->tab[i]->parms.qavg; 185 qavg += t->tab[i]->parms.qavg;
187 } 186 }
188 187
189 } 188 }
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
203 gred_store_wred_set(t, q); 202 gred_store_wred_set(t, q);
204 203
205 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 204 switch (red_action(&q->parms, q->parms.qavg + qavg)) {
206 case RED_DONT_MARK: 205 case RED_DONT_MARK:
207 break; 206 break;
208 207
209 case RED_PROB_MARK: 208 case RED_PROB_MARK:
210 sch->qstats.overlimits++; 209 sch->qstats.overlimits++;
211 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 210 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
212 q->stats.prob_drop++; 211 q->stats.prob_drop++;
213 goto congestion_drop; 212 goto congestion_drop;
214 } 213 }
215 214
216 q->stats.prob_mark++; 215 q->stats.prob_mark++;
217 break; 216 break;
218 217
219 case RED_HARD_MARK: 218 case RED_HARD_MARK:
220 sch->qstats.overlimits++; 219 sch->qstats.overlimits++;
221 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 220 if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
222 !INET_ECN_set_ce(skb)) { 221 !INET_ECN_set_ce(skb)) {
223 q->stats.forced_drop++; 222 q->stats.forced_drop++;
224 goto congestion_drop; 223 goto congestion_drop;
225 } 224 }
226 q->stats.forced_mark++; 225 q->stats.forced_mark++;
227 break; 226 break;
228 } 227 }
229 228
230 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { 229 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
241 return NET_XMIT_CN; 240 return NET_XMIT_CN;
242} 241}
243 242
244static struct sk_buff *gred_dequeue(struct Qdisc* sch) 243static struct sk_buff *gred_dequeue(struct Qdisc *sch)
245{ 244{
246 struct sk_buff *skb; 245 struct sk_buff *skb;
247 struct gred_sched *t = qdisc_priv(sch); 246 struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
254 253
255 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 254 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
256 if (net_ratelimit()) 255 if (net_ratelimit())
257 printk(KERN_WARNING "GRED: Unable to relocate " 256 pr_warning("GRED: Unable to relocate VQ 0x%x "
258 "VQ 0x%x after dequeue, screwing up " 257 "after dequeue, screwing up "
259 "backlog.\n", tc_index_to_dp(skb)); 258 "backlog.\n", tc_index_to_dp(skb));
260 } else { 259 } else {
261 q->backlog -= qdisc_pkt_len(skb); 260 q->backlog -= qdisc_pkt_len(skb);
262 261
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
273 return NULL; 272 return NULL;
274} 273}
275 274
276static unsigned int gred_drop(struct Qdisc* sch) 275static unsigned int gred_drop(struct Qdisc *sch)
277{ 276{
278 struct sk_buff *skb; 277 struct sk_buff *skb;
279 struct gred_sched *t = qdisc_priv(sch); 278 struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
286 285
287 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 286 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
288 if (net_ratelimit()) 287 if (net_ratelimit())
289 printk(KERN_WARNING "GRED: Unable to relocate " 288 pr_warning("GRED: Unable to relocate VQ 0x%x "
290 "VQ 0x%x while dropping, screwing up " 289 "while dropping, screwing up "
291 "backlog.\n", tc_index_to_dp(skb)); 290 "backlog.\n", tc_index_to_dp(skb));
292 } else { 291 } else {
293 q->backlog -= len; 292 q->backlog -= len;
294 q->stats.other++; 293 q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
308 307
309} 308}
310 309
311static void gred_reset(struct Qdisc* sch) 310static void gred_reset(struct Qdisc *sch)
312{ 311{
313 int i; 312 int i;
314 struct gred_sched *t = qdisc_priv(sch); 313 struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
369 368
370 for (i = table->DPs; i < MAX_DPs; i++) { 369 for (i = table->DPs; i < MAX_DPs; i++) {
371 if (table->tab[i]) { 370 if (table->tab[i]) {
372 printk(KERN_WARNING "GRED: Warning: Destroying " 371 pr_warning("GRED: Warning: Destroying "
373 "shadowed VQ 0x%x\n", i); 372 "shadowed VQ 0x%x\n", i);
374 gred_destroy_vq(table->tab[i]); 373 gred_destroy_vq(table->tab[i]);
375 table->tab[i] = NULL; 374 table->tab[i] = NULL;
376 } 375 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 14a799de1c35..6488e6425652 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
81 * that are expensive on 32-bit architectures. 81 * that are expensive on 32-bit architectures.
82 */ 82 */
83 83
84struct internal_sc 84struct internal_sc {
85{
86 u64 sm1; /* scaled slope of the 1st segment */ 85 u64 sm1; /* scaled slope of the 1st segment */
87 u64 ism1; /* scaled inverse-slope of the 1st segment */ 86 u64 ism1; /* scaled inverse-slope of the 1st segment */
88 u64 dx; /* the x-projection of the 1st segment */ 87 u64 dx; /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
92}; 91};
93 92
94/* runtime service curve */ 93/* runtime service curve */
95struct runtime_sc 94struct runtime_sc {
96{
97 u64 x; /* current starting position on x-axis */ 95 u64 x; /* current starting position on x-axis */
98 u64 y; /* current starting position on y-axis */ 96 u64 y; /* current starting position on y-axis */
99 u64 sm1; /* scaled slope of the 1st segment */ 97 u64 sm1; /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
104 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 102 u64 ism2; /* scaled inverse-slope of the 2nd segment */
105}; 103};
106 104
107enum hfsc_class_flags 105enum hfsc_class_flags {
108{
109 HFSC_RSC = 0x1, 106 HFSC_RSC = 0x1,
110 HFSC_FSC = 0x2, 107 HFSC_FSC = 0x2,
111 HFSC_USC = 0x4 108 HFSC_USC = 0x4
112}; 109};
113 110
114struct hfsc_class 111struct hfsc_class {
115{
116 struct Qdisc_class_common cl_common; 112 struct Qdisc_class_common cl_common;
117 unsigned int refcnt; /* usage count */ 113 unsigned int refcnt; /* usage count */
118 114
@@ -140,8 +136,8 @@ struct hfsc_class
140 u64 cl_cumul; /* cumulative work in bytes done by 136 u64 cl_cumul; /* cumulative work in bytes done by
141 real-time criteria */ 137 real-time criteria */
142 138
143 u64 cl_d; /* deadline*/ 139 u64 cl_d; /* deadline*/
144 u64 cl_e; /* eligible time */ 140 u64 cl_e; /* eligible time */
145 u64 cl_vt; /* virtual time */ 141 u64 cl_vt; /* virtual time */
146 u64 cl_f; /* time when this class will fit for 142 u64 cl_f; /* time when this class will fit for
147 link-sharing, max(myf, cfmin) */ 143 link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
176 unsigned long cl_nactive; /* number of active children */ 172 unsigned long cl_nactive; /* number of active children */
177}; 173};
178 174
179struct hfsc_sched 175struct hfsc_sched {
180{
181 u16 defcls; /* default class id */ 176 u16 defcls; /* default class id */
182 struct hfsc_class root; /* root class */ 177 struct hfsc_class root; /* root class */
183 struct Qdisc_class_hash clhash; /* class hash */ 178 struct Qdisc_class_hash clhash; /* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
693 if (go_active) { 688 if (go_active) {
694 n = rb_last(&cl->cl_parent->vt_tree); 689 n = rb_last(&cl->cl_parent->vt_tree);
695 if (n != NULL) { 690 if (n != NULL) {
696 max_cl = rb_entry(n, struct hfsc_class,vt_node); 691 max_cl = rb_entry(n, struct hfsc_class, vt_node);
697 /* 692 /*
698 * set vt to the average of the min and max 693 * set vt to the average of the min and max
699 * classes. if the parent's period didn't 694 * classes. if the parent's period didn't
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1177 return NULL; 1172 return NULL;
1178 } 1173 }
1179#endif 1174#endif
1180 if ((cl = (struct hfsc_class *)res.class) == NULL) { 1175 cl = (struct hfsc_class *)res.class;
1181 if ((cl = hfsc_find_class(res.classid, sch)) == NULL) 1176 if (!cl) {
1177 cl = hfsc_find_class(res.classid, sch);
1178 if (!cl)
1182 break; /* filter selected invalid classid */ 1179 break; /* filter selected invalid classid */
1183 if (cl->level >= head->level) 1180 if (cl->level >= head->level)
1184 break; /* filter may only point downwards */ 1181 break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
1316 return -1; 1313 return -1;
1317} 1314}
1318 1315
1319static inline int 1316static int
1320hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) 1317hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
1321{ 1318{
1322 if ((cl->cl_flags & HFSC_RSC) && 1319 if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1420 struct hfsc_class *cl; 1417 struct hfsc_class *cl;
1421 u64 next_time = 0; 1418 u64 next_time = 0;
1422 1419
1423 if ((cl = eltree_get_minel(q)) != NULL) 1420 cl = eltree_get_minel(q);
1421 if (cl)
1424 next_time = cl->cl_e; 1422 next_time = cl->cl_e;
1425 if (q->root.cl_cfmin != 0) { 1423 if (q->root.cl_cfmin != 0) {
1426 if (next_time == 0 || next_time > q->root.cl_cfmin) 1424 if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1625,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
1625 * find the class with the minimum deadline among 1623 * find the class with the minimum deadline among
1626 * the eligible classes. 1624 * the eligible classes.
1627 */ 1625 */
1628 if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { 1626 cl = eltree_get_mindl(q, cur_time);
1627 if (cl) {
1629 realtime = 1; 1628 realtime = 1;
1630 } else { 1629 } else {
1631 /* 1630 /*
@@ -1664,7 +1663,7 @@ hfsc_dequeue(struct Qdisc *sch)
1664 set_passive(cl); 1663 set_passive(cl);
1665 } 1664 }
1666 1665
1667 sch->flags &= ~TCQ_F_THROTTLED; 1666 qdisc_unthrottled(sch);
1668 qdisc_bstats_update(sch, skb); 1667 qdisc_bstats_update(sch, skb);
1669 sch->q.qlen--; 1668 sch->q.qlen--;
1670 1669
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fc12fe6f5597..29b942ce9e82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
99 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ 99 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
100 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ 100 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
101 /* When class changes from state 1->2 and disconnects from 101 /* When class changes from state 1->2 and disconnects from
102 parent's feed then we lost ptr value and start from the 102 * parent's feed then we lost ptr value and start from the
103 first child again. Here we store classid of the 103 * first child again. Here we store classid of the
104 last valid ptr (used when ptr is NULL). */ 104 * last valid ptr (used when ptr is NULL).
105 */
105 u32 last_ptr_id[TC_HTB_NUMPRIO]; 106 u32 last_ptr_id[TC_HTB_NUMPRIO];
106 } inner; 107 } inner;
107 } un; 108 } un;
@@ -182,10 +183,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
182 * filters in qdisc and in inner nodes (if higher filter points to the inner 183 * filters in qdisc and in inner nodes (if higher filter points to the inner
183 * node). If we end up with classid MAJOR:0 we enqueue the skb into special 184 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
184 * internal fifo (direct). These packets then go directly thru. If we still 185 * internal fifo (direct). These packets then go directly thru. If we still
185 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull 186 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
186 * then finish and return direct queue. 187 * then finish and return direct queue.
187 */ 188 */
188#define HTB_DIRECT (struct htb_class*)-1 189#define HTB_DIRECT ((struct htb_class *)-1L)
189 190
190static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 191static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
191 int *qerr) 192 int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
197 int result; 198 int result;
198 199
199 /* allow to select class by setting skb->priority to valid classid; 200 /* allow to select class by setting skb->priority to valid classid;
200 note that nfmark can be used too by attaching filter fw with no 201 * note that nfmark can be used too by attaching filter fw with no
201 rules in it */ 202 * rules in it
203 */
202 if (skb->priority == sch->handle) 204 if (skb->priority == sch->handle)
203 return HTB_DIRECT; /* X:0 (direct flow) selected */ 205 return HTB_DIRECT; /* X:0 (direct flow) selected */
204 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) 206 cl = htb_find(skb->priority, sch);
207 if (cl && cl->level == 0)
205 return cl; 208 return cl;
206 209
207 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 210 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 return NULL; 219 return NULL;
217 } 220 }
218#endif 221#endif
219 if ((cl = (void *)res.class) == NULL) { 222 cl = (void *)res.class;
223 if (!cl) {
220 if (res.classid == sch->handle) 224 if (res.classid == sch->handle)
221 return HTB_DIRECT; /* X:0 (direct flow) */ 225 return HTB_DIRECT; /* X:0 (direct flow) */
222 if ((cl = htb_find(res.classid, sch)) == NULL) 226 cl = htb_find(res.classid, sch);
227 if (!cl)
223 break; /* filter selected invalid classid */ 228 break; /* filter selected invalid classid */
224 } 229 }
225 if (!cl->level) 230 if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
378 383
379 if (p->un.inner.feed[prio].rb_node) 384 if (p->un.inner.feed[prio].rb_node)
380 /* parent already has its feed in use so that 385 /* parent already has its feed in use so that
381 reset bit in mask as parent is already ok */ 386 * reset bit in mask as parent is already ok
387 */
382 mask &= ~(1 << prio); 388 mask &= ~(1 << prio);
383 389
384 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); 390 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
413 419
414 if (p->un.inner.ptr[prio] == cl->node + prio) { 420 if (p->un.inner.ptr[prio] == cl->node + prio) {
415 /* we are removing child which is pointed to from 421 /* we are removing child which is pointed to from
416 parent feed - forget the pointer but remember 422 * parent feed - forget the pointer but remember
417 classid */ 423 * classid
424 */
418 p->un.inner.last_ptr_id[prio] = cl->common.classid; 425 p->un.inner.last_ptr_id[prio] = cl->common.classid;
419 p->un.inner.ptr[prio] = NULL; 426 p->un.inner.ptr[prio] = NULL;
420 } 427 }
@@ -663,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
663 unsigned long start) 670 unsigned long start)
664{ 671{
665 /* don't run for longer than 2 jiffies; 2 is used instead of 672 /* don't run for longer than 2 jiffies; 2 is used instead of
666 1 to simplify things when jiffy is going to be incremented 673 * 1 to simplify things when jiffy is going to be incremented
667 too soon */ 674 * too soon
675 */
668 unsigned long stop_at = start + 2; 676 unsigned long stop_at = start + 2;
669 while (time_before(jiffies, stop_at)) { 677 while (time_before(jiffies, stop_at)) {
670 struct htb_class *cl; 678 struct htb_class *cl;
@@ -687,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
687 695
688 /* too much load - let's continue after a break for scheduling */ 696 /* too much load - let's continue after a break for scheduling */
689 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { 697 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
690 printk(KERN_WARNING "htb: too many events!\n"); 698 pr_warning("htb: too many events!\n");
691 q->warned |= HTB_WARN_TOOMANYEVENTS; 699 q->warned |= HTB_WARN_TOOMANYEVENTS;
692 } 700 }
693 701
@@ -695,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
695} 703}
696 704
697/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 705/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
698 is no such one exists. */ 706 * is no such one exists.
707 */
699static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, 708static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
700 u32 id) 709 u32 id)
701{ 710{
@@ -739,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
739 for (i = 0; i < 65535; i++) { 748 for (i = 0; i < 65535; i++) {
740 if (!*sp->pptr && *sp->pid) { 749 if (!*sp->pptr && *sp->pid) {
741 /* ptr was invalidated but id is valid - try to recover 750 /* ptr was invalidated but id is valid - try to recover
742 the original or next ptr */ 751 * the original or next ptr
752 */
743 *sp->pptr = 753 *sp->pptr =
744 htb_id_find_next_upper(prio, sp->root, *sp->pid); 754 htb_id_find_next_upper(prio, sp->root, *sp->pid);
745 } 755 }
746 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 756 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
747 can become out of date quickly */ 757 * can become out of date quickly
758 */
748 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 759 if (!*sp->pptr) { /* we are at right end; rewind & go up */
749 *sp->pptr = sp->root; 760 *sp->pptr = sp->root;
750 while ((*sp->pptr)->rb_left) 761 while ((*sp->pptr)->rb_left)
@@ -772,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
772} 783}
773 784
774/* dequeues packet at given priority and level; call only if 785/* dequeues packet at given priority and level; call only if
775 you are sure that there is active class at prio/level */ 786 * you are sure that there is active class at prio/level
787 */
776static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, 788static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
777 int level) 789 int level)
778{ 790{
@@ -789,9 +801,10 @@ next:
789 return NULL; 801 return NULL;
790 802
791 /* class can be empty - it is unlikely but can be true if leaf 803 /* class can be empty - it is unlikely but can be true if leaf
792 qdisc drops packets in enqueue routine or if someone used 804 * qdisc drops packets in enqueue routine or if someone used
793 graft operation on the leaf since last dequeue; 805 * graft operation on the leaf since last dequeue;
794 simply deactivate and skip such class */ 806 * simply deactivate and skip such class
807 */
795 if (unlikely(cl->un.leaf.q->q.qlen == 0)) { 808 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
796 struct htb_class *next; 809 struct htb_class *next;
797 htb_deactivate(q, cl); 810 htb_deactivate(q, cl);
@@ -831,7 +844,8 @@ next:
831 ptr[0]) + prio); 844 ptr[0]) + prio);
832 } 845 }
833 /* this used to be after charge_class but this constelation 846 /* this used to be after charge_class but this constelation
834 gives us slightly better performance */ 847 * gives us slightly better performance
848 */
835 if (!cl->un.leaf.q->q.qlen) 849 if (!cl->un.leaf.q->q.qlen)
836 htb_deactivate(q, cl); 850 htb_deactivate(q, cl);
837 htb_charge_class(q, cl, level, skb); 851 htb_charge_class(q, cl, level, skb);
@@ -852,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
852 if (skb != NULL) { 866 if (skb != NULL) {
853ok: 867ok:
854 qdisc_bstats_update(sch, skb); 868 qdisc_bstats_update(sch, skb);
855 sch->flags &= ~TCQ_F_THROTTLED; 869 qdisc_unthrottled(sch);
856 sch->q.qlen--; 870 sch->q.qlen--;
857 return skb; 871 return skb;
858 } 872 }
@@ -883,6 +897,7 @@ ok:
883 m = ~q->row_mask[level]; 897 m = ~q->row_mask[level];
884 while (m != (int)(-1)) { 898 while (m != (int)(-1)) {
885 int prio = ffz(m); 899 int prio = ffz(m);
900
886 m |= 1 << prio; 901 m |= 1 << prio;
887 skb = htb_dequeue_tree(q, prio, level); 902 skb = htb_dequeue_tree(q, prio, level);
888 if (likely(skb != NULL)) 903 if (likely(skb != NULL))
@@ -987,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
987 return err; 1002 return err;
988 1003
989 if (tb[TCA_HTB_INIT] == NULL) { 1004 if (tb[TCA_HTB_INIT] == NULL) {
990 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); 1005 pr_err("HTB: hey probably you have bad tc tool ?\n");
991 return -EINVAL; 1006 return -EINVAL;
992 } 1007 }
993 gopt = nla_data(tb[TCA_HTB_INIT]); 1008 gopt = nla_data(tb[TCA_HTB_INIT]);
994 if (gopt->version != HTB_VER >> 16) { 1009 if (gopt->version != HTB_VER >> 16) {
995 printk(KERN_ERR 1010 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
996 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
997 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); 1011 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
998 return -EINVAL; 1012 return -EINVAL;
999 } 1013 }
@@ -1206,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
1206 cancel_work_sync(&q->work); 1220 cancel_work_sync(&q->work);
1207 qdisc_watchdog_cancel(&q->watchdog); 1221 qdisc_watchdog_cancel(&q->watchdog);
1208 /* This line used to be after htb_destroy_class call below 1222 /* This line used to be after htb_destroy_class call below
1209 and surprisingly it worked in 2.4. But it must precede it 1223 * and surprisingly it worked in 2.4. But it must precede it
1210 because filter need its target class alive to be able to call 1224 * because filter need its target class alive to be able to call
1211 unbind_filter on it (without Oops). */ 1225 * unbind_filter on it (without Oops).
1226 */
1212 tcf_destroy_chain(&q->filter_list); 1227 tcf_destroy_chain(&q->filter_list);
1213 1228
1214 for (i = 0; i < q->clhash.hashsize; i++) { 1229 for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1342,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1342 1357
1343 /* check maximal depth */ 1358 /* check maximal depth */
1344 if (parent && parent->parent && parent->parent->level < 2) { 1359 if (parent && parent->parent && parent->parent->level < 2) {
1345 printk(KERN_ERR "htb: tree is too deep\n"); 1360 pr_err("htb: tree is too deep\n");
1346 goto failure; 1361 goto failure;
1347 } 1362 }
1348 err = -ENOBUFS; 1363 err = -ENOBUFS;
1349 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) 1364 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1365 if (!cl)
1350 goto failure; 1366 goto failure;
1351 1367
1352 err = gen_new_estimator(&cl->bstats, &cl->rate_est, 1368 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1366,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1366 RB_CLEAR_NODE(&cl->node[prio]); 1382 RB_CLEAR_NODE(&cl->node[prio]);
1367 1383
1368 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1384 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1369 so that can't be used inside of sch_tree_lock 1385 * so that can't be used inside of sch_tree_lock
1370 -- thanks to Karlis Peisenieks */ 1386 * -- thanks to Karlis Peisenieks
1387 */
1371 new_q = qdisc_create_dflt(sch->dev_queue, 1388 new_q = qdisc_create_dflt(sch->dev_queue,
1372 &pfifo_qdisc_ops, classid); 1389 &pfifo_qdisc_ops, classid);
1373 sch_tree_lock(sch); 1390 sch_tree_lock(sch);
@@ -1419,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1419 } 1436 }
1420 1437
1421 /* it used to be a nasty bug here, we have to check that node 1438 /* it used to be a nasty bug here, we have to check that node
1422 is really leaf before changing cl->un.leaf ! */ 1439 * is really leaf before changing cl->un.leaf !
1440 */
1423 if (!cl->level) { 1441 if (!cl->level) {
1424 cl->quantum = rtab->rate.rate / q->rate2quantum; 1442 cl->quantum = rtab->rate.rate / q->rate2quantum;
1425 if (!hopt->quantum && cl->quantum < 1000) { 1443 if (!hopt->quantum && cl->quantum < 1000) {
1426 printk(KERN_WARNING 1444 pr_warning(
1427 "HTB: quantum of class %X is small. Consider r2q change.\n", 1445 "HTB: quantum of class %X is small. Consider r2q change.\n",
1428 cl->common.classid); 1446 cl->common.classid);
1429 cl->quantum = 1000; 1447 cl->quantum = 1000;
1430 } 1448 }
1431 if (!hopt->quantum && cl->quantum > 200000) { 1449 if (!hopt->quantum && cl->quantum > 200000) {
1432 printk(KERN_WARNING 1450 pr_warning(
1433 "HTB: quantum of class %X is big. Consider r2q change.\n", 1451 "HTB: quantum of class %X is big. Consider r2q change.\n",
1434 cl->common.classid); 1452 cl->common.classid);
1435 cl->quantum = 200000; 1453 cl->quantum = 200000;
@@ -1478,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1478 struct htb_class *cl = htb_find(classid, sch); 1496 struct htb_class *cl = htb_find(classid, sch);
1479 1497
1480 /*if (cl && !cl->level) return 0; 1498 /*if (cl && !cl->level) return 0;
1481 The line above used to be there to prevent attaching filters to 1499 * The line above used to be there to prevent attaching filters to
1482 leaves. But at least tc_index filter uses this just to get class 1500 * leaves. But at least tc_index filter uses this just to get class
1483 for other reasons so that we have to allow for it. 1501 * for other reasons so that we have to allow for it.
1484 ---- 1502 * ----
1485 19.6.2002 As Werner explained it is ok - bind filter is just 1503 * 19.6.2002 As Werner explained it is ok - bind filter is just
1486 another way to "lock" the class - unlike "get" this lock can 1504 * another way to "lock" the class - unlike "get" this lock can
1487 be broken by class during destroy IIUC. 1505 * be broken by class during destroy IIUC.
1488 */ 1506 */
1489 if (cl) 1507 if (cl)
1490 cl->filter_cnt++; 1508 cl->filter_cnt++;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ecc302f4d2a1..ec5cbc848963 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
61 TC_H_MIN(ntx + 1))); 61 TC_H_MIN(ntx + 1)));
62 if (qdisc == NULL) 62 if (qdisc == NULL)
63 goto err; 63 goto err;
64 qdisc->flags |= TCQ_F_CAN_BYPASS;
65 priv->qdiscs[ntx] = qdisc; 64 priv->qdiscs[ntx] = qdisc;
66 } 65 }
67 66
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 000000000000..ea17cbed29ef
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,418 @@
1/*
2 * net/sched/sch_mqprio.c
3 *
4 * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/types.h>
12#include <linux/slab.h>
13#include <linux/kernel.h>
14#include <linux/string.h>
15#include <linux/errno.h>
16#include <linux/skbuff.h>
17#include <net/netlink.h>
18#include <net/pkt_sched.h>
19#include <net/sch_generic.h>
20
21struct mqprio_sched {
22 struct Qdisc **qdiscs;
23 int hw_owned;
24};
25
26static void mqprio_destroy(struct Qdisc *sch)
27{
28 struct net_device *dev = qdisc_dev(sch);
29 struct mqprio_sched *priv = qdisc_priv(sch);
30 unsigned int ntx;
31
32 if (priv->qdiscs) {
33 for (ntx = 0;
34 ntx < dev->num_tx_queues && priv->qdiscs[ntx];
35 ntx++)
36 qdisc_destroy(priv->qdiscs[ntx]);
37 kfree(priv->qdiscs);
38 }
39
40 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
41 dev->netdev_ops->ndo_setup_tc(dev, 0);
42 else
43 netdev_set_num_tc(dev, 0);
44}
45
46static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
47{
48 int i, j;
49
50 /* Verify num_tc is not out of max range */
51 if (qopt->num_tc > TC_MAX_QUEUE)
52 return -EINVAL;
53
54 /* Verify priority mapping uses valid tcs */
55 for (i = 0; i < TC_BITMASK + 1; i++) {
56 if (qopt->prio_tc_map[i] >= qopt->num_tc)
57 return -EINVAL;
58 }
59
60 /* net_device does not support requested operation */
61 if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
62 return -EINVAL;
63
64 /* if hw owned qcount and qoffset are taken from LLD so
65 * no reason to verify them here
66 */
67 if (qopt->hw)
68 return 0;
69
70 for (i = 0; i < qopt->num_tc; i++) {
71 unsigned int last = qopt->offset[i] + qopt->count[i];
72
73 /* Verify the queue count is in tx range being equal to the
74 * real_num_tx_queues indicates the last queue is in use.
75 */
76 if (qopt->offset[i] >= dev->real_num_tx_queues ||
77 !qopt->count[i] ||
78 last > dev->real_num_tx_queues)
79 return -EINVAL;
80
81 /* Verify that the offset and counts do not overlap */
82 for (j = i + 1; j < qopt->num_tc; j++) {
83 if (last > qopt->offset[j])
84 return -EINVAL;
85 }
86 }
87
88 return 0;
89}
90
91static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
92{
93 struct net_device *dev = qdisc_dev(sch);
94 struct mqprio_sched *priv = qdisc_priv(sch);
95 struct netdev_queue *dev_queue;
96 struct Qdisc *qdisc;
97 int i, err = -EOPNOTSUPP;
98 struct tc_mqprio_qopt *qopt = NULL;
99
100 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
101 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
102
103 if (sch->parent != TC_H_ROOT)
104 return -EOPNOTSUPP;
105
106 if (!netif_is_multiqueue(dev))
107 return -EOPNOTSUPP;
108
109 if (nla_len(opt) < sizeof(*qopt))
110 return -EINVAL;
111
112 qopt = nla_data(opt);
113 if (mqprio_parse_opt(dev, qopt))
114 return -EINVAL;
115
116 /* pre-allocate qdisc, attachment can't fail */
117 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
118 GFP_KERNEL);
119 if (priv->qdiscs == NULL) {
120 err = -ENOMEM;
121 goto err;
122 }
123
124 for (i = 0; i < dev->num_tx_queues; i++) {
125 dev_queue = netdev_get_tx_queue(dev, i);
126 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
127 TC_H_MAKE(TC_H_MAJ(sch->handle),
128 TC_H_MIN(i + 1)));
129 if (qdisc == NULL) {
130 err = -ENOMEM;
131 goto err;
132 }
133 priv->qdiscs[i] = qdisc;
134 }
135
136 /* If the mqprio options indicate that hardware should own
137 * the queue mapping then run ndo_setup_tc otherwise use the
138 * supplied and verified mapping
139 */
140 if (qopt->hw) {
141 priv->hw_owned = 1;
142 err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
143 if (err)
144 goto err;
145 } else {
146 netdev_set_num_tc(dev, qopt->num_tc);
147 for (i = 0; i < qopt->num_tc; i++)
148 netdev_set_tc_queue(dev, i,
149 qopt->count[i], qopt->offset[i]);
150 }
151
152 /* Always use supplied priority mappings */
153 for (i = 0; i < TC_BITMASK + 1; i++)
154 netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
155
156 sch->flags |= TCQ_F_MQROOT;
157 return 0;
158
159err:
160 mqprio_destroy(sch);
161 return err;
162}
163
164static void mqprio_attach(struct Qdisc *sch)
165{
166 struct net_device *dev = qdisc_dev(sch);
167 struct mqprio_sched *priv = qdisc_priv(sch);
168 struct Qdisc *qdisc;
169 unsigned int ntx;
170
171 /* Attach underlying qdisc */
172 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
173 qdisc = priv->qdiscs[ntx];
174 qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
175 if (qdisc)
176 qdisc_destroy(qdisc);
177 }
178 kfree(priv->qdiscs);
179 priv->qdiscs = NULL;
180}
181
182static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
183 unsigned long cl)
184{
185 struct net_device *dev = qdisc_dev(sch);
186 unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
187
188 if (ntx >= dev->num_tx_queues)
189 return NULL;
190 return netdev_get_tx_queue(dev, ntx);
191}
192
193static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
194 struct Qdisc **old)
195{
196 struct net_device *dev = qdisc_dev(sch);
197 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
198
199 if (!dev_queue)
200 return -EINVAL;
201
202 if (dev->flags & IFF_UP)
203 dev_deactivate(dev);
204
205 *old = dev_graft_qdisc(dev_queue, new);
206
207 if (dev->flags & IFF_UP)
208 dev_activate(dev);
209
210 return 0;
211}
212
213static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
214{
215 struct net_device *dev = qdisc_dev(sch);
216 struct mqprio_sched *priv = qdisc_priv(sch);
217 unsigned char *b = skb_tail_pointer(skb);
218 struct tc_mqprio_qopt opt = { 0 };
219 struct Qdisc *qdisc;
220 unsigned int i;
221
222 sch->q.qlen = 0;
223 memset(&sch->bstats, 0, sizeof(sch->bstats));
224 memset(&sch->qstats, 0, sizeof(sch->qstats));
225
226 for (i = 0; i < dev->num_tx_queues; i++) {
227 qdisc = netdev_get_tx_queue(dev, i)->qdisc;
228 spin_lock_bh(qdisc_lock(qdisc));
229 sch->q.qlen += qdisc->q.qlen;
230 sch->bstats.bytes += qdisc->bstats.bytes;
231 sch->bstats.packets += qdisc->bstats.packets;
232 sch->qstats.qlen += qdisc->qstats.qlen;
233 sch->qstats.backlog += qdisc->qstats.backlog;
234 sch->qstats.drops += qdisc->qstats.drops;
235 sch->qstats.requeues += qdisc->qstats.requeues;
236 sch->qstats.overlimits += qdisc->qstats.overlimits;
237 spin_unlock_bh(qdisc_lock(qdisc));
238 }
239
240 opt.num_tc = netdev_get_num_tc(dev);
241 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
242 opt.hw = priv->hw_owned;
243
244 for (i = 0; i < netdev_get_num_tc(dev); i++) {
245 opt.count[i] = dev->tc_to_txq[i].count;
246 opt.offset[i] = dev->tc_to_txq[i].offset;
247 }
248
249 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
250
251 return skb->len;
252nla_put_failure:
253 nlmsg_trim(skb, b);
254 return -1;
255}
256
257static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
258{
259 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
260
261 if (!dev_queue)
262 return NULL;
263
264 return dev_queue->qdisc_sleeping;
265}
266
267static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
268{
269 struct net_device *dev = qdisc_dev(sch);
270 unsigned int ntx = TC_H_MIN(classid);
271
272 if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
273 return 0;
274 return ntx;
275}
276
277static void mqprio_put(struct Qdisc *sch, unsigned long cl)
278{
279}
280
281static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
282 struct sk_buff *skb, struct tcmsg *tcm)
283{
284 struct net_device *dev = qdisc_dev(sch);
285
286 if (cl <= netdev_get_num_tc(dev)) {
287 tcm->tcm_parent = TC_H_ROOT;
288 tcm->tcm_info = 0;
289 } else {
290 int i;
291 struct netdev_queue *dev_queue;
292
293 dev_queue = mqprio_queue_get(sch, cl);
294 tcm->tcm_parent = 0;
295 for (i = 0; i < netdev_get_num_tc(dev); i++) {
296 struct netdev_tc_txq tc = dev->tc_to_txq[i];
297 int q_idx = cl - netdev_get_num_tc(dev);
298
299 if (q_idx > tc.offset &&
300 q_idx <= tc.offset + tc.count) {
301 tcm->tcm_parent =
302 TC_H_MAKE(TC_H_MAJ(sch->handle),
303 TC_H_MIN(i + 1));
304 break;
305 }
306 }
307 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
308 }
309 tcm->tcm_handle |= TC_H_MIN(cl);
310 return 0;
311}
312
313static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
314 struct gnet_dump *d)
315 __releases(d->lock)
316 __acquires(d->lock)
317{
318 struct net_device *dev = qdisc_dev(sch);
319
320 if (cl <= netdev_get_num_tc(dev)) {
321 int i;
322 struct Qdisc *qdisc;
323 struct gnet_stats_queue qstats = {0};
324 struct gnet_stats_basic_packed bstats = {0};
325 struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
326
327 /* Drop lock here it will be reclaimed before touching
328 * statistics this is required because the d->lock we
329 * hold here is the look on dev_queue->qdisc_sleeping
330 * also acquired below.
331 */
332 spin_unlock_bh(d->lock);
333
334 for (i = tc.offset; i < tc.offset + tc.count; i++) {
335 qdisc = netdev_get_tx_queue(dev, i)->qdisc;
336 spin_lock_bh(qdisc_lock(qdisc));
337 bstats.bytes += qdisc->bstats.bytes;
338 bstats.packets += qdisc->bstats.packets;
339 qstats.qlen += qdisc->qstats.qlen;
340 qstats.backlog += qdisc->qstats.backlog;
341 qstats.drops += qdisc->qstats.drops;
342 qstats.requeues += qdisc->qstats.requeues;
343 qstats.overlimits += qdisc->qstats.overlimits;
344 spin_unlock_bh(qdisc_lock(qdisc));
345 }
346 /* Reclaim root sleeping lock before completing stats */
347 spin_lock_bh(d->lock);
348 if (gnet_stats_copy_basic(d, &bstats) < 0 ||
349 gnet_stats_copy_queue(d, &qstats) < 0)
350 return -1;
351 } else {
352 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
353
354 sch = dev_queue->qdisc_sleeping;
355 sch->qstats.qlen = sch->q.qlen;
356 if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
357 gnet_stats_copy_queue(d, &sch->qstats) < 0)
358 return -1;
359 }
360 return 0;
361}
362
363static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
364{
365 struct net_device *dev = qdisc_dev(sch);
366 unsigned long ntx;
367
368 if (arg->stop)
369 return;
370
371 /* Walk hierarchy with a virtual class per tc */
372 arg->count = arg->skip;
373 for (ntx = arg->skip;
374 ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
375 ntx++) {
376 if (arg->fn(sch, ntx + 1, arg) < 0) {
377 arg->stop = 1;
378 break;
379 }
380 arg->count++;
381 }
382}
383
384static const struct Qdisc_class_ops mqprio_class_ops = {
385 .graft = mqprio_graft,
386 .leaf = mqprio_leaf,
387 .get = mqprio_get,
388 .put = mqprio_put,
389 .walk = mqprio_walk,
390 .dump = mqprio_dump_class,
391 .dump_stats = mqprio_dump_class_stats,
392};
393
394static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
395 .cl_ops = &mqprio_class_ops,
396 .id = "mqprio",
397 .priv_size = sizeof(struct mqprio_sched),
398 .init = mqprio_init,
399 .destroy = mqprio_destroy,
400 .attach = mqprio_attach,
401 .dump = mqprio_dump,
402 .owner = THIS_MODULE,
403};
404
405static int __init mqprio_module_init(void)
406{
407 return register_qdisc(&mqprio_qdisc_ops);
408}
409
410static void __exit mqprio_module_exit(void)
411{
412 unregister_qdisc(&mqprio_qdisc_ops);
413}
414
415module_init(mqprio_module_init);
416module_exit(mqprio_module_exit);
417
418MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 436a2e75b322..edc1950e0e77 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
156 unsigned int len; 156 unsigned int len;
157 struct Qdisc *qdisc; 157 struct Qdisc *qdisc;
158 158
159 for (band = q->bands-1; band >= 0; band--) { 159 for (band = q->bands - 1; band >= 0; band--) {
160 qdisc = q->queues[band]; 160 qdisc = q->queues[band];
161 if (qdisc->ops->drop) { 161 if (qdisc->ops->drop) {
162 len = qdisc->ops->drop(qdisc); 162 len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
265 for (i = 0; i < q->max_bands; i++) 265 for (i = 0; i < q->max_bands; i++)
266 q->queues[i] = &noop_qdisc; 266 q->queues[i] = &noop_qdisc;
267 267
268 err = multiq_tune(sch,opt); 268 err = multiq_tune(sch, opt);
269 269
270 if (err) 270 if (err)
271 kfree(q->queues); 271 kfree(q->queues);
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
346 struct multiq_sched_data *q = qdisc_priv(sch); 346 struct multiq_sched_data *q = qdisc_priv(sch);
347 347
348 tcm->tcm_handle |= TC_H_MIN(cl); 348 tcm->tcm_handle |= TC_H_MIN(cl);
349 tcm->tcm_info = q->queues[cl-1]->handle; 349 tcm->tcm_info = q->queues[cl - 1]->handle;
350 return 0; 350 return 0;
351} 351}
352 352
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
378 arg->count++; 378 arg->count++;
379 continue; 379 continue;
380 } 380 }
381 if (arg->fn(sch, band+1, arg) < 0) { 381 if (arg->fn(sch, band + 1, arg) < 0) {
382 arg->stop = 1; 382 arg->stop = 1;
383 break; 383 break;
384 } 384 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6a3006b38dc5..69c35f6cd13f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -19,12 +19,13 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/vmalloc.h>
22#include <linux/rtnetlink.h> 23#include <linux/rtnetlink.h>
23 24
24#include <net/netlink.h> 25#include <net/netlink.h>
25#include <net/pkt_sched.h> 26#include <net/pkt_sched.h>
26 27
27#define VERSION "1.2" 28#define VERSION "1.3"
28 29
29/* Network Emulation Queuing algorithm. 30/* Network Emulation Queuing algorithm.
30 ==================================== 31 ====================================
@@ -47,6 +48,20 @@
47 layering other disciplines. It does not need to do bandwidth 48 layering other disciplines. It does not need to do bandwidth
48 control either since that can be handled by using token 49 control either since that can be handled by using token
49 bucket or other rate control. 50 bucket or other rate control.
51
52 Correlated Loss Generator models
53
54 Added generation of correlated loss according to the
55 "Gilbert-Elliot" model, a 4-state markov model.
56
57 References:
58 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
59 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
60 and intuitive loss model for packet networks and its implementation
61 in the Netem module in the Linux kernel", available in [1]
62
63 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
64 Fabio Ludovici <fabio.ludovici at yahoo.it>
50*/ 65*/
51 66
52struct netem_sched_data { 67struct netem_sched_data {
@@ -73,6 +88,26 @@ struct netem_sched_data {
73 u32 size; 88 u32 size;
74 s16 table[0]; 89 s16 table[0];
75 } *delay_dist; 90 } *delay_dist;
91
92 enum {
93 CLG_RANDOM,
94 CLG_4_STATES,
95 CLG_GILB_ELL,
96 } loss_model;
97
98 /* Correlated Loss Generation models */
99 struct clgstate {
100 /* state of the Markov chain */
101 u8 state;
102
103 /* 4-states and Gilbert-Elliot models */
104 u32 a1; /* p13 for 4-states or p for GE */
105 u32 a2; /* p31 for 4-states or r for GE */
106 u32 a3; /* p32 for 4-states or h for GE */
107 u32 a4; /* p14 for 4-states or 1-k for GE */
108 u32 a5; /* p23 used only in 4-states */
109 } clg;
110
76}; 111};
77 112
78/* Time stamp put into socket buffer control block */ 113/* Time stamp put into socket buffer control block */
@@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state)
115 return answer; 150 return answer;
116} 151}
117 152
153/* loss_4state - 4-state model loss generator
154 * Generates losses according to the 4-state Markov chain adopted in
155 * the GI (General and Intuitive) loss model.
156 */
157static bool loss_4state(struct netem_sched_data *q)
158{
159 struct clgstate *clg = &q->clg;
160 u32 rnd = net_random();
161
162 /*
163 * Makes a comparison between rnd and the transition
164 * probabilities outgoing from the current state, then decides the
165 * next state and if the next packet has to be transmitted or lost.
166 * The four states correspond to:
167 * 1 => successfully transmitted packets within a gap period
168 * 4 => isolated losses within a gap period
169 * 3 => lost packets within a burst period
170 * 2 => successfully transmitted packets within a burst period
171 */
172 switch (clg->state) {
173 case 1:
174 if (rnd < clg->a4) {
175 clg->state = 4;
176 return true;
177 } else if (clg->a4 < rnd && rnd < clg->a1) {
178 clg->state = 3;
179 return true;
180 } else if (clg->a1 < rnd)
181 clg->state = 1;
182
183 break;
184 case 2:
185 if (rnd < clg->a5) {
186 clg->state = 3;
187 return true;
188 } else
189 clg->state = 2;
190
191 break;
192 case 3:
193 if (rnd < clg->a3)
194 clg->state = 2;
195 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
196 clg->state = 1;
197 return true;
198 } else if (clg->a2 + clg->a3 < rnd) {
199 clg->state = 3;
200 return true;
201 }
202 break;
203 case 4:
204 clg->state = 1;
205 break;
206 }
207
208 return false;
209}
210
211/* loss_gilb_ell - Gilbert-Elliot model loss generator
212 * Generates losses according to the Gilbert-Elliot loss model or
213 * its special cases (Gilbert or Simple Gilbert)
214 *
215 * Makes a comparison between random number and the transition
216 * probabilities outgoing from the current state, then decides the
217 * next state. A second random number is extracted and the comparison
218 * with the loss probability of the current state decides if the next
219 * packet will be transmitted or lost.
220 */
221static bool loss_gilb_ell(struct netem_sched_data *q)
222{
223 struct clgstate *clg = &q->clg;
224
225 switch (clg->state) {
226 case 1:
227 if (net_random() < clg->a1)
228 clg->state = 2;
229 if (net_random() < clg->a4)
230 return true;
231 case 2:
232 if (net_random() < clg->a2)
233 clg->state = 1;
234 if (clg->a3 > net_random())
235 return true;
236 }
237
238 return false;
239}
240
241static bool loss_event(struct netem_sched_data *q)
242{
243 switch (q->loss_model) {
244 case CLG_RANDOM:
245 /* Random packet drop 0 => none, ~0 => all */
246 return q->loss && q->loss >= get_crandom(&q->loss_cor);
247
248 case CLG_4_STATES:
249 /* 4state loss model algorithm (used also for GI model)
250 * Extracts a value from the markov 4 state loss generator,
251 * if it is 1 drops a packet and if needed writes the event in
252 * the kernel logs
253 */
254 return loss_4state(q);
255
256 case CLG_GILB_ELL:
257 /* Gilbert-Elliot loss model algorithm
258 * Extracts a value from the Gilbert-Elliot loss generator,
259 * if it is 1 drops a packet and if needed writes the event in
260 * the kernel logs
261 */
262 return loss_gilb_ell(q);
263 }
264
265 return false; /* not reached */
266}
267
268
118/* tabledist - return a pseudo-randomly distributed value with mean mu and 269/* tabledist - return a pseudo-randomly distributed value with mean mu and
119 * std deviation sigma. Uses table lookup to approximate the desired 270 * std deviation sigma. Uses table lookup to approximate the desired
120 * distribution, and a uniformly-distributed pseudo-random source. 271 * distribution, and a uniformly-distributed pseudo-random source.
@@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
161 int ret; 312 int ret;
162 int count = 1; 313 int count = 1;
163 314
164 pr_debug("netem_enqueue skb=%p\n", skb);
165
166 /* Random duplication */ 315 /* Random duplication */
167 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 316 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
168 ++count; 317 ++count;
169 318
170 /* Random packet drop 0 => none, ~0 => all */ 319 /* Drop packet? */
171 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 320 if (loss_event(q))
172 --count; 321 --count;
173 322
174 if (count == 0) { 323 if (count == 0) {
@@ -211,8 +360,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
211 } 360 }
212 361
213 cb = netem_skb_cb(skb); 362 cb = netem_skb_cb(skb);
214 if (q->gap == 0 || /* not doing reordering */ 363 if (q->gap == 0 || /* not doing reordering */
215 q->counter < q->gap || /* inside last reordering gap */ 364 q->counter < q->gap || /* inside last reordering gap */
216 q->reorder < get_crandom(&q->reorder_cor)) { 365 q->reorder < get_crandom(&q->reorder_cor)) {
217 psched_time_t now; 366 psched_time_t now;
218 psched_tdiff_t delay; 367 psched_tdiff_t delay;
@@ -238,17 +387,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
238 ret = NET_XMIT_SUCCESS; 387 ret = NET_XMIT_SUCCESS;
239 } 388 }
240 389
241 if (likely(ret == NET_XMIT_SUCCESS)) { 390 if (ret != NET_XMIT_SUCCESS) {
242 sch->q.qlen++; 391 if (net_xmit_drop_count(ret)) {
243 } else if (net_xmit_drop_count(ret)) { 392 sch->qstats.drops++;
244 sch->qstats.drops++; 393 return ret;
394 }
245 } 395 }
246 396
247 pr_debug("netem: enqueue ret %d\n", ret); 397 sch->q.qlen++;
248 return ret; 398 return NET_XMIT_SUCCESS;
249} 399}
250 400
251static unsigned int netem_drop(struct Qdisc* sch) 401static unsigned int netem_drop(struct Qdisc *sch)
252{ 402{
253 struct netem_sched_data *q = qdisc_priv(sch); 403 struct netem_sched_data *q = qdisc_priv(sch);
254 unsigned int len = 0; 404 unsigned int len = 0;
@@ -265,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
265 struct netem_sched_data *q = qdisc_priv(sch); 415 struct netem_sched_data *q = qdisc_priv(sch);
266 struct sk_buff *skb; 416 struct sk_buff *skb;
267 417
268 if (sch->flags & TCQ_F_THROTTLED) 418 if (qdisc_is_throttled(sch))
269 return NULL; 419 return NULL;
270 420
271 skb = q->qdisc->ops->peek(q->qdisc); 421 skb = q->qdisc->ops->peek(q->qdisc);
@@ -287,9 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
287 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 437 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
288 skb->tstamp.tv64 = 0; 438 skb->tstamp.tv64 = 0;
289#endif 439#endif
290 pr_debug("netem_dequeue: return skb=%p\n", skb); 440
291 qdisc_bstats_update(sch, skb);
292 sch->q.qlen--; 441 sch->q.qlen--;
442 qdisc_unthrottled(sch);
443 qdisc_bstats_update(sch, skb);
293 return skb; 444 return skb;
294 } 445 }
295 446
@@ -308,6 +459,16 @@ static void netem_reset(struct Qdisc *sch)
308 qdisc_watchdog_cancel(&q->watchdog); 459 qdisc_watchdog_cancel(&q->watchdog);
309} 460}
310 461
462static void dist_free(struct disttable *d)
463{
464 if (d) {
465 if (is_vmalloc_addr(d))
466 vfree(d);
467 else
468 kfree(d);
469 }
470}
471
311/* 472/*
312 * Distribution data is a variable size payload containing 473 * Distribution data is a variable size payload containing
313 * signed 16 bit values. 474 * signed 16 bit values.
@@ -315,16 +476,20 @@ static void netem_reset(struct Qdisc *sch)
315static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 476static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
316{ 477{
317 struct netem_sched_data *q = qdisc_priv(sch); 478 struct netem_sched_data *q = qdisc_priv(sch);
318 unsigned long n = nla_len(attr)/sizeof(__s16); 479 size_t n = nla_len(attr)/sizeof(__s16);
319 const __s16 *data = nla_data(attr); 480 const __s16 *data = nla_data(attr);
320 spinlock_t *root_lock; 481 spinlock_t *root_lock;
321 struct disttable *d; 482 struct disttable *d;
322 int i; 483 int i;
484 size_t s;
323 485
324 if (n > 65536) 486 if (n > NETEM_DIST_MAX)
325 return -EINVAL; 487 return -EINVAL;
326 488
327 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 489 s = sizeof(struct disttable) + n * sizeof(s16);
490 d = kmalloc(s, GFP_KERNEL);
491 if (!d)
492 d = vmalloc(s);
328 if (!d) 493 if (!d)
329 return -ENOMEM; 494 return -ENOMEM;
330 495
@@ -335,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
335 root_lock = qdisc_root_sleeping_lock(sch); 500 root_lock = qdisc_root_sleeping_lock(sch);
336 501
337 spin_lock_bh(root_lock); 502 spin_lock_bh(root_lock);
338 kfree(q->delay_dist); 503 dist_free(q->delay_dist);
339 q->delay_dist = d; 504 q->delay_dist = d;
340 spin_unlock_bh(root_lock); 505 spin_unlock_bh(root_lock);
341 return 0; 506 return 0;
@@ -369,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
369 init_crandom(&q->corrupt_cor, r->correlation); 534 init_crandom(&q->corrupt_cor, r->correlation);
370} 535}
371 536
537static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
538{
539 struct netem_sched_data *q = qdisc_priv(sch);
540 const struct nlattr *la;
541 int rem;
542
543 nla_for_each_nested(la, attr, rem) {
544 u16 type = nla_type(la);
545
546 switch(type) {
547 case NETEM_LOSS_GI: {
548 const struct tc_netem_gimodel *gi = nla_data(la);
549
550 if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
551 pr_info("netem: incorrect gi model size\n");
552 return -EINVAL;
553 }
554
555 q->loss_model = CLG_4_STATES;
556
557 q->clg.state = 1;
558 q->clg.a1 = gi->p13;
559 q->clg.a2 = gi->p31;
560 q->clg.a3 = gi->p32;
561 q->clg.a4 = gi->p14;
562 q->clg.a5 = gi->p23;
563 break;
564 }
565
566 case NETEM_LOSS_GE: {
567 const struct tc_netem_gemodel *ge = nla_data(la);
568
569 if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
570 pr_info("netem: incorrect gi model size\n");
571 return -EINVAL;
572 }
573
574 q->loss_model = CLG_GILB_ELL;
575 q->clg.state = 1;
576 q->clg.a1 = ge->p;
577 q->clg.a2 = ge->r;
578 q->clg.a3 = ge->h;
579 q->clg.a4 = ge->k1;
580 break;
581 }
582
583 default:
584 pr_info("netem: unknown loss type %u\n", type);
585 return -EINVAL;
586 }
587 }
588
589 return 0;
590}
591
372static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 592static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
373 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 593 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
374 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 594 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
375 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 595 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
596 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
376}; 597};
377 598
378static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 599static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -380,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
380{ 601{
381 int nested_len = nla_len(nla) - NLA_ALIGN(len); 602 int nested_len = nla_len(nla) - NLA_ALIGN(len);
382 603
383 if (nested_len < 0) 604 if (nested_len < 0) {
605 pr_info("netem: invalid attributes len %d\n", nested_len);
384 return -EINVAL; 606 return -EINVAL;
607 }
608
385 if (nested_len >= nla_attr_size(0)) 609 if (nested_len >= nla_attr_size(0))
386 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 610 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
387 nested_len, policy); 611 nested_len, policy);
612
388 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 613 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
389 return 0; 614 return 0;
390} 615}
@@ -407,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
407 632
408 ret = fifo_set_limit(q->qdisc, qopt->limit); 633 ret = fifo_set_limit(q->qdisc, qopt->limit);
409 if (ret) { 634 if (ret) {
410 pr_debug("netem: can't set fifo limit\n"); 635 pr_info("netem: can't set fifo limit\n");
411 return ret; 636 return ret;
412 } 637 }
413 638
@@ -440,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
440 if (tb[TCA_NETEM_CORRUPT]) 665 if (tb[TCA_NETEM_CORRUPT])
441 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 666 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
442 667
443 return 0; 668 q->loss_model = CLG_RANDOM;
669 if (tb[TCA_NETEM_LOSS])
670 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
671
672 return ret;
444} 673}
445 674
446/* 675/*
@@ -535,16 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
535 764
536 qdisc_watchdog_init(&q->watchdog, sch); 765 qdisc_watchdog_init(&q->watchdog, sch);
537 766
767 q->loss_model = CLG_RANDOM;
538 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, 768 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
539 TC_H_MAKE(sch->handle, 1)); 769 TC_H_MAKE(sch->handle, 1));
540 if (!q->qdisc) { 770 if (!q->qdisc) {
541 pr_debug("netem: qdisc create failed\n"); 771 pr_notice("netem: qdisc create tfifo qdisc failed\n");
542 return -ENOMEM; 772 return -ENOMEM;
543 } 773 }
544 774
545 ret = netem_change(sch, opt); 775 ret = netem_change(sch, opt);
546 if (ret) { 776 if (ret) {
547 pr_debug("netem: change failed\n"); 777 pr_info("netem: change failed\n");
548 qdisc_destroy(q->qdisc); 778 qdisc_destroy(q->qdisc);
549 } 779 }
550 return ret; 780 return ret;
@@ -556,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch)
556 786
557 qdisc_watchdog_cancel(&q->watchdog); 787 qdisc_watchdog_cancel(&q->watchdog);
558 qdisc_destroy(q->qdisc); 788 qdisc_destroy(q->qdisc);
559 kfree(q->delay_dist); 789 dist_free(q->delay_dist);
790}
791
792static int dump_loss_model(const struct netem_sched_data *q,
793 struct sk_buff *skb)
794{
795 struct nlattr *nest;
796
797 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
798 if (nest == NULL)
799 goto nla_put_failure;
800
801 switch (q->loss_model) {
802 case CLG_RANDOM:
803 /* legacy loss model */
804 nla_nest_cancel(skb, nest);
805 return 0; /* no data */
806
807 case CLG_4_STATES: {
808 struct tc_netem_gimodel gi = {
809 .p13 = q->clg.a1,
810 .p31 = q->clg.a2,
811 .p32 = q->clg.a3,
812 .p14 = q->clg.a4,
813 .p23 = q->clg.a5,
814 };
815
816 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
817 break;
818 }
819 case CLG_GILB_ELL: {
820 struct tc_netem_gemodel ge = {
821 .p = q->clg.a1,
822 .r = q->clg.a2,
823 .h = q->clg.a3,
824 .k1 = q->clg.a4,
825 };
826
827 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
828 break;
829 }
830 }
831
832 nla_nest_end(skb, nest);
833 return 0;
834
835nla_put_failure:
836 nla_nest_cancel(skb, nest);
837 return -1;
560} 838}
561 839
562static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 840static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
563{ 841{
564 const struct netem_sched_data *q = qdisc_priv(sch); 842 const struct netem_sched_data *q = qdisc_priv(sch);
565 unsigned char *b = skb_tail_pointer(skb); 843 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
566 struct nlattr *nla = (struct nlattr *) b;
567 struct tc_netem_qopt qopt; 844 struct tc_netem_qopt qopt;
568 struct tc_netem_corr cor; 845 struct tc_netem_corr cor;
569 struct tc_netem_reorder reorder; 846 struct tc_netem_reorder reorder;
@@ -590,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
590 corrupt.correlation = q->corrupt_cor.rho; 867 corrupt.correlation = q->corrupt_cor.rho;
591 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 868 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
592 869
593 nla->nla_len = skb_tail_pointer(skb) - b; 870 if (dump_loss_model(q, skb) != 0)
871 goto nla_put_failure;
594 872
595 return skb->len; 873 return nla_nest_end(skb, nla);
596 874
597nla_put_failure: 875nla_put_failure:
598 nlmsg_trim(skb, b); 876 nlmsg_trim(skb, nla);
599 return -1; 877 return -1;
600} 878}
601 879
880static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
881 struct sk_buff *skb, struct tcmsg *tcm)
882{
883 struct netem_sched_data *q = qdisc_priv(sch);
884
885 if (cl != 1) /* only one class */
886 return -ENOENT;
887
888 tcm->tcm_handle |= TC_H_MIN(1);
889 tcm->tcm_info = q->qdisc->handle;
890
891 return 0;
892}
893
894static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
895 struct Qdisc **old)
896{
897 struct netem_sched_data *q = qdisc_priv(sch);
898
899 if (new == NULL)
900 new = &noop_qdisc;
901
902 sch_tree_lock(sch);
903 *old = q->qdisc;
904 q->qdisc = new;
905 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
906 qdisc_reset(*old);
907 sch_tree_unlock(sch);
908
909 return 0;
910}
911
912static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
913{
914 struct netem_sched_data *q = qdisc_priv(sch);
915 return q->qdisc;
916}
917
918static unsigned long netem_get(struct Qdisc *sch, u32 classid)
919{
920 return 1;
921}
922
923static void netem_put(struct Qdisc *sch, unsigned long arg)
924{
925}
926
927static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
928{
929 if (!walker->stop) {
930 if (walker->count >= walker->skip)
931 if (walker->fn(sch, 1, walker) < 0) {
932 walker->stop = 1;
933 return;
934 }
935 walker->count++;
936 }
937}
938
939static const struct Qdisc_class_ops netem_class_ops = {
940 .graft = netem_graft,
941 .leaf = netem_leaf,
942 .get = netem_get,
943 .put = netem_put,
944 .walk = netem_walk,
945 .dump = netem_dump_class,
946};
947
602static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 948static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
603 .id = "netem", 949 .id = "netem",
950 .cl_ops = &netem_class_ops,
604 .priv_size = sizeof(struct netem_sched_data), 951 .priv_size = sizeof(struct netem_sched_data),
605 .enqueue = netem_enqueue, 952 .enqueue = netem_enqueue,
606 .dequeue = netem_dequeue, 953 .dequeue = netem_dequeue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fbd710d619bf..2a318f2dc3e5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23 23
24 24
25struct prio_sched_data 25struct prio_sched_data {
26{
27 int bands; 26 int bands;
28 struct tcf_proto *filter_list; 27 struct tcf_proto *filter_list;
29 u8 prio2band[TC_PRIO_MAX+1]; 28 u8 prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
54 if (!q->filter_list || err < 0) { 53 if (!q->filter_list || err < 0) {
55 if (TC_H_MAJ(band)) 54 if (TC_H_MAJ(band))
56 band = 0; 55 band = 0;
57 return q->queues[q->prio2band[band&TC_PRIO_MAX]]; 56 return q->queues[q->prio2band[band & TC_PRIO_MAX]];
58 } 57 }
59 band = res.classid; 58 band = res.classid;
60 } 59 }
@@ -106,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
106 return NULL; 105 return NULL;
107} 106}
108 107
109static struct sk_buff *prio_dequeue(struct Qdisc* sch) 108static struct sk_buff *prio_dequeue(struct Qdisc *sch)
110{ 109{
111 struct prio_sched_data *q = qdisc_priv(sch); 110 struct prio_sched_data *q = qdisc_priv(sch);
112 int prio; 111 int prio;
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
124 123
125} 124}
126 125
127static unsigned int prio_drop(struct Qdisc* sch) 126static unsigned int prio_drop(struct Qdisc *sch)
128{ 127{
129 struct prio_sched_data *q = qdisc_priv(sch); 128 struct prio_sched_data *q = qdisc_priv(sch);
130 int prio; 129 int prio;
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
143 142
144 143
145static void 144static void
146prio_reset(struct Qdisc* sch) 145prio_reset(struct Qdisc *sch)
147{ 146{
148 int prio; 147 int prio;
149 struct prio_sched_data *q = qdisc_priv(sch); 148 struct prio_sched_data *q = qdisc_priv(sch);
150 149
151 for (prio=0; prio<q->bands; prio++) 150 for (prio = 0; prio < q->bands; prio++)
152 qdisc_reset(q->queues[prio]); 151 qdisc_reset(q->queues[prio]);
153 sch->q.qlen = 0; 152 sch->q.qlen = 0;
154} 153}
155 154
156static void 155static void
157prio_destroy(struct Qdisc* sch) 156prio_destroy(struct Qdisc *sch)
158{ 157{
159 int prio; 158 int prio;
160 struct prio_sched_data *q = qdisc_priv(sch); 159 struct prio_sched_data *q = qdisc_priv(sch);
161 160
162 tcf_destroy_chain(&q->filter_list); 161 tcf_destroy_chain(&q->filter_list);
163 for (prio=0; prio<q->bands; prio++) 162 for (prio = 0; prio < q->bands; prio++)
164 qdisc_destroy(q->queues[prio]); 163 qdisc_destroy(q->queues[prio]);
165} 164}
166 165
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
177 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) 176 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
178 return -EINVAL; 177 return -EINVAL;
179 178
180 for (i=0; i<=TC_PRIO_MAX; i++) { 179 for (i = 0; i <= TC_PRIO_MAX; i++) {
181 if (qopt->priomap[i] >= qopt->bands) 180 if (qopt->priomap[i] >= qopt->bands)
182 return -EINVAL; 181 return -EINVAL;
183 } 182 }
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
186 q->bands = qopt->bands; 185 q->bands = qopt->bands;
187 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); 186 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
188 187
189 for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { 188 for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
190 struct Qdisc *child = q->queues[i]; 189 struct Qdisc *child = q->queues[i];
191 q->queues[i] = &noop_qdisc; 190 q->queues[i] = &noop_qdisc;
192 if (child != &noop_qdisc) { 191 if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
196 } 195 }
197 sch_tree_unlock(sch); 196 sch_tree_unlock(sch);
198 197
199 for (i=0; i<q->bands; i++) { 198 for (i = 0; i < q->bands; i++) {
200 if (q->queues[i] == &noop_qdisc) { 199 if (q->queues[i] == &noop_qdisc) {
201 struct Qdisc *child, *old; 200 struct Qdisc *child, *old;
201
202 child = qdisc_create_dflt(sch->dev_queue, 202 child = qdisc_create_dflt(sch->dev_queue,
203 &pfifo_qdisc_ops, 203 &pfifo_qdisc_ops,
204 TC_H_MAKE(sch->handle, i + 1)); 204 TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
224 struct prio_sched_data *q = qdisc_priv(sch); 224 struct prio_sched_data *q = qdisc_priv(sch);
225 int i; 225 int i;
226 226
227 for (i=0; i<TCQ_PRIO_BANDS; i++) 227 for (i = 0; i < TCQ_PRIO_BANDS; i++)
228 q->queues[i] = &noop_qdisc; 228 q->queues[i] = &noop_qdisc;
229 229
230 if (opt == NULL) { 230 if (opt == NULL) {
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
232 } else { 232 } else {
233 int err; 233 int err;
234 234
235 if ((err= prio_tune(sch, opt)) != 0) 235 if ((err = prio_tune(sch, opt)) != 0)
236 return err; 236 return err;
237 } 237 }
238 return 0; 238 return 0;
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
245 struct tc_prio_qopt opt; 245 struct tc_prio_qopt opt;
246 246
247 opt.bands = q->bands; 247 opt.bands = q->bands;
248 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); 248 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
249 249
250 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 250 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
251 251
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
342 arg->count++; 342 arg->count++;
343 continue; 343 continue;
344 } 344 }
345 if (arg->fn(sch, prio+1, arg) < 0) { 345 if (arg->fn(sch, prio + 1, arg) < 0) {
346 arg->stop = 1; 346 arg->stop = 1;
347 break; 347 break;
348 } 348 }
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
350 } 350 }
351} 351}
352 352
353static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) 353static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
354{ 354{
355 struct prio_sched_data *q = qdisc_priv(sch); 355 struct prio_sched_data *q = qdisc_priv(sch);
356 356
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
new file mode 100644
index 000000000000..103343408593
--- /dev/null
+++ b/net/sched/sch_qfq.c
@@ -0,0 +1,1137 @@
1/*
2 * net/sched/sch_qfq.c Quick Fair Queueing Scheduler.
3 *
4 * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/bitops.h>
14#include <linux/errno.h>
15#include <linux/netdevice.h>
16#include <linux/pkt_sched.h>
17#include <net/sch_generic.h>
18#include <net/pkt_sched.h>
19#include <net/pkt_cls.h>
20
21
22/* Quick Fair Queueing
23 ===================
24
25 Sources:
26
27 Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
28 Packet Scheduling with Tight Bandwidth Distribution Guarantees."
29
30 See also:
31 http://retis.sssup.it/~fabio/linux/qfq/
32 */
33
34/*
35
36 Virtual time computations.
37
38 S, F and V are all computed in fixed point arithmetic with
39 FRAC_BITS decimal bits.
40
41 QFQ_MAX_INDEX is the maximum index allowed for a group. We need
42 one bit per index.
43 QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
44
45 The layout of the bits is as below:
46
47 [ MTU_SHIFT ][ FRAC_BITS ]
48 [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
49 ^.__grp->index = 0
50 *.__grp->slot_shift
51
52 where MIN_SLOT_SHIFT is derived by difference from the others.
53
54 The max group index corresponds to Lmax/w_min, where
55 Lmax=1<<MTU_SHIFT, w_min = 1 .
56 From this, and knowing how many groups (MAX_INDEX) we want,
57 we can derive the shift corresponding to each group.
58
59 Because we often need to compute
60 F = S + len/w_i and V = V + len/wsum
61 instead of storing w_i store the value
62 inv_w = (1<<FRAC_BITS)/w_i
63 so we can do F = S + len * inv_w * wsum.
64 We use W_TOT in the formulas so we can easily move between
65 static and adaptive weight sum.
66
67 The per-scheduler-instance data contain all the data structures
68 for the scheduler: bitmaps and bucket lists.
69
70 */
71
72/*
73 * Maximum number of consecutive slots occupied by backlogged classes
74 * inside a group.
75 */
76#define QFQ_MAX_SLOTS 32
77
78/*
79 * Shifts used for class<->group mapping. We allow class weights that are
80 * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
81 * group with the smallest index that can support the L_i / r_i configured
82 * for the class.
83 *
84 * grp->index is the index of the group; and grp->slot_shift
85 * is the shift for the corresponding (scaled) sigma_i.
86 */
87#define QFQ_MAX_INDEX 19
88#define QFQ_MAX_WSHIFT 16
89
90#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
91#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
92
93#define FRAC_BITS 30 /* fixed point arithmetic */
94#define ONE_FP (1UL << FRAC_BITS)
95#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
96
97#define QFQ_MTU_SHIFT 11
98#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
99
100/*
101 * Possible group states. These values are used as indexes for the bitmaps
102 * array of struct qfq_queue.
103 */
104enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
105
106struct qfq_group;
107
108struct qfq_class {
109 struct Qdisc_class_common common;
110
111 unsigned int refcnt;
112 unsigned int filter_cnt;
113
114 struct gnet_stats_basic_packed bstats;
115 struct gnet_stats_queue qstats;
116 struct gnet_stats_rate_est rate_est;
117 struct Qdisc *qdisc;
118
119 struct hlist_node next; /* Link for the slot list. */
120 u64 S, F; /* flow timestamps (exact) */
121
122 /* group we belong to. In principle we would need the index,
123 * which is log_2(lmax/weight), but we never reference it
124 * directly, only the group.
125 */
126 struct qfq_group *grp;
127
128 /* these are copied from the flowset. */
129 u32 inv_w; /* ONE_FP/weight */
130 u32 lmax; /* Max packet size for this flow. */
131};
132
133struct qfq_group {
134 u64 S, F; /* group timestamps (approx). */
135 unsigned int slot_shift; /* Slot shift. */
136 unsigned int index; /* Group index. */
137 unsigned int front; /* Index of the front slot. */
138 unsigned long full_slots; /* non-empty slots */
139
140 /* Array of RR lists of active classes. */
141 struct hlist_head slots[QFQ_MAX_SLOTS];
142};
143
144struct qfq_sched {
145 struct tcf_proto *filter_list;
146 struct Qdisc_class_hash clhash;
147
148 u64 V; /* Precise virtual time. */
149 u32 wsum; /* weight sum */
150
151 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
152 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
153};
154
155static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
156{
157 struct qfq_sched *q = qdisc_priv(sch);
158 struct Qdisc_class_common *clc;
159
160 clc = qdisc_class_find(&q->clhash, classid);
161 if (clc == NULL)
162 return NULL;
163 return container_of(clc, struct qfq_class, common);
164}
165
166static void qfq_purge_queue(struct qfq_class *cl)
167{
168 unsigned int len = cl->qdisc->q.qlen;
169
170 qdisc_reset(cl->qdisc);
171 qdisc_tree_decrease_qlen(cl->qdisc, len);
172}
173
174static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
175 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
176 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
177};
178
179/*
180 * Calculate a flow index, given its weight and maximum packet length.
181 * index = log_2(maxlen/weight) but we need to apply the scaling.
182 * This is used only once at flow creation.
183 */
184static int qfq_calc_index(u32 inv_w, unsigned int maxlen)
185{
186 u64 slot_size = (u64)maxlen * inv_w;
187 unsigned long size_map;
188 int index = 0;
189
190 size_map = slot_size >> QFQ_MIN_SLOT_SHIFT;
191 if (!size_map)
192 goto out;
193
194 index = __fls(size_map) + 1; /* basically a log_2 */
195 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
196
197 if (index < 0)
198 index = 0;
199out:
200 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
201 (unsigned long) ONE_FP/inv_w, maxlen, index);
202
203 return index;
204}
205
206static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
207 struct nlattr **tca, unsigned long *arg)
208{
209 struct qfq_sched *q = qdisc_priv(sch);
210 struct qfq_class *cl = (struct qfq_class *)*arg;
211 struct nlattr *tb[TCA_QFQ_MAX + 1];
212 u32 weight, lmax, inv_w;
213 int i, err;
214
215 if (tca[TCA_OPTIONS] == NULL) {
216 pr_notice("qfq: no options\n");
217 return -EINVAL;
218 }
219
220 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
221 if (err < 0)
222 return err;
223
224 if (tb[TCA_QFQ_WEIGHT]) {
225 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
226 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
227 pr_notice("qfq: invalid weight %u\n", weight);
228 return -EINVAL;
229 }
230 } else
231 weight = 1;
232
233 inv_w = ONE_FP / weight;
234 weight = ONE_FP / inv_w;
235 if (q->wsum + weight > QFQ_MAX_WSUM) {
236 pr_notice("qfq: total weight out of range (%u + %u)\n",
237 weight, q->wsum);
238 return -EINVAL;
239 }
240
241 if (tb[TCA_QFQ_LMAX]) {
242 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
243 if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
244 pr_notice("qfq: invalid max length %u\n", lmax);
245 return -EINVAL;
246 }
247 } else
248 lmax = 1UL << QFQ_MTU_SHIFT;
249
250 if (cl != NULL) {
251 if (tca[TCA_RATE]) {
252 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
253 qdisc_root_sleeping_lock(sch),
254 tca[TCA_RATE]);
255 if (err)
256 return err;
257 }
258
259 sch_tree_lock(sch);
260 if (tb[TCA_QFQ_WEIGHT]) {
261 q->wsum = weight - ONE_FP / cl->inv_w;
262 cl->inv_w = inv_w;
263 }
264 sch_tree_unlock(sch);
265
266 return 0;
267 }
268
269 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
270 if (cl == NULL)
271 return -ENOBUFS;
272
273 cl->refcnt = 1;
274 cl->common.classid = classid;
275 cl->lmax = lmax;
276 cl->inv_w = inv_w;
277 i = qfq_calc_index(cl->inv_w, cl->lmax);
278
279 cl->grp = &q->groups[i];
280 q->wsum += weight;
281
282 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
283 &pfifo_qdisc_ops, classid);
284 if (cl->qdisc == NULL)
285 cl->qdisc = &noop_qdisc;
286
287 if (tca[TCA_RATE]) {
288 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
289 qdisc_root_sleeping_lock(sch),
290 tca[TCA_RATE]);
291 if (err) {
292 qdisc_destroy(cl->qdisc);
293 kfree(cl);
294 return err;
295 }
296 }
297
298 sch_tree_lock(sch);
299 qdisc_class_hash_insert(&q->clhash, &cl->common);
300 sch_tree_unlock(sch);
301
302 qdisc_class_hash_grow(sch, &q->clhash);
303
304 *arg = (unsigned long)cl;
305 return 0;
306}
307
308static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
309{
310 struct qfq_sched *q = qdisc_priv(sch);
311
312 if (cl->inv_w) {
313 q->wsum -= ONE_FP / cl->inv_w;
314 cl->inv_w = 0;
315 }
316
317 gen_kill_estimator(&cl->bstats, &cl->rate_est);
318 qdisc_destroy(cl->qdisc);
319 kfree(cl);
320}
321
322static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
323{
324 struct qfq_sched *q = qdisc_priv(sch);
325 struct qfq_class *cl = (struct qfq_class *)arg;
326
327 if (cl->filter_cnt > 0)
328 return -EBUSY;
329
330 sch_tree_lock(sch);
331
332 qfq_purge_queue(cl);
333 qdisc_class_hash_remove(&q->clhash, &cl->common);
334
335 BUG_ON(--cl->refcnt == 0);
336 /*
337 * This shouldn't happen: we "hold" one cops->get() when called
338 * from tc_ctl_tclass; the destroy method is done from cops->put().
339 */
340
341 sch_tree_unlock(sch);
342 return 0;
343}
344
345static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
346{
347 struct qfq_class *cl = qfq_find_class(sch, classid);
348
349 if (cl != NULL)
350 cl->refcnt++;
351
352 return (unsigned long)cl;
353}
354
355static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
356{
357 struct qfq_class *cl = (struct qfq_class *)arg;
358
359 if (--cl->refcnt == 0)
360 qfq_destroy_class(sch, cl);
361}
362
363static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
364{
365 struct qfq_sched *q = qdisc_priv(sch);
366
367 if (cl)
368 return NULL;
369
370 return &q->filter_list;
371}
372
373static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
374 u32 classid)
375{
376 struct qfq_class *cl = qfq_find_class(sch, classid);
377
378 if (cl != NULL)
379 cl->filter_cnt++;
380
381 return (unsigned long)cl;
382}
383
384static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
385{
386 struct qfq_class *cl = (struct qfq_class *)arg;
387
388 cl->filter_cnt--;
389}
390
391static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
392 struct Qdisc *new, struct Qdisc **old)
393{
394 struct qfq_class *cl = (struct qfq_class *)arg;
395
396 if (new == NULL) {
397 new = qdisc_create_dflt(sch->dev_queue,
398 &pfifo_qdisc_ops, cl->common.classid);
399 if (new == NULL)
400 new = &noop_qdisc;
401 }
402
403 sch_tree_lock(sch);
404 qfq_purge_queue(cl);
405 *old = cl->qdisc;
406 cl->qdisc = new;
407 sch_tree_unlock(sch);
408 return 0;
409}
410
411static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
412{
413 struct qfq_class *cl = (struct qfq_class *)arg;
414
415 return cl->qdisc;
416}
417
418static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
419 struct sk_buff *skb, struct tcmsg *tcm)
420{
421 struct qfq_class *cl = (struct qfq_class *)arg;
422 struct nlattr *nest;
423
424 tcm->tcm_parent = TC_H_ROOT;
425 tcm->tcm_handle = cl->common.classid;
426 tcm->tcm_info = cl->qdisc->handle;
427
428 nest = nla_nest_start(skb, TCA_OPTIONS);
429 if (nest == NULL)
430 goto nla_put_failure;
431 NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
432 NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
433 return nla_nest_end(skb, nest);
434
435nla_put_failure:
436 nla_nest_cancel(skb, nest);
437 return -EMSGSIZE;
438}
439
440static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
441 struct gnet_dump *d)
442{
443 struct qfq_class *cl = (struct qfq_class *)arg;
444 struct tc_qfq_stats xstats;
445
446 memset(&xstats, 0, sizeof(xstats));
447 cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
448
449 xstats.weight = ONE_FP/cl->inv_w;
450 xstats.lmax = cl->lmax;
451
452 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
453 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
454 gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
455 return -1;
456
457 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
458}
459
460static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
461{
462 struct qfq_sched *q = qdisc_priv(sch);
463 struct qfq_class *cl;
464 struct hlist_node *n;
465 unsigned int i;
466
467 if (arg->stop)
468 return;
469
470 for (i = 0; i < q->clhash.hashsize; i++) {
471 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
472 if (arg->count < arg->skip) {
473 arg->count++;
474 continue;
475 }
476 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
477 arg->stop = 1;
478 return;
479 }
480 arg->count++;
481 }
482 }
483}
484
485static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
486 int *qerr)
487{
488 struct qfq_sched *q = qdisc_priv(sch);
489 struct qfq_class *cl;
490 struct tcf_result res;
491 int result;
492
493 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
494 pr_debug("qfq_classify: found %d\n", skb->priority);
495 cl = qfq_find_class(sch, skb->priority);
496 if (cl != NULL)
497 return cl;
498 }
499
500 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
501 result = tc_classify(skb, q->filter_list, &res);
502 if (result >= 0) {
503#ifdef CONFIG_NET_CLS_ACT
504 switch (result) {
505 case TC_ACT_QUEUED:
506 case TC_ACT_STOLEN:
507 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
508 case TC_ACT_SHOT:
509 return NULL;
510 }
511#endif
512 cl = (struct qfq_class *)res.class;
513 if (cl == NULL)
514 cl = qfq_find_class(sch, res.classid);
515 return cl;
516 }
517
518 return NULL;
519}
520
521/* Generic comparison function, handling wraparound. */
522static inline int qfq_gt(u64 a, u64 b)
523{
524 return (s64)(a - b) > 0;
525}
526
527/* Round a precise timestamp to its slotted value. */
528static inline u64 qfq_round_down(u64 ts, unsigned int shift)
529{
530 return ts & ~((1ULL << shift) - 1);
531}
532
533/* return the pointer to the group with lowest index in the bitmap */
534static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
535 unsigned long bitmap)
536{
537 int index = __ffs(bitmap);
538 return &q->groups[index];
539}
540/* Calculate a mask to mimic what would be ffs_from(). */
541static inline unsigned long mask_from(unsigned long bitmap, int from)
542{
543 return bitmap & ~((1UL << from) - 1);
544}
545
546/*
547 * The state computation relies on ER=0, IR=1, EB=2, IB=3
548 * First compute eligibility comparing grp->S, q->V,
549 * then check if someone is blocking us and possibly add EB
550 */
551static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
552{
553 /* if S > V we are not eligible */
554 unsigned int state = qfq_gt(grp->S, q->V);
555 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
556 struct qfq_group *next;
557
558 if (mask) {
559 next = qfq_ffs(q, mask);
560 if (qfq_gt(grp->F, next->F))
561 state |= EB;
562 }
563
564 return state;
565}
566
567
568/*
569 * In principle
570 * q->bitmaps[dst] |= q->bitmaps[src] & mask;
571 * q->bitmaps[src] &= ~mask;
572 * but we should make sure that src != dst
573 */
574static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
575 int src, int dst)
576{
577 q->bitmaps[dst] |= q->bitmaps[src] & mask;
578 q->bitmaps[src] &= ~mask;
579}
580
581static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
582{
583 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
584 struct qfq_group *next;
585
586 if (mask) {
587 next = qfq_ffs(q, mask);
588 if (!qfq_gt(next->F, old_F))
589 return;
590 }
591
592 mask = (1UL << index) - 1;
593 qfq_move_groups(q, mask, EB, ER);
594 qfq_move_groups(q, mask, IB, IR);
595}
596
597/*
598 * perhaps
599 *
600 old_V ^= q->V;
601 old_V >>= QFQ_MIN_SLOT_SHIFT;
602 if (old_V) {
603 ...
604 }
605 *
606 */
607static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
608{
609 unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
610 unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
611
612 if (vslot != old_vslot) {
613 unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
614 qfq_move_groups(q, mask, IR, ER);
615 qfq_move_groups(q, mask, IB, EB);
616 }
617}
618
619
620/*
621 * XXX we should make sure that slot becomes less than 32.
622 * This is guaranteed by the input values.
623 * roundedS is always cl->S rounded on grp->slot_shift bits.
624 */
625static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
626 u64 roundedS)
627{
628 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
629 unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
630
631 hlist_add_head(&cl->next, &grp->slots[i]);
632 __set_bit(slot, &grp->full_slots);
633}
634
635/* Maybe introduce hlist_first_entry?? */
636static struct qfq_class *qfq_slot_head(struct qfq_group *grp)
637{
638 return hlist_entry(grp->slots[grp->front].first,
639 struct qfq_class, next);
640}
641
642/*
643 * remove the entry from the slot
644 */
645static void qfq_front_slot_remove(struct qfq_group *grp)
646{
647 struct qfq_class *cl = qfq_slot_head(grp);
648
649 BUG_ON(!cl);
650 hlist_del(&cl->next);
651 if (hlist_empty(&grp->slots[grp->front]))
652 __clear_bit(0, &grp->full_slots);
653}
654
655/*
656 * Returns the first full queue in a group. As a side effect,
657 * adjust the bucket list so the first non-empty bucket is at
658 * position 0 in full_slots.
659 */
660static struct qfq_class *qfq_slot_scan(struct qfq_group *grp)
661{
662 unsigned int i;
663
664 pr_debug("qfq slot_scan: grp %u full %#lx\n",
665 grp->index, grp->full_slots);
666
667 if (grp->full_slots == 0)
668 return NULL;
669
670 i = __ffs(grp->full_slots); /* zero based */
671 if (i > 0) {
672 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
673 grp->full_slots >>= i;
674 }
675
676 return qfq_slot_head(grp);
677}
678
679/*
680 * adjust the bucket list. When the start time of a group decreases,
681 * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
682 * move the objects. The mask of occupied slots must be shifted
683 * because we use ffs() to find the first non-empty slot.
684 * This covers decreases in the group's start time, but what about
685 * increases of the start time ?
686 * Here too we should make sure that i is less than 32
687 */
688static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
689{
690 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
691
692 grp->full_slots <<= i;
693 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
694}
695
696static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
697{
698 struct qfq_group *grp;
699 unsigned long ineligible;
700
701 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
702 if (ineligible) {
703 if (!q->bitmaps[ER]) {
704 grp = qfq_ffs(q, ineligible);
705 if (qfq_gt(grp->S, q->V))
706 q->V = grp->S;
707 }
708 qfq_make_eligible(q, old_V);
709 }
710}
711
712/* What is length of next packet in queue (0 if queue is empty) */
713static unsigned int qdisc_peek_len(struct Qdisc *sch)
714{
715 struct sk_buff *skb;
716
717 skb = sch->ops->peek(sch);
718 return skb ? qdisc_pkt_len(skb) : 0;
719}
720
721/*
722 * Updates the class, returns true if also the group needs to be updated.
723 */
724static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl)
725{
726 unsigned int len = qdisc_peek_len(cl->qdisc);
727
728 cl->S = cl->F;
729 if (!len)
730 qfq_front_slot_remove(grp); /* queue is empty */
731 else {
732 u64 roundedS;
733
734 cl->F = cl->S + (u64)len * cl->inv_w;
735 roundedS = qfq_round_down(cl->S, grp->slot_shift);
736 if (roundedS == grp->S)
737 return false;
738
739 qfq_front_slot_remove(grp);
740 qfq_slot_insert(grp, cl, roundedS);
741 }
742
743 return true;
744}
745
746static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
747{
748 struct qfq_sched *q = qdisc_priv(sch);
749 struct qfq_group *grp;
750 struct qfq_class *cl;
751 struct sk_buff *skb;
752 unsigned int len;
753 u64 old_V;
754
755 if (!q->bitmaps[ER])
756 return NULL;
757
758 grp = qfq_ffs(q, q->bitmaps[ER]);
759
760 cl = qfq_slot_head(grp);
761 skb = qdisc_dequeue_peeked(cl->qdisc);
762 if (!skb) {
763 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
764 return NULL;
765 }
766
767 sch->q.qlen--;
768 qdisc_bstats_update(sch, skb);
769
770 old_V = q->V;
771 len = qdisc_pkt_len(skb);
772 q->V += (u64)len * IWSUM;
773 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
774 len, (unsigned long long) cl->F, (unsigned long long) q->V);
775
776 if (qfq_update_class(grp, cl)) {
777 u64 old_F = grp->F;
778
779 cl = qfq_slot_scan(grp);
780 if (!cl)
781 __clear_bit(grp->index, &q->bitmaps[ER]);
782 else {
783 u64 roundedS = qfq_round_down(cl->S, grp->slot_shift);
784 unsigned int s;
785
786 if (grp->S == roundedS)
787 goto skip_unblock;
788 grp->S = roundedS;
789 grp->F = roundedS + (2ULL << grp->slot_shift);
790 __clear_bit(grp->index, &q->bitmaps[ER]);
791 s = qfq_calc_state(q, grp);
792 __set_bit(grp->index, &q->bitmaps[s]);
793 }
794
795 qfq_unblock_groups(q, grp->index, old_F);
796 }
797
798skip_unblock:
799 qfq_update_eligible(q, old_V);
800
801 return skb;
802}
803
804/*
805 * Assign a reasonable start time for a new flow k in group i.
806 * Admissible values for \hat(F) are multiples of \sigma_i
807 * no greater than V+\sigma_i . Larger values mean that
808 * we had a wraparound so we consider the timestamp to be stale.
809 *
810 * If F is not stale and F >= V then we set S = F.
811 * Otherwise we should assign S = V, but this may violate
812 * the ordering in ER. So, if we have groups in ER, set S to
813 * the F_j of the first group j which would be blocking us.
814 * We are guaranteed not to move S backward because
815 * otherwise our group i would still be blocked.
816 */
817static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
818{
819 unsigned long mask;
820 uint32_t limit, roundedF;
821 int slot_shift = cl->grp->slot_shift;
822
823 roundedF = qfq_round_down(cl->F, slot_shift);
824 limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
825
826 if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
827 /* timestamp was stale */
828 mask = mask_from(q->bitmaps[ER], cl->grp->index);
829 if (mask) {
830 struct qfq_group *next = qfq_ffs(q, mask);
831 if (qfq_gt(roundedF, next->F)) {
832 cl->S = next->F;
833 return;
834 }
835 }
836 cl->S = q->V;
837 } else /* timestamp is not stale */
838 cl->S = cl->F;
839}
840
841static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
842{
843 struct qfq_sched *q = qdisc_priv(sch);
844 struct qfq_group *grp;
845 struct qfq_class *cl;
846 int err;
847 u64 roundedS;
848 int s;
849
850 cl = qfq_classify(skb, sch, &err);
851 if (cl == NULL) {
852 if (err & __NET_XMIT_BYPASS)
853 sch->qstats.drops++;
854 kfree_skb(skb);
855 return err;
856 }
857 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
858
859 err = qdisc_enqueue(skb, cl->qdisc);
860 if (unlikely(err != NET_XMIT_SUCCESS)) {
861 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
862 if (net_xmit_drop_count(err)) {
863 cl->qstats.drops++;
864 sch->qstats.drops++;
865 }
866 return err;
867 }
868
869 bstats_update(&cl->bstats, skb);
870 ++sch->q.qlen;
871
872 /* If the new skb is not the head of queue, then done here. */
873 if (cl->qdisc->q.qlen != 1)
874 return err;
875
876 /* If reach this point, queue q was idle */
877 grp = cl->grp;
878 qfq_update_start(q, cl);
879
880 /* compute new finish time and rounded start. */
881 cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
882 roundedS = qfq_round_down(cl->S, grp->slot_shift);
883
884 /*
885 * insert cl in the correct bucket.
886 * If cl->S >= grp->S we don't need to adjust the
887 * bucket list and simply go to the insertion phase.
888 * Otherwise grp->S is decreasing, we must make room
889 * in the bucket list, and also recompute the group state.
890 * Finally, if there were no flows in this group and nobody
891 * was in ER make sure to adjust V.
892 */
893 if (grp->full_slots) {
894 if (!qfq_gt(grp->S, cl->S))
895 goto skip_update;
896
897 /* create a slot for this cl->S */
898 qfq_slot_rotate(grp, roundedS);
899 /* group was surely ineligible, remove */
900 __clear_bit(grp->index, &q->bitmaps[IR]);
901 __clear_bit(grp->index, &q->bitmaps[IB]);
902 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
903 q->V = roundedS;
904
905 grp->S = roundedS;
906 grp->F = roundedS + (2ULL << grp->slot_shift);
907 s = qfq_calc_state(q, grp);
908 __set_bit(grp->index, &q->bitmaps[s]);
909
910 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
911 s, q->bitmaps[s],
912 (unsigned long long) cl->S,
913 (unsigned long long) cl->F,
914 (unsigned long long) q->V);
915
916skip_update:
917 qfq_slot_insert(grp, cl, roundedS);
918
919 return err;
920}
921
922
923static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
924 struct qfq_class *cl)
925{
926 unsigned int i, offset;
927 u64 roundedS;
928
929 roundedS = qfq_round_down(cl->S, grp->slot_shift);
930 offset = (roundedS - grp->S) >> grp->slot_shift;
931 i = (grp->front + offset) % QFQ_MAX_SLOTS;
932
933 hlist_del(&cl->next);
934 if (hlist_empty(&grp->slots[i]))
935 __clear_bit(offset, &grp->full_slots);
936}
937
938/*
939 * called to forcibly destroy a queue.
940 * If the queue is not in the front bucket, or if it has
941 * other queues in the front bucket, we can simply remove
942 * the queue with no other side effects.
943 * Otherwise we must propagate the event up.
944 */
945static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
946{
947 struct qfq_group *grp = cl->grp;
948 unsigned long mask;
949 u64 roundedS;
950 int s;
951
952 cl->F = cl->S;
953 qfq_slot_remove(q, grp, cl);
954
955 if (!grp->full_slots) {
956 __clear_bit(grp->index, &q->bitmaps[IR]);
957 __clear_bit(grp->index, &q->bitmaps[EB]);
958 __clear_bit(grp->index, &q->bitmaps[IB]);
959
960 if (test_bit(grp->index, &q->bitmaps[ER]) &&
961 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
962 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
963 if (mask)
964 mask = ~((1UL << __fls(mask)) - 1);
965 else
966 mask = ~0UL;
967 qfq_move_groups(q, mask, EB, ER);
968 qfq_move_groups(q, mask, IB, IR);
969 }
970 __clear_bit(grp->index, &q->bitmaps[ER]);
971 } else if (hlist_empty(&grp->slots[grp->front])) {
972 cl = qfq_slot_scan(grp);
973 roundedS = qfq_round_down(cl->S, grp->slot_shift);
974 if (grp->S != roundedS) {
975 __clear_bit(grp->index, &q->bitmaps[ER]);
976 __clear_bit(grp->index, &q->bitmaps[IR]);
977 __clear_bit(grp->index, &q->bitmaps[EB]);
978 __clear_bit(grp->index, &q->bitmaps[IB]);
979 grp->S = roundedS;
980 grp->F = roundedS + (2ULL << grp->slot_shift);
981 s = qfq_calc_state(q, grp);
982 __set_bit(grp->index, &q->bitmaps[s]);
983 }
984 }
985
986 qfq_update_eligible(q, q->V);
987}
988
989static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
990{
991 struct qfq_sched *q = qdisc_priv(sch);
992 struct qfq_class *cl = (struct qfq_class *)arg;
993
994 if (cl->qdisc->q.qlen == 0)
995 qfq_deactivate_class(q, cl);
996}
997
998static unsigned int qfq_drop(struct Qdisc *sch)
999{
1000 struct qfq_sched *q = qdisc_priv(sch);
1001 struct qfq_group *grp;
1002 unsigned int i, j, len;
1003
1004 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1005 grp = &q->groups[i];
1006 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1007 struct qfq_class *cl;
1008 struct hlist_node *n;
1009
1010 hlist_for_each_entry(cl, n, &grp->slots[j], next) {
1011
1012 if (!cl->qdisc->ops->drop)
1013 continue;
1014
1015 len = cl->qdisc->ops->drop(cl->qdisc);
1016 if (len > 0) {
1017 sch->q.qlen--;
1018 if (!cl->qdisc->q.qlen)
1019 qfq_deactivate_class(q, cl);
1020
1021 return len;
1022 }
1023 }
1024 }
1025 }
1026
1027 return 0;
1028}
1029
1030static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1031{
1032 struct qfq_sched *q = qdisc_priv(sch);
1033 struct qfq_group *grp;
1034 int i, j, err;
1035
1036 err = qdisc_class_hash_init(&q->clhash);
1037 if (err < 0)
1038 return err;
1039
1040 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1041 grp = &q->groups[i];
1042 grp->index = i;
1043 grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS
1044 - (QFQ_MAX_INDEX - i);
1045 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1046 INIT_HLIST_HEAD(&grp->slots[j]);
1047 }
1048
1049 return 0;
1050}
1051
1052static void qfq_reset_qdisc(struct Qdisc *sch)
1053{
1054 struct qfq_sched *q = qdisc_priv(sch);
1055 struct qfq_group *grp;
1056 struct qfq_class *cl;
1057 struct hlist_node *n, *tmp;
1058 unsigned int i, j;
1059
1060 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1061 grp = &q->groups[i];
1062 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1063 hlist_for_each_entry_safe(cl, n, tmp,
1064 &grp->slots[j], next) {
1065 qfq_deactivate_class(q, cl);
1066 }
1067 }
1068 }
1069
1070 for (i = 0; i < q->clhash.hashsize; i++) {
1071 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1072 qdisc_reset(cl->qdisc);
1073 }
1074 sch->q.qlen = 0;
1075}
1076
1077static void qfq_destroy_qdisc(struct Qdisc *sch)
1078{
1079 struct qfq_sched *q = qdisc_priv(sch);
1080 struct qfq_class *cl;
1081 struct hlist_node *n, *next;
1082 unsigned int i;
1083
1084 tcf_destroy_chain(&q->filter_list);
1085
1086 for (i = 0; i < q->clhash.hashsize; i++) {
1087 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1088 common.hnode) {
1089 qfq_destroy_class(sch, cl);
1090 }
1091 }
1092 qdisc_class_hash_destroy(&q->clhash);
1093}
1094
1095static const struct Qdisc_class_ops qfq_class_ops = {
1096 .change = qfq_change_class,
1097 .delete = qfq_delete_class,
1098 .get = qfq_get_class,
1099 .put = qfq_put_class,
1100 .tcf_chain = qfq_tcf_chain,
1101 .bind_tcf = qfq_bind_tcf,
1102 .unbind_tcf = qfq_unbind_tcf,
1103 .graft = qfq_graft_class,
1104 .leaf = qfq_class_leaf,
1105 .qlen_notify = qfq_qlen_notify,
1106 .dump = qfq_dump_class,
1107 .dump_stats = qfq_dump_class_stats,
1108 .walk = qfq_walk,
1109};
1110
1111static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1112 .cl_ops = &qfq_class_ops,
1113 .id = "qfq",
1114 .priv_size = sizeof(struct qfq_sched),
1115 .enqueue = qfq_enqueue,
1116 .dequeue = qfq_dequeue,
1117 .peek = qdisc_peek_dequeued,
1118 .drop = qfq_drop,
1119 .init = qfq_init_qdisc,
1120 .reset = qfq_reset_qdisc,
1121 .destroy = qfq_destroy_qdisc,
1122 .owner = THIS_MODULE,
1123};
1124
1125static int __init qfq_init(void)
1126{
1127 return register_qdisc(&qfq_qdisc_ops);
1128}
1129
1130static void __exit qfq_exit(void)
1131{
1132 unregister_qdisc(&qfq_qdisc_ops);
1133}
1134
1135module_init(qfq_init);
1136module_exit(qfq_exit);
1137MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9f98dbd32d4c..6649463da1b6 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
36 if RED works correctly. 36 if RED works correctly.
37 */ 37 */
38 38
39struct red_sched_data 39struct red_sched_data {
40{
41 u32 limit; /* HARD maximal queue length */ 40 u32 limit; /* HARD maximal queue length */
42 unsigned char flags; 41 unsigned char flags;
43 struct red_parms parms; 42 struct red_parms parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
55 return q->flags & TC_RED_HARDDROP; 54 return q->flags & TC_RED_HARDDROP;
56} 55}
57 56
58static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) 57static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
59{ 58{
60 struct red_sched_data *q = qdisc_priv(sch); 59 struct red_sched_data *q = qdisc_priv(sch);
61 struct Qdisc *child = q->qdisc; 60 struct Qdisc *child = q->qdisc;
@@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
67 red_end_of_idle_period(&q->parms); 66 red_end_of_idle_period(&q->parms);
68 67
69 switch (red_action(&q->parms, q->parms.qavg)) { 68 switch (red_action(&q->parms, q->parms.qavg)) {
70 case RED_DONT_MARK: 69 case RED_DONT_MARK:
71 break; 70 break;
72 71
73 case RED_PROB_MARK: 72 case RED_PROB_MARK:
74 sch->qstats.overlimits++; 73 sch->qstats.overlimits++;
75 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { 74 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76 q->stats.prob_drop++; 75 q->stats.prob_drop++;
77 goto congestion_drop; 76 goto congestion_drop;
78 } 77 }
79 78
80 q->stats.prob_mark++; 79 q->stats.prob_mark++;
81 break; 80 break;
82 81
83 case RED_HARD_MARK: 82 case RED_HARD_MARK:
84 sch->qstats.overlimits++; 83 sch->qstats.overlimits++;
85 if (red_use_harddrop(q) || !red_use_ecn(q) || 84 if (red_use_harddrop(q) || !red_use_ecn(q) ||
86 !INET_ECN_set_ce(skb)) { 85 !INET_ECN_set_ce(skb)) {
87 q->stats.forced_drop++; 86 q->stats.forced_drop++;
88 goto congestion_drop; 87 goto congestion_drop;
89 } 88 }
90 89
91 q->stats.forced_mark++; 90 q->stats.forced_mark++;
92 break; 91 break;
93 } 92 }
94 93
95 ret = qdisc_enqueue(skb, child); 94 ret = qdisc_enqueue(skb, child);
@@ -106,7 +105,7 @@ congestion_drop:
106 return NET_XMIT_CN; 105 return NET_XMIT_CN;
107} 106}
108 107
109static struct sk_buff * red_dequeue(struct Qdisc* sch) 108static struct sk_buff *red_dequeue(struct Qdisc *sch)
110{ 109{
111 struct sk_buff *skb; 110 struct sk_buff *skb;
112 struct red_sched_data *q = qdisc_priv(sch); 111 struct red_sched_data *q = qdisc_priv(sch);
@@ -123,7 +122,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
123 return skb; 122 return skb;
124} 123}
125 124
126static struct sk_buff * red_peek(struct Qdisc* sch) 125static struct sk_buff *red_peek(struct Qdisc *sch)
127{ 126{
128 struct red_sched_data *q = qdisc_priv(sch); 127 struct red_sched_data *q = qdisc_priv(sch);
129 struct Qdisc *child = q->qdisc; 128 struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
131 return child->ops->peek(child); 130 return child->ops->peek(child);
132} 131}
133 132
134static unsigned int red_drop(struct Qdisc* sch) 133static unsigned int red_drop(struct Qdisc *sch)
135{ 134{
136 struct red_sched_data *q = qdisc_priv(sch); 135 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc; 136 struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
150 return 0; 149 return 0;
151} 150}
152 151
153static void red_reset(struct Qdisc* sch) 152static void red_reset(struct Qdisc *sch)
154{ 153{
155 struct red_sched_data *q = qdisc_priv(sch); 154 struct red_sched_data *q = qdisc_priv(sch);
156 155
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
217 return 0; 216 return 0;
218} 217}
219 218
220static int red_init(struct Qdisc* sch, struct nlattr *opt) 219static int red_init(struct Qdisc *sch, struct nlattr *opt)
221{ 220{
222 struct red_sched_data *q = qdisc_priv(sch); 221 struct red_sched_data *q = qdisc_priv(sch);
223 222
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
new file mode 100644
index 000000000000..0a833d0c1f61
--- /dev/null
+++ b/net/sched/sch_sfb.c
@@ -0,0 +1,709 @@
1/*
2 * net/sched/sch_sfb.c Stochastic Fair Blue
3 *
4 * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
12 * A New Class of Active Queue Management Algorithms.
13 * U. Michigan CSE-TR-387-99, April 1999.
14 *
15 * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
16 *
17 */
18
19#include <linux/module.h>
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/skbuff.h>
24#include <linux/random.h>
25#include <linux/jhash.h>
26#include <net/ip.h>
27#include <net/pkt_sched.h>
28#include <net/inet_ecn.h>
29
30/*
31 * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
32 * This implementation uses L = 8 and N = 16
33 * This permits us to split one 32bit hash (provided per packet by rxhash or
34 * external classifier) into 8 subhashes of 4 bits.
35 */
36#define SFB_BUCKET_SHIFT 4
37#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */
38#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1)
39#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */
40
41/* SFB algo uses a virtual queue, named "bin" */
42struct sfb_bucket {
43 u16 qlen; /* length of virtual queue */
44 u16 p_mark; /* marking probability */
45};
46
47/* We use a double buffering right before hash change
48 * (Section 4.4 of SFB reference : moving hash functions)
49 */
50struct sfb_bins {
51 u32 perturbation; /* jhash perturbation */
52 struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
53};
54
55struct sfb_sched_data {
56 struct Qdisc *qdisc;
57 struct tcf_proto *filter_list;
58 unsigned long rehash_interval;
59 unsigned long warmup_time; /* double buffering warmup time in jiffies */
60 u32 max;
61 u32 bin_size; /* maximum queue length per bin */
62 u32 increment; /* d1 */
63 u32 decrement; /* d2 */
64 u32 limit; /* HARD maximal queue length */
65 u32 penalty_rate;
66 u32 penalty_burst;
67 u32 tokens_avail;
68 unsigned long rehash_time;
69 unsigned long token_time;
70
71 u8 slot; /* current active bins (0 or 1) */
72 bool double_buffering;
73 struct sfb_bins bins[2];
74
75 struct {
76 u32 earlydrop;
77 u32 penaltydrop;
78 u32 bucketdrop;
79 u32 queuedrop;
80 u32 childdrop; /* drops in child qdisc */
81 u32 marked; /* ECN mark */
82 } stats;
83};
84
85/*
86 * Each queued skb might be hashed on one or two bins
87 * We store in skb_cb the two hash values.
88 * (A zero value means double buffering was not used)
89 */
90struct sfb_skb_cb {
91 u32 hashes[2];
92};
93
94static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
95{
96 BUILD_BUG_ON(sizeof(skb->cb) <
97 sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb));
98 return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
99}
100
101/*
102 * If using 'internal' SFB flow classifier, hash comes from skb rxhash
103 * If using external classifier, hash comes from the classid.
104 */
105static u32 sfb_hash(const struct sk_buff *skb, u32 slot)
106{
107 return sfb_skb_cb(skb)->hashes[slot];
108}
109
110/* Probabilities are coded as Q0.16 fixed-point values,
111 * with 0xFFFF representing 65535/65536 (almost 1.0)
112 * Addition and subtraction are saturating in [0, 65535]
113 */
114static u32 prob_plus(u32 p1, u32 p2)
115{
116 u32 res = p1 + p2;
117
118 return min_t(u32, res, SFB_MAX_PROB);
119}
120
121static u32 prob_minus(u32 p1, u32 p2)
122{
123 return p1 > p2 ? p1 - p2 : 0;
124}
125
126static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
127{
128 int i;
129 struct sfb_bucket *b = &q->bins[slot].bins[0][0];
130
131 for (i = 0; i < SFB_LEVELS; i++) {
132 u32 hash = sfbhash & SFB_BUCKET_MASK;
133
134 sfbhash >>= SFB_BUCKET_SHIFT;
135 if (b[hash].qlen < 0xFFFF)
136 b[hash].qlen++;
137 b += SFB_NUMBUCKETS; /* next level */
138 }
139}
140
141static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
142{
143 u32 sfbhash;
144
145 sfbhash = sfb_hash(skb, 0);
146 if (sfbhash)
147 increment_one_qlen(sfbhash, 0, q);
148
149 sfbhash = sfb_hash(skb, 1);
150 if (sfbhash)
151 increment_one_qlen(sfbhash, 1, q);
152}
153
154static void decrement_one_qlen(u32 sfbhash, u32 slot,
155 struct sfb_sched_data *q)
156{
157 int i;
158 struct sfb_bucket *b = &q->bins[slot].bins[0][0];
159
160 for (i = 0; i < SFB_LEVELS; i++) {
161 u32 hash = sfbhash & SFB_BUCKET_MASK;
162
163 sfbhash >>= SFB_BUCKET_SHIFT;
164 if (b[hash].qlen > 0)
165 b[hash].qlen--;
166 b += SFB_NUMBUCKETS; /* next level */
167 }
168}
169
170static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
171{
172 u32 sfbhash;
173
174 sfbhash = sfb_hash(skb, 0);
175 if (sfbhash)
176 decrement_one_qlen(sfbhash, 0, q);
177
178 sfbhash = sfb_hash(skb, 1);
179 if (sfbhash)
180 decrement_one_qlen(sfbhash, 1, q);
181}
182
183static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
184{
185 b->p_mark = prob_minus(b->p_mark, q->decrement);
186}
187
188static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
189{
190 b->p_mark = prob_plus(b->p_mark, q->increment);
191}
192
193static void sfb_zero_all_buckets(struct sfb_sched_data *q)
194{
195 memset(&q->bins, 0, sizeof(q->bins));
196}
197
198/*
199 * compute max qlen, max p_mark, and avg p_mark
200 */
201static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q)
202{
203 int i;
204 u32 qlen = 0, prob = 0, totalpm = 0;
205 const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0];
206
207 for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) {
208 if (qlen < b->qlen)
209 qlen = b->qlen;
210 totalpm += b->p_mark;
211 if (prob < b->p_mark)
212 prob = b->p_mark;
213 b++;
214 }
215 *prob_r = prob;
216 *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS);
217 return qlen;
218}
219
220
221static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
222{
223 q->bins[slot].perturbation = net_random();
224}
225
226static void sfb_swap_slot(struct sfb_sched_data *q)
227{
228 sfb_init_perturbation(q->slot, q);
229 q->slot ^= 1;
230 q->double_buffering = false;
231}
232
233/* Non elastic flows are allowed to use part of the bandwidth, expressed
234 * in "penalty_rate" packets per second, with "penalty_burst" burst
235 */
236static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
237{
238 if (q->penalty_rate == 0 || q->penalty_burst == 0)
239 return true;
240
241 if (q->tokens_avail < 1) {
242 unsigned long age = min(10UL * HZ, jiffies - q->token_time);
243
244 q->tokens_avail = (age * q->penalty_rate) / HZ;
245 if (q->tokens_avail > q->penalty_burst)
246 q->tokens_avail = q->penalty_burst;
247 q->token_time = jiffies;
248 if (q->tokens_avail < 1)
249 return true;
250 }
251
252 q->tokens_avail--;
253 return false;
254}
255
256static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
257 int *qerr, u32 *salt)
258{
259 struct tcf_result res;
260 int result;
261
262 result = tc_classify(skb, q->filter_list, &res);
263 if (result >= 0) {
264#ifdef CONFIG_NET_CLS_ACT
265 switch (result) {
266 case TC_ACT_STOLEN:
267 case TC_ACT_QUEUED:
268 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
269 case TC_ACT_SHOT:
270 return false;
271 }
272#endif
273 *salt = TC_H_MIN(res.classid);
274 return true;
275 }
276 return false;
277}
278
279static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
280{
281
282 struct sfb_sched_data *q = qdisc_priv(sch);
283 struct Qdisc *child = q->qdisc;
284 int i;
285 u32 p_min = ~0;
286 u32 minqlen = ~0;
287 u32 r, slot, salt, sfbhash;
288 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
289
290 if (q->rehash_interval > 0) {
291 unsigned long limit = q->rehash_time + q->rehash_interval;
292
293 if (unlikely(time_after(jiffies, limit))) {
294 sfb_swap_slot(q);
295 q->rehash_time = jiffies;
296 } else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
297 time_after(jiffies, limit - q->warmup_time))) {
298 q->double_buffering = true;
299 }
300 }
301
302 if (q->filter_list) {
303 /* If using external classifiers, get result and record it. */
304 if (!sfb_classify(skb, q, &ret, &salt))
305 goto other_drop;
306 } else {
307 salt = skb_get_rxhash(skb);
308 }
309
310 slot = q->slot;
311
312 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
313 if (!sfbhash)
314 sfbhash = 1;
315 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
316
317 for (i = 0; i < SFB_LEVELS; i++) {
318 u32 hash = sfbhash & SFB_BUCKET_MASK;
319 struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
320
321 sfbhash >>= SFB_BUCKET_SHIFT;
322 if (b->qlen == 0)
323 decrement_prob(b, q);
324 else if (b->qlen >= q->bin_size)
325 increment_prob(b, q);
326 if (minqlen > b->qlen)
327 minqlen = b->qlen;
328 if (p_min > b->p_mark)
329 p_min = b->p_mark;
330 }
331
332 slot ^= 1;
333 sfb_skb_cb(skb)->hashes[slot] = 0;
334
335 if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) {
336 sch->qstats.overlimits++;
337 if (minqlen >= q->max)
338 q->stats.bucketdrop++;
339 else
340 q->stats.queuedrop++;
341 goto drop;
342 }
343
344 if (unlikely(p_min >= SFB_MAX_PROB)) {
345 /* Inelastic flow */
346 if (q->double_buffering) {
347 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
348 if (!sfbhash)
349 sfbhash = 1;
350 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
351
352 for (i = 0; i < SFB_LEVELS; i++) {
353 u32 hash = sfbhash & SFB_BUCKET_MASK;
354 struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
355
356 sfbhash >>= SFB_BUCKET_SHIFT;
357 if (b->qlen == 0)
358 decrement_prob(b, q);
359 else if (b->qlen >= q->bin_size)
360 increment_prob(b, q);
361 }
362 }
363 if (sfb_rate_limit(skb, q)) {
364 sch->qstats.overlimits++;
365 q->stats.penaltydrop++;
366 goto drop;
367 }
368 goto enqueue;
369 }
370
371 r = net_random() & SFB_MAX_PROB;
372
373 if (unlikely(r < p_min)) {
374 if (unlikely(p_min > SFB_MAX_PROB / 2)) {
375 /* If we're marking that many packets, then either
376 * this flow is unresponsive, or we're badly congested.
377 * In either case, we want to start dropping packets.
378 */
379 if (r < (p_min - SFB_MAX_PROB / 2) * 2) {
380 q->stats.earlydrop++;
381 goto drop;
382 }
383 }
384 if (INET_ECN_set_ce(skb)) {
385 q->stats.marked++;
386 } else {
387 q->stats.earlydrop++;
388 goto drop;
389 }
390 }
391
392enqueue:
393 ret = qdisc_enqueue(skb, child);
394 if (likely(ret == NET_XMIT_SUCCESS)) {
395 sch->q.qlen++;
396 increment_qlen(skb, q);
397 } else if (net_xmit_drop_count(ret)) {
398 q->stats.childdrop++;
399 sch->qstats.drops++;
400 }
401 return ret;
402
403drop:
404 qdisc_drop(skb, sch);
405 return NET_XMIT_CN;
406other_drop:
407 if (ret & __NET_XMIT_BYPASS)
408 sch->qstats.drops++;
409 kfree_skb(skb);
410 return ret;
411}
412
413static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
414{
415 struct sfb_sched_data *q = qdisc_priv(sch);
416 struct Qdisc *child = q->qdisc;
417 struct sk_buff *skb;
418
419 skb = child->dequeue(q->qdisc);
420
421 if (skb) {
422 qdisc_bstats_update(sch, skb);
423 sch->q.qlen--;
424 decrement_qlen(skb, q);
425 }
426
427 return skb;
428}
429
430static struct sk_buff *sfb_peek(struct Qdisc *sch)
431{
432 struct sfb_sched_data *q = qdisc_priv(sch);
433 struct Qdisc *child = q->qdisc;
434
435 return child->ops->peek(child);
436}
437
438/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */
439
440static void sfb_reset(struct Qdisc *sch)
441{
442 struct sfb_sched_data *q = qdisc_priv(sch);
443
444 qdisc_reset(q->qdisc);
445 sch->q.qlen = 0;
446 q->slot = 0;
447 q->double_buffering = false;
448 sfb_zero_all_buckets(q);
449 sfb_init_perturbation(0, q);
450}
451
452static void sfb_destroy(struct Qdisc *sch)
453{
454 struct sfb_sched_data *q = qdisc_priv(sch);
455
456 tcf_destroy_chain(&q->filter_list);
457 qdisc_destroy(q->qdisc);
458}
459
460static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
461 [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) },
462};
463
464static const struct tc_sfb_qopt sfb_default_ops = {
465 .rehash_interval = 600 * MSEC_PER_SEC,
466 .warmup_time = 60 * MSEC_PER_SEC,
467 .limit = 0,
468 .max = 25,
469 .bin_size = 20,
470 .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */
471 .decrement = (SFB_MAX_PROB + 3000) / 6000,
472 .penalty_rate = 10,
473 .penalty_burst = 20,
474};
475
476static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
477{
478 struct sfb_sched_data *q = qdisc_priv(sch);
479 struct Qdisc *child;
480 struct nlattr *tb[TCA_SFB_MAX + 1];
481 const struct tc_sfb_qopt *ctl = &sfb_default_ops;
482 u32 limit;
483 int err;
484
485 if (opt) {
486 err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
487 if (err < 0)
488 return -EINVAL;
489
490 if (tb[TCA_SFB_PARMS] == NULL)
491 return -EINVAL;
492
493 ctl = nla_data(tb[TCA_SFB_PARMS]);
494 }
495
496 limit = ctl->limit;
497 if (limit == 0)
498 limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
499
500 child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
501 if (IS_ERR(child))
502 return PTR_ERR(child);
503
504 sch_tree_lock(sch);
505
506 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
507 qdisc_destroy(q->qdisc);
508 q->qdisc = child;
509
510 q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
511 q->warmup_time = msecs_to_jiffies(ctl->warmup_time);
512 q->rehash_time = jiffies;
513 q->limit = limit;
514 q->increment = ctl->increment;
515 q->decrement = ctl->decrement;
516 q->max = ctl->max;
517 q->bin_size = ctl->bin_size;
518 q->penalty_rate = ctl->penalty_rate;
519 q->penalty_burst = ctl->penalty_burst;
520 q->tokens_avail = ctl->penalty_burst;
521 q->token_time = jiffies;
522
523 q->slot = 0;
524 q->double_buffering = false;
525 sfb_zero_all_buckets(q);
526 sfb_init_perturbation(0, q);
527 sfb_init_perturbation(1, q);
528
529 sch_tree_unlock(sch);
530
531 return 0;
532}
533
534static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
535{
536 struct sfb_sched_data *q = qdisc_priv(sch);
537
538 q->qdisc = &noop_qdisc;
539 return sfb_change(sch, opt);
540}
541
542static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
543{
544 struct sfb_sched_data *q = qdisc_priv(sch);
545 struct nlattr *opts;
546 struct tc_sfb_qopt opt = {
547 .rehash_interval = jiffies_to_msecs(q->rehash_interval),
548 .warmup_time = jiffies_to_msecs(q->warmup_time),
549 .limit = q->limit,
550 .max = q->max,
551 .bin_size = q->bin_size,
552 .increment = q->increment,
553 .decrement = q->decrement,
554 .penalty_rate = q->penalty_rate,
555 .penalty_burst = q->penalty_burst,
556 };
557
558 sch->qstats.backlog = q->qdisc->qstats.backlog;
559 opts = nla_nest_start(skb, TCA_OPTIONS);
560 NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
561 return nla_nest_end(skb, opts);
562
563nla_put_failure:
564 nla_nest_cancel(skb, opts);
565 return -EMSGSIZE;
566}
567
568static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
569{
570 struct sfb_sched_data *q = qdisc_priv(sch);
571 struct tc_sfb_xstats st = {
572 .earlydrop = q->stats.earlydrop,
573 .penaltydrop = q->stats.penaltydrop,
574 .bucketdrop = q->stats.bucketdrop,
575 .queuedrop = q->stats.queuedrop,
576 .childdrop = q->stats.childdrop,
577 .marked = q->stats.marked,
578 };
579
580 st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);
581
582 return gnet_stats_copy_app(d, &st, sizeof(st));
583}
584
585static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
586 struct sk_buff *skb, struct tcmsg *tcm)
587{
588 return -ENOSYS;
589}
590
591static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
592 struct Qdisc **old)
593{
594 struct sfb_sched_data *q = qdisc_priv(sch);
595
596 if (new == NULL)
597 new = &noop_qdisc;
598
599 sch_tree_lock(sch);
600 *old = q->qdisc;
601 q->qdisc = new;
602 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
603 qdisc_reset(*old);
604 sch_tree_unlock(sch);
605 return 0;
606}
607
608static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
609{
610 struct sfb_sched_data *q = qdisc_priv(sch);
611
612 return q->qdisc;
613}
614
615static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
616{
617 return 1;
618}
619
620static void sfb_put(struct Qdisc *sch, unsigned long arg)
621{
622}
623
624static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
625 struct nlattr **tca, unsigned long *arg)
626{
627 return -ENOSYS;
628}
629
630static int sfb_delete(struct Qdisc *sch, unsigned long cl)
631{
632 return -ENOSYS;
633}
634
635static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
636{
637 if (!walker->stop) {
638 if (walker->count >= walker->skip)
639 if (walker->fn(sch, 1, walker) < 0) {
640 walker->stop = 1;
641 return;
642 }
643 walker->count++;
644 }
645}
646
647static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
648{
649 struct sfb_sched_data *q = qdisc_priv(sch);
650
651 if (cl)
652 return NULL;
653 return &q->filter_list;
654}
655
656static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
657 u32 classid)
658{
659 return 0;
660}
661
662
663static const struct Qdisc_class_ops sfb_class_ops = {
664 .graft = sfb_graft,
665 .leaf = sfb_leaf,
666 .get = sfb_get,
667 .put = sfb_put,
668 .change = sfb_change_class,
669 .delete = sfb_delete,
670 .walk = sfb_walk,
671 .tcf_chain = sfb_find_tcf,
672 .bind_tcf = sfb_bind,
673 .unbind_tcf = sfb_put,
674 .dump = sfb_dump_class,
675};
676
677static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
678 .id = "sfb",
679 .priv_size = sizeof(struct sfb_sched_data),
680 .cl_ops = &sfb_class_ops,
681 .enqueue = sfb_enqueue,
682 .dequeue = sfb_dequeue,
683 .peek = sfb_peek,
684 .init = sfb_init,
685 .reset = sfb_reset,
686 .destroy = sfb_destroy,
687 .change = sfb_change,
688 .dump = sfb_dump,
689 .dump_stats = sfb_dump_stats,
690 .owner = THIS_MODULE,
691};
692
693static int __init sfb_module_init(void)
694{
695 return register_qdisc(&sfb_qdisc_ops);
696}
697
698static void __exit sfb_module_exit(void)
699{
700 unregister_qdisc(&sfb_qdisc_ops);
701}
702
703module_init(sfb_module_init)
704module_exit(sfb_module_exit)
705
706MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline");
707MODULE_AUTHOR("Juliusz Chroboczek");
708MODULE_AUTHOR("Eric Dumazet");
709MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index edea8cefec6c..7ef87f9eb675 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/jhash.h> 22#include <linux/jhash.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/vmalloc.h>
24#include <net/ip.h> 25#include <net/ip.h>
25#include <net/netlink.h> 26#include <net/netlink.h>
26#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
76#define SFQ_DEPTH 128 /* max number of packets per flow */ 77#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */ 78#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255 79#define SFQ_EMPTY_SLOT 255
79#define SFQ_HASH_DIVISOR 1024 80#define SFQ_DEFAULT_HASH_DIVISOR 1024
81
80/* We use 16 bits to store allot, and want to handle packets up to 64K 82/* We use 16 bits to store allot, and want to handle packets up to 64K
81 * Scale allot by 8 (1<<3) so that no overflow occurs. 83 * Scale allot by 8 (1<<3) so that no overflow occurs.
82 */ 84 */
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index;
92 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] 94 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
93 * are 'pointers' to dep[] array 95 * are 'pointers' to dep[] array
94 */ 96 */
95struct sfq_head 97struct sfq_head {
96{
97 sfq_index next; 98 sfq_index next;
98 sfq_index prev; 99 sfq_index prev;
99}; 100};
@@ -108,13 +109,12 @@ struct sfq_slot {
108 short allot; /* credit for this slot */ 109 short allot; /* credit for this slot */
109}; 110};
110 111
111struct sfq_sched_data 112struct sfq_sched_data {
112{
113/* Parameters */ 113/* Parameters */
114 int perturb_period; 114 int perturb_period;
115 unsigned quantum; /* Allotment per round: MUST BE >= MTU */ 115 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
116 int limit; 116 int limit;
117 117 unsigned int divisor; /* number of slots in hash table */
118/* Variables */ 118/* Variables */
119 struct tcf_proto *filter_list; 119 struct tcf_proto *filter_list;
120 struct timer_list perturb_timer; 120 struct timer_list perturb_timer;
@@ -122,7 +122,7 @@ struct sfq_sched_data
122 sfq_index cur_depth; /* depth of longest slot */ 122 sfq_index cur_depth; /* depth of longest slot */
123 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ 123 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
124 struct sfq_slot *tail; /* current slot in round */ 124 struct sfq_slot *tail; /* current slot in round */
125 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ 125 sfq_index *ht; /* Hash table (divisor slots) */
126 struct sfq_slot slots[SFQ_SLOTS]; 126 struct sfq_slot slots[SFQ_SLOTS];
127 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ 127 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
128}; 128};
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
137 return &q->dep[val - SFQ_SLOTS]; 137 return &q->dep[val - SFQ_SLOTS];
138} 138}
139 139
140static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) 140static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
141{ 141{
142 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); 142 return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
143} 143}
144 144
145static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) 145static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
146{ 146{
147 u32 h, h2; 147 u32 h, h2;
148 148
@@ -157,19 +157,19 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
157 iph = ip_hdr(skb); 157 iph = ip_hdr(skb);
158 h = (__force u32)iph->daddr; 158 h = (__force u32)iph->daddr;
159 h2 = (__force u32)iph->saddr ^ iph->protocol; 159 h2 = (__force u32)iph->saddr ^ iph->protocol;
160 if (iph->frag_off & htons(IP_MF|IP_OFFSET)) 160 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
161 break; 161 break;
162 poff = proto_ports_offset(iph->protocol); 162 poff = proto_ports_offset(iph->protocol);
163 if (poff >= 0 && 163 if (poff >= 0 &&
164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { 164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
165 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
166 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff); 166 h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
167 } 167 }
168 break; 168 break;
169 } 169 }
170 case htons(ETH_P_IPV6): 170 case htons(ETH_P_IPV6):
171 { 171 {
172 struct ipv6hdr *iph; 172 const struct ipv6hdr *iph;
173 int poff; 173 int poff;
174 174
175 if (!pskb_network_may_pull(skb, sizeof(*iph))) 175 if (!pskb_network_may_pull(skb, sizeof(*iph)))
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
181 if (poff >= 0 && 181 if (poff >= 0 &&
182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { 182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
183 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
184 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff); 184 h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
185 } 185 }
186 break; 186 break;
187 } 187 }
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
203 203
204 if (TC_H_MAJ(skb->priority) == sch->handle && 204 if (TC_H_MAJ(skb->priority) == sch->handle &&
205 TC_H_MIN(skb->priority) > 0 && 205 TC_H_MIN(skb->priority) > 0 &&
206 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) 206 TC_H_MIN(skb->priority) <= q->divisor)
207 return TC_H_MIN(skb->priority); 207 return TC_H_MIN(skb->priority);
208 208
209 if (!q->filter_list) 209 if (!q->filter_list)
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
221 return 0; 221 return 0;
222 } 222 }
223#endif 223#endif
224 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) 224 if (TC_H_MIN(res.classid) <= q->divisor)
225 return TC_H_MIN(res.classid); 225 return TC_H_MIN(res.classid);
226 } 226 }
227 return 0; 227 return 0;
@@ -491,13 +491,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
491 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 491 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
492 return -EINVAL; 492 return -EINVAL;
493 493
494 if (ctl->divisor &&
495 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
496 return -EINVAL;
497
494 sch_tree_lock(sch); 498 sch_tree_lock(sch);
495 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); 499 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
496 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 500 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
497 q->perturb_period = ctl->perturb_period * HZ; 501 q->perturb_period = ctl->perturb_period * HZ;
498 if (ctl->limit) 502 if (ctl->limit)
499 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 503 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
500 504 if (ctl->divisor)
505 q->divisor = ctl->divisor;
501 qlen = sch->q.qlen; 506 qlen = sch->q.qlen;
502 while (sch->q.qlen > q->limit) 507 while (sch->q.qlen > q->limit)
503 sfq_drop(sch); 508 sfq_drop(sch);
@@ -515,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
515static int sfq_init(struct Qdisc *sch, struct nlattr *opt) 520static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
516{ 521{
517 struct sfq_sched_data *q = qdisc_priv(sch); 522 struct sfq_sched_data *q = qdisc_priv(sch);
523 size_t sz;
518 int i; 524 int i;
519 525
520 q->perturb_timer.function = sfq_perturbation; 526 q->perturb_timer.function = sfq_perturbation;
521 q->perturb_timer.data = (unsigned long)sch; 527 q->perturb_timer.data = (unsigned long)sch;
522 init_timer_deferrable(&q->perturb_timer); 528 init_timer_deferrable(&q->perturb_timer);
523 529
524 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
525 q->ht[i] = SFQ_EMPTY_SLOT;
526
527 for (i = 0; i < SFQ_DEPTH; i++) { 530 for (i = 0; i < SFQ_DEPTH; i++) {
528 q->dep[i].next = i + SFQ_SLOTS; 531 q->dep[i].next = i + SFQ_SLOTS;
529 q->dep[i].prev = i + SFQ_SLOTS; 532 q->dep[i].prev = i + SFQ_SLOTS;
@@ -532,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
532 q->limit = SFQ_DEPTH - 1; 535 q->limit = SFQ_DEPTH - 1;
533 q->cur_depth = 0; 536 q->cur_depth = 0;
534 q->tail = NULL; 537 q->tail = NULL;
538 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
535 if (opt == NULL) { 539 if (opt == NULL) {
536 q->quantum = psched_mtu(qdisc_dev(sch)); 540 q->quantum = psched_mtu(qdisc_dev(sch));
537 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 541 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -543,10 +547,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
543 return err; 547 return err;
544 } 548 }
545 549
550 sz = sizeof(q->ht[0]) * q->divisor;
551 q->ht = kmalloc(sz, GFP_KERNEL);
552 if (!q->ht && sz > PAGE_SIZE)
553 q->ht = vmalloc(sz);
554 if (!q->ht)
555 return -ENOMEM;
556 for (i = 0; i < q->divisor; i++)
557 q->ht[i] = SFQ_EMPTY_SLOT;
558
546 for (i = 0; i < SFQ_SLOTS; i++) { 559 for (i = 0; i < SFQ_SLOTS; i++) {
547 slot_queue_init(&q->slots[i]); 560 slot_queue_init(&q->slots[i]);
548 sfq_link(q, i); 561 sfq_link(q, i);
549 } 562 }
563 if (q->limit >= 1)
564 sch->flags |= TCQ_F_CAN_BYPASS;
565 else
566 sch->flags &= ~TCQ_F_CAN_BYPASS;
550 return 0; 567 return 0;
551} 568}
552 569
@@ -557,6 +574,10 @@ static void sfq_destroy(struct Qdisc *sch)
557 tcf_destroy_chain(&q->filter_list); 574 tcf_destroy_chain(&q->filter_list);
558 q->perturb_period = 0; 575 q->perturb_period = 0;
559 del_timer_sync(&q->perturb_timer); 576 del_timer_sync(&q->perturb_timer);
577 if (is_vmalloc_addr(q->ht))
578 vfree(q->ht);
579 else
580 kfree(q->ht);
560} 581}
561 582
562static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 583static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -569,7 +590,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
569 opt.perturb_period = q->perturb_period / HZ; 590 opt.perturb_period = q->perturb_period / HZ;
570 591
571 opt.limit = q->limit; 592 opt.limit = q->limit;
572 opt.divisor = SFQ_HASH_DIVISOR; 593 opt.divisor = q->divisor;
573 opt.flows = q->limit; 594 opt.flows = q->limit;
574 595
575 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 596 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -594,6 +615,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
594static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, 615static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
595 u32 classid) 616 u32 classid)
596{ 617{
618 /* we cannot bypass queue discipline anymore */
619 sch->flags &= ~TCQ_F_CAN_BYPASS;
597 return 0; 620 return 0;
598} 621}
599 622
@@ -647,7 +670,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
647 if (arg->stop) 670 if (arg->stop)
648 return; 671 return;
649 672
650 for (i = 0; i < SFQ_HASH_DIVISOR; i++) { 673 for (i = 0; i < q->divisor; i++) {
651 if (q->ht[i] == SFQ_EMPTY_SLOT || 674 if (q->ht[i] == SFQ_EMPTY_SLOT ||
652 arg->count < arg->skip) { 675 arg->count < arg->skip) {
653 arg->count++; 676 arg->count++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index e93165820c3f..1dcfb5223a86 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
97 changed the limit is not effective anymore. 97 changed the limit is not effective anymore.
98*/ 98*/
99 99
100struct tbf_sched_data 100struct tbf_sched_data {
101{
102/* Parameters */ 101/* Parameters */
103 u32 limit; /* Maximal length of backlog: bytes */ 102 u32 limit; /* Maximal length of backlog: bytes */
104 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ 103 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
115 struct qdisc_watchdog watchdog; /* Watchdog timer */ 114 struct qdisc_watchdog watchdog; /* Watchdog timer */
116}; 115};
117 116
118#define L2T(q,L) qdisc_l2t((q)->R_tab,L) 117#define L2T(q, L) qdisc_l2t((q)->R_tab, L)
119#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) 118#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
120 119
121static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) 120static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
122{ 121{
123 struct tbf_sched_data *q = qdisc_priv(sch); 122 struct tbf_sched_data *q = qdisc_priv(sch);
124 int ret; 123 int ret;
@@ -137,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
137 return NET_XMIT_SUCCESS; 136 return NET_XMIT_SUCCESS;
138} 137}
139 138
140static unsigned int tbf_drop(struct Qdisc* sch) 139static unsigned int tbf_drop(struct Qdisc *sch)
141{ 140{
142 struct tbf_sched_data *q = qdisc_priv(sch); 141 struct tbf_sched_data *q = qdisc_priv(sch);
143 unsigned int len = 0; 142 unsigned int len = 0;
@@ -149,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
149 return len; 148 return len;
150} 149}
151 150
152static struct sk_buff *tbf_dequeue(struct Qdisc* sch) 151static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
153{ 152{
154 struct tbf_sched_data *q = qdisc_priv(sch); 153 struct tbf_sched_data *q = qdisc_priv(sch);
155 struct sk_buff *skb; 154 struct sk_buff *skb;
@@ -185,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
185 q->tokens = toks; 184 q->tokens = toks;
186 q->ptokens = ptoks; 185 q->ptokens = ptoks;
187 sch->q.qlen--; 186 sch->q.qlen--;
188 sch->flags &= ~TCQ_F_THROTTLED; 187 qdisc_unthrottled(sch);
189 qdisc_bstats_update(sch, skb); 188 qdisc_bstats_update(sch, skb);
190 return skb; 189 return skb;
191 } 190 }
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
209 return NULL; 208 return NULL;
210} 209}
211 210
212static void tbf_reset(struct Qdisc* sch) 211static void tbf_reset(struct Qdisc *sch)
213{ 212{
214 struct tbf_sched_data *q = qdisc_priv(sch); 213 struct tbf_sched_data *q = qdisc_priv(sch);
215 214
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
227 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 226 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
228}; 227};
229 228
230static int tbf_change(struct Qdisc* sch, struct nlattr *opt) 229static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
231{ 230{
232 int err; 231 int err;
233 struct tbf_sched_data *q = qdisc_priv(sch); 232 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
236 struct qdisc_rate_table *rtab = NULL; 235 struct qdisc_rate_table *rtab = NULL;
237 struct qdisc_rate_table *ptab = NULL; 236 struct qdisc_rate_table *ptab = NULL;
238 struct Qdisc *child = NULL; 237 struct Qdisc *child = NULL;
239 int max_size,n; 238 int max_size, n;
240 239
241 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); 240 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
242 if (err < 0) 241 if (err < 0)
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
259 } 258 }
260 259
261 for (n = 0; n < 256; n++) 260 for (n = 0; n < 256; n++)
262 if (rtab->data[n] > qopt->buffer) break; 261 if (rtab->data[n] > qopt->buffer)
263 max_size = (n << qopt->rate.cell_log)-1; 262 break;
263 max_size = (n << qopt->rate.cell_log) - 1;
264 if (ptab) { 264 if (ptab) {
265 int size; 265 int size;
266 266
267 for (n = 0; n < 256; n++) 267 for (n = 0; n < 256; n++)
268 if (ptab->data[n] > qopt->mtu) break; 268 if (ptab->data[n] > qopt->mtu)
269 size = (n << qopt->peakrate.cell_log)-1; 269 break;
270 if (size < max_size) max_size = size; 270 size = (n << qopt->peakrate.cell_log) - 1;
271 if (size < max_size)
272 max_size = size;
271 } 273 }
272 if (max_size < 0) 274 if (max_size < 0)
273 goto done; 275 goto done;
@@ -310,7 +312,7 @@ done:
310 return err; 312 return err;
311} 313}
312 314
313static int tbf_init(struct Qdisc* sch, struct nlattr *opt) 315static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
314{ 316{
315 struct tbf_sched_data *q = qdisc_priv(sch); 317 struct tbf_sched_data *q = qdisc_priv(sch);
316 318
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
422 } 424 }
423} 425}
424 426
425static const struct Qdisc_class_ops tbf_class_ops = 427static const struct Qdisc_class_ops tbf_class_ops = {
426{
427 .graft = tbf_graft, 428 .graft = tbf_graft,
428 .leaf = tbf_leaf, 429 .leaf = tbf_leaf,
429 .get = tbf_get, 430 .get = tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d84e7329660f..45cd30098e34 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,8 +53,7 @@
53 which will not break load balancing, though native slave 53 which will not break load balancing, though native slave
54 traffic will have the highest priority. */ 54 traffic will have the highest priority. */
55 55
56struct teql_master 56struct teql_master {
57{
58 struct Qdisc_ops qops; 57 struct Qdisc_ops qops;
59 struct net_device *dev; 58 struct net_device *dev;
60 struct Qdisc *slaves; 59 struct Qdisc *slaves;
@@ -65,22 +64,21 @@ struct teql_master
65 unsigned long tx_dropped; 64 unsigned long tx_dropped;
66}; 65};
67 66
68struct teql_sched_data 67struct teql_sched_data {
69{
70 struct Qdisc *next; 68 struct Qdisc *next;
71 struct teql_master *m; 69 struct teql_master *m;
72 struct neighbour *ncache; 70 struct neighbour *ncache;
73 struct sk_buff_head q; 71 struct sk_buff_head q;
74}; 72};
75 73
76#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) 74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
77 75
78#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT) 76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
79 77
80/* "teql*" qdisc routines */ 78/* "teql*" qdisc routines */
81 79
82static int 80static int
83teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) 81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
84{ 82{
85 struct net_device *dev = qdisc_dev(sch); 83 struct net_device *dev = qdisc_dev(sch);
86 struct teql_sched_data *q = qdisc_priv(sch); 84 struct teql_sched_data *q = qdisc_priv(sch);
@@ -96,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
96} 94}
97 95
98static struct sk_buff * 96static struct sk_buff *
99teql_dequeue(struct Qdisc* sch) 97teql_dequeue(struct Qdisc *sch)
100{ 98{
101 struct teql_sched_data *dat = qdisc_priv(sch); 99 struct teql_sched_data *dat = qdisc_priv(sch);
102 struct netdev_queue *dat_queue; 100 struct netdev_queue *dat_queue;
@@ -118,13 +116,13 @@ teql_dequeue(struct Qdisc* sch)
118} 116}
119 117
120static struct sk_buff * 118static struct sk_buff *
121teql_peek(struct Qdisc* sch) 119teql_peek(struct Qdisc *sch)
122{ 120{
123 /* teql is meant to be used as root qdisc */ 121 /* teql is meant to be used as root qdisc */
124 return NULL; 122 return NULL;
125} 123}
126 124
127static __inline__ void 125static inline void
128teql_neigh_release(struct neighbour *n) 126teql_neigh_release(struct neighbour *n)
129{ 127{
130 if (n) 128 if (n)
@@ -132,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
132} 130}
133 131
134static void 132static void
135teql_reset(struct Qdisc* sch) 133teql_reset(struct Qdisc *sch)
136{ 134{
137 struct teql_sched_data *dat = qdisc_priv(sch); 135 struct teql_sched_data *dat = qdisc_priv(sch);
138 136
@@ -142,13 +140,14 @@ teql_reset(struct Qdisc* sch)
142} 140}
143 141
144static void 142static void
145teql_destroy(struct Qdisc* sch) 143teql_destroy(struct Qdisc *sch)
146{ 144{
147 struct Qdisc *q, *prev; 145 struct Qdisc *q, *prev;
148 struct teql_sched_data *dat = qdisc_priv(sch); 146 struct teql_sched_data *dat = qdisc_priv(sch);
149 struct teql_master *master = dat->m; 147 struct teql_master *master = dat->m;
150 148
151 if ((prev = master->slaves) != NULL) { 149 prev = master->slaves;
150 if (prev) {
152 do { 151 do {
153 q = NEXT_SLAVE(prev); 152 q = NEXT_SLAVE(prev);
154 if (q == sch) { 153 if (q == sch) {
@@ -180,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
180static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) 179static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
181{ 180{
182 struct net_device *dev = qdisc_dev(sch); 181 struct net_device *dev = qdisc_dev(sch);
183 struct teql_master *m = (struct teql_master*)sch->ops; 182 struct teql_master *m = (struct teql_master *)sch->ops;
184 struct teql_sched_data *q = qdisc_priv(sch); 183 struct teql_sched_data *q = qdisc_priv(sch);
185 184
186 if (dev->hard_header_len > m->dev->hard_header_len) 185 if (dev->hard_header_len > m->dev->hard_header_len)
@@ -291,7 +290,8 @@ restart:
291 nores = 0; 290 nores = 0;
292 busy = 0; 291 busy = 0;
293 292
294 if ((q = start) == NULL) 293 q = start;
294 if (!q)
295 goto drop; 295 goto drop;
296 296
297 do { 297 do {
@@ -356,10 +356,10 @@ drop:
356 356
357static int teql_master_open(struct net_device *dev) 357static int teql_master_open(struct net_device *dev)
358{ 358{
359 struct Qdisc * q; 359 struct Qdisc *q;
360 struct teql_master *m = netdev_priv(dev); 360 struct teql_master *m = netdev_priv(dev);
361 int mtu = 0xFFFE; 361 int mtu = 0xFFFE;
362 unsigned flags = IFF_NOARP|IFF_MULTICAST; 362 unsigned int flags = IFF_NOARP | IFF_MULTICAST;
363 363
364 if (m->slaves == NULL) 364 if (m->slaves == NULL)
365 return -EUNATCH; 365 return -EUNATCH;
@@ -427,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
427 do { 427 do {
428 if (new_mtu > qdisc_dev(q)->mtu) 428 if (new_mtu > qdisc_dev(q)->mtu)
429 return -EINVAL; 429 return -EINVAL;
430 } while ((q=NEXT_SLAVE(q)) != m->slaves); 430 } while ((q = NEXT_SLAVE(q)) != m->slaves);
431 } 431 }
432 432
433 dev->mtu = new_mtu; 433 dev->mtu = new_mtu;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5f1fb8bd862d..1a21c571aa03 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -569,6 +569,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
569 sctp_assoc_set_primary(asoc, transport); 569 sctp_assoc_set_primary(asoc, transport);
570 if (asoc->peer.active_path == peer) 570 if (asoc->peer.active_path == peer)
571 asoc->peer.active_path = transport; 571 asoc->peer.active_path = transport;
572 if (asoc->peer.retran_path == peer)
573 asoc->peer.retran_path = transport;
572 if (asoc->peer.last_data_from == peer) 574 if (asoc->peer.last_data_from == peer)
573 asoc->peer.last_data_from = transport; 575 asoc->peer.last_data_from = transport;
574 576
@@ -1089,7 +1091,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
1089 base.inqueue.immediate); 1091 base.inqueue.immediate);
1090 struct sctp_endpoint *ep; 1092 struct sctp_endpoint *ep;
1091 struct sctp_chunk *chunk; 1093 struct sctp_chunk *chunk;
1092 struct sock *sk;
1093 struct sctp_inq *inqueue; 1094 struct sctp_inq *inqueue;
1094 int state; 1095 int state;
1095 sctp_subtype_t subtype; 1096 sctp_subtype_t subtype;
@@ -1097,7 +1098,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
1097 1098
1098 /* The association should be held so we should be safe. */ 1099 /* The association should be held so we should be safe. */
1099 ep = asoc->ep; 1100 ep = asoc->ep;
1100 sk = asoc->base.sk;
1101 1101
1102 inqueue = &asoc->base.inqueue; 1102 inqueue = &asoc->base.inqueue;
1103 sctp_association_hold(asoc); 1103 sctp_association_hold(asoc);
@@ -1325,6 +1325,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1325 1325
1326 if (t) 1326 if (t)
1327 asoc->peer.retran_path = t; 1327 asoc->peer.retran_path = t;
1328 else
1329 t = asoc->peer.retran_path;
1328 1330
1329 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" 1331 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1330 " %p addr: ", 1332 " %p addr: ",
@@ -1595,7 +1597,7 @@ void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc)
1595 struct sctp_chunk *ack; 1597 struct sctp_chunk *ack;
1596 struct sctp_chunk *tmp; 1598 struct sctp_chunk *tmp;
1597 1599
1598 /* We can remove all the entries from the queue upto 1600 /* We can remove all the entries from the queue up to
1599 * the "Peer-Sequence-Number". 1601 * the "Peer-Sequence-Number".
1600 */ 1602 */
1601 list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list, 1603 list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index ddbbf7c81fa1..865e68fef21c 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -113,7 +113,7 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
113 return new; 113 return new;
114} 114}
115 115
116/* Free the shared key stucture */ 116/* Free the shared key structure */
117static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) 117static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
118{ 118{
119 BUG_ON(!list_empty(&sh_key->key_list)); 119 BUG_ON(!list_empty(&sh_key->key_list));
@@ -122,7 +122,7 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
122 kfree(sh_key); 122 kfree(sh_key);
123} 123}
124 124
125/* Destory the entire key list. This is done during the 125/* Destroy the entire key list. This is done during the
126 * associon and endpoint free process. 126 * associon and endpoint free process.
127 */ 127 */
128void sctp_auth_destroy_keys(struct list_head *keys) 128void sctp_auth_destroy_keys(struct list_head *keys)
@@ -324,7 +324,7 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret(
324 if (!peer_key_vector || !local_key_vector) 324 if (!peer_key_vector || !local_key_vector)
325 goto out; 325 goto out;
326 326
327 /* Figure out the order in wich the key_vectors will be 327 /* Figure out the order in which the key_vectors will be
328 * added to the endpoint shared key. 328 * added to the endpoint shared key.
329 * SCTP-AUTH, Section 6.1: 329 * SCTP-AUTH, Section 6.1:
330 * This is performed by selecting the numerically smaller key 330 * This is performed by selecting the numerically smaller key
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index faf71d179e46..83e3011c19ca 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -140,14 +140,12 @@ void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
140/* Dispose of the address list. */ 140/* Dispose of the address list. */
141static void sctp_bind_addr_clean(struct sctp_bind_addr *bp) 141static void sctp_bind_addr_clean(struct sctp_bind_addr *bp)
142{ 142{
143 struct sctp_sockaddr_entry *addr; 143 struct sctp_sockaddr_entry *addr, *temp;
144 struct list_head *pos, *temp;
145 144
146 /* Empty the bind address list. */ 145 /* Empty the bind address list. */
147 list_for_each_safe(pos, temp, &bp->address_list) { 146 list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
148 addr = list_entry(pos, struct sctp_sockaddr_entry, list); 147 list_del_rcu(&addr->list);
149 list_del(pos); 148 kfree_rcu(addr, rcu);
150 kfree(addr);
151 SCTP_DBG_OBJCNT_DEC(addr); 149 SCTP_DBG_OBJCNT_DEC(addr);
152 } 150 }
153} 151}
@@ -219,7 +217,7 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
219 } 217 }
220 218
221 if (found) { 219 if (found) {
222 call_rcu(&addr->rcu, sctp_local_addr_free); 220 kfree_rcu(addr, rcu);
223 SCTP_DBG_OBJCNT_DEC(addr); 221 SCTP_DBG_OBJCNT_DEC(addr);
224 return 0; 222 return 0;
225 } 223 }
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index bf24fa697de2..ec997cfe0a7e 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -98,7 +98,6 @@ const char *sctp_cname(const sctp_subtype_t cid)
98 98
99/* These are printable forms of the states. */ 99/* These are printable forms of the states. */
100const char *const sctp_state_tbl[SCTP_STATE_NUM_STATES] = { 100const char *const sctp_state_tbl[SCTP_STATE_NUM_STATES] = {
101 "STATE_EMPTY",
102 "STATE_CLOSED", 101 "STATE_CLOSED",
103 "STATE_COOKIE_WAIT", 102 "STATE_COOKIE_WAIT",
104 "STATE_COOKIE_ECHOED", 103 "STATE_COOKIE_ECHOED",
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index e10acc01c75f..c8cc24e282c3 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -325,6 +325,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
325 struct sctp_transport **transport) 325 struct sctp_transport **transport)
326{ 326{
327 struct sctp_association *asoc = NULL; 327 struct sctp_association *asoc = NULL;
328 struct sctp_association *tmp;
328 struct sctp_transport *t = NULL; 329 struct sctp_transport *t = NULL;
329 struct sctp_hashbucket *head; 330 struct sctp_hashbucket *head;
330 struct sctp_ep_common *epb; 331 struct sctp_ep_common *epb;
@@ -333,25 +334,32 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
333 int rport; 334 int rport;
334 335
335 *transport = NULL; 336 *transport = NULL;
337
338 /* If the local port is not set, there can't be any associations
339 * on this endpoint.
340 */
341 if (!ep->base.bind_addr.port)
342 goto out;
343
336 rport = ntohs(paddr->v4.sin_port); 344 rport = ntohs(paddr->v4.sin_port);
337 345
338 hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport); 346 hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport);
339 head = &sctp_assoc_hashtable[hash]; 347 head = &sctp_assoc_hashtable[hash];
340 read_lock(&head->lock); 348 read_lock(&head->lock);
341 sctp_for_each_hentry(epb, node, &head->chain) { 349 sctp_for_each_hentry(epb, node, &head->chain) {
342 asoc = sctp_assoc(epb); 350 tmp = sctp_assoc(epb);
343 if (asoc->ep != ep || rport != asoc->peer.port) 351 if (tmp->ep != ep || rport != tmp->peer.port)
344 goto next; 352 continue;
345 353
346 t = sctp_assoc_lookup_paddr(asoc, paddr); 354 t = sctp_assoc_lookup_paddr(tmp, paddr);
347 if (t) { 355 if (t) {
356 asoc = tmp;
348 *transport = t; 357 *transport = t;
349 break; 358 break;
350 } 359 }
351next:
352 asoc = NULL;
353 } 360 }
354 read_unlock(&head->lock); 361 read_unlock(&head->lock);
362out:
355 return asoc; 363 return asoc;
356} 364}
357 365
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ea2192444ce6..741ed1648838 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -565,7 +565,7 @@ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
565 */ 565 */
566void sctp_v4_err(struct sk_buff *skb, __u32 info) 566void sctp_v4_err(struct sk_buff *skb, __u32 info)
567{ 567{
568 struct iphdr *iph = (struct iphdr *)skb->data; 568 const struct iphdr *iph = (const struct iphdr *)skb->data;
569 const int ihlen = iph->ihl * 4; 569 const int ihlen = iph->ihl * 4;
570 const int type = icmp_hdr(skb)->type; 570 const int type = icmp_hdr(skb)->type;
571 const int code = icmp_hdr(skb)->code; 571 const int code = icmp_hdr(skb)->code;
@@ -661,7 +661,6 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
661{ 661{
662 sctp_chunkhdr_t *ch; 662 sctp_chunkhdr_t *ch;
663 __u8 *ch_end; 663 __u8 *ch_end;
664 sctp_errhdr_t *err;
665 664
666 ch = (sctp_chunkhdr_t *) skb->data; 665 ch = (sctp_chunkhdr_t *) skb->data;
667 666
@@ -697,20 +696,6 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
697 if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data) 696 if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data)
698 goto discard; 697 goto discard;
699 698
700 /* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR
701 * or a COOKIE ACK the SCTP Packet should be silently
702 * discarded.
703 */
704 if (SCTP_CID_COOKIE_ACK == ch->type)
705 goto discard;
706
707 if (SCTP_CID_ERROR == ch->type) {
708 sctp_walk_errors(err, ch) {
709 if (SCTP_ERROR_STALE_COOKIE == err->cause)
710 goto discard;
711 }
712 }
713
714 ch = (sctp_chunkhdr_t *) ch_end; 699 ch = (sctp_chunkhdr_t *) ch_end;
715 } while (ch_end < skb_tail_pointer(skb)); 700 } while (ch_end < skb_tail_pointer(skb));
716 701
@@ -948,14 +933,11 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
948 union sctp_addr addr; 933 union sctp_addr addr;
949 union sctp_addr *paddr = &addr; 934 union sctp_addr *paddr = &addr;
950 struct sctphdr *sh = sctp_hdr(skb); 935 struct sctphdr *sh = sctp_hdr(skb);
951 sctp_chunkhdr_t *ch;
952 union sctp_params params; 936 union sctp_params params;
953 sctp_init_chunk_t *init; 937 sctp_init_chunk_t *init;
954 struct sctp_transport *transport; 938 struct sctp_transport *transport;
955 struct sctp_af *af; 939 struct sctp_af *af;
956 940
957 ch = (sctp_chunkhdr_t *) skb->data;
958
959 /* 941 /*
960 * This code will NOT touch anything inside the chunk--it is 942 * This code will NOT touch anything inside the chunk--it is
961 * strictly READ-ONLY. 943 * strictly READ-ONLY.
@@ -1020,7 +1002,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
1020 /* Skip over the ADDIP header and find the Address parameter */ 1002 /* Skip over the ADDIP header and find the Address parameter */
1021 param = (union sctp_addr_param *)(asconf + 1); 1003 param = (union sctp_addr_param *)(asconf + 1);
1022 1004
1023 af = sctp_get_af_specific(param_type2af(param->v4.param_hdr.type)); 1005 af = sctp_get_af_specific(param_type2af(param->p.type));
1024 if (unlikely(!af)) 1006 if (unlikely(!af))
1025 return NULL; 1007 return NULL;
1026 1008
@@ -1037,7 +1019,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
1037* association. 1019* association.
1038* 1020*
1039* This means that any chunks that can help us identify the association need 1021* This means that any chunks that can help us identify the association need
1040* to be looked at to find this assocation. 1022* to be looked at to find this association.
1041*/ 1023*/
1042static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, 1024static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
1043 const union sctp_addr *laddr, 1025 const union sctp_addr *laddr,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 95e0c8eda1a0..0bb0d7cb9f10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -80,6 +80,13 @@
80 80
81#include <asm/uaccess.h> 81#include <asm/uaccess.h>
82 82
83static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
84 union sctp_addr *s2);
85static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
86 __be16 port);
87static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
88 const union sctp_addr *addr2);
89
83/* Event handler for inet6 address addition/deletion events. 90/* Event handler for inet6 address addition/deletion events.
84 * The sctp_local_addr_list needs to be protocted by a spin lock since 91 * The sctp_local_addr_list needs to be protocted by a spin lock since
85 * multiple notifiers (say IPv4 and IPv6) may be running at the same 92 * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -123,7 +130,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
123 } 130 }
124 spin_unlock_bh(&sctp_local_addr_lock); 131 spin_unlock_bh(&sctp_local_addr_lock);
125 if (found) 132 if (found)
126 call_rcu(&addr->rcu, sctp_local_addr_free); 133 kfree_rcu(addr, rcu);
127 break; 134 break;
128 } 135 }
129 136
@@ -201,76 +208,146 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
201{ 208{
202 struct sock *sk = skb->sk; 209 struct sock *sk = skb->sk;
203 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct ipv6_pinfo *np = inet6_sk(sk);
204 struct flowi fl; 211 struct flowi6 fl6;
205 212
206 memset(&fl, 0, sizeof(fl)); 213 memset(&fl6, 0, sizeof(fl6));
207 214
208 fl.proto = sk->sk_protocol; 215 fl6.flowi6_proto = sk->sk_protocol;
209 216
210 /* Fill in the dest address from the route entry passed with the skb 217 /* Fill in the dest address from the route entry passed with the skb
211 * and the source address from the transport. 218 * and the source address from the transport.
212 */ 219 */
213 ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr); 220 ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr);
214 ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr); 221 ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr);
215 222
216 fl.fl6_flowlabel = np->flow_label; 223 fl6.flowlabel = np->flow_label;
217 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); 224 IP6_ECN_flow_xmit(sk, fl6.flowlabel);
218 if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) 225 if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL)
219 fl.oif = transport->saddr.v6.sin6_scope_id; 226 fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id;
220 else 227 else
221 fl.oif = sk->sk_bound_dev_if; 228 fl6.flowi6_oif = sk->sk_bound_dev_if;
222 229
223 if (np->opt && np->opt->srcrt) { 230 if (np->opt && np->opt->srcrt) {
224 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; 231 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
225 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 232 ipv6_addr_copy(&fl6.daddr, rt0->addr);
226 } 233 }
227 234
228 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", 235 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
229 __func__, skb, skb->len, 236 __func__, skb, skb->len,
230 &fl.fl6_src, &fl.fl6_dst); 237 &fl6.saddr, &fl6.daddr);
231 238
232 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); 239 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
233 240
234 if (!(transport->param_flags & SPP_PMTUD_ENABLE)) 241 if (!(transport->param_flags & SPP_PMTUD_ENABLE))
235 skb->local_df = 1; 242 skb->local_df = 1;
236 243
237 return ip6_xmit(sk, skb, &fl, np->opt); 244 return ip6_xmit(sk, skb, &fl6, np->opt);
238} 245}
239 246
240/* Returns the dst cache entry for the given source and destination ip 247/* Returns the dst cache entry for the given source and destination ip
241 * addresses. 248 * addresses.
242 */ 249 */
243static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, 250static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
244 union sctp_addr *daddr, 251 struct flowi *fl, struct sock *sk)
245 union sctp_addr *saddr)
246{ 252{
247 struct dst_entry *dst; 253 struct sctp_association *asoc = t->asoc;
248 struct flowi fl; 254 struct dst_entry *dst = NULL;
255 struct flowi6 *fl6 = &fl->u.ip6;
256 struct sctp_bind_addr *bp;
257 struct sctp_sockaddr_entry *laddr;
258 union sctp_addr *baddr = NULL;
259 union sctp_addr *daddr = &t->ipaddr;
260 union sctp_addr dst_saddr;
261 __u8 matchlen = 0;
262 __u8 bmatchlen;
263 sctp_scope_t scope;
249 264
250 memset(&fl, 0, sizeof(fl)); 265 memset(fl6, 0, sizeof(struct flowi6));
251 ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); 266 ipv6_addr_copy(&fl6->daddr, &daddr->v6.sin6_addr);
267 fl6->fl6_dport = daddr->v6.sin6_port;
268 fl6->flowi6_proto = IPPROTO_SCTP;
252 if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) 269 if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
253 fl.oif = daddr->v6.sin6_scope_id; 270 fl6->flowi6_oif = daddr->v6.sin6_scope_id;
254 271
272 SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6->daddr);
255 273
256 SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); 274 if (asoc)
275 fl6->fl6_sport = htons(asoc->base.bind_addr.port);
257 276
258 if (saddr) { 277 if (saddr) {
259 ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); 278 ipv6_addr_copy(&fl6->saddr, &saddr->v6.sin6_addr);
260 SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); 279 fl6->fl6_sport = saddr->v6.sin6_port;
280 SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr);
261 } 281 }
262 282
263 dst = ip6_route_output(&init_net, NULL, &fl); 283 dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
264 if (!dst->error) { 284 if (!asoc || saddr)
285 goto out;
286
287 bp = &asoc->base.bind_addr;
288 scope = sctp_scope(daddr);
289 /* ip6_dst_lookup has filled in the fl6->saddr for us. Check
290 * to see if we can use it.
291 */
292 if (!IS_ERR(dst)) {
293 /* Walk through the bind address list and look for a bind
294 * address that matches the source address of the returned dst.
295 */
296 sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port));
297 rcu_read_lock();
298 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
299 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
300 continue;
301
302 /* Do not compare against v4 addrs */
303 if ((laddr->a.sa.sa_family == AF_INET6) &&
304 (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) {
305 rcu_read_unlock();
306 goto out;
307 }
308 }
309 rcu_read_unlock();
310 /* None of the bound addresses match the source address of the
311 * dst. So release it.
312 */
313 dst_release(dst);
314 dst = NULL;
315 }
316
317 /* Walk through the bind address list and try to get the
318 * best source address for a given destination.
319 */
320 rcu_read_lock();
321 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
322 if (!laddr->valid && laddr->state != SCTP_ADDR_SRC)
323 continue;
324 if ((laddr->a.sa.sa_family == AF_INET6) &&
325 (scope <= sctp_scope(&laddr->a))) {
326 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
327 if (!baddr || (matchlen < bmatchlen)) {
328 baddr = &laddr->a;
329 matchlen = bmatchlen;
330 }
331 }
332 }
333 rcu_read_unlock();
334 if (baddr) {
335 ipv6_addr_copy(&fl6->saddr, &baddr->v6.sin6_addr);
336 fl6->fl6_sport = baddr->v6.sin6_port;
337 dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
338 }
339
340out:
341 if (!IS_ERR(dst)) {
265 struct rt6_info *rt; 342 struct rt6_info *rt;
266 rt = (struct rt6_info *)dst; 343 rt = (struct rt6_info *)dst;
344 t->dst = dst;
267 SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", 345 SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n",
268 &rt->rt6i_dst.addr, &rt->rt6i_src.addr); 346 &rt->rt6i_dst.addr, &fl6->saddr);
269 return dst; 347 } else {
348 t->dst = NULL;
349 SCTP_DEBUG_PRINTK("NO ROUTE\n");
270 } 350 }
271 SCTP_DEBUG_PRINTK("NO ROUTE\n");
272 dst_release(dst);
273 return NULL;
274} 351}
275 352
276/* Returns the number of consecutive initial bits that match in the 2 ipv6 353/* Returns the number of consecutive initial bits that match in the 2 ipv6
@@ -286,64 +363,18 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
286 * and asoc's bind address list. 363 * and asoc's bind address list.
287 */ 364 */
288static void sctp_v6_get_saddr(struct sctp_sock *sk, 365static void sctp_v6_get_saddr(struct sctp_sock *sk,
289 struct sctp_association *asoc, 366 struct sctp_transport *t,
290 struct dst_entry *dst, 367 struct flowi *fl)
291 union sctp_addr *daddr,
292 union sctp_addr *saddr)
293{ 368{
294 struct sctp_bind_addr *bp; 369 struct flowi6 *fl6 = &fl->u.ip6;
295 struct sctp_sockaddr_entry *laddr; 370 union sctp_addr *saddr = &t->saddr;
296 sctp_scope_t scope;
297 union sctp_addr *baddr = NULL;
298 __u8 matchlen = 0;
299 __u8 bmatchlen;
300
301 SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%pI6 ",
302 __func__, asoc, dst, &daddr->v6.sin6_addr);
303
304 if (!asoc) {
305 ipv6_dev_get_saddr(sock_net(sctp_opt2sk(sk)),
306 dst ? ip6_dst_idev(dst)->dev : NULL,
307 &daddr->v6.sin6_addr,
308 inet6_sk(&sk->inet.sk)->srcprefs,
309 &saddr->v6.sin6_addr);
310 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n",
311 &saddr->v6.sin6_addr);
312 return;
313 }
314
315 scope = sctp_scope(daddr);
316
317 bp = &asoc->base.bind_addr;
318 371
319 /* Go through the bind address list and find the best source address 372 SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst);
320 * that matches the scope of the destination address.
321 */
322 rcu_read_lock();
323 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
324 if (!laddr->valid)
325 continue;
326 if ((laddr->state == SCTP_ADDR_SRC) &&
327 (laddr->a.sa.sa_family == AF_INET6) &&
328 (scope <= sctp_scope(&laddr->a))) {
329 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
330 if (!baddr || (matchlen < bmatchlen)) {
331 baddr = &laddr->a;
332 matchlen = bmatchlen;
333 }
334 }
335 }
336 373
337 if (baddr) { 374 if (t->dst) {
338 memcpy(saddr, baddr, sizeof(union sctp_addr)); 375 saddr->v6.sin6_family = AF_INET6;
339 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); 376 ipv6_addr_copy(&saddr->v6.sin6_addr, &fl6->saddr);
340 } else {
341 pr_err("%s: asoc:%p Could not find a valid source "
342 "address for the dest:%pI6\n",
343 __func__, asoc, &daddr->v6.sin6_addr);
344 } 377 }
345
346 rcu_read_unlock();
347} 378}
348 379
349/* Make a copy of all potential local addresses. */ 380/* Make a copy of all potential local addresses. */
@@ -465,14 +496,13 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
465 return length; 496 return length;
466} 497}
467 498
468/* Initialize a sctp_addr from a dst_entry. */ 499/* Initialize a sctp_addr from struct in6_addr. */
469static void sctp_v6_dst_saddr(union sctp_addr *addr, struct dst_entry *dst, 500static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
470 __be16 port) 501 __be16 port)
471{ 502{
472 struct rt6_info *rt = (struct rt6_info *)dst;
473 addr->sa.sa_family = AF_INET6; 503 addr->sa.sa_family = AF_INET6;
474 addr->v6.sin6_port = port; 504 addr->v6.sin6_port = port;
475 ipv6_addr_copy(&addr->v6.sin6_addr, &rt->rt6i_src.addr); 505 ipv6_addr_copy(&addr->v6.sin6_addr, saddr);
476} 506}
477 507
478/* Compare addresses exactly. 508/* Compare addresses exactly.
@@ -531,7 +561,7 @@ static int sctp_v6_is_any(const union sctp_addr *addr)
531static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) 561static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
532{ 562{
533 int type; 563 int type;
534 struct in6_addr *in6 = (struct in6_addr *)&addr->v6.sin6_addr; 564 const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr;
535 565
536 type = ipv6_addr_type(in6); 566 type = ipv6_addr_type(in6);
537 if (IPV6_ADDR_ANY == type) 567 if (IPV6_ADDR_ANY == type)
@@ -959,7 +989,6 @@ static struct sctp_af sctp_af_inet6 = {
959 .to_sk_daddr = sctp_v6_to_sk_daddr, 989 .to_sk_daddr = sctp_v6_to_sk_daddr,
960 .from_addr_param = sctp_v6_from_addr_param, 990 .from_addr_param = sctp_v6_from_addr_param,
961 .to_addr_param = sctp_v6_to_addr_param, 991 .to_addr_param = sctp_v6_to_addr_param,
962 .dst_saddr = sctp_v6_dst_saddr,
963 .cmp_addr = sctp_v6_cmp_addr, 992 .cmp_addr = sctp_v6_cmp_addr,
964 .scope = sctp_v6_scope, 993 .scope = sctp_v6_scope,
965 .addr_valid = sctp_v6_addr_valid, 994 .addr_valid = sctp_v6_addr_valid,
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 60600d337a3a..b4f3cf06d8da 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -510,7 +510,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
510 sh->checksum = sctp_end_cksum(crc32); 510 sh->checksum = sctp_end_cksum(crc32);
511 } else { 511 } else {
512 if (dst->dev->features & NETIF_F_SCTP_CSUM) { 512 if (dst->dev->features & NETIF_F_SCTP_CSUM) {
513 /* no need to seed psuedo checksum for SCTP */ 513 /* no need to seed pseudo checksum for SCTP */
514 nskb->ip_summed = CHECKSUM_PARTIAL; 514 nskb->ip_summed = CHECKSUM_PARTIAL;
515 nskb->csum_start = (skb_transport_header(nskb) - 515 nskb->csum_start = (skb_transport_header(nskb) -
516 nskb->head); 516 nskb->head);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 8c6d379b4bb6..1c88c8911dc5 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -131,7 +131,8 @@ static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary,
131static inline int sctp_cacc_skip_3_1_f(struct sctp_transport *transport, 131static inline int sctp_cacc_skip_3_1_f(struct sctp_transport *transport,
132 int count_of_newacks) 132 int count_of_newacks)
133{ 133{
134 if (count_of_newacks < 2 && !transport->cacc.cacc_saw_newack) 134 if (count_of_newacks < 2 &&
135 (transport && !transport->cacc.cacc_saw_newack))
135 return 1; 136 return 1;
136 return 0; 137 return 0;
137} 138}
@@ -177,13 +178,13 @@ static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn)
177 * 3) If the missing report count for TSN t is to be 178 * 3) If the missing report count for TSN t is to be
178 * incremented according to [RFC2960] and 179 * incremented according to [RFC2960] and
179 * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set, 180 * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set,
180 * then the sender MUST futher execute steps 3.1 and 181 * then the sender MUST further execute steps 3.1 and
181 * 3.2 to determine if the missing report count for 182 * 3.2 to determine if the missing report count for
182 * TSN t SHOULD NOT be incremented. 183 * TSN t SHOULD NOT be incremented.
183 * 184 *
184 * 3.3) If 3.1 and 3.2 do not dictate that the missing 185 * 3.3) If 3.1 and 3.2 do not dictate that the missing
185 * report count for t should not be incremented, then 186 * report count for t should not be incremented, then
186 * the sender SOULD increment missing report count for 187 * the sender SHOULD increment missing report count for
187 * t (according to [RFC2960] and [SCTP_STEWART_2002]). 188 * t (according to [RFC2960] and [SCTP_STEWART_2002]).
188 */ 189 */
189static inline int sctp_cacc_skip(struct sctp_transport *primary, 190static inline int sctp_cacc_skip(struct sctp_transport *primary,
@@ -319,7 +320,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
319 * chunk. 320 * chunk.
320 */ 321 */
321 switch (q->asoc->state) { 322 switch (q->asoc->state) {
322 case SCTP_STATE_EMPTY:
323 case SCTP_STATE_CLOSED: 323 case SCTP_STATE_CLOSED:
324 case SCTP_STATE_SHUTDOWN_PENDING: 324 case SCTP_STATE_SHUTDOWN_PENDING:
325 case SCTP_STATE_SHUTDOWN_SENT: 325 case SCTP_STATE_SHUTDOWN_SENT:
@@ -545,13 +545,11 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
545 struct sctp_transport *transport = pkt->transport; 545 struct sctp_transport *transport = pkt->transport;
546 sctp_xmit_t status; 546 sctp_xmit_t status;
547 struct sctp_chunk *chunk, *chunk1; 547 struct sctp_chunk *chunk, *chunk1;
548 struct sctp_association *asoc;
549 int fast_rtx; 548 int fast_rtx;
550 int error = 0; 549 int error = 0;
551 int timer = 0; 550 int timer = 0;
552 int done = 0; 551 int done = 0;
553 552
554 asoc = q->asoc;
555 lqueue = &q->retransmit; 553 lqueue = &q->retransmit;
556 fast_rtx = q->fast_rtx; 554 fast_rtx = q->fast_rtx;
557 555
@@ -579,6 +577,13 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
579 * try to send as much as possible. 577 * try to send as much as possible.
580 */ 578 */
581 list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) { 579 list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) {
580 /* If the chunk is abandoned, move it to abandoned list. */
581 if (sctp_chunk_abandoned(chunk)) {
582 list_del_init(&chunk->transmitted_list);
583 sctp_insert_list(&q->abandoned,
584 &chunk->transmitted_list);
585 continue;
586 }
582 587
583 /* Make sure that Gap Acked TSNs are not retransmitted. A 588 /* Make sure that Gap Acked TSNs are not retransmitted. A
584 * simple approach is just to move such TSNs out of the 589 * simple approach is just to move such TSNs out of the
@@ -620,9 +625,12 @@ redo:
620 625
621 /* If we are retransmitting, we should only 626 /* If we are retransmitting, we should only
622 * send a single packet. 627 * send a single packet.
628 * Otherwise, try appending this chunk again.
623 */ 629 */
624 if (rtx_timeout || fast_rtx) 630 if (rtx_timeout || fast_rtx)
625 done = 1; 631 done = 1;
632 else
633 goto redo;
626 634
627 /* Bundle next chunk in the next round. */ 635 /* Bundle next chunk in the next round. */
628 break; 636 break;
@@ -845,7 +853,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
845 case SCTP_CID_ECN_CWR: 853 case SCTP_CID_ECN_CWR:
846 case SCTP_CID_ASCONF_ACK: 854 case SCTP_CID_ASCONF_ACK:
847 one_packet = 1; 855 one_packet = 1;
848 /* Fall throught */ 856 /* Fall through */
849 857
850 case SCTP_CID_SACK: 858 case SCTP_CID_SACK:
851 case SCTP_CID_HEARTBEAT: 859 case SCTP_CID_HEARTBEAT:
@@ -1685,8 +1693,9 @@ static void sctp_mark_missing(struct sctp_outq *q,
1685 /* SFR-CACC may require us to skip marking 1693 /* SFR-CACC may require us to skip marking
1686 * this chunk as missing. 1694 * this chunk as missing.
1687 */ 1695 */
1688 if (!transport || !sctp_cacc_skip(primary, transport, 1696 if (!transport || !sctp_cacc_skip(primary,
1689 count_of_newacks, tsn)) { 1697 chunk->transport,
1698 count_of_newacks, tsn)) {
1690 chunk->tsn_missing_report++; 1699 chunk->tsn_missing_report++;
1691 1700
1692 SCTP_DEBUG_PRINTK( 1701 SCTP_DEBUG_PRINTK(
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e58f9476f29c..67380a29e2e9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -230,13 +230,6 @@ static void sctp_free_local_addr_list(void)
230 } 230 }
231} 231}
232 232
233void sctp_local_addr_free(struct rcu_head *head)
234{
235 struct sctp_sockaddr_entry *e = container_of(head,
236 struct sctp_sockaddr_entry, rcu);
237 kfree(e);
238}
239
240/* Copy the local addresses which are valid for 'scope' into 'bp'. */ 233/* Copy the local addresses which are valid for 'scope' into 'bp'. */
241int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, 234int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
242 gfp_t gfp, int copy_flags) 235 gfp_t gfp, int copy_flags)
@@ -339,13 +332,12 @@ static int sctp_v4_to_addr_param(const union sctp_addr *addr,
339} 332}
340 333
341/* Initialize a sctp_addr from a dst_entry. */ 334/* Initialize a sctp_addr from a dst_entry. */
342static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct dst_entry *dst, 335static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4,
343 __be16 port) 336 __be16 port)
344{ 337{
345 struct rtable *rt = (struct rtable *)dst;
346 saddr->v4.sin_family = AF_INET; 338 saddr->v4.sin_family = AF_INET;
347 saddr->v4.sin_port = port; 339 saddr->v4.sin_port = port;
348 saddr->v4.sin_addr.s_addr = rt->rt_src; 340 saddr->v4.sin_addr.s_addr = fl4->saddr;
349} 341}
350 342
351/* Compare two addresses exactly. */ 343/* Compare two addresses exactly. */
@@ -463,37 +455,38 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
463 * addresses. If an association is passed, trys to get a dst entry with a 455 * addresses. If an association is passed, trys to get a dst entry with a
464 * source address that matches an address in the bind address list. 456 * source address that matches an address in the bind address list.
465 */ 457 */
466static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, 458static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
467 union sctp_addr *daddr, 459 struct flowi *fl, struct sock *sk)
468 union sctp_addr *saddr)
469{ 460{
461 struct sctp_association *asoc = t->asoc;
470 struct rtable *rt; 462 struct rtable *rt;
471 struct flowi fl; 463 struct flowi4 *fl4 = &fl->u.ip4;
472 struct sctp_bind_addr *bp; 464 struct sctp_bind_addr *bp;
473 struct sctp_sockaddr_entry *laddr; 465 struct sctp_sockaddr_entry *laddr;
474 struct dst_entry *dst = NULL; 466 struct dst_entry *dst = NULL;
467 union sctp_addr *daddr = &t->ipaddr;
475 union sctp_addr dst_saddr; 468 union sctp_addr dst_saddr;
476 469
477 memset(&fl, 0x0, sizeof(struct flowi)); 470 memset(fl4, 0x0, sizeof(struct flowi4));
478 fl.fl4_dst = daddr->v4.sin_addr.s_addr; 471 fl4->daddr = daddr->v4.sin_addr.s_addr;
479 fl.fl_ip_dport = daddr->v4.sin_port; 472 fl4->fl4_dport = daddr->v4.sin_port;
480 fl.proto = IPPROTO_SCTP; 473 fl4->flowi4_proto = IPPROTO_SCTP;
481 if (asoc) { 474 if (asoc) {
482 fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); 475 fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
483 fl.oif = asoc->base.sk->sk_bound_dev_if; 476 fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
484 fl.fl_ip_sport = htons(asoc->base.bind_addr.port); 477 fl4->fl4_sport = htons(asoc->base.bind_addr.port);
485 } 478 }
486 if (saddr) { 479 if (saddr) {
487 fl.fl4_src = saddr->v4.sin_addr.s_addr; 480 fl4->saddr = saddr->v4.sin_addr.s_addr;
488 fl.fl_ip_sport = saddr->v4.sin_port; 481 fl4->fl4_sport = saddr->v4.sin_port;
489 } 482 }
490 483
491 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", 484 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
492 __func__, &fl.fl4_dst, &fl.fl4_src); 485 __func__, &fl4->daddr, &fl4->saddr);
493 486
494 if (!ip_route_output_key(&init_net, &rt, &fl)) { 487 rt = ip_route_output_key(&init_net, fl4);
488 if (!IS_ERR(rt))
495 dst = &rt->dst; 489 dst = &rt->dst;
496 }
497 490
498 /* If there is no association or if a source address is passed, no 491 /* If there is no association or if a source address is passed, no
499 * more validation is required. 492 * more validation is required.
@@ -507,7 +500,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
507 /* Walk through the bind address list and look for a bind 500 /* Walk through the bind address list and look for a bind
508 * address that matches the source address of the returned dst. 501 * address that matches the source address of the returned dst.
509 */ 502 */
510 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); 503 sctp_v4_dst_saddr(&dst_saddr, fl4, htons(bp->port));
511 rcu_read_lock(); 504 rcu_read_lock();
512 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 505 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
513 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) 506 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
@@ -533,9 +526,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
533 continue; 526 continue;
534 if ((laddr->state == SCTP_ADDR_SRC) && 527 if ((laddr->state == SCTP_ADDR_SRC) &&
535 (AF_INET == laddr->a.sa.sa_family)) { 528 (AF_INET == laddr->a.sa.sa_family)) {
536 fl.fl4_src = laddr->a.v4.sin_addr.s_addr; 529 fl4->saddr = laddr->a.v4.sin_addr.s_addr;
537 fl.fl_ip_sport = laddr->a.v4.sin_port; 530 fl4->fl4_sport = laddr->a.v4.sin_port;
538 if (!ip_route_output_key(&init_net, &rt, &fl)) { 531 rt = ip_route_output_key(&init_net, fl4);
532 if (!IS_ERR(rt)) {
539 dst = &rt->dst; 533 dst = &rt->dst;
540 goto out_unlock; 534 goto out_unlock;
541 } 535 }
@@ -545,33 +539,27 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
545out_unlock: 539out_unlock:
546 rcu_read_unlock(); 540 rcu_read_unlock();
547out: 541out:
542 t->dst = dst;
548 if (dst) 543 if (dst)
549 SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n", 544 SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n",
550 &rt->rt_dst, &rt->rt_src); 545 &fl4->daddr, &fl4->saddr);
551 else 546 else
552 SCTP_DEBUG_PRINTK("NO ROUTE\n"); 547 SCTP_DEBUG_PRINTK("NO ROUTE\n");
553
554 return dst;
555} 548}
556 549
557/* For v4, the source address is cached in the route entry(dst). So no need 550/* For v4, the source address is cached in the route entry(dst). So no need
558 * to cache it separately and hence this is an empty routine. 551 * to cache it separately and hence this is an empty routine.
559 */ 552 */
560static void sctp_v4_get_saddr(struct sctp_sock *sk, 553static void sctp_v4_get_saddr(struct sctp_sock *sk,
561 struct sctp_association *asoc, 554 struct sctp_transport *t,
562 struct dst_entry *dst, 555 struct flowi *fl)
563 union sctp_addr *daddr,
564 union sctp_addr *saddr)
565{ 556{
566 struct rtable *rt = (struct rtable *)dst; 557 union sctp_addr *saddr = &t->saddr;
567 558 struct rtable *rt = (struct rtable *)t->dst;
568 if (!asoc)
569 return;
570 559
571 if (rt) { 560 if (rt) {
572 saddr->v4.sin_family = AF_INET; 561 saddr->v4.sin_family = AF_INET;
573 saddr->v4.sin_port = htons(asoc->base.bind_addr.port); 562 saddr->v4.sin_addr.s_addr = fl->u.ip4.saddr;
574 saddr->v4.sin_addr.s_addr = rt->rt_src;
575 } 563 }
576} 564}
577 565
@@ -680,7 +668,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
680 } 668 }
681 spin_unlock_bh(&sctp_local_addr_lock); 669 spin_unlock_bh(&sctp_local_addr_lock);
682 if (found) 670 if (found)
683 call_rcu(&addr->rcu, sctp_local_addr_free); 671 kfree_rcu(addr, rcu);
684 break; 672 break;
685 } 673 }
686 674
@@ -853,14 +841,14 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
853 841
854 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", 842 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n",
855 __func__, skb, skb->len, 843 __func__, skb, skb->len,
856 &skb_rtable(skb)->rt_src, 844 &transport->fl.u.ip4.saddr,
857 &skb_rtable(skb)->rt_dst); 845 &transport->fl.u.ip4.daddr);
858 846
859 inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? 847 inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
860 IP_PMTUDISC_DO : IP_PMTUDISC_DONT; 848 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
861 849
862 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); 850 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
863 return ip_queue_xmit(skb); 851 return ip_queue_xmit(skb, &transport->fl);
864} 852}
865 853
866static struct sctp_af sctp_af_inet; 854static struct sctp_af sctp_af_inet;
@@ -949,7 +937,6 @@ static struct sctp_af sctp_af_inet = {
949 .to_sk_daddr = sctp_v4_to_sk_daddr, 937 .to_sk_daddr = sctp_v4_to_sk_daddr,
950 .from_addr_param = sctp_v4_from_addr_param, 938 .from_addr_param = sctp_v4_from_addr_param,
951 .to_addr_param = sctp_v4_to_addr_param, 939 .to_addr_param = sctp_v4_to_addr_param,
952 .dst_saddr = sctp_v4_dst_saddr,
953 .cmp_addr = sctp_v4_cmp_addr, 940 .cmp_addr = sctp_v4_cmp_addr,
954 .addr_valid = sctp_v4_addr_valid, 941 .addr_valid = sctp_v4_addr_valid,
955 .inaddr_any = sctp_v4_inaddr_any, 942 .inaddr_any = sctp_v4_inaddr_any,
@@ -1204,7 +1191,7 @@ SCTP_STATIC __init int sctp_init(void)
1204 if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) 1191 if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
1205 continue; 1192 continue;
1206 sctp_assoc_hashtable = (struct sctp_hashbucket *) 1193 sctp_assoc_hashtable = (struct sctp_hashbucket *)
1207 __get_free_pages(GFP_ATOMIC, order); 1194 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
1208 } while (!sctp_assoc_hashtable && --order > 0); 1195 } while (!sctp_assoc_hashtable && --order > 0);
1209 if (!sctp_assoc_hashtable) { 1196 if (!sctp_assoc_hashtable) {
1210 pr_err("Failed association hash alloc\n"); 1197 pr_err("Failed association hash alloc\n");
@@ -1237,7 +1224,7 @@ SCTP_STATIC __init int sctp_init(void)
1237 if ((sctp_port_hashsize > (64 * 1024)) && order > 0) 1224 if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
1238 continue; 1225 continue;
1239 sctp_port_hashtable = (struct sctp_bind_hashbucket *) 1226 sctp_port_hashtable = (struct sctp_bind_hashbucket *)
1240 __get_free_pages(GFP_ATOMIC, order); 1227 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
1241 } while (!sctp_port_hashtable && --order > 0); 1228 } while (!sctp_port_hashtable && --order > 0);
1242 if (!sctp_port_hashtable) { 1229 if (!sctp_port_hashtable) {
1243 pr_err("Failed bind hash alloc\n"); 1230 pr_err("Failed bind hash alloc\n");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b23428f3c0dd..58eb27fed4b4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1075,20 +1075,28 @@ nodata:
1075 1075
1076/* Make a HEARTBEAT chunk. */ 1076/* Make a HEARTBEAT chunk. */
1077struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, 1077struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
1078 const struct sctp_transport *transport, 1078 const struct sctp_transport *transport)
1079 const void *payload, const size_t paylen)
1080{ 1079{
1081 struct sctp_chunk *retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT, 1080 struct sctp_chunk *retval;
1082 0, paylen); 1081 sctp_sender_hb_info_t hbinfo;
1082
1083 retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo));
1083 1084
1084 if (!retval) 1085 if (!retval)
1085 goto nodata; 1086 goto nodata;
1086 1087
1088 hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
1089 hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
1090 hbinfo.daddr = transport->ipaddr;
1091 hbinfo.sent_at = jiffies;
1092 hbinfo.hb_nonce = transport->hb_nonce;
1093
1087 /* Cast away the 'const', as this is just telling the chunk 1094 /* Cast away the 'const', as this is just telling the chunk
1088 * what transport it belongs to. 1095 * what transport it belongs to.
1089 */ 1096 */
1090 retval->transport = (struct sctp_transport *) transport; 1097 retval->transport = (struct sctp_transport *) transport;
1091 retval->subh.hbs_hdr = sctp_addto_chunk(retval, paylen, payload); 1098 retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo),
1099 &hbinfo);
1092 1100
1093nodata: 1101nodata:
1094 return retval; 1102 return retval;
@@ -2242,14 +2250,17 @@ int sctp_verify_init(const struct sctp_association *asoc,
2242 * Returns 0 on failure, else success. 2250 * Returns 0 on failure, else success.
2243 * FIXME: This is an association method. 2251 * FIXME: This is an association method.
2244 */ 2252 */
2245int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, 2253int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
2246 const union sctp_addr *peer_addr, 2254 const union sctp_addr *peer_addr,
2247 sctp_init_chunk_t *peer_init, gfp_t gfp) 2255 sctp_init_chunk_t *peer_init, gfp_t gfp)
2248{ 2256{
2249 union sctp_params param; 2257 union sctp_params param;
2250 struct sctp_transport *transport; 2258 struct sctp_transport *transport;
2251 struct list_head *pos, *temp; 2259 struct list_head *pos, *temp;
2260 struct sctp_af *af;
2261 union sctp_addr addr;
2252 char *cookie; 2262 char *cookie;
2263 int src_match = 0;
2253 2264
2254 /* We must include the address that the INIT packet came from. 2265 /* We must include the address that the INIT packet came from.
2255 * This is the only address that matters for an INIT packet. 2266 * This is the only address that matters for an INIT packet.
@@ -2261,18 +2272,31 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
2261 * added as the primary transport. The source address seems to 2272 * added as the primary transport. The source address seems to
2262 * be a a better choice than any of the embedded addresses. 2273 * be a a better choice than any of the embedded addresses.
2263 */ 2274 */
2264 if (peer_addr) { 2275 if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE))
2265 if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) 2276 goto nomem;
2266 goto nomem; 2277
2267 } 2278 if (sctp_cmp_addr_exact(sctp_source(chunk), peer_addr))
2279 src_match = 1;
2268 2280
2269 /* Process the initialization parameters. */ 2281 /* Process the initialization parameters. */
2270 sctp_walk_params(param, peer_init, init_hdr.params) { 2282 sctp_walk_params(param, peer_init, init_hdr.params) {
2283 if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
2284 param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
2285 af = sctp_get_af_specific(param_type2af(param.p->type));
2286 af->from_addr_param(&addr, param.addr,
2287 chunk->sctp_hdr->source, 0);
2288 if (sctp_cmp_addr_exact(sctp_source(chunk), &addr))
2289 src_match = 1;
2290 }
2271 2291
2272 if (!sctp_process_param(asoc, param, peer_addr, gfp)) 2292 if (!sctp_process_param(asoc, param, peer_addr, gfp))
2273 goto clean_up; 2293 goto clean_up;
2274 } 2294 }
2275 2295
2296 /* source address of chunk may not match any valid address */
2297 if (!src_match)
2298 goto clean_up;
2299
2276 /* AUTH: After processing the parameters, make sure that we 2300 /* AUTH: After processing the parameters, make sure that we
2277 * have all the required info to potentially do authentications. 2301 * have all the required info to potentially do authentications.
2278 */ 2302 */
@@ -2923,7 +2947,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2923 asconf_param->param_hdr.type != SCTP_PARAM_SET_PRIMARY) 2947 asconf_param->param_hdr.type != SCTP_PARAM_SET_PRIMARY)
2924 return SCTP_ERROR_UNKNOWN_PARAM; 2948 return SCTP_ERROR_UNKNOWN_PARAM;
2925 2949
2926 switch (addr_param->v4.param_hdr.type) { 2950 switch (addr_param->p.type) {
2927 case SCTP_PARAM_IPV6_ADDRESS: 2951 case SCTP_PARAM_IPV6_ADDRESS:
2928 if (!asoc->peer.ipv6_address) 2952 if (!asoc->peer.ipv6_address)
2929 return SCTP_ERROR_DNS_FAILED; 2953 return SCTP_ERROR_DNS_FAILED;
@@ -2936,7 +2960,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2936 return SCTP_ERROR_DNS_FAILED; 2960 return SCTP_ERROR_DNS_FAILED;
2937 } 2961 }
2938 2962
2939 af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type)); 2963 af = sctp_get_af_specific(param_type2af(addr_param->p.type));
2940 if (unlikely(!af)) 2964 if (unlikely(!af))
2941 return SCTP_ERROR_DNS_FAILED; 2965 return SCTP_ERROR_DNS_FAILED;
2942 2966
@@ -3100,16 +3124,16 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
3100 /* Skip the address parameter and store a pointer to the first 3124 /* Skip the address parameter and store a pointer to the first
3101 * asconf parameter. 3125 * asconf parameter.
3102 */ 3126 */
3103 length = ntohs(addr_param->v4.param_hdr.length); 3127 length = ntohs(addr_param->p.length);
3104 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); 3128 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length);
3105 chunk_len -= length; 3129 chunk_len -= length;
3106 3130
3107 /* create an ASCONF_ACK chunk. 3131 /* create an ASCONF_ACK chunk.
3108 * Based on the definitions of parameters, we know that the size of 3132 * Based on the definitions of parameters, we know that the size of
3109 * ASCONF_ACK parameters are less than or equal to the twice of ASCONF 3133 * ASCONF_ACK parameters are less than or equal to the fourfold of ASCONF
3110 * parameters. 3134 * parameters.
3111 */ 3135 */
3112 asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 2); 3136 asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 4);
3113 if (!asconf_ack) 3137 if (!asconf_ack)
3114 goto done; 3138 goto done;
3115 3139
@@ -3177,7 +3201,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
3177 ((void *)asconf_param + sizeof(sctp_addip_param_t)); 3201 ((void *)asconf_param + sizeof(sctp_addip_param_t));
3178 3202
3179 /* We have checked the packet before, so we do not check again. */ 3203 /* We have checked the packet before, so we do not check again. */
3180 af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type)); 3204 af = sctp_get_af_specific(param_type2af(addr_param->p.type));
3181 af->from_addr_param(&addr, addr_param, htons(bp->port), 0); 3205 af->from_addr_param(&addr, addr_param, htons(bp->port), 0);
3182 3206
3183 switch (asconf_param->param_hdr.type) { 3207 switch (asconf_param->param_hdr.type) {
@@ -3193,11 +3217,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
3193 local_bh_enable(); 3217 local_bh_enable();
3194 list_for_each_entry(transport, &asoc->peer.transport_addr_list, 3218 list_for_each_entry(transport, &asoc->peer.transport_addr_list,
3195 transports) { 3219 transports) {
3196 if (transport->state == SCTP_ACTIVE)
3197 continue;
3198 dst_release(transport->dst); 3220 dst_release(transport->dst);
3199 sctp_transport_route(transport, NULL, 3221 transport->dst = NULL;
3200 sctp_sk(asoc->base.sk));
3201 } 3222 }
3202 break; 3223 break;
3203 case SCTP_PARAM_DEL_IP: 3224 case SCTP_PARAM_DEL_IP:
@@ -3207,8 +3228,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
3207 list_for_each_entry(transport, &asoc->peer.transport_addr_list, 3228 list_for_each_entry(transport, &asoc->peer.transport_addr_list,
3208 transports) { 3229 transports) {
3209 dst_release(transport->dst); 3230 dst_release(transport->dst);
3210 sctp_transport_route(transport, NULL, 3231 transport->dst = NULL;
3211 sctp_sk(asoc->base.sk));
3212 } 3232 }
3213 break; 3233 break;
3214 default: 3234 default:
@@ -3304,7 +3324,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
3304 /* Skip the address parameter in the last asconf sent and store a 3324 /* Skip the address parameter in the last asconf sent and store a
3305 * pointer to the first asconf parameter. 3325 * pointer to the first asconf parameter.
3306 */ 3326 */
3307 length = ntohs(addr_param->v4.param_hdr.length); 3327 length = ntohs(addr_param->p.length);
3308 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); 3328 asconf_param = (sctp_addip_param_t *)((void *)addr_param + length);
3309 asconf_len -= length; 3329 asconf_len -= length;
3310 3330
@@ -3375,7 +3395,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3375 struct sctp_fwdtsn_skip *skiplist) 3395 struct sctp_fwdtsn_skip *skiplist)
3376{ 3396{
3377 struct sctp_chunk *retval = NULL; 3397 struct sctp_chunk *retval = NULL;
3378 struct sctp_fwdtsn_chunk *ftsn_chunk;
3379 struct sctp_fwdtsn_hdr ftsn_hdr; 3398 struct sctp_fwdtsn_hdr ftsn_hdr;
3380 struct sctp_fwdtsn_skip skip; 3399 struct sctp_fwdtsn_skip skip;
3381 size_t hint; 3400 size_t hint;
@@ -3388,8 +3407,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3388 if (!retval) 3407 if (!retval)
3389 return NULL; 3408 return NULL;
3390 3409
3391 ftsn_chunk = (struct sctp_fwdtsn_chunk *)retval->subh.fwdtsn_hdr;
3392
3393 ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); 3410 ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
3394 retval->subh.fwdtsn_hdr = 3411 retval->subh.fwdtsn_hdr =
3395 sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); 3412 sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b21b218d564f..d612ca1ca6c0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -482,7 +482,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
482 * If the timer was a heartbeat, we only increment error counts 482 * If the timer was a heartbeat, we only increment error counts
483 * when we already have an outstanding HEARTBEAT that has not 483 * when we already have an outstanding HEARTBEAT that has not
484 * been acknowledged. 484 * been acknowledged.
485 * Additionaly, some tranport states inhibit error increments. 485 * Additionally, some tranport states inhibit error increments.
486 */ 486 */
487 if (!is_hb) { 487 if (!is_hb) {
488 asoc->overall_error_count++; 488 asoc->overall_error_count++;
@@ -595,8 +595,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
595 * fail during INIT processing (due to malloc problems), 595 * fail during INIT processing (due to malloc problems),
596 * just return the error and stop processing the stack. 596 * just return the error and stop processing the stack.
597 */ 597 */
598 if (!sctp_process_init(asoc, chunk->chunk_hdr->type, 598 if (!sctp_process_init(asoc, chunk, sctp_source(chunk), peer_init, gfp))
599 sctp_source(chunk), peer_init, gfp))
600 error = -ENOMEM; 599 error = -ENOMEM;
601 else 600 else
602 error = 0; 601 error = 0;
@@ -1415,12 +1414,6 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1415 SCTP_RTXR_T3_RTX); 1414 SCTP_RTXR_T3_RTX);
1416 break; 1415 break;
1417 1416
1418 case SCTP_CMD_TRANSMIT:
1419 /* Kick start transmission. */
1420 error = sctp_outq_uncork(&asoc->outqueue);
1421 local_cork = 0;
1422 break;
1423
1424 case SCTP_CMD_ECN_CE: 1417 case SCTP_CMD_ECN_CE:
1425 /* Do delayed CE processing. */ 1418 /* Do delayed CE processing. */
1426 sctp_do_ecn_ce_work(asoc, cmd->obj.u32); 1419 sctp_do_ecn_ce_work(asoc, cmd->obj.u32);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4b4eb7c96bbd..7f4a4f8368ee 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -393,8 +393,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
393 goto nomem_init; 393 goto nomem_init;
394 394
395 /* The call, sctp_process_init(), can fail on memory allocation. */ 395 /* The call, sctp_process_init(), can fail on memory allocation. */
396 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, 396 if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
397 sctp_source(chunk),
398 (sctp_init_chunk_t *)chunk->chunk_hdr, 397 (sctp_init_chunk_t *)chunk->chunk_hdr,
399 GFP_ATOMIC)) 398 GFP_ATOMIC))
400 goto nomem_init; 399 goto nomem_init;
@@ -551,7 +550,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
551 * 550 *
552 * This means that if we only want to abort associations 551 * This means that if we only want to abort associations
553 * in an authenticated way (i.e AUTH+ABORT), then we 552 * in an authenticated way (i.e AUTH+ABORT), then we
554 * can't destroy this association just becuase the packet 553 * can't destroy this association just because the packet
555 * was malformed. 554 * was malformed.
556 */ 555 */
557 if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) 556 if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
@@ -725,7 +724,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
725 */ 724 */
726 peer_init = &chunk->subh.cookie_hdr->c.peer_init[0]; 725 peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
727 726
728 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, 727 if (!sctp_process_init(new_asoc, chunk,
729 &chunk->subh.cookie_hdr->c.peer_addr, 728 &chunk->subh.cookie_hdr->c.peer_addr,
730 peer_init, GFP_ATOMIC)) 729 peer_init, GFP_ATOMIC))
731 goto nomem_init; 730 goto nomem_init;
@@ -942,18 +941,9 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
942{ 941{
943 struct sctp_transport *transport = (struct sctp_transport *) arg; 942 struct sctp_transport *transport = (struct sctp_transport *) arg;
944 struct sctp_chunk *reply; 943 struct sctp_chunk *reply;
945 sctp_sender_hb_info_t hbinfo;
946 size_t paylen = 0;
947
948 hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
949 hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
950 hbinfo.daddr = transport->ipaddr;
951 hbinfo.sent_at = jiffies;
952 hbinfo.hb_nonce = transport->hb_nonce;
953 944
954 /* Send a heartbeat to our peer. */ 945 /* Send a heartbeat to our peer. */
955 paylen = sizeof(sctp_sender_hb_info_t); 946 reply = sctp_make_heartbeat(asoc, transport);
956 reply = sctp_make_heartbeat(asoc, transport, &hbinfo, paylen);
957 if (!reply) 947 if (!reply)
958 return SCTP_DISPOSITION_NOMEM; 948 return SCTP_DISPOSITION_NOMEM;
959 949
@@ -1464,8 +1454,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
1464 * Verification Tag and Peers Verification tag into a reserved 1454 * Verification Tag and Peers Verification tag into a reserved
1465 * place (local tie-tag and per tie-tag) within the state cookie. 1455 * place (local tie-tag and per tie-tag) within the state cookie.
1466 */ 1456 */
1467 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, 1457 if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
1468 sctp_source(chunk),
1469 (sctp_init_chunk_t *)chunk->chunk_hdr, 1458 (sctp_init_chunk_t *)chunk->chunk_hdr,
1470 GFP_ATOMIC)) 1459 GFP_ATOMIC))
1471 goto nomem; 1460 goto nomem;
@@ -1546,7 +1535,7 @@ cleanup:
1546} 1535}
1547 1536
1548/* 1537/*
1549 * Handle simultanous INIT. 1538 * Handle simultaneous INIT.
1550 * This means we started an INIT and then we got an INIT request from 1539 * This means we started an INIT and then we got an INIT request from
1551 * our peer. 1540 * our peer.
1552 * 1541 *
@@ -1694,8 +1683,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
1694 */ 1683 */
1695 peer_init = &chunk->subh.cookie_hdr->c.peer_init[0]; 1684 peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
1696 1685
1697 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, 1686 if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), peer_init,
1698 sctp_source(chunk), peer_init,
1699 GFP_ATOMIC)) 1687 GFP_ATOMIC))
1700 goto nomem; 1688 goto nomem;
1701 1689
@@ -1780,8 +1768,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
1780 * side effects--it is safe to run them here. 1768 * side effects--it is safe to run them here.
1781 */ 1769 */
1782 peer_init = &chunk->subh.cookie_hdr->c.peer_init[0]; 1770 peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
1783 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, 1771 if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), peer_init,
1784 sctp_source(chunk), peer_init,
1785 GFP_ATOMIC)) 1772 GFP_ATOMIC))
1786 goto nomem; 1773 goto nomem;
1787 1774
@@ -2079,7 +2066,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
2079 * RFC 2960, Section 3.3.7 2066 * RFC 2960, Section 3.3.7
2080 * If an endpoint receives an ABORT with a format error or for an 2067 * If an endpoint receives an ABORT with a format error or for an
2081 * association that doesn't exist, it MUST silently discard it. 2068 * association that doesn't exist, it MUST silently discard it.
2082 * Becasue the length is "invalid", we can't really discard just 2069 * Because the length is "invalid", we can't really discard just
2083 * as we do not know its true length. So, to be safe, discard the 2070 * as we do not know its true length. So, to be safe, discard the
2084 * packet. 2071 * packet.
2085 */ 2072 */
@@ -2120,7 +2107,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
2120 * RFC 2960, Section 3.3.7 2107 * RFC 2960, Section 3.3.7
2121 * If an endpoint receives an ABORT with a format error or for an 2108 * If an endpoint receives an ABORT with a format error or for an
2122 * association that doesn't exist, it MUST silently discard it. 2109 * association that doesn't exist, it MUST silently discard it.
2123 * Becasue the length is "invalid", we can't really discard just 2110 * Because the length is "invalid", we can't really discard just
2124 * as we do not know its true length. So, to be safe, discard the 2111 * as we do not know its true length. So, to be safe, discard the
2125 * packet. 2112 * packet.
2126 */ 2113 */
@@ -2381,7 +2368,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2381 * RFC 2960, Section 3.3.7 2368 * RFC 2960, Section 3.3.7
2382 * If an endpoint receives an ABORT with a format error or for an 2369 * If an endpoint receives an ABORT with a format error or for an
2383 * association that doesn't exist, it MUST silently discard it. 2370 * association that doesn't exist, it MUST silently discard it.
2384 * Becasue the length is "invalid", we can't really discard just 2371 * Because the length is "invalid", we can't really discard just
2385 * as we do not know its true length. So, to be safe, discard the 2372 * as we do not know its true length. So, to be safe, discard the
2386 * packet. 2373 * packet.
2387 */ 2374 */
@@ -2412,8 +2399,15 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2412 2399
2413 /* See if we have an error cause code in the chunk. */ 2400 /* See if we have an error cause code in the chunk. */
2414 len = ntohs(chunk->chunk_hdr->length); 2401 len = ntohs(chunk->chunk_hdr->length);
2415 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) 2402 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
2403
2404 sctp_errhdr_t *err;
2405 sctp_walk_errors(err, chunk->chunk_hdr);
2406 if ((void *)err != (void *)chunk->chunk_end)
2407 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2408
2416 error = ((sctp_errhdr_t *)chunk->skb->data)->cause; 2409 error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
2410 }
2417 2411
2418 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); 2412 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
2419 /* ASSOC_FAILED will DELETE_TCB. */ 2413 /* ASSOC_FAILED will DELETE_TCB. */
@@ -2448,7 +2442,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
2448 * RFC 2960, Section 3.3.7 2442 * RFC 2960, Section 3.3.7
2449 * If an endpoint receives an ABORT with a format error or for an 2443 * If an endpoint receives an ABORT with a format error or for an
2450 * association that doesn't exist, it MUST silently discard it. 2444 * association that doesn't exist, it MUST silently discard it.
2451 * Becasue the length is "invalid", we can't really discard just 2445 * Because the length is "invalid", we can't really discard just
2452 * as we do not know its true length. So, to be safe, discard the 2446 * as we do not know its true length. So, to be safe, discard the
2453 * packet. 2447 * packet.
2454 */ 2448 */
@@ -3204,6 +3198,7 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
3204 sctp_cmd_seq_t *commands) 3198 sctp_cmd_seq_t *commands)
3205{ 3199{
3206 struct sctp_chunk *chunk = arg; 3200 struct sctp_chunk *chunk = arg;
3201 sctp_errhdr_t *err;
3207 3202
3208 if (!sctp_vtag_verify(chunk, asoc)) 3203 if (!sctp_vtag_verify(chunk, asoc))
3209 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3204 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -3212,6 +3207,10 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
3212 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) 3207 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
3213 return sctp_sf_violation_chunklen(ep, asoc, type, arg, 3208 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
3214 commands); 3209 commands);
3210 sctp_walk_errors(err, chunk->chunk_hdr);
3211 if ((void *)err != (void *)chunk->chunk_end)
3212 return sctp_sf_violation_paramlen(ep, asoc, type, arg,
3213 (void *)err, commands);
3215 3214
3216 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, 3215 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
3217 SCTP_CHUNK(chunk)); 3216 SCTP_CHUNK(chunk));
@@ -3320,8 +3319,10 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
3320 struct sctp_chunk *chunk = arg; 3319 struct sctp_chunk *chunk = arg;
3321 struct sk_buff *skb = chunk->skb; 3320 struct sk_buff *skb = chunk->skb;
3322 sctp_chunkhdr_t *ch; 3321 sctp_chunkhdr_t *ch;
3322 sctp_errhdr_t *err;
3323 __u8 *ch_end; 3323 __u8 *ch_end;
3324 int ootb_shut_ack = 0; 3324 int ootb_shut_ack = 0;
3325 int ootb_cookie_ack = 0;
3325 3326
3326 SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); 3327 SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
3327 3328
@@ -3346,6 +3347,23 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
3346 if (SCTP_CID_ABORT == ch->type) 3347 if (SCTP_CID_ABORT == ch->type)
3347 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3348 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3348 3349
3350 /* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR
3351 * or a COOKIE ACK the SCTP Packet should be silently
3352 * discarded.
3353 */
3354
3355 if (SCTP_CID_COOKIE_ACK == ch->type)
3356 ootb_cookie_ack = 1;
3357
3358 if (SCTP_CID_ERROR == ch->type) {
3359 sctp_walk_errors(err, ch) {
3360 if (SCTP_ERROR_STALE_COOKIE == err->cause) {
3361 ootb_cookie_ack = 1;
3362 break;
3363 }
3364 }
3365 }
3366
3349 /* Report violation if chunk len overflows */ 3367 /* Report violation if chunk len overflows */
3350 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); 3368 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
3351 if (ch_end > skb_tail_pointer(skb)) 3369 if (ch_end > skb_tail_pointer(skb))
@@ -3357,6 +3375,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
3357 3375
3358 if (ootb_shut_ack) 3376 if (ootb_shut_ack)
3359 return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); 3377 return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
3378 else if (ootb_cookie_ack)
3379 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3360 else 3380 else
3361 return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); 3381 return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
3362} 3382}
@@ -3855,7 +3875,7 @@ gen_shutdown:
3855} 3875}
3856 3876
3857/* 3877/*
3858 * SCTP-AUTH Section 6.3 Receving authenticated chukns 3878 * SCTP-AUTH Section 6.3 Receiving authenticated chukns
3859 * 3879 *
3860 * The receiver MUST use the HMAC algorithm indicated in the HMAC 3880 * The receiver MUST use the HMAC algorithm indicated in the HMAC
3861 * Identifier field. If this algorithm was not specified by the 3881 * Identifier field. If this algorithm was not specified by the
@@ -4231,7 +4251,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
4231 * 4251 *
4232 * This means that if we only want to abort associations 4252 * This means that if we only want to abort associations
4233 * in an authenticated way (i.e AUTH+ABORT), then we 4253 * in an authenticated way (i.e AUTH+ABORT), then we
4234 * can't destroy this association just becuase the packet 4254 * can't destroy this association just because the packet
4235 * was malformed. 4255 * was malformed.
4236 */ 4256 */
4237 if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) 4257 if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
@@ -4343,8 +4363,9 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
4343 4363
4344/* 4364/*
4345 * Handle a protocol violation when the parameter length is invalid. 4365 * Handle a protocol violation when the parameter length is invalid.
4346 * "Invalid" length is identified as smaller than the minimal length a 4366 * If the length is smaller than the minimum length of a given parameter,
4347 * given parameter can be. 4367 * or accumulated length in multi parameters exceeds the end of the chunk,
4368 * the length is considered as invalid.
4348 */ 4369 */
4349static sctp_disposition_t sctp_sf_violation_paramlen( 4370static sctp_disposition_t sctp_sf_violation_paramlen(
4350 const struct sctp_endpoint *ep, 4371 const struct sctp_endpoint *ep,
@@ -4402,9 +4423,9 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
4402} 4423}
4403 4424
4404/* Handle protocol violation of an invalid chunk bundling. For example, 4425/* Handle protocol violation of an invalid chunk bundling. For example,
4405 * when we have an association and we recieve bundled INIT-ACK, or 4426 * when we have an association and we receive bundled INIT-ACK, or
4406 * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle" 4427 * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle"
4407 * statement from the specs. Additinally, there might be an attacker 4428 * statement from the specs. Additionally, there might be an attacker
4408 * on the path and we may not want to continue this communication. 4429 * on the path and we may not want to continue this communication.
4409 */ 4430 */
4410static sctp_disposition_t sctp_sf_violation_chunk( 4431static sctp_disposition_t sctp_sf_violation_chunk(
@@ -5056,6 +5077,30 @@ sctp_disposition_t sctp_sf_ignore_primitive(
5056 ***************************************************************************/ 5077 ***************************************************************************/
5057 5078
5058/* 5079/*
5080 * When the SCTP stack has no more user data to send or retransmit, this
5081 * notification is given to the user. Also, at the time when a user app
5082 * subscribes to this event, if there is no data to be sent or
5083 * retransmit, the stack will immediately send up this notification.
5084 */
5085sctp_disposition_t sctp_sf_do_no_pending_tsn(
5086 const struct sctp_endpoint *ep,
5087 const struct sctp_association *asoc,
5088 const sctp_subtype_t type,
5089 void *arg,
5090 sctp_cmd_seq_t *commands)
5091{
5092 struct sctp_ulpevent *event;
5093
5094 event = sctp_ulpevent_make_sender_dry_event(asoc, GFP_ATOMIC);
5095 if (!event)
5096 return SCTP_DISPOSITION_NOMEM;
5097
5098 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(event));
5099
5100 return SCTP_DISPOSITION_CONSUME;
5101}
5102
5103/*
5059 * Start the shutdown negotiation. 5104 * Start the shutdown negotiation.
5060 * 5105 *
5061 * From Section 9.2: 5106 * From Section 9.2:
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 546d4387fb3c..0338dc6fdc9d 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -107,8 +107,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
107#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func} 107#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
108 108
109#define TYPE_SCTP_DATA { \ 109#define TYPE_SCTP_DATA { \
110 /* SCTP_STATE_EMPTY */ \
111 TYPE_SCTP_FUNC(sctp_sf_ootb), \
112 /* SCTP_STATE_CLOSED */ \ 110 /* SCTP_STATE_CLOSED */ \
113 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 111 TYPE_SCTP_FUNC(sctp_sf_ootb), \
114 /* SCTP_STATE_COOKIE_WAIT */ \ 112 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -128,8 +126,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
128} /* TYPE_SCTP_DATA */ 126} /* TYPE_SCTP_DATA */
129 127
130#define TYPE_SCTP_INIT { \ 128#define TYPE_SCTP_INIT { \
131 /* SCTP_STATE_EMPTY */ \
132 TYPE_SCTP_FUNC(sctp_sf_bug), \
133 /* SCTP_STATE_CLOSED */ \ 129 /* SCTP_STATE_CLOSED */ \
134 TYPE_SCTP_FUNC(sctp_sf_do_5_1B_init), \ 130 TYPE_SCTP_FUNC(sctp_sf_do_5_1B_init), \
135 /* SCTP_STATE_COOKIE_WAIT */ \ 131 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -149,8 +145,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
149} /* TYPE_SCTP_INIT */ 145} /* TYPE_SCTP_INIT */
150 146
151#define TYPE_SCTP_INIT_ACK { \ 147#define TYPE_SCTP_INIT_ACK { \
152 /* SCTP_STATE_EMPTY */ \
153 TYPE_SCTP_FUNC(sctp_sf_ootb), \
154 /* SCTP_STATE_CLOSED */ \ 148 /* SCTP_STATE_CLOSED */ \
155 TYPE_SCTP_FUNC(sctp_sf_do_5_2_3_initack), \ 149 TYPE_SCTP_FUNC(sctp_sf_do_5_2_3_initack), \
156 /* SCTP_STATE_COOKIE_WAIT */ \ 150 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -170,8 +164,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
170} /* TYPE_SCTP_INIT_ACK */ 164} /* TYPE_SCTP_INIT_ACK */
171 165
172#define TYPE_SCTP_SACK { \ 166#define TYPE_SCTP_SACK { \
173 /* SCTP_STATE_EMPTY */ \
174 TYPE_SCTP_FUNC(sctp_sf_ootb), \
175 /* SCTP_STATE_CLOSED */ \ 167 /* SCTP_STATE_CLOSED */ \
176 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 168 TYPE_SCTP_FUNC(sctp_sf_ootb), \
177 /* SCTP_STATE_COOKIE_WAIT */ \ 169 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -191,8 +183,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
191} /* TYPE_SCTP_SACK */ 183} /* TYPE_SCTP_SACK */
192 184
193#define TYPE_SCTP_HEARTBEAT { \ 185#define TYPE_SCTP_HEARTBEAT { \
194 /* SCTP_STATE_EMPTY */ \
195 TYPE_SCTP_FUNC(sctp_sf_ootb), \
196 /* SCTP_STATE_CLOSED */ \ 186 /* SCTP_STATE_CLOSED */ \
197 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 187 TYPE_SCTP_FUNC(sctp_sf_ootb), \
198 /* SCTP_STATE_COOKIE_WAIT */ \ 188 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -213,8 +203,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
213} /* TYPE_SCTP_HEARTBEAT */ 203} /* TYPE_SCTP_HEARTBEAT */
214 204
215#define TYPE_SCTP_HEARTBEAT_ACK { \ 205#define TYPE_SCTP_HEARTBEAT_ACK { \
216 /* SCTP_STATE_EMPTY */ \
217 TYPE_SCTP_FUNC(sctp_sf_ootb), \
218 /* SCTP_STATE_CLOSED */ \ 206 /* SCTP_STATE_CLOSED */ \
219 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 207 TYPE_SCTP_FUNC(sctp_sf_ootb), \
220 /* SCTP_STATE_COOKIE_WAIT */ \ 208 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -234,8 +222,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
234} /* TYPE_SCTP_HEARTBEAT_ACK */ 222} /* TYPE_SCTP_HEARTBEAT_ACK */
235 223
236#define TYPE_SCTP_ABORT { \ 224#define TYPE_SCTP_ABORT { \
237 /* SCTP_STATE_EMPTY */ \
238 TYPE_SCTP_FUNC(sctp_sf_ootb), \
239 /* SCTP_STATE_CLOSED */ \ 225 /* SCTP_STATE_CLOSED */ \
240 TYPE_SCTP_FUNC(sctp_sf_pdiscard), \ 226 TYPE_SCTP_FUNC(sctp_sf_pdiscard), \
241 /* SCTP_STATE_COOKIE_WAIT */ \ 227 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -255,8 +241,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
255} /* TYPE_SCTP_ABORT */ 241} /* TYPE_SCTP_ABORT */
256 242
257#define TYPE_SCTP_SHUTDOWN { \ 243#define TYPE_SCTP_SHUTDOWN { \
258 /* SCTP_STATE_EMPTY */ \
259 TYPE_SCTP_FUNC(sctp_sf_ootb), \
260 /* SCTP_STATE_CLOSED */ \ 244 /* SCTP_STATE_CLOSED */ \
261 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 245 TYPE_SCTP_FUNC(sctp_sf_ootb), \
262 /* SCTP_STATE_COOKIE_WAIT */ \ 246 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -276,8 +260,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
276} /* TYPE_SCTP_SHUTDOWN */ 260} /* TYPE_SCTP_SHUTDOWN */
277 261
278#define TYPE_SCTP_SHUTDOWN_ACK { \ 262#define TYPE_SCTP_SHUTDOWN_ACK { \
279 /* SCTP_STATE_EMPTY */ \
280 TYPE_SCTP_FUNC(sctp_sf_ootb), \
281 /* SCTP_STATE_CLOSED */ \ 263 /* SCTP_STATE_CLOSED */ \
282 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 264 TYPE_SCTP_FUNC(sctp_sf_ootb), \
283 /* SCTP_STATE_COOKIE_WAIT */ \ 265 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -297,8 +279,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
297} /* TYPE_SCTP_SHUTDOWN_ACK */ 279} /* TYPE_SCTP_SHUTDOWN_ACK */
298 280
299#define TYPE_SCTP_ERROR { \ 281#define TYPE_SCTP_ERROR { \
300 /* SCTP_STATE_EMPTY */ \
301 TYPE_SCTP_FUNC(sctp_sf_ootb), \
302 /* SCTP_STATE_CLOSED */ \ 282 /* SCTP_STATE_CLOSED */ \
303 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 283 TYPE_SCTP_FUNC(sctp_sf_ootb), \
304 /* SCTP_STATE_COOKIE_WAIT */ \ 284 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -318,8 +298,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
318} /* TYPE_SCTP_ERROR */ 298} /* TYPE_SCTP_ERROR */
319 299
320#define TYPE_SCTP_COOKIE_ECHO { \ 300#define TYPE_SCTP_COOKIE_ECHO { \
321 /* SCTP_STATE_EMPTY */ \
322 TYPE_SCTP_FUNC(sctp_sf_bug), \
323 /* SCTP_STATE_CLOSED */ \ 301 /* SCTP_STATE_CLOSED */ \
324 TYPE_SCTP_FUNC(sctp_sf_do_5_1D_ce), \ 302 TYPE_SCTP_FUNC(sctp_sf_do_5_1D_ce), \
325 /* SCTP_STATE_COOKIE_WAIT */ \ 303 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -339,8 +317,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
339} /* TYPE_SCTP_COOKIE_ECHO */ 317} /* TYPE_SCTP_COOKIE_ECHO */
340 318
341#define TYPE_SCTP_COOKIE_ACK { \ 319#define TYPE_SCTP_COOKIE_ACK { \
342 /* SCTP_STATE_EMPTY */ \
343 TYPE_SCTP_FUNC(sctp_sf_ootb), \
344 /* SCTP_STATE_CLOSED */ \ 320 /* SCTP_STATE_CLOSED */ \
345 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 321 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
346 /* SCTP_STATE_COOKIE_WAIT */ \ 322 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -360,8 +336,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
360} /* TYPE_SCTP_COOKIE_ACK */ 336} /* TYPE_SCTP_COOKIE_ACK */
361 337
362#define TYPE_SCTP_ECN_ECNE { \ 338#define TYPE_SCTP_ECN_ECNE { \
363 /* SCTP_STATE_EMPTY */ \
364 TYPE_SCTP_FUNC(sctp_sf_ootb), \
365 /* SCTP_STATE_CLOSED */ \ 339 /* SCTP_STATE_CLOSED */ \
366 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 340 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
367 /* SCTP_STATE_COOKIE_WAIT */ \ 341 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -381,8 +355,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
381} /* TYPE_SCTP_ECN_ECNE */ 355} /* TYPE_SCTP_ECN_ECNE */
382 356
383#define TYPE_SCTP_ECN_CWR { \ 357#define TYPE_SCTP_ECN_CWR { \
384 /* SCTP_STATE_EMPTY */ \
385 TYPE_SCTP_FUNC(sctp_sf_ootb), \
386 /* SCTP_STATE_CLOSED */ \ 358 /* SCTP_STATE_CLOSED */ \
387 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 359 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
388 /* SCTP_STATE_COOKIE_WAIT */ \ 360 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -402,8 +374,6 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
402} /* TYPE_SCTP_ECN_CWR */ 374} /* TYPE_SCTP_ECN_CWR */
403 375
404#define TYPE_SCTP_SHUTDOWN_COMPLETE { \ 376#define TYPE_SCTP_SHUTDOWN_COMPLETE { \
405 /* SCTP_STATE_EMPTY */ \
406 TYPE_SCTP_FUNC(sctp_sf_ootb), \
407 /* SCTP_STATE_CLOSED */ \ 377 /* SCTP_STATE_CLOSED */ \
408 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 378 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
409 /* SCTP_STATE_COOKIE_WAIT */ \ 379 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -446,8 +416,6 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
446}; /* state_fn_t chunk_event_table[][] */ 416}; /* state_fn_t chunk_event_table[][] */
447 417
448#define TYPE_SCTP_ASCONF { \ 418#define TYPE_SCTP_ASCONF { \
449 /* SCTP_STATE_EMPTY */ \
450 TYPE_SCTP_FUNC(sctp_sf_ootb), \
451 /* SCTP_STATE_CLOSED */ \ 419 /* SCTP_STATE_CLOSED */ \
452 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 420 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
453 /* SCTP_STATE_COOKIE_WAIT */ \ 421 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -467,8 +435,6 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
467} /* TYPE_SCTP_ASCONF */ 435} /* TYPE_SCTP_ASCONF */
468 436
469#define TYPE_SCTP_ASCONF_ACK { \ 437#define TYPE_SCTP_ASCONF_ACK { \
470 /* SCTP_STATE_EMPTY */ \
471 TYPE_SCTP_FUNC(sctp_sf_ootb), \
472 /* SCTP_STATE_CLOSED */ \ 438 /* SCTP_STATE_CLOSED */ \
473 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 439 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
474 /* SCTP_STATE_COOKIE_WAIT */ \ 440 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -496,8 +462,6 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_
496}; /*state_fn_t addip_chunk_event_table[][] */ 462}; /*state_fn_t addip_chunk_event_table[][] */
497 463
498#define TYPE_SCTP_FWD_TSN { \ 464#define TYPE_SCTP_FWD_TSN { \
499 /* SCTP_STATE_EMPTY */ \
500 TYPE_SCTP_FUNC(sctp_sf_ootb), \
501 /* SCTP_STATE_CLOSED */ \ 465 /* SCTP_STATE_CLOSED */ \
502 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 466 TYPE_SCTP_FUNC(sctp_sf_ootb), \
503 /* SCTP_STATE_COOKIE_WAIT */ \ 467 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -524,8 +488,6 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN
524}; /*state_fn_t prsctp_chunk_event_table[][] */ 488}; /*state_fn_t prsctp_chunk_event_table[][] */
525 489
526#define TYPE_SCTP_AUTH { \ 490#define TYPE_SCTP_AUTH { \
527 /* SCTP_STATE_EMPTY */ \
528 TYPE_SCTP_FUNC(sctp_sf_ootb), \
529 /* SCTP_STATE_CLOSED */ \ 491 /* SCTP_STATE_CLOSED */ \
530 TYPE_SCTP_FUNC(sctp_sf_ootb), \ 492 TYPE_SCTP_FUNC(sctp_sf_ootb), \
531 /* SCTP_STATE_COOKIE_WAIT */ \ 493 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -553,8 +515,6 @@ static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TY
553 515
554static const sctp_sm_table_entry_t 516static const sctp_sm_table_entry_t
555chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { 517chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
556 /* SCTP_STATE_EMPTY */
557 TYPE_SCTP_FUNC(sctp_sf_ootb),
558 /* SCTP_STATE_CLOSED */ 518 /* SCTP_STATE_CLOSED */
559 TYPE_SCTP_FUNC(sctp_sf_ootb), 519 TYPE_SCTP_FUNC(sctp_sf_ootb),
560 /* SCTP_STATE_COOKIE_WAIT */ 520 /* SCTP_STATE_COOKIE_WAIT */
@@ -575,8 +535,6 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
575 535
576 536
577#define TYPE_SCTP_PRIMITIVE_ASSOCIATE { \ 537#define TYPE_SCTP_PRIMITIVE_ASSOCIATE { \
578 /* SCTP_STATE_EMPTY */ \
579 TYPE_SCTP_FUNC(sctp_sf_bug), \
580 /* SCTP_STATE_CLOSED */ \ 538 /* SCTP_STATE_CLOSED */ \
581 TYPE_SCTP_FUNC(sctp_sf_do_prm_asoc), \ 539 TYPE_SCTP_FUNC(sctp_sf_do_prm_asoc), \
582 /* SCTP_STATE_COOKIE_WAIT */ \ 540 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -596,8 +554,6 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
596} /* TYPE_SCTP_PRIMITIVE_ASSOCIATE */ 554} /* TYPE_SCTP_PRIMITIVE_ASSOCIATE */
597 555
598#define TYPE_SCTP_PRIMITIVE_SHUTDOWN { \ 556#define TYPE_SCTP_PRIMITIVE_SHUTDOWN { \
599 /* SCTP_STATE_EMPTY */ \
600 TYPE_SCTP_FUNC(sctp_sf_bug), \
601 /* SCTP_STATE_CLOSED */ \ 557 /* SCTP_STATE_CLOSED */ \
602 TYPE_SCTP_FUNC(sctp_sf_error_closed), \ 558 TYPE_SCTP_FUNC(sctp_sf_error_closed), \
603 /* SCTP_STATE_COOKIE_WAIT */ \ 559 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -617,8 +573,6 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
617} /* TYPE_SCTP_PRIMITIVE_SHUTDOWN */ 573} /* TYPE_SCTP_PRIMITIVE_SHUTDOWN */
618 574
619#define TYPE_SCTP_PRIMITIVE_ABORT { \ 575#define TYPE_SCTP_PRIMITIVE_ABORT { \
620 /* SCTP_STATE_EMPTY */ \
621 TYPE_SCTP_FUNC(sctp_sf_bug), \
622 /* SCTP_STATE_CLOSED */ \ 576 /* SCTP_STATE_CLOSED */ \
623 TYPE_SCTP_FUNC(sctp_sf_error_closed), \ 577 TYPE_SCTP_FUNC(sctp_sf_error_closed), \
624 /* SCTP_STATE_COOKIE_WAIT */ \ 578 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -638,8 +592,6 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
638} /* TYPE_SCTP_PRIMITIVE_ABORT */ 592} /* TYPE_SCTP_PRIMITIVE_ABORT */
639 593
640#define TYPE_SCTP_PRIMITIVE_SEND { \ 594#define TYPE_SCTP_PRIMITIVE_SEND { \
641 /* SCTP_STATE_EMPTY */ \
642 TYPE_SCTP_FUNC(sctp_sf_bug), \
643 /* SCTP_STATE_CLOSED */ \ 595 /* SCTP_STATE_CLOSED */ \
644 TYPE_SCTP_FUNC(sctp_sf_error_closed), \ 596 TYPE_SCTP_FUNC(sctp_sf_error_closed), \
645 /* SCTP_STATE_COOKIE_WAIT */ \ 597 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -659,8 +611,6 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
659} /* TYPE_SCTP_PRIMITIVE_SEND */ 611} /* TYPE_SCTP_PRIMITIVE_SEND */
660 612
661#define TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT { \ 613#define TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT { \
662 /* SCTP_STATE_EMPTY */ \
663 TYPE_SCTP_FUNC(sctp_sf_bug), \
664 /* SCTP_STATE_CLOSED */ \ 614 /* SCTP_STATE_CLOSED */ \
665 TYPE_SCTP_FUNC(sctp_sf_error_closed), \ 615 TYPE_SCTP_FUNC(sctp_sf_error_closed), \
666 /* SCTP_STATE_COOKIE_WAIT */ \ 616 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -680,8 +630,6 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
680} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */ 630} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
681 631
682#define TYPE_SCTP_PRIMITIVE_ASCONF { \ 632#define TYPE_SCTP_PRIMITIVE_ASCONF { \
683 /* SCTP_STATE_EMPTY */ \
684 TYPE_SCTP_FUNC(sctp_sf_bug), \
685 /* SCTP_STATE_CLOSED */ \ 633 /* SCTP_STATE_CLOSED */ \
686 TYPE_SCTP_FUNC(sctp_sf_error_closed), \ 634 TYPE_SCTP_FUNC(sctp_sf_error_closed), \
687 /* SCTP_STATE_COOKIE_WAIT */ \ 635 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -713,8 +661,6 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
713}; 661};
714 662
715#define TYPE_SCTP_OTHER_NO_PENDING_TSN { \ 663#define TYPE_SCTP_OTHER_NO_PENDING_TSN { \
716 /* SCTP_STATE_EMPTY */ \
717 TYPE_SCTP_FUNC(sctp_sf_bug), \
718 /* SCTP_STATE_CLOSED */ \ 664 /* SCTP_STATE_CLOSED */ \
719 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ 665 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
720 /* SCTP_STATE_COOKIE_WAIT */ \ 666 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -722,7 +668,7 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
722 /* SCTP_STATE_COOKIE_ECHOED */ \ 668 /* SCTP_STATE_COOKIE_ECHOED */ \
723 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ 669 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
724 /* SCTP_STATE_ESTABLISHED */ \ 670 /* SCTP_STATE_ESTABLISHED */ \
725 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ 671 TYPE_SCTP_FUNC(sctp_sf_do_no_pending_tsn), \
726 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 672 /* SCTP_STATE_SHUTDOWN_PENDING */ \
727 TYPE_SCTP_FUNC(sctp_sf_do_9_2_start_shutdown), \ 673 TYPE_SCTP_FUNC(sctp_sf_do_9_2_start_shutdown), \
728 /* SCTP_STATE_SHUTDOWN_SENT */ \ 674 /* SCTP_STATE_SHUTDOWN_SENT */ \
@@ -734,8 +680,6 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
734} 680}
735 681
736#define TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH { \ 682#define TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH { \
737 /* SCTP_STATE_EMPTY */ \
738 TYPE_SCTP_FUNC(sctp_sf_bug), \
739 /* SCTP_STATE_CLOSED */ \ 683 /* SCTP_STATE_CLOSED */ \
740 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ 684 TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
741 /* SCTP_STATE_COOKIE_WAIT */ \ 685 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -760,8 +704,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
760}; 704};
761 705
762#define TYPE_SCTP_EVENT_TIMEOUT_NONE { \ 706#define TYPE_SCTP_EVENT_TIMEOUT_NONE { \
763 /* SCTP_STATE_EMPTY */ \
764 TYPE_SCTP_FUNC(sctp_sf_bug), \
765 /* SCTP_STATE_CLOSED */ \ 707 /* SCTP_STATE_CLOSED */ \
766 TYPE_SCTP_FUNC(sctp_sf_bug), \ 708 TYPE_SCTP_FUNC(sctp_sf_bug), \
767 /* SCTP_STATE_COOKIE_WAIT */ \ 709 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -781,8 +723,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
781} 723}
782 724
783#define TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE { \ 725#define TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE { \
784 /* SCTP_STATE_EMPTY */ \
785 TYPE_SCTP_FUNC(sctp_sf_bug), \
786 /* SCTP_STATE_CLOSED */ \ 726 /* SCTP_STATE_CLOSED */ \
787 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 727 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
788 /* SCTP_STATE_COOKIE_WAIT */ \ 728 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -802,8 +742,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
802} 742}
803 743
804#define TYPE_SCTP_EVENT_TIMEOUT_T1_INIT { \ 744#define TYPE_SCTP_EVENT_TIMEOUT_T1_INIT { \
805 /* SCTP_STATE_EMPTY */ \
806 TYPE_SCTP_FUNC(sctp_sf_bug), \
807 /* SCTP_STATE_CLOSED */ \ 745 /* SCTP_STATE_CLOSED */ \
808 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 746 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
809 /* SCTP_STATE_COOKIE_WAIT */ \ 747 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -823,8 +761,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
823} 761}
824 762
825#define TYPE_SCTP_EVENT_TIMEOUT_T2_SHUTDOWN { \ 763#define TYPE_SCTP_EVENT_TIMEOUT_T2_SHUTDOWN { \
826 /* SCTP_STATE_EMPTY */ \
827 TYPE_SCTP_FUNC(sctp_sf_bug), \
828 /* SCTP_STATE_CLOSED */ \ 764 /* SCTP_STATE_CLOSED */ \
829 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 765 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
830 /* SCTP_STATE_COOKIE_WAIT */ \ 766 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -844,8 +780,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
844} 780}
845 781
846#define TYPE_SCTP_EVENT_TIMEOUT_T3_RTX { \ 782#define TYPE_SCTP_EVENT_TIMEOUT_T3_RTX { \
847 /* SCTP_STATE_EMPTY */ \
848 TYPE_SCTP_FUNC(sctp_sf_bug), \
849 /* SCTP_STATE_CLOSED */ \ 783 /* SCTP_STATE_CLOSED */ \
850 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 784 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
851 /* SCTP_STATE_COOKIE_WAIT */ \ 785 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -865,8 +799,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
865} 799}
866 800
867#define TYPE_SCTP_EVENT_TIMEOUT_T4_RTO { \ 801#define TYPE_SCTP_EVENT_TIMEOUT_T4_RTO { \
868 /* SCTP_STATE_EMPTY */ \
869 TYPE_SCTP_FUNC(sctp_sf_bug), \
870 /* SCTP_STATE_CLOSED */ \ 802 /* SCTP_STATE_CLOSED */ \
871 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 803 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
872 /* SCTP_STATE_COOKIE_WAIT */ \ 804 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -886,8 +818,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
886} 818}
887 819
888#define TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD { \ 820#define TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD { \
889 /* SCTP_STATE_EMPTY */ \
890 TYPE_SCTP_FUNC(sctp_sf_bug), \
891 /* SCTP_STATE_CLOSED */ \ 821 /* SCTP_STATE_CLOSED */ \
892 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 822 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
893 /* SCTP_STATE_COOKIE_WAIT */ \ 823 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -907,8 +837,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
907} 837}
908 838
909#define TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT { \ 839#define TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT { \
910 /* SCTP_STATE_EMPTY */ \
911 TYPE_SCTP_FUNC(sctp_sf_bug), \
912 /* SCTP_STATE_CLOSED */ \ 840 /* SCTP_STATE_CLOSED */ \
913 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 841 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
914 /* SCTP_STATE_COOKIE_WAIT */ \ 842 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -928,8 +856,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
928} 856}
929 857
930#define TYPE_SCTP_EVENT_TIMEOUT_SACK { \ 858#define TYPE_SCTP_EVENT_TIMEOUT_SACK { \
931 /* SCTP_STATE_EMPTY */ \
932 TYPE_SCTP_FUNC(sctp_sf_bug), \
933 /* SCTP_STATE_CLOSED */ \ 859 /* SCTP_STATE_CLOSED */ \
934 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 860 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
935 /* SCTP_STATE_COOKIE_WAIT */ \ 861 /* SCTP_STATE_COOKIE_WAIT */ \
@@ -949,8 +875,6 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
949} 875}
950 876
951#define TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE { \ 877#define TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE { \
952 /* SCTP_STATE_EMPTY */ \
953 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
954 /* SCTP_STATE_CLOSED */ \ 878 /* SCTP_STATE_CLOSED */ \
955 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 879 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
956 /* SCTP_STATE_COOKIE_WAIT */ \ 880 /* SCTP_STATE_COOKIE_WAIT */ \
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 8e02550ff3e8..6766913a53e6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -658,11 +658,15 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
658 goto err_bindx_rem; 658 goto err_bindx_rem;
659 } 659 }
660 660
661 if (sa_addr->v4.sin_port != htons(bp->port)) { 661 if (sa_addr->v4.sin_port &&
662 sa_addr->v4.sin_port != htons(bp->port)) {
662 retval = -EINVAL; 663 retval = -EINVAL;
663 goto err_bindx_rem; 664 goto err_bindx_rem;
664 } 665 }
665 666
667 if (!sa_addr->v4.sin_port)
668 sa_addr->v4.sin_port = htons(bp->port);
669
666 /* FIXME - There is probably a need to check if sk->sk_saddr and 670 /* FIXME - There is probably a need to check if sk->sk_saddr and
667 * sk->sk_rcv_addr are currently set to one of the addresses to 671 * sk->sk_rcv_addr are currently set to one of the addresses to
668 * be removed. This is something which needs to be looked into 672 * be removed. This is something which needs to be looked into
@@ -1193,7 +1197,7 @@ out_free:
1193 * an endpoint that is multi-homed. Much like sctp_bindx() this call 1197 * an endpoint that is multi-homed. Much like sctp_bindx() this call
1194 * allows a caller to specify multiple addresses at which a peer can be 1198 * allows a caller to specify multiple addresses at which a peer can be
1195 * reached. The way the SCTP stack uses the list of addresses to set up 1199 * reached. The way the SCTP stack uses the list of addresses to set up
1196 * the association is implementation dependant. This function only 1200 * the association is implementation dependent. This function only
1197 * specifies that the stack will try to make use of all the addresses in 1201 * specifies that the stack will try to make use of all the addresses in
1198 * the list when needed. 1202 * the list when needed.
1199 * 1203 *
@@ -1492,7 +1496,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1492 struct sctp_chunk *chunk; 1496 struct sctp_chunk *chunk;
1493 union sctp_addr to; 1497 union sctp_addr to;
1494 struct sockaddr *msg_name = NULL; 1498 struct sockaddr *msg_name = NULL;
1495 struct sctp_sndrcvinfo default_sinfo = { 0 }; 1499 struct sctp_sndrcvinfo default_sinfo;
1496 struct sctp_sndrcvinfo *sinfo; 1500 struct sctp_sndrcvinfo *sinfo;
1497 struct sctp_initmsg *sinit; 1501 struct sctp_initmsg *sinit;
1498 sctp_assoc_t associd = 0; 1502 sctp_assoc_t associd = 0;
@@ -1756,6 +1760,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1756 /* If the user didn't specify SNDRCVINFO, make up one with 1760 /* If the user didn't specify SNDRCVINFO, make up one with
1757 * some defaults. 1761 * some defaults.
1758 */ 1762 */
1763 memset(&default_sinfo, 0, sizeof(default_sinfo));
1759 default_sinfo.sinfo_stream = asoc->default_stream; 1764 default_sinfo.sinfo_stream = asoc->default_stream;
1760 default_sinfo.sinfo_flags = asoc->default_flags; 1765 default_sinfo.sinfo_flags = asoc->default_flags;
1761 default_sinfo.sinfo_ppid = asoc->default_ppid; 1766 default_sinfo.sinfo_ppid = asoc->default_ppid;
@@ -1786,12 +1791,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1786 goto out_free; 1791 goto out_free;
1787 } 1792 }
1788 1793
1789 if (sinfo) { 1794 /* Check for invalid stream. */
1790 /* Check for invalid stream. */ 1795 if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
1791 if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { 1796 err = -EINVAL;
1792 err = -EINVAL; 1797 goto out_free;
1793 goto out_free;
1794 }
1795 } 1798 }
1796 1799
1797 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1800 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -2283,7 +2286,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2283 trans->param_flags = 2286 trans->param_flags =
2284 (trans->param_flags & ~SPP_PMTUD) | pmtud_change; 2287 (trans->param_flags & ~SPP_PMTUD) | pmtud_change;
2285 if (update) { 2288 if (update) {
2286 sctp_transport_pmtu(trans); 2289 sctp_transport_pmtu(trans, sctp_opt2sk(sp));
2287 sctp_assoc_sync_pmtu(asoc); 2290 sctp_assoc_sync_pmtu(asoc);
2288 } 2291 }
2289 } else if (asoc) { 2292 } else if (asoc) {
@@ -2928,7 +2931,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
2928 unsigned int optlen) 2931 unsigned int optlen)
2929{ 2932{
2930 struct sctp_sock *sp; 2933 struct sctp_sock *sp;
2931 struct sctp_endpoint *ep;
2932 struct sctp_association *asoc = NULL; 2934 struct sctp_association *asoc = NULL;
2933 struct sctp_setpeerprim prim; 2935 struct sctp_setpeerprim prim;
2934 struct sctp_chunk *chunk; 2936 struct sctp_chunk *chunk;
@@ -2936,7 +2938,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
2936 int err; 2938 int err;
2937 2939
2938 sp = sctp_sk(sk); 2940 sp = sctp_sk(sk);
2939 ep = sp->ep;
2940 2941
2941 if (!sctp_addip_enable) 2942 if (!sctp_addip_enable)
2942 return -EPERM; 2943 return -EPERM;
@@ -3217,14 +3218,9 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
3217 if (optlen < sizeof(struct sctp_hmacalgo)) 3218 if (optlen < sizeof(struct sctp_hmacalgo))
3218 return -EINVAL; 3219 return -EINVAL;
3219 3220
3220 hmacs = kmalloc(optlen, GFP_KERNEL); 3221 hmacs= memdup_user(optval, optlen);
3221 if (!hmacs) 3222 if (IS_ERR(hmacs))
3222 return -ENOMEM; 3223 return PTR_ERR(hmacs);
3223
3224 if (copy_from_user(hmacs, optval, optlen)) {
3225 err = -EFAULT;
3226 goto out;
3227 }
3228 3224
3229 idents = hmacs->shmac_num_idents; 3225 idents = hmacs->shmac_num_idents;
3230 if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS || 3226 if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS ||
@@ -3259,14 +3255,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
3259 if (optlen <= sizeof(struct sctp_authkey)) 3255 if (optlen <= sizeof(struct sctp_authkey))
3260 return -EINVAL; 3256 return -EINVAL;
3261 3257
3262 authkey = kmalloc(optlen, GFP_KERNEL); 3258 authkey= memdup_user(optval, optlen);
3263 if (!authkey) 3259 if (IS_ERR(authkey))
3264 return -ENOMEM; 3260 return PTR_ERR(authkey);
3265
3266 if (copy_from_user(authkey, optval, optlen)) {
3267 ret = -EFAULT;
3268 goto out;
3269 }
3270 3261
3271 if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) { 3262 if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) {
3272 ret = -EINVAL; 3263 ret = -EINVAL;
@@ -5285,6 +5276,55 @@ static int sctp_getsockopt_assoc_number(struct sock *sk, int len,
5285 return 0; 5276 return 0;
5286} 5277}
5287 5278
5279/*
5280 * 8.2.6. Get the Current Identifiers of Associations
5281 * (SCTP_GET_ASSOC_ID_LIST)
5282 *
5283 * This option gets the current list of SCTP association identifiers of
5284 * the SCTP associations handled by a one-to-many style socket.
5285 */
5286static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
5287 char __user *optval, int __user *optlen)
5288{
5289 struct sctp_sock *sp = sctp_sk(sk);
5290 struct sctp_association *asoc;
5291 struct sctp_assoc_ids *ids;
5292 u32 num = 0;
5293
5294 if (sctp_style(sk, TCP))
5295 return -EOPNOTSUPP;
5296
5297 if (len < sizeof(struct sctp_assoc_ids))
5298 return -EINVAL;
5299
5300 list_for_each_entry(asoc, &(sp->ep->asocs), asocs) {
5301 num++;
5302 }
5303
5304 if (len < sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num)
5305 return -EINVAL;
5306
5307 len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num;
5308
5309 ids = kmalloc(len, GFP_KERNEL);
5310 if (unlikely(!ids))
5311 return -ENOMEM;
5312
5313 ids->gaids_number_of_ids = num;
5314 num = 0;
5315 list_for_each_entry(asoc, &(sp->ep->asocs), asocs) {
5316 ids->gaids_assoc_id[num++] = asoc->assoc_id;
5317 }
5318
5319 if (put_user(len, optlen) || copy_to_user(optval, ids, len)) {
5320 kfree(ids);
5321 return -EFAULT;
5322 }
5323
5324 kfree(ids);
5325 return 0;
5326}
5327
5288SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, 5328SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
5289 char __user *optval, int __user *optlen) 5329 char __user *optval, int __user *optlen)
5290{ 5330{
@@ -5417,6 +5457,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
5417 case SCTP_GET_ASSOC_NUMBER: 5457 case SCTP_GET_ASSOC_NUMBER:
5418 retval = sctp_getsockopt_assoc_number(sk, len, optval, optlen); 5458 retval = sctp_getsockopt_assoc_number(sk, len, optval, optlen);
5419 break; 5459 break;
5460 case SCTP_GET_ASSOC_ID_LIST:
5461 retval = sctp_getsockopt_assoc_ids(sk, len, optval, optlen);
5462 break;
5420 default: 5463 default:
5421 retval = -ENOPROTOOPT; 5464 retval = -ENOPROTOOPT;
5422 break; 5465 break;
@@ -6102,15 +6145,16 @@ static void __sctp_write_space(struct sctp_association *asoc)
6102 wake_up_interruptible(&asoc->wait); 6145 wake_up_interruptible(&asoc->wait);
6103 6146
6104 if (sctp_writeable(sk)) { 6147 if (sctp_writeable(sk)) {
6105 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 6148 wait_queue_head_t *wq = sk_sleep(sk);
6106 wake_up_interruptible(sk_sleep(sk)); 6149
6150 if (wq && waitqueue_active(wq))
6151 wake_up_interruptible(wq);
6107 6152
6108 /* Note that we try to include the Async I/O support 6153 /* Note that we try to include the Async I/O support
6109 * here by modeling from the current TCP/UDP code. 6154 * here by modeling from the current TCP/UDP code.
6110 * We have not tested with it yet. 6155 * We have not tested with it yet.
6111 */ 6156 */
6112 if (sock->wq->fasync_list && 6157 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
6113 !(sk->sk_shutdown & SEND_SHUTDOWN))
6114 sock_wake_async(sock, 6158 sock_wake_async(sock,
6115 SOCK_WAKE_SPACE, POLL_OUT); 6159 SOCK_WAKE_SPACE, POLL_OUT);
6116 } 6160 }
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d3ae493d234a..394c57ca2f54 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -211,15 +211,17 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
211} 211}
212 212
213/* Initialize the pmtu of a transport. */ 213/* Initialize the pmtu of a transport. */
214void sctp_transport_pmtu(struct sctp_transport *transport) 214void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
215{ 215{
216 struct dst_entry *dst; 216 /* If we don't have a fresh route, look one up */
217 217 if (!transport->dst || transport->dst->obsolete > 1) {
218 dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); 218 dst_release(transport->dst);
219 transport->af_specific->get_dst(transport, &transport->saddr,
220 &transport->fl, sk);
221 }
219 222
220 if (dst) { 223 if (transport->dst) {
221 transport->pathmtu = dst_mtu(dst); 224 transport->pathmtu = dst_mtu(transport->dst);
222 dst_release(dst);
223 } else 225 } else
224 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; 226 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
225} 227}
@@ -270,22 +272,19 @@ void sctp_transport_route(struct sctp_transport *transport,
270{ 272{
271 struct sctp_association *asoc = transport->asoc; 273 struct sctp_association *asoc = transport->asoc;
272 struct sctp_af *af = transport->af_specific; 274 struct sctp_af *af = transport->af_specific;
273 union sctp_addr *daddr = &transport->ipaddr;
274 struct dst_entry *dst;
275 275
276 dst = af->get_dst(asoc, daddr, saddr); 276 af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
277 277
278 if (saddr) 278 if (saddr)
279 memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); 279 memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
280 else 280 else
281 af->get_saddr(opt, asoc, dst, daddr, &transport->saddr); 281 af->get_saddr(opt, transport, &transport->fl);
282 282
283 transport->dst = dst;
284 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { 283 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
285 return; 284 return;
286 } 285 }
287 if (dst) { 286 if (transport->dst) {
288 transport->pathmtu = dst_mtu(dst); 287 transport->pathmtu = dst_mtu(transport->dst);
289 288
290 /* Initialize sk->sk_rcv_saddr, if the transport is the 289 /* Initialize sk->sk_rcv_saddr, if the transport is the
291 * association's active path for getsockname(). 290 * association's active path for getsockname().
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 747d5412c463..f1e40cebc981 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -344,7 +344,7 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map,
344 344
345 /* Refresh the gap ack information. */ 345 /* Refresh the gap ack information. */
346 if (sctp_tsnmap_has_gap(map)) { 346 if (sctp_tsnmap_has_gap(map)) {
347 __u16 start, end; 347 __u16 start = 0, end = 0;
348 sctp_tsnmap_iter_init(map, &iter); 348 sctp_tsnmap_iter_init(map, &iter);
349 while (sctp_tsnmap_next_gap_ack(map, &iter, 349 while (sctp_tsnmap_next_gap_ack(map, &iter,
350 &start, 350 &start,
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index aa72e89c3ee1..e70e5fc87890 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
554 memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo)); 554 memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));
555 555
556 /* Per TSVWG discussion with Randy. Allow the application to 556 /* Per TSVWG discussion with Randy. Allow the application to
557 * ressemble a fragmented message. 557 * reassemble a fragmented message.
558 */ 558 */
559 ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags; 559 ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;
560 560
@@ -843,7 +843,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_authkey(
843 ak = (struct sctp_authkey_event *) 843 ak = (struct sctp_authkey_event *)
844 skb_put(skb, sizeof(struct sctp_authkey_event)); 844 skb_put(skb, sizeof(struct sctp_authkey_event));
845 845
846 ak->auth_type = SCTP_AUTHENTICATION_INDICATION; 846 ak->auth_type = SCTP_AUTHENTICATION_EVENT;
847 ak->auth_flags = 0; 847 ak->auth_flags = 0;
848 ak->auth_length = sizeof(struct sctp_authkey_event); 848 ak->auth_length = sizeof(struct sctp_authkey_event);
849 849
@@ -862,6 +862,34 @@ fail:
862 return NULL; 862 return NULL;
863} 863}
864 864
865/*
866 * Socket Extensions for SCTP
867 * 6.3.10. SCTP_SENDER_DRY_EVENT
868 */
869struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
870 const struct sctp_association *asoc, gfp_t gfp)
871{
872 struct sctp_ulpevent *event;
873 struct sctp_sender_dry_event *sdry;
874 struct sk_buff *skb;
875
876 event = sctp_ulpevent_new(sizeof(struct sctp_sender_dry_event),
877 MSG_NOTIFICATION, gfp);
878 if (!event)
879 return NULL;
880
881 skb = sctp_event2skb(event);
882 sdry = (struct sctp_sender_dry_event *)
883 skb_put(skb, sizeof(struct sctp_sender_dry_event));
884
885 sdry->sender_dry_type = SCTP_SENDER_DRY_EVENT;
886 sdry->sender_dry_flags = 0;
887 sdry->sender_dry_length = sizeof(struct sctp_sender_dry_event);
888 sctp_ulpevent_set_owner(event, asoc);
889 sdry->sender_dry_assoc_id = sctp_assoc2id(asoc);
890
891 return event;
892}
865 893
866/* Return the notification type, assuming this is a notification 894/* Return the notification type, assuming this is a notification
867 * event. 895 * event.
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index c7f7e49609cb..f2d1de7f2ffb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -105,11 +105,8 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
105 gfp_t gfp) 105 gfp_t gfp)
106{ 106{
107 struct sk_buff_head temp; 107 struct sk_buff_head temp;
108 sctp_data_chunk_t *hdr;
109 struct sctp_ulpevent *event; 108 struct sctp_ulpevent *event;
110 109
111 hdr = (sctp_data_chunk_t *) chunk->chunk_hdr;
112
113 /* Create an event from the incoming chunk. */ 110 /* Create an event from the incoming chunk. */
114 event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); 111 event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
115 if (!event) 112 if (!event)
@@ -243,7 +240,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
243 } else { 240 } else {
244 /* 241 /*
245 * If fragment interleave is enabled, we 242 * If fragment interleave is enabled, we
246 * can queue this to the recieve queue instead 243 * can queue this to the receive queue instead
247 * of the lobby. 244 * of the lobby.
248 */ 245 */
249 if (sctp_sk(sk)->frag_interleave) 246 if (sctp_sk(sk)->frag_interleave)
@@ -743,11 +740,9 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
743 struct sk_buff *pos, *tmp; 740 struct sk_buff *pos, *tmp;
744 struct sctp_ulpevent *cevent; 741 struct sctp_ulpevent *cevent;
745 struct sctp_stream *in; 742 struct sctp_stream *in;
746 __u16 sid, csid; 743 __u16 sid, csid, cssn;
747 __u16 ssn, cssn;
748 744
749 sid = event->stream; 745 sid = event->stream;
750 ssn = event->ssn;
751 in = &ulpq->asoc->ssnmap->in; 746 in = &ulpq->asoc->ssnmap->in;
752 747
753 event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; 748 event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
diff --git a/net/socket.c b/net/socket.c
index ac2219f90d5d..02dc82db3d23 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly;
240static struct inode *sock_alloc_inode(struct super_block *sb) 240static struct inode *sock_alloc_inode(struct super_block *sb)
241{ 241{
242 struct socket_alloc *ei; 242 struct socket_alloc *ei;
243 struct socket_wq *wq;
243 244
244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
245 if (!ei) 246 if (!ei)
246 return NULL; 247 return NULL;
247 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); 248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!ei->socket.wq) { 249 if (!wq) {
249 kmem_cache_free(sock_inode_cachep, ei); 250 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL; 251 return NULL;
251 } 252 }
252 init_waitqueue_head(&ei->socket.wq->wait); 253 init_waitqueue_head(&wq->wait);
253 ei->socket.wq->fasync_list = NULL; 254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
254 256
255 ei->socket.state = SS_UNCONNECTED; 257 ei->socket.state = SS_UNCONNECTED;
256 ei->socket.flags = 0; 258 ei->socket.flags = 0;
@@ -261,21 +263,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
261 return &ei->vfs_inode; 263 return &ei->vfs_inode;
262} 264}
263 265
264
265
266static void wq_free_rcu(struct rcu_head *head)
267{
268 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
269
270 kfree(wq);
271}
272
273static void sock_destroy_inode(struct inode *inode) 266static void sock_destroy_inode(struct inode *inode)
274{ 267{
275 struct socket_alloc *ei; 268 struct socket_alloc *ei;
269 struct socket_wq *wq;
276 270
277 ei = container_of(inode, struct socket_alloc, vfs_inode); 271 ei = container_of(inode, struct socket_alloc, vfs_inode);
278 call_rcu(&ei->socket.wq->rcu, wq_free_rcu); 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
273 kfree_rcu(wq, rcu);
279 kmem_cache_free(sock_inode_cachep, ei); 274 kmem_cache_free(sock_inode_cachep, ei);
280} 275}
281 276
@@ -524,7 +519,7 @@ void sock_release(struct socket *sock)
524 module_put(owner); 519 module_put(owner);
525 } 520 }
526 521
527 if (sock->wq->fasync_list) 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
528 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
529 524
530 percpu_sub(sockets_in_use, 1); 525 percpu_sub(sockets_in_use, 1);
@@ -547,11 +542,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
547} 542}
548EXPORT_SYMBOL(sock_tx_timestamp); 543EXPORT_SYMBOL(sock_tx_timestamp);
549 544
550static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 545static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
551 struct msghdr *msg, size_t size) 546 struct msghdr *msg, size_t size)
552{ 547{
553 struct sock_iocb *si = kiocb_to_siocb(iocb); 548 struct sock_iocb *si = kiocb_to_siocb(iocb);
554 int err;
555 549
556 sock_update_classid(sock->sk); 550 sock_update_classid(sock->sk);
557 551
@@ -560,13 +554,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
560 si->msg = msg; 554 si->msg = msg;
561 si->size = size; 555 si->size = size;
562 556
563 err = security_socket_sendmsg(sock, msg, size);
564 if (err)
565 return err;
566
567 return sock->ops->sendmsg(iocb, sock, msg, size); 557 return sock->ops->sendmsg(iocb, sock, msg, size);
568} 558}
569 559
560static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
561 struct msghdr *msg, size_t size)
562{
563 int err = security_socket_sendmsg(sock, msg, size);
564
565 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
566}
567
570int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 568int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
571{ 569{
572 struct kiocb iocb; 570 struct kiocb iocb;
@@ -582,6 +580,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
582} 580}
583EXPORT_SYMBOL(sock_sendmsg); 581EXPORT_SYMBOL(sock_sendmsg);
584 582
583int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
584{
585 struct kiocb iocb;
586 struct sock_iocb siocb;
587 int ret;
588
589 init_sync_kiocb(&iocb, NULL);
590 iocb.private = &siocb;
591 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
592 if (-EIOCBQUEUED == ret)
593 ret = wait_on_sync_kiocb(&iocb);
594 return ret;
595}
596
585int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 597int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
586 struct kvec *vec, size_t num, size_t size) 598 struct kvec *vec, size_t num, size_t size)
587{ 599{
@@ -1108,15 +1120,16 @@ static int sock_fasync(int fd, struct file *filp, int on)
1108{ 1120{
1109 struct socket *sock = filp->private_data; 1121 struct socket *sock = filp->private_data;
1110 struct sock *sk = sock->sk; 1122 struct sock *sk = sock->sk;
1123 struct socket_wq *wq;
1111 1124
1112 if (sk == NULL) 1125 if (sk == NULL)
1113 return -EINVAL; 1126 return -EINVAL;
1114 1127
1115 lock_sock(sk); 1128 lock_sock(sk);
1129 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1130 fasync_helper(fd, filp, on, &wq->fasync_list);
1116 1131
1117 fasync_helper(fd, filp, on, &sock->wq->fasync_list); 1132 if (!wq->fasync_list)
1118
1119 if (!sock->wq->fasync_list)
1120 sock_reset_flag(sk, SOCK_FASYNC); 1133 sock_reset_flag(sk, SOCK_FASYNC);
1121 else 1134 else
1122 sock_set_flag(sk, SOCK_FASYNC); 1135 sock_set_flag(sk, SOCK_FASYNC);
@@ -1858,57 +1871,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1858#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1859#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1860 1873
1861/* 1874static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1862 * BSD sendmsg interface 1875 struct msghdr *msg_sys, unsigned flags, int nosec)
1863 */
1864
1865SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1866{ 1876{
1867 struct compat_msghdr __user *msg_compat = 1877 struct compat_msghdr __user *msg_compat =
1868 (struct compat_msghdr __user *)msg; 1878 (struct compat_msghdr __user *)msg;
1869 struct socket *sock;
1870 struct sockaddr_storage address; 1879 struct sockaddr_storage address;
1871 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1880 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1872 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1881 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1873 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1882 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1874 /* 20 is size of ipv6_pktinfo */ 1883 /* 20 is size of ipv6_pktinfo */
1875 unsigned char *ctl_buf = ctl; 1884 unsigned char *ctl_buf = ctl;
1876 struct msghdr msg_sys;
1877 int err, ctl_len, iov_size, total_len; 1885 int err, ctl_len, iov_size, total_len;
1878 int fput_needed;
1879 1886
1880 err = -EFAULT; 1887 err = -EFAULT;
1881 if (MSG_CMSG_COMPAT & flags) { 1888 if (MSG_CMSG_COMPAT & flags) {
1882 if (get_compat_msghdr(&msg_sys, msg_compat)) 1889 if (get_compat_msghdr(msg_sys, msg_compat))
1883 return -EFAULT; 1890 return -EFAULT;
1884 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1891 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1885 return -EFAULT; 1892 return -EFAULT;
1886 1893
1887 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1888 if (!sock)
1889 goto out;
1890
1891 /* do not move before msg_sys is valid */ 1894 /* do not move before msg_sys is valid */
1892 err = -EMSGSIZE; 1895 err = -EMSGSIZE;
1893 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1896 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1894 goto out_put; 1897 goto out;
1895 1898
1896 /* Check whether to allocate the iovec area */ 1899 /* Check whether to allocate the iovec area */
1897 err = -ENOMEM; 1900 err = -ENOMEM;
1898 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1901 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1899 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1902 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1900 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1903 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1901 if (!iov) 1904 if (!iov)
1902 goto out_put; 1905 goto out;
1903 } 1906 }
1904 1907
1905 /* This will also move the address data into kernel space */ 1908 /* This will also move the address data into kernel space */
1906 if (MSG_CMSG_COMPAT & flags) { 1909 if (MSG_CMSG_COMPAT & flags) {
1907 err = verify_compat_iovec(&msg_sys, iov, 1910 err = verify_compat_iovec(msg_sys, iov,
1908 (struct sockaddr *)&address, 1911 (struct sockaddr *)&address,
1909 VERIFY_READ); 1912 VERIFY_READ);
1910 } else 1913 } else
1911 err = verify_iovec(&msg_sys, iov, 1914 err = verify_iovec(msg_sys, iov,
1912 (struct sockaddr *)&address, 1915 (struct sockaddr *)&address,
1913 VERIFY_READ); 1916 VERIFY_READ);
1914 if (err < 0) 1917 if (err < 0)
@@ -1917,17 +1920,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1917 1920
1918 err = -ENOBUFS; 1921 err = -ENOBUFS;
1919 1922
1920 if (msg_sys.msg_controllen > INT_MAX) 1923 if (msg_sys->msg_controllen > INT_MAX)
1921 goto out_freeiov; 1924 goto out_freeiov;
1922 ctl_len = msg_sys.msg_controllen; 1925 ctl_len = msg_sys->msg_controllen;
1923 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1926 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1924 err = 1927 err =
1925 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1928 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
1926 sizeof(ctl)); 1929 sizeof(ctl));
1927 if (err) 1930 if (err)
1928 goto out_freeiov; 1931 goto out_freeiov;
1929 ctl_buf = msg_sys.msg_control; 1932 ctl_buf = msg_sys->msg_control;
1930 ctl_len = msg_sys.msg_controllen; 1933 ctl_len = msg_sys->msg_controllen;
1931 } else if (ctl_len) { 1934 } else if (ctl_len) {
1932 if (ctl_len > sizeof(ctl)) { 1935 if (ctl_len > sizeof(ctl)) {
1933 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1936 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1936,21 +1939,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1936 } 1939 }
1937 err = -EFAULT; 1940 err = -EFAULT;
1938 /* 1941 /*
1939 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1942 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1940 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1943 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1941 * checking falls down on this. 1944 * checking falls down on this.
1942 */ 1945 */
1943 if (copy_from_user(ctl_buf, 1946 if (copy_from_user(ctl_buf,
1944 (void __user __force *)msg_sys.msg_control, 1947 (void __user __force *)msg_sys->msg_control,
1945 ctl_len)) 1948 ctl_len))
1946 goto out_freectl; 1949 goto out_freectl;
1947 msg_sys.msg_control = ctl_buf; 1950 msg_sys->msg_control = ctl_buf;
1948 } 1951 }
1949 msg_sys.msg_flags = flags; 1952 msg_sys->msg_flags = flags;
1950 1953
1951 if (sock->file->f_flags & O_NONBLOCK) 1954 if (sock->file->f_flags & O_NONBLOCK)
1952 msg_sys.msg_flags |= MSG_DONTWAIT; 1955 msg_sys->msg_flags |= MSG_DONTWAIT;
1953 err = sock_sendmsg(sock, &msg_sys, total_len); 1956 err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
1957 total_len);
1954 1958
1955out_freectl: 1959out_freectl:
1956 if (ctl_buf != ctl) 1960 if (ctl_buf != ctl)
@@ -1958,12 +1962,114 @@ out_freectl:
1958out_freeiov: 1962out_freeiov:
1959 if (iov != iovstack) 1963 if (iov != iovstack)
1960 sock_kfree_s(sock->sk, iov, iov_size); 1964 sock_kfree_s(sock->sk, iov, iov_size);
1961out_put: 1965out:
1966 return err;
1967}
1968
1969/*
1970 * BSD sendmsg interface
1971 */
1972
1973SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1974{
1975 int fput_needed, err;
1976 struct msghdr msg_sys;
1977 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
1978
1979 if (!sock)
1980 goto out;
1981
1982 err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
1983
1962 fput_light(sock->file, fput_needed); 1984 fput_light(sock->file, fput_needed);
1963out: 1985out:
1964 return err; 1986 return err;
1965} 1987}
1966 1988
1989/*
1990 * Linux sendmmsg interface
1991 */
1992
1993int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
1994 unsigned int flags)
1995{
1996 int fput_needed, err, datagrams;
1997 struct socket *sock;
1998 struct mmsghdr __user *entry;
1999 struct compat_mmsghdr __user *compat_entry;
2000 struct msghdr msg_sys;
2001
2002 datagrams = 0;
2003
2004 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2005 if (!sock)
2006 return err;
2007
2008 err = sock_error(sock->sk);
2009 if (err)
2010 goto out_put;
2011
2012 entry = mmsg;
2013 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2014
2015 while (datagrams < vlen) {
2016 /*
2017 * No need to ask LSM for more than the first datagram.
2018 */
2019 if (MSG_CMSG_COMPAT & flags) {
2020 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2021 &msg_sys, flags, datagrams);
2022 if (err < 0)
2023 break;
2024 err = __put_user(err, &compat_entry->msg_len);
2025 ++compat_entry;
2026 } else {
2027 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2028 &msg_sys, flags, datagrams);
2029 if (err < 0)
2030 break;
2031 err = put_user(err, &entry->msg_len);
2032 ++entry;
2033 }
2034
2035 if (err)
2036 break;
2037 ++datagrams;
2038 }
2039
2040out_put:
2041 fput_light(sock->file, fput_needed);
2042
2043 if (err == 0)
2044 return datagrams;
2045
2046 if (datagrams != 0) {
2047 /*
2048 * We may send less entries than requested (vlen) if the
2049 * sock is non blocking...
2050 */
2051 if (err != -EAGAIN) {
2052 /*
2053 * ... or if sendmsg returns an error after we
2054 * send some datagrams, where we record the
2055 * error to return on the next call or if the
2056 * app asks about it using getsockopt(SO_ERROR).
2057 */
2058 sock->sk->sk_err = -err;
2059 }
2060
2061 return datagrams;
2062 }
2063
2064 return err;
2065}
2066
2067SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2068 unsigned int, vlen, unsigned int, flags)
2069{
2070 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2071}
2072
1967static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 2073static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1968 struct msghdr *msg_sys, unsigned flags, int nosec) 2074 struct msghdr *msg_sys, unsigned flags, int nosec)
1969{ 2075{
@@ -2117,14 +2223,16 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2117 */ 2223 */
2118 if (MSG_CMSG_COMPAT & flags) { 2224 if (MSG_CMSG_COMPAT & flags) {
2119 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, 2225 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2120 &msg_sys, flags, datagrams); 2226 &msg_sys, flags & ~MSG_WAITFORONE,
2227 datagrams);
2121 if (err < 0) 2228 if (err < 0)
2122 break; 2229 break;
2123 err = __put_user(err, &compat_entry->msg_len); 2230 err = __put_user(err, &compat_entry->msg_len);
2124 ++compat_entry; 2231 ++compat_entry;
2125 } else { 2232 } else {
2126 err = __sys_recvmsg(sock, (struct msghdr __user *)entry, 2233 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2127 &msg_sys, flags, datagrams); 2234 &msg_sys, flags & ~MSG_WAITFORONE,
2235 datagrams);
2128 if (err < 0) 2236 if (err < 0)
2129 break; 2237 break;
2130 err = put_user(err, &entry->msg_len); 2238 err = put_user(err, &entry->msg_len);
@@ -2209,11 +2317,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2209#ifdef __ARCH_WANT_SYS_SOCKETCALL 2317#ifdef __ARCH_WANT_SYS_SOCKETCALL
2210/* Argument list sizes for sys_socketcall */ 2318/* Argument list sizes for sys_socketcall */
2211#define AL(x) ((x) * sizeof(unsigned long)) 2319#define AL(x) ((x) * sizeof(unsigned long))
2212static const unsigned char nargs[20] = { 2320static const unsigned char nargs[21] = {
2213 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2321 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2214 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2322 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2215 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2323 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2216 AL(4), AL(5) 2324 AL(4), AL(5), AL(4)
2217}; 2325};
2218 2326
2219#undef AL 2327#undef AL
@@ -2233,7 +2341,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2233 int err; 2341 int err;
2234 unsigned int len; 2342 unsigned int len;
2235 2343
2236 if (call < 1 || call > SYS_RECVMMSG) 2344 if (call < 1 || call > SYS_SENDMMSG)
2237 return -EINVAL; 2345 return -EINVAL;
2238 2346
2239 len = nargs[call]; 2347 len = nargs[call];
@@ -2308,6 +2416,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2308 case SYS_SENDMSG: 2416 case SYS_SENDMSG:
2309 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2417 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2310 break; 2418 break;
2419 case SYS_SENDMMSG:
2420 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2421 break;
2311 case SYS_RECVMSG: 2422 case SYS_RECVMSG:
2312 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2423 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2313 break; 2424 break;
@@ -2583,23 +2694,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2583 2694
2584static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 2695static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2585{ 2696{
2697 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2698 bool convert_in = false, convert_out = false;
2699 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2700 struct ethtool_rxnfc __user *rxnfc;
2586 struct ifreq __user *ifr; 2701 struct ifreq __user *ifr;
2702 u32 rule_cnt = 0, actual_rule_cnt;
2703 u32 ethcmd;
2587 u32 data; 2704 u32 data;
2588 void __user *datap; 2705 int ret;
2589 2706
2590 ifr = compat_alloc_user_space(sizeof(*ifr)); 2707 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2708 return -EFAULT;
2591 2709
2592 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2710 compat_rxnfc = compat_ptr(data);
2711
2712 if (get_user(ethcmd, &compat_rxnfc->cmd))
2593 return -EFAULT; 2713 return -EFAULT;
2594 2714
2595 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2715 /* Most ethtool structures are defined without padding.
2716 * Unfortunately struct ethtool_rxnfc is an exception.
2717 */
2718 switch (ethcmd) {
2719 default:
2720 break;
2721 case ETHTOOL_GRXCLSRLALL:
2722 /* Buffer size is variable */
2723 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2724 return -EFAULT;
2725 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2726 return -ENOMEM;
2727 buf_size += rule_cnt * sizeof(u32);
2728 /* fall through */
2729 case ETHTOOL_GRXRINGS:
2730 case ETHTOOL_GRXCLSRLCNT:
2731 case ETHTOOL_GRXCLSRULE:
2732 convert_out = true;
2733 /* fall through */
2734 case ETHTOOL_SRXCLSRLDEL:
2735 case ETHTOOL_SRXCLSRLINS:
2736 buf_size += sizeof(struct ethtool_rxnfc);
2737 convert_in = true;
2738 break;
2739 }
2740
2741 ifr = compat_alloc_user_space(buf_size);
2742 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2743
2744 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2596 return -EFAULT; 2745 return -EFAULT;
2597 2746
2598 datap = compat_ptr(data); 2747 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2599 if (put_user(datap, &ifr->ifr_ifru.ifru_data)) 2748 &ifr->ifr_ifru.ifru_data))
2600 return -EFAULT; 2749 return -EFAULT;
2601 2750
2602 return dev_ioctl(net, SIOCETHTOOL, ifr); 2751 if (convert_in) {
2752 /* We expect there to be holes between fs.m_ext and
2753 * fs.ring_cookie and at the end of fs, but nowhere else.
2754 */
2755 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2756 sizeof(compat_rxnfc->fs.m_ext) !=
2757 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2758 sizeof(rxnfc->fs.m_ext));
2759 BUILD_BUG_ON(
2760 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2761 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2762 offsetof(struct ethtool_rxnfc, fs.location) -
2763 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2764
2765 if (copy_in_user(rxnfc, compat_rxnfc,
2766 (void *)(&rxnfc->fs.m_ext + 1) -
2767 (void *)rxnfc) ||
2768 copy_in_user(&rxnfc->fs.ring_cookie,
2769 &compat_rxnfc->fs.ring_cookie,
2770 (void *)(&rxnfc->fs.location + 1) -
2771 (void *)&rxnfc->fs.ring_cookie) ||
2772 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2773 sizeof(rxnfc->rule_cnt)))
2774 return -EFAULT;
2775 }
2776
2777 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2778 if (ret)
2779 return ret;
2780
2781 if (convert_out) {
2782 if (copy_in_user(compat_rxnfc, rxnfc,
2783 (const void *)(&rxnfc->fs.m_ext + 1) -
2784 (const void *)rxnfc) ||
2785 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2786 &rxnfc->fs.ring_cookie,
2787 (const void *)(&rxnfc->fs.location + 1) -
2788 (const void *)&rxnfc->fs.ring_cookie) ||
2789 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2790 sizeof(rxnfc->rule_cnt)))
2791 return -EFAULT;
2792
2793 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2794 /* As an optimisation, we only copy the actual
2795 * number of rules that the underlying
2796 * function returned. Since Mallory might
2797 * change the rule count in user memory, we
2798 * check that it is less than the rule count
2799 * originally given (as the user buffer size),
2800 * which has been range-checked.
2801 */
2802 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2803 return -EFAULT;
2804 if (actual_rule_cnt < rule_cnt)
2805 rule_cnt = actual_rule_cnt;
2806 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2807 &rxnfc->rule_locs[0],
2808 rule_cnt * sizeof(u32)))
2809 return -EFAULT;
2810 }
2811 }
2812
2813 return 0;
2603} 2814}
2604 2815
2605static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 2816static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
@@ -2643,7 +2854,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2643 2854
2644 old_fs = get_fs(); 2855 old_fs = get_fs();
2645 set_fs(KERNEL_DS); 2856 set_fs(KERNEL_DS);
2646 err = dev_ioctl(net, cmd, &kifr); 2857 err = dev_ioctl(net, cmd,
2858 (struct ifreq __user __force *) &kifr);
2647 set_fs(old_fs); 2859 set_fs(old_fs);
2648 2860
2649 return err; 2861 return err;
@@ -2752,7 +2964,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2752 2964
2753 old_fs = get_fs(); 2965 old_fs = get_fs();
2754 set_fs(KERNEL_DS); 2966 set_fs(KERNEL_DS);
2755 err = dev_ioctl(net, cmd, (void __user *)&ifr); 2967 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
2756 set_fs(old_fs); 2968 set_fs(old_fs);
2757 2969
2758 if (cmd == SIOCGIFMAP && !err) { 2970 if (cmd == SIOCGIFMAP && !err) {
@@ -2857,7 +3069,8 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2857 ret |= __get_user(rtdev, &(ur4->rt_dev)); 3069 ret |= __get_user(rtdev, &(ur4->rt_dev));
2858 if (rtdev) { 3070 if (rtdev) {
2859 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 3071 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
2860 r4.rt_dev = devname; devname[15] = 0; 3072 r4.rt_dev = (char __user __force *)devname;
3073 devname[15] = 0;
2861 } else 3074 } else
2862 r4.rt_dev = NULL; 3075 r4.rt_dev = NULL;
2863 3076
@@ -2879,7 +3092,7 @@ out:
2879 3092
2880/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 3093/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2881 * for some operations; this forces use of the newer bridge-utils that 3094 * for some operations; this forces use of the newer bridge-utils that
2882 * use compatiable ioctls 3095 * use compatible ioctls
2883 */ 3096 */
2884static int old_bridge_ioctl(compat_ulong_t __user *argp) 3097static int old_bridge_ioctl(compat_ulong_t __user *argp)
2885{ 3098{
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 8873fd8ddacd..b2198e65d8bb 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,14 +18,13 @@ config SUNRPC_XPRT_RDMA
18 If unsure, say N. 18 If unsure, say N.
19 19
20config RPCSEC_GSS_KRB5 20config RPCSEC_GSS_KRB5
21 tristate 21 tristate "Secure RPC: Kerberos V mechanism"
22 depends on SUNRPC && CRYPTO 22 depends on SUNRPC && CRYPTO
23 prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) 23 depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
24 depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
25 depends on CRYPTO_ARC4
24 default y 26 default y
25 select SUNRPC_GSS 27 select SUNRPC_GSS
26 select CRYPTO_MD5
27 select CRYPTO_DES
28 select CRYPTO_CBC
29 help 28 help
30 Choose Y here to enable Secure RPC using the Kerberos version 5 29 Choose Y here to enable Secure RPC using the Kerberos version 5
31 GSS-API mechanism (RFC 1964). 30 GSS-API mechanism (RFC 1964).
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 45dbf1521b9a..339ba64cce1e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -417,7 +417,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
417 gss_msg->msg.len += len; 417 gss_msg->msg.len += len;
418 } 418 }
419 if (mech->gm_upcall_enctypes) { 419 if (mech->gm_upcall_enctypes) {
420 len = sprintf(p, mech->gm_upcall_enctypes); 420 len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes);
421 p += len; 421 p += len;
422 gss_msg->msg.len += len; 422 gss_msg->msg.len += len;
423 } 423 }
@@ -520,7 +520,7 @@ gss_refresh_upcall(struct rpc_task *task)
520 warn_gssd(); 520 warn_gssd();
521 task->tk_timeout = 15*HZ; 521 task->tk_timeout = 15*HZ;
522 rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); 522 rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
523 return 0; 523 return -EAGAIN;
524 } 524 }
525 if (IS_ERR(gss_msg)) { 525 if (IS_ERR(gss_msg)) {
526 err = PTR_ERR(gss_msg); 526 err = PTR_ERR(gss_msg);
@@ -563,10 +563,12 @@ retry:
563 if (PTR_ERR(gss_msg) == -EAGAIN) { 563 if (PTR_ERR(gss_msg) == -EAGAIN) {
564 err = wait_event_interruptible_timeout(pipe_version_waitqueue, 564 err = wait_event_interruptible_timeout(pipe_version_waitqueue,
565 pipe_version >= 0, 15*HZ); 565 pipe_version >= 0, 15*HZ);
566 if (pipe_version < 0) {
567 warn_gssd();
568 err = -EACCES;
569 }
566 if (err) 570 if (err)
567 goto out; 571 goto out;
568 if (pipe_version < 0)
569 warn_gssd();
570 goto retry; 572 goto retry;
571 } 573 }
572 if (IS_ERR(gss_msg)) { 574 if (IS_ERR(gss_msg)) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index f375decc024b..0a9a2ec2e469 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
427context_derive_keys_rc4(struct krb5_ctx *ctx) 427context_derive_keys_rc4(struct krb5_ctx *ctx)
428{ 428{
429 struct crypto_hash *hmac; 429 struct crypto_hash *hmac;
430 static const char sigkeyconstant[] = "signaturekey"; 430 char sigkeyconstant[] = "signaturekey";
431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
432 struct hash_desc desc; 432 struct hash_desc desc;
433 struct scatterlist sg[1]; 433 struct scatterlist sg[1];
@@ -750,7 +750,7 @@ static struct gss_api_mech gss_kerberos_mech = {
750 .gm_ops = &gss_kerberos_ops, 750 .gm_ops = &gss_kerberos_ops,
751 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), 751 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
752 .gm_pfs = gss_kerberos_pfs, 752 .gm_pfs = gss_kerberos_pfs,
753 .gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ", 753 .gm_upcall_enctypes = "18,17,16,23,3,1,2",
754}; 754};
755 755
756static int __init init_kerberos_module(void) 756static int __init init_kerberos_module(void)
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 8b4061049d76..e3c36a274412 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name)
160 160
161EXPORT_SYMBOL_GPL(gss_mech_get_by_name); 161EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
162 162
163struct gss_api_mech *
164gss_mech_get_by_OID(struct xdr_netobj *obj)
165{
166 struct gss_api_mech *pos, *gm = NULL;
167
168 spin_lock(&registered_mechs_lock);
169 list_for_each_entry(pos, &registered_mechs, gm_list) {
170 if (obj->len == pos->gm_oid.len) {
171 if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
172 if (try_module_get(pos->gm_owner))
173 gm = pos;
174 break;
175 }
176 }
177 }
178 spin_unlock(&registered_mechs_lock);
179 return gm;
180
181}
182
183EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
184
163static inline int 185static inline int
164mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) 186mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
165{ 187{
@@ -193,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
193 215
194EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); 216EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
195 217
218int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
219{
220 struct gss_api_mech *pos = NULL;
221 int i = 0;
222
223 spin_lock(&registered_mechs_lock);
224 list_for_each_entry(pos, &registered_mechs, gm_list) {
225 array_ptr[i] = pos->gm_pfs->pseudoflavor;
226 i++;
227 }
228 spin_unlock(&registered_mechs_lock);
229 return i;
230}
231
232EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
233
196u32 234u32
197gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) 235gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
198{ 236{
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index bcdae78fdfc6..8d0f7d3c71c8 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1101,7 +1101,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1101 1101
1102 /* credential is: 1102 /* credential is:
1103 * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle 1103 * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
1104 * at least 5 u32s, and is preceeded by length, so that makes 6. 1104 * at least 5 u32s, and is preceded by length, so that makes 6.
1105 */ 1105 */
1106 1106
1107 if (argv->iov_len < 5 * 4) 1107 if (argv->iov_len < 5 * 4)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 57d344cf2256..8d83f9d48713 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -436,7 +436,9 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
436 if (!(rovr->tk_flags & RPC_TASK_KILLED)) { 436 if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
437 rovr->tk_flags |= RPC_TASK_KILLED; 437 rovr->tk_flags |= RPC_TASK_KILLED;
438 rpc_exit(rovr, -EIO); 438 rpc_exit(rovr, -EIO);
439 rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); 439 if (RPC_IS_QUEUED(rovr))
440 rpc_wake_up_queued_task(rovr->tk_waitqueue,
441 rovr);
440 } 442 }
441 } 443 }
442 spin_unlock(&clnt->cl_lock); 444 spin_unlock(&clnt->cl_lock);
@@ -597,6 +599,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
597 } 599 }
598} 600}
599 601
602void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
603{
604 rpc_task_release_client(task);
605 rpc_task_set_client(task, clnt);
606}
607EXPORT_SYMBOL_GPL(rpc_task_reset_client);
608
609
600static void 610static void
601rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) 611rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
602{ 612{
@@ -636,12 +646,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
636 rpc_task_set_client(task, task_setup_data->rpc_client); 646 rpc_task_set_client(task, task_setup_data->rpc_client);
637 rpc_task_set_rpc_message(task, task_setup_data->rpc_message); 647 rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
638 648
639 if (task->tk_status != 0) {
640 int ret = task->tk_status;
641 rpc_put_task(task);
642 return ERR_PTR(ret);
643 }
644
645 if (task->tk_action == NULL) 649 if (task->tk_action == NULL)
646 rpc_call_start(task); 650 rpc_call_start(task);
647 651
@@ -1504,7 +1508,10 @@ call_timeout(struct rpc_task *task)
1504 if (clnt->cl_chatty) 1508 if (clnt->cl_chatty)
1505 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 1509 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
1506 clnt->cl_protname, clnt->cl_server); 1510 clnt->cl_protname, clnt->cl_server);
1507 rpc_exit(task, -EIO); 1511 if (task->tk_flags & RPC_TASK_TIMEOUT)
1512 rpc_exit(task, -ETIMEDOUT);
1513 else
1514 rpc_exit(task, -EIO);
1508 return; 1515 return;
1509 } 1516 }
1510 1517
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164e..6b43ee7221d5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
252 252
253/* 253/*
254 * Mark an RPC call as having completed by clearing the 'active' bit 254 * Mark an RPC call as having completed by clearing the 'active' bit
255 * and then waking up all tasks that were sleeping.
255 */ 256 */
256static void rpc_mark_complete_task(struct rpc_task *task) 257static int rpc_complete_task(struct rpc_task *task)
257{ 258{
258 smp_mb__before_clear_bit(); 259 void *m = &task->tk_runstate;
260 wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
261 struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
262 unsigned long flags;
263 int ret;
264
265 spin_lock_irqsave(&wq->lock, flags);
259 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 266 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
260 smp_mb__after_clear_bit(); 267 ret = atomic_dec_and_test(&task->tk_count);
261 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); 268 if (waitqueue_active(wq))
269 __wake_up_locked_key(wq, TASK_NORMAL, &k);
270 spin_unlock_irqrestore(&wq->lock, flags);
271 return ret;
262} 272}
263 273
264/* 274/*
265 * Allow callers to wait for completion of an RPC call 275 * Allow callers to wait for completion of an RPC call
276 *
277 * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
278 * to enforce taking of the wq->lock and hence avoid races with
279 * rpc_complete_task().
266 */ 280 */
267int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) 281int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
268{ 282{
269 if (action == NULL) 283 if (action == NULL)
270 action = rpc_wait_bit_killable; 284 action = rpc_wait_bit_killable;
271 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, 285 return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
272 action, TASK_KILLABLE); 286 action, TASK_KILLABLE);
273} 287}
274EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); 288EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -285,15 +299,8 @@ static void rpc_make_runnable(struct rpc_task *task)
285 if (rpc_test_and_set_running(task)) 299 if (rpc_test_and_set_running(task))
286 return; 300 return;
287 if (RPC_IS_ASYNC(task)) { 301 if (RPC_IS_ASYNC(task)) {
288 int status;
289
290 INIT_WORK(&task->u.tk_work, rpc_async_schedule); 302 INIT_WORK(&task->u.tk_work, rpc_async_schedule);
291 status = queue_work(rpciod_workqueue, &task->u.tk_work); 303 queue_work(rpciod_workqueue, &task->u.tk_work);
292 if (status < 0) {
293 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
294 task->tk_status = status;
295 return;
296 }
297 } else 304 } else
298 wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); 305 wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
299} 306}
@@ -623,14 +630,12 @@ static void __rpc_execute(struct rpc_task *task)
623 save_callback = task->tk_callback; 630 save_callback = task->tk_callback;
624 task->tk_callback = NULL; 631 task->tk_callback = NULL;
625 save_callback(task); 632 save_callback(task);
626 } 633 } else {
627 634 /*
628 /* 635 * Perform the next FSM step.
629 * Perform the next FSM step. 636 * tk_action may be NULL when the task has been killed
630 * tk_action may be NULL when the task has been killed 637 * by someone else.
631 * by someone else. 638 */
632 */
633 if (!RPC_IS_QUEUED(task)) {
634 if (task->tk_action == NULL) 639 if (task->tk_action == NULL)
635 break; 640 break;
636 task->tk_action(task); 641 task->tk_action(task);
@@ -829,12 +834,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
829 } 834 }
830 835
831 rpc_init_task(task, setup_data); 836 rpc_init_task(task, setup_data);
832 if (task->tk_status < 0) {
833 int err = task->tk_status;
834 rpc_put_task(task);
835 return ERR_PTR(err);
836 }
837
838 task->tk_flags |= flags; 837 task->tk_flags |= flags;
839 dprintk("RPC: allocated task %p\n", task); 838 dprintk("RPC: allocated task %p\n", task);
840 return task; 839 return task;
@@ -857,34 +856,69 @@ static void rpc_async_release(struct work_struct *work)
857 rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); 856 rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
858} 857}
859 858
860void rpc_put_task(struct rpc_task *task) 859static void rpc_release_resources_task(struct rpc_task *task)
861{ 860{
862 if (!atomic_dec_and_test(&task->tk_count))
863 return;
864 /* Release resources */
865 if (task->tk_rqstp) 861 if (task->tk_rqstp)
866 xprt_release(task); 862 xprt_release(task);
867 if (task->tk_msg.rpc_cred) 863 if (task->tk_msg.rpc_cred) {
868 put_rpccred(task->tk_msg.rpc_cred); 864 put_rpccred(task->tk_msg.rpc_cred);
865 task->tk_msg.rpc_cred = NULL;
866 }
869 rpc_task_release_client(task); 867 rpc_task_release_client(task);
870 if (task->tk_workqueue != NULL) { 868}
869
870static void rpc_final_put_task(struct rpc_task *task,
871 struct workqueue_struct *q)
872{
873 if (q != NULL) {
871 INIT_WORK(&task->u.tk_work, rpc_async_release); 874 INIT_WORK(&task->u.tk_work, rpc_async_release);
872 queue_work(task->tk_workqueue, &task->u.tk_work); 875 queue_work(q, &task->u.tk_work);
873 } else 876 } else
874 rpc_free_task(task); 877 rpc_free_task(task);
875} 878}
879
880static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
881{
882 if (atomic_dec_and_test(&task->tk_count)) {
883 rpc_release_resources_task(task);
884 rpc_final_put_task(task, q);
885 }
886}
887
888void rpc_put_task(struct rpc_task *task)
889{
890 rpc_do_put_task(task, NULL);
891}
876EXPORT_SYMBOL_GPL(rpc_put_task); 892EXPORT_SYMBOL_GPL(rpc_put_task);
877 893
894void rpc_put_task_async(struct rpc_task *task)
895{
896 rpc_do_put_task(task, task->tk_workqueue);
897}
898EXPORT_SYMBOL_GPL(rpc_put_task_async);
899
878static void rpc_release_task(struct rpc_task *task) 900static void rpc_release_task(struct rpc_task *task)
879{ 901{
880 dprintk("RPC: %5u release task\n", task->tk_pid); 902 dprintk("RPC: %5u release task\n", task->tk_pid);
881 903
882 BUG_ON (RPC_IS_QUEUED(task)); 904 BUG_ON (RPC_IS_QUEUED(task));
883 905
884 /* Wake up anyone who is waiting for task completion */ 906 rpc_release_resources_task(task);
885 rpc_mark_complete_task(task);
886 907
887 rpc_put_task(task); 908 /*
909 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
910 * so it should be safe to use task->tk_count as a test for whether
911 * or not any other processes still hold references to our rpc_task.
912 */
913 if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
914 /* Wake up anyone who may be waiting for task completion */
915 if (!rpc_complete_task(task))
916 return;
917 } else {
918 if (!atomic_dec_and_test(&task->tk_count))
919 return;
920 }
921 rpc_final_put_task(task, task->tk_workqueue);
888} 922}
889 923
890int rpciod_up(void) 924int rpciod_up(void)
@@ -908,7 +942,7 @@ static int rpciod_start(void)
908 * Create the rpciod thread and wait for it to start. 942 * Create the rpciod thread and wait for it to start.
909 */ 943 */
910 dprintk("RPC: creating workqueue rpciod\n"); 944 dprintk("RPC: creating workqueue rpciod\n");
911 wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); 945 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
912 rpciod_workqueue = wq; 946 rpciod_workqueue = wq;
913 return rpciod_workqueue != NULL; 947 return rpciod_workqueue != NULL;
914} 948}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 30916b06c12b..c8e10216c113 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -38,6 +38,14 @@ struct unix_domain {
38 38
39extern struct auth_ops svcauth_unix; 39extern struct auth_ops svcauth_unix;
40 40
41static void svcauth_unix_domain_release(struct auth_domain *dom)
42{
43 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
44
45 kfree(dom->name);
46 kfree(ud);
47}
48
41struct auth_domain *unix_domain_find(char *name) 49struct auth_domain *unix_domain_find(char *name)
42{ 50{
43 struct auth_domain *rv; 51 struct auth_domain *rv;
@@ -47,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name)
47 while(1) { 55 while(1) {
48 if (rv) { 56 if (rv) {
49 if (new && rv != &new->h) 57 if (new && rv != &new->h)
50 auth_domain_put(&new->h); 58 svcauth_unix_domain_release(&new->h);
51 59
52 if (rv->flavour != &svcauth_unix) { 60 if (rv->flavour != &svcauth_unix) {
53 auth_domain_put(rv); 61 auth_domain_put(rv);
@@ -74,14 +82,6 @@ struct auth_domain *unix_domain_find(char *name)
74} 82}
75EXPORT_SYMBOL_GPL(unix_domain_find); 83EXPORT_SYMBOL_GPL(unix_domain_find);
76 84
77static void svcauth_unix_domain_release(struct auth_domain *dom)
78{
79 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
80
81 kfree(dom->name);
82 kfree(ud);
83}
84
85 85
86/************************************************** 86/**************************************************
87 * cache for IP address to unix_domain 87 * cache for IP address to unix_domain
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d802e941d365..b7d435c3f19e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
420static void svc_udp_data_ready(struct sock *sk, int count) 420static void svc_udp_data_ready(struct sock *sk, int count)
421{ 421{
422 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 422 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
423 wait_queue_head_t *wq = sk_sleep(sk);
423 424
424 if (svsk) { 425 if (svsk) {
425 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 426 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
@@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
428 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 429 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
429 svc_xprt_enqueue(&svsk->sk_xprt); 430 svc_xprt_enqueue(&svsk->sk_xprt);
430 } 431 }
431 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 432 if (wq && waitqueue_active(wq))
432 wake_up_interruptible(sk_sleep(sk)); 433 wake_up_interruptible(wq);
433} 434}
434 435
435/* 436/*
@@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count)
438static void svc_write_space(struct sock *sk) 439static void svc_write_space(struct sock *sk)
439{ 440{
440 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 441 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
442 wait_queue_head_t *wq = sk_sleep(sk);
441 443
442 if (svsk) { 444 if (svsk) {
443 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 445 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
@@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk)
445 svc_xprt_enqueue(&svsk->sk_xprt); 447 svc_xprt_enqueue(&svsk->sk_xprt);
446 } 448 }
447 449
448 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { 450 if (wq && waitqueue_active(wq)) {
449 dprintk("RPC svc_write_space: someone sleeping on %p\n", 451 dprintk("RPC svc_write_space: someone sleeping on %p\n",
450 svsk); 452 svsk);
451 wake_up_interruptible(sk_sleep(sk)); 453 wake_up_interruptible(wq);
452 } 454 }
453} 455}
454 456
@@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
739static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) 741static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
740{ 742{
741 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 743 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
744 wait_queue_head_t *wq;
742 745
743 dprintk("svc: socket %p TCP (listen) state change %d\n", 746 dprintk("svc: socket %p TCP (listen) state change %d\n",
744 sk, sk->sk_state); 747 sk, sk->sk_state);
@@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
761 printk("svc: socket %p: no user data\n", sk); 764 printk("svc: socket %p: no user data\n", sk);
762 } 765 }
763 766
764 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 767 wq = sk_sleep(sk);
765 wake_up_interruptible_all(sk_sleep(sk)); 768 if (wq && waitqueue_active(wq))
769 wake_up_interruptible_all(wq);
766} 770}
767 771
768/* 772/*
@@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
771static void svc_tcp_state_change(struct sock *sk) 775static void svc_tcp_state_change(struct sock *sk)
772{ 776{
773 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 777 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
778 wait_queue_head_t *wq = sk_sleep(sk);
774 779
775 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", 780 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
776 sk, sk->sk_state, sk->sk_user_data); 781 sk, sk->sk_state, sk->sk_user_data);
@@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk)
781 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 786 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
782 svc_xprt_enqueue(&svsk->sk_xprt); 787 svc_xprt_enqueue(&svsk->sk_xprt);
783 } 788 }
784 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 789 if (wq && waitqueue_active(wq))
785 wake_up_interruptible_all(sk_sleep(sk)); 790 wake_up_interruptible_all(wq);
786} 791}
787 792
788static void svc_tcp_data_ready(struct sock *sk, int count) 793static void svc_tcp_data_ready(struct sock *sk, int count)
789{ 794{
790 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 795 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
796 wait_queue_head_t *wq = sk_sleep(sk);
791 797
792 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 798 dprintk("svc: socket %p TCP data ready (svsk %p)\n",
793 sk, sk->sk_user_data); 799 sk, sk->sk_user_data);
@@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
795 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 801 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
796 svc_xprt_enqueue(&svsk->sk_xprt); 802 svc_xprt_enqueue(&svsk->sk_xprt);
797 } 803 }
798 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 804 if (wq && waitqueue_active(wq))
799 wake_up_interruptible(sk_sleep(sk)); 805 wake_up_interruptible(wq);
800} 806}
801 807
802/* 808/*
@@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1531{ 1537{
1532 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1538 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1533 struct sock *sk = svsk->sk_sk; 1539 struct sock *sk = svsk->sk_sk;
1540 wait_queue_head_t *wq;
1534 1541
1535 dprintk("svc: svc_sock_detach(%p)\n", svsk); 1542 dprintk("svc: svc_sock_detach(%p)\n", svsk);
1536 1543
@@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1539 sk->sk_data_ready = svsk->sk_odata; 1546 sk->sk_data_ready = svsk->sk_odata;
1540 sk->sk_write_space = svsk->sk_owspace; 1547 sk->sk_write_space = svsk->sk_owspace;
1541 1548
1542 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1549 wq = sk_sleep(sk);
1543 wake_up_interruptible(sk_sleep(sk)); 1550 if (wq && waitqueue_active(wq))
1551 wake_up_interruptible(wq);
1544} 1552}
1545 1553
1546/* 1554/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 856274d7e85c..ce5eb68a9664 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -202,10 +202,9 @@ int xprt_reserve_xprt(struct rpc_task *task)
202 goto out_sleep; 202 goto out_sleep;
203 } 203 }
204 xprt->snd_task = task; 204 xprt->snd_task = task;
205 if (req) { 205 req->rq_bytes_sent = 0;
206 req->rq_bytes_sent = 0; 206 req->rq_ntrans++;
207 req->rq_ntrans++; 207
208 }
209 return 1; 208 return 1;
210 209
211out_sleep: 210out_sleep:
@@ -213,7 +212,7 @@ out_sleep:
213 task->tk_pid, xprt); 212 task->tk_pid, xprt);
214 task->tk_timeout = 0; 213 task->tk_timeout = 0;
215 task->tk_status = -EAGAIN; 214 task->tk_status = -EAGAIN;
216 if (req && req->rq_ntrans) 215 if (req->rq_ntrans)
217 rpc_sleep_on(&xprt->resend, task, NULL); 216 rpc_sleep_on(&xprt->resend, task, NULL);
218 else 217 else
219 rpc_sleep_on(&xprt->sending, task, NULL); 218 rpc_sleep_on(&xprt->sending, task, NULL);
@@ -907,6 +906,7 @@ void xprt_transmit(struct rpc_task *task)
907 } 906 }
908 907
909 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 908 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
909 task->tk_flags |= RPC_TASK_SENT;
910 spin_lock_bh(&xprt->transport_lock); 910 spin_lock_bh(&xprt->transport_lock);
911 911
912 xprt->ops->set_retrans_timeout(task); 912 xprt->ops->set_retrans_timeout(task);
@@ -965,7 +965,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
965 xprt = kzalloc(size, GFP_KERNEL); 965 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL) 966 if (xprt == NULL)
967 goto out; 967 goto out;
968 kref_init(&xprt->kref); 968 atomic_set(&xprt->count, 1);
969 969
970 xprt->max_reqs = max_req; 970 xprt->max_reqs = max_req;
971 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); 971 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
@@ -1145,13 +1145,11 @@ found:
1145 1145
1146/** 1146/**
1147 * xprt_destroy - destroy an RPC transport, killing off all requests. 1147 * xprt_destroy - destroy an RPC transport, killing off all requests.
1148 * @kref: kref for the transport to destroy 1148 * @xprt: transport to destroy
1149 * 1149 *
1150 */ 1150 */
1151static void xprt_destroy(struct kref *kref) 1151static void xprt_destroy(struct rpc_xprt *xprt)
1152{ 1152{
1153 struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
1154
1155 dprintk("RPC: destroying transport %p\n", xprt); 1153 dprintk("RPC: destroying transport %p\n", xprt);
1156 xprt->shutdown = 1; 1154 xprt->shutdown = 1;
1157 del_timer_sync(&xprt->timer); 1155 del_timer_sync(&xprt->timer);
@@ -1175,7 +1173,8 @@ static void xprt_destroy(struct kref *kref)
1175 */ 1173 */
1176void xprt_put(struct rpc_xprt *xprt) 1174void xprt_put(struct rpc_xprt *xprt)
1177{ 1175{
1178 kref_put(&xprt->kref, xprt_destroy); 1176 if (atomic_dec_and_test(&xprt->count))
1177 xprt_destroy(xprt);
1179} 1178}
1180 1179
1181/** 1180/**
@@ -1185,6 +1184,7 @@ void xprt_put(struct rpc_xprt *xprt)
1185 */ 1184 */
1186struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) 1185struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
1187{ 1186{
1188 kref_get(&xprt->kref); 1187 if (atomic_inc_not_zero(&xprt->count))
1189 return xprt; 1188 return xprt;
1189 return NULL;
1190} 1190}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2ac3f6e8adff..554d0814c875 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) 87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
88{ 88{
89 int len, n = 0, p; 89 int len, n = 0, p;
90 int page_base;
91 struct page **ppages;
90 92
91 if (pos == 0 && xdrbuf->head[0].iov_len) { 93 if (pos == 0 && xdrbuf->head[0].iov_len) {
92 seg[n].mr_page = NULL; 94 seg[n].mr_page = NULL;
@@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
95 ++n; 97 ++n;
96 } 98 }
97 99
98 if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { 100 len = xdrbuf->page_len;
99 if (n == nsegs) 101 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
100 return 0; 102 page_base = xdrbuf->page_base & ~PAGE_MASK;
101 seg[n].mr_page = xdrbuf->pages[0]; 103 p = 0;
102 seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; 104 while (len && n < nsegs) {
103 seg[n].mr_len = min_t(u32, 105 seg[n].mr_page = ppages[p];
104 PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); 106 seg[n].mr_offset = (void *)(unsigned long) page_base;
105 len = xdrbuf->page_len - seg[n].mr_len; 107 seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
108 BUG_ON(seg[n].mr_len > PAGE_SIZE);
109 len -= seg[n].mr_len;
106 ++n; 110 ++n;
107 p = 1; 111 ++p;
108 while (len > 0) { 112 page_base = 0; /* page offset only applies to first page */
109 if (n == nsegs)
110 return 0;
111 seg[n].mr_page = xdrbuf->pages[p];
112 seg[n].mr_offset = NULL;
113 seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
114 len -= seg[n].mr_len;
115 ++n;
116 ++p;
117 }
118 } 113 }
119 114
115 /* Message overflows the seg array */
116 if (len && n == nsegs)
117 return 0;
118
120 if (xdrbuf->tail[0].iov_len) { 119 if (xdrbuf->tail[0].iov_len) {
121 /* the rpcrdma protocol allows us to omit any trailing 120 /* the rpcrdma protocol allows us to omit any trailing
122 * xdr pad bytes, saving the server an RDMA operation. */ 121 * xdr pad bytes, saving the server an RDMA operation. */
123 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) 122 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124 return n; 123 return n;
125 if (n == nsegs) 124 if (n == nsegs)
125 /* Tail remains, but we're out of segments */
126 return 0; 126 return 0;
127 seg[n].mr_page = NULL; 127 seg[n].mr_page = NULL;
128 seg[n].mr_offset = xdrbuf->tail[0].iov_base; 128 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
@@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
296 int copy_len; 296 int copy_len;
297 unsigned char *srcp, *destp; 297 unsigned char *srcp, *destp;
298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
299 int page_base;
300 struct page **ppages;
299 301
300 destp = rqst->rq_svec[0].iov_base; 302 destp = rqst->rq_svec[0].iov_base;
301 curlen = rqst->rq_svec[0].iov_len; 303 curlen = rqst->rq_svec[0].iov_len;
@@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
324 __func__, destp + copy_len, curlen); 326 __func__, destp + copy_len, curlen);
325 rqst->rq_svec[0].iov_len += curlen; 327 rqst->rq_svec[0].iov_len += curlen;
326 } 328 }
327
328 r_xprt->rx_stats.pullup_copy_count += copy_len; 329 r_xprt->rx_stats.pullup_copy_count += copy_len;
329 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; 330
331 page_base = rqst->rq_snd_buf.page_base;
332 ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
333 page_base &= ~PAGE_MASK;
334 npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
330 for (i = 0; copy_len && i < npages; i++) { 335 for (i = 0; copy_len && i < npages; i++) {
331 if (i == 0) 336 curlen = PAGE_SIZE - page_base;
332 curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
333 else
334 curlen = PAGE_SIZE;
335 if (curlen > copy_len) 337 if (curlen > copy_len)
336 curlen = copy_len; 338 curlen = copy_len;
337 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", 339 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
338 __func__, i, destp, copy_len, curlen); 340 __func__, i, destp, copy_len, curlen);
339 srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], 341 srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
340 KM_SKB_SUNRPC_DATA); 342 memcpy(destp, srcp+page_base, curlen);
341 if (i == 0)
342 memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
343 else
344 memcpy(destp, srcp, curlen);
345 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); 343 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
346 rqst->rq_svec[0].iov_len += curlen; 344 rqst->rq_svec[0].iov_len += curlen;
347 destp += curlen; 345 destp += curlen;
348 copy_len -= curlen; 346 copy_len -= curlen;
347 page_base = 0;
349 } 348 }
350 /* header now contains entire send message */ 349 /* header now contains entire send message */
351 return pad; 350 return pad;
@@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
606{ 605{
607 int i, npages, curlen, olen; 606 int i, npages, curlen, olen;
608 char *destp; 607 char *destp;
608 struct page **ppages;
609 int page_base;
609 610
610 curlen = rqst->rq_rcv_buf.head[0].iov_len; 611 curlen = rqst->rq_rcv_buf.head[0].iov_len;
611 if (curlen > copy_len) { /* write chunk header fixup */ 612 if (curlen > copy_len) { /* write chunk header fixup */
@@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
624 olen = copy_len; 625 olen = copy_len;
625 i = 0; 626 i = 0;
626 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; 627 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
628 page_base = rqst->rq_rcv_buf.page_base;
629 ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
630 page_base &= ~PAGE_MASK;
631
627 if (copy_len && rqst->rq_rcv_buf.page_len) { 632 if (copy_len && rqst->rq_rcv_buf.page_len) {
628 npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + 633 npages = PAGE_ALIGN(page_base +
629 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; 634 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
630 for (; i < npages; i++) { 635 for (; i < npages; i++) {
631 if (i == 0) 636 curlen = PAGE_SIZE - page_base;
632 curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
633 else
634 curlen = PAGE_SIZE;
635 if (curlen > copy_len) 637 if (curlen > copy_len)
636 curlen = copy_len; 638 curlen = copy_len;
637 dprintk("RPC: %s: page %d" 639 dprintk("RPC: %s: page %d"
638 " srcp 0x%p len %d curlen %d\n", 640 " srcp 0x%p len %d curlen %d\n",
639 __func__, i, srcp, copy_len, curlen); 641 __func__, i, srcp, copy_len, curlen);
640 destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], 642 destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
641 KM_SKB_SUNRPC_DATA); 643 memcpy(destp + page_base, srcp, curlen);
642 if (i == 0) 644 flush_dcache_page(ppages[i]);
643 memcpy(destp + rqst->rq_rcv_buf.page_base,
644 srcp, curlen);
645 else
646 memcpy(destp, srcp, curlen);
647 flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
648 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); 645 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
649 srcp += curlen; 646 srcp += curlen;
650 copy_len -= curlen; 647 copy_len -= curlen;
651 if (copy_len == 0) 648 if (copy_len == 0)
652 break; 649 break;
650 page_base = 0;
653 } 651 }
654 rqst->rq_rcv_buf.page_len = olen - copy_len; 652 rqst->rq_rcv_buf.page_len = olen - copy_len;
655 } else 653 } else
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912a..1a10dcd999ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1335 p, 0, length, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1337 put_page(p); 1337 put_page(p);
1338 svc_rdma_put_context(ctxt, 1);
1338 return; 1339 return;
1339 } 1340 }
1340 atomic_inc(&xprt->sc_dma_used); 1341 atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5f4c7b3bc711..d4297dc43dc4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
144static inline 144static inline
145void rpcrdma_event_process(struct ib_wc *wc) 145void rpcrdma_event_process(struct ib_wc *wc)
146{ 146{
147 struct rpcrdma_mw *frmr;
147 struct rpcrdma_rep *rep = 148 struct rpcrdma_rep *rep =
148 (struct rpcrdma_rep *)(unsigned long) wc->wr_id; 149 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
149 150
@@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc)
154 return; 155 return;
155 156
156 if (IB_WC_SUCCESS != wc->status) { 157 if (IB_WC_SUCCESS != wc->status) {
157 dprintk("RPC: %s: %s WC status %X, connection lost\n", 158 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
158 __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", 159 __func__, wc->opcode, wc->status);
159 wc->status);
160 rep->rr_len = ~0U; 160 rep->rr_len = ~0U;
161 rpcrdma_schedule_tasklet(rep); 161 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
162 rpcrdma_schedule_tasklet(rep);
162 return; 163 return;
163 } 164 }
164 165
165 switch (wc->opcode) { 166 switch (wc->opcode) {
167 case IB_WC_FAST_REG_MR:
168 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
169 frmr->r.frmr.state = FRMR_IS_VALID;
170 break;
171 case IB_WC_LOCAL_INV:
172 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
173 frmr->r.frmr.state = FRMR_IS_INVALID;
174 break;
166 case IB_WC_RECV: 175 case IB_WC_RECV:
167 rep->rr_len = wc->byte_len; 176 rep->rr_len = wc->byte_len;
168 ib_dma_sync_single_for_cpu( 177 ib_dma_sync_single_for_cpu(
@@ -1450,6 +1459,12 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1450 seg->mr_dma = ib_dma_map_single(ia->ri_id->device, 1459 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1451 seg->mr_offset, 1460 seg->mr_offset,
1452 seg->mr_dmalen, seg->mr_dir); 1461 seg->mr_dmalen, seg->mr_dir);
1462 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1463 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1464 __func__,
1465 (unsigned long long)seg->mr_dma,
1466 seg->mr_offset, seg->mr_dmalen);
1467 }
1453} 1468}
1454 1469
1455static void 1470static void
@@ -1469,7 +1484,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1469 struct rpcrdma_xprt *r_xprt) 1484 struct rpcrdma_xprt *r_xprt)
1470{ 1485{
1471 struct rpcrdma_mr_seg *seg1 = seg; 1486 struct rpcrdma_mr_seg *seg1 = seg;
1472 struct ib_send_wr frmr_wr, *bad_wr; 1487 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1488
1473 u8 key; 1489 u8 key;
1474 int len, pageoff; 1490 int len, pageoff;
1475 int i, rc; 1491 int i, rc;
@@ -1484,6 +1500,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1484 rpcrdma_map_one(ia, seg, writing); 1500 rpcrdma_map_one(ia, seg, writing);
1485 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; 1501 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1486 len += seg->mr_len; 1502 len += seg->mr_len;
1503 BUG_ON(seg->mr_len > PAGE_SIZE);
1487 ++seg; 1504 ++seg;
1488 ++i; 1505 ++i;
1489 /* Check for holes */ 1506 /* Check for holes */
@@ -1494,26 +1511,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1494 dprintk("RPC: %s: Using frmr %p to map %d segments\n", 1511 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1495 __func__, seg1->mr_chunk.rl_mw, i); 1512 __func__, seg1->mr_chunk.rl_mw, i);
1496 1513
1514 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1515 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1516 __func__,
1517 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1518 /* Invalidate before using. */
1519 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1520 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1521 invalidate_wr.next = &frmr_wr;
1522 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1523 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1524 invalidate_wr.ex.invalidate_rkey =
1525 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1526 DECR_CQCOUNT(&r_xprt->rx_ep);
1527 post_wr = &invalidate_wr;
1528 } else
1529 post_wr = &frmr_wr;
1530
1497 /* Bump the key */ 1531 /* Bump the key */
1498 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); 1532 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1499 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); 1533 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1500 1534
1501 /* Prepare FRMR WR */ 1535 /* Prepare FRMR WR */
1502 memset(&frmr_wr, 0, sizeof frmr_wr); 1536 memset(&frmr_wr, 0, sizeof frmr_wr);
1537 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1503 frmr_wr.opcode = IB_WR_FAST_REG_MR; 1538 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1504 frmr_wr.send_flags = 0; /* unsignaled */ 1539 frmr_wr.send_flags = IB_SEND_SIGNALED;
1505 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; 1540 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1506 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; 1541 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1507 frmr_wr.wr.fast_reg.page_list_len = i; 1542 frmr_wr.wr.fast_reg.page_list_len = i;
1508 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1543 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1509 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; 1544 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1545 BUG_ON(frmr_wr.wr.fast_reg.length < len);
1510 frmr_wr.wr.fast_reg.access_flags = (writing ? 1546 frmr_wr.wr.fast_reg.access_flags = (writing ?
1511 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 1547 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1512 IB_ACCESS_REMOTE_READ); 1548 IB_ACCESS_REMOTE_READ);
1513 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1549 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1514 DECR_CQCOUNT(&r_xprt->rx_ep); 1550 DECR_CQCOUNT(&r_xprt->rx_ep);
1515 1551
1516 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); 1552 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
1517 1553
1518 if (rc) { 1554 if (rc) {
1519 dprintk("RPC: %s: failed ib_post_send for register," 1555 dprintk("RPC: %s: failed ib_post_send for register,"
@@ -1542,8 +1578,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1542 rpcrdma_unmap_one(ia, seg++); 1578 rpcrdma_unmap_one(ia, seg++);
1543 1579
1544 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1580 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1581 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1545 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1582 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1546 invalidate_wr.send_flags = 0; /* unsignaled */ 1583 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1547 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1584 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1548 DECR_CQCOUNT(&r_xprt->rx_ep); 1585 DECR_CQCOUNT(&r_xprt->rx_ep);
1549 1586
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c7a7eba991bc..cae761a8536c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
164 struct { 164 struct {
165 struct ib_fast_reg_page_list *fr_pgl; 165 struct ib_fast_reg_page_list *fr_pgl;
166 struct ib_mr *fr_mr; 166 struct ib_mr *fr_mr;
167 enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
167 } frmr; 168 } frmr;
168 } r; 169 } r;
169 struct list_head mw_list; 170 struct list_head mw_list;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a57960..bf005d3c65ef 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -504,7 +504,7 @@ static int xs_nospace(struct rpc_task *task)
504 * EAGAIN: The socket was blocked, please call again later to 504 * EAGAIN: The socket was blocked, please call again later to
505 * complete the request 505 * complete the request
506 * ENOTCONN: Caller needs to invoke connect logic then call again 506 * ENOTCONN: Caller needs to invoke connect logic then call again
507 * other: Some other error occured, the request was not sent 507 * other: Some other error occurred, the request was not sent
508 */ 508 */
509static int xs_udp_send_request(struct rpc_task *task) 509static int xs_udp_send_request(struct rpc_task *task)
510{ 510{
@@ -590,7 +590,7 @@ static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
590 * EAGAIN: The socket was blocked, please call again later to 590 * EAGAIN: The socket was blocked, please call again later to
591 * complete the request 591 * complete the request
592 * ENOTCONN: Caller needs to invoke connect logic then call again 592 * ENOTCONN: Caller needs to invoke connect logic then call again
593 * other: Some other error occured, the request was not sent 593 * other: Some other error occurred, the request was not sent
594 * 594 *
595 * XXX: In the case of soft timeouts, should we eventually give up 595 * XXX: In the case of soft timeouts, should we eventually give up
596 * if sendmsg is not able to make progress? 596 * if sendmsg is not able to make progress?
@@ -710,6 +710,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
710 if (sk == NULL) 710 if (sk == NULL)
711 return; 711 return;
712 712
713 transport->srcport = 0;
714
713 write_lock_bh(&sk->sk_callback_lock); 715 write_lock_bh(&sk->sk_callback_lock);
714 transport->inet = NULL; 716 transport->inet = NULL;
715 transport->sock = NULL; 717 transport->sock = NULL;
@@ -1631,7 +1633,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1631 } 1633 }
1632 xs_reclassify_socket(family, sock); 1634 xs_reclassify_socket(family, sock);
1633 1635
1634 if (xs_bind(transport, sock)) { 1636 err = xs_bind(transport, sock);
1637 if (err) {
1635 sock_release(sock); 1638 sock_release(sock);
1636 goto out; 1639 goto out;
1637 } 1640 }
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 0436927369f3..2c5954b85933 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -29,18 +29,6 @@ config TIPC_ADVANCED
29 Saying Y here will open some advanced configuration for TIPC. 29 Saying Y here will open some advanced configuration for TIPC.
30 Most users do not need to bother; if unsure, just say N. 30 Most users do not need to bother; if unsure, just say N.
31 31
32config TIPC_NODES
33 int "Maximum number of nodes in a cluster"
34 depends on TIPC_ADVANCED
35 range 8 2047
36 default "255"
37 help
38 Specifies how many nodes can be supported in a TIPC cluster.
39 Can range from 8 to 2047 nodes; default is 255.
40
41 Setting this to a smaller value saves some memory;
42 setting it to higher allows for more nodes.
43
44config TIPC_PORTS 32config TIPC_PORTS
45 int "Maximum number of ports in a node" 33 int "Maximum number of ports in a node"
46 depends on TIPC_ADVANCED 34 depends on TIPC_ADVANCED
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 88463d9a6f12..a6fdab33877e 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -2,7 +2,7 @@
2 * net/tipc/addr.c: TIPC address utility routines 2 * net/tipc/addr.c: TIPC address utility routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,7 @@
41 * tipc_addr_domain_valid - validates a network domain address 41 * tipc_addr_domain_valid - validates a network domain address
42 * 42 *
43 * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, 43 * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
44 * where Z, C, and N are non-zero and do not exceed the configured limits. 44 * where Z, C, and N are non-zero.
45 * 45 *
46 * Returns 1 if domain address is valid, otherwise 0 46 * Returns 1 if domain address is valid, otherwise 0
47 */ 47 */
@@ -51,10 +51,6 @@ int tipc_addr_domain_valid(u32 addr)
51 u32 n = tipc_node(addr); 51 u32 n = tipc_node(addr);
52 u32 c = tipc_cluster(addr); 52 u32 c = tipc_cluster(addr);
53 u32 z = tipc_zone(addr); 53 u32 z = tipc_zone(addr);
54 u32 max_nodes = tipc_max_nodes;
55
56 if (n > max_nodes)
57 return 0;
58 54
59 if (n && (!z || !c)) 55 if (n && (!z || !c))
60 return 0; 56 return 0;
@@ -66,8 +62,7 @@ int tipc_addr_domain_valid(u32 addr)
66/** 62/**
67 * tipc_addr_node_valid - validates a proposed network address for this node 63 * tipc_addr_node_valid - validates a proposed network address for this node
68 * 64 *
69 * Accepts <Z.C.N>, where Z, C, and N are non-zero and do not exceed 65 * Accepts <Z.C.N>, where Z, C, and N are non-zero.
70 * the configured limits.
71 * 66 *
72 * Returns 1 if address can be used, otherwise 0 67 * Returns 1 if address can be used, otherwise 0
73 */ 68 */
@@ -81,9 +76,9 @@ int tipc_in_scope(u32 domain, u32 addr)
81{ 76{
82 if (!domain || (domain == addr)) 77 if (!domain || (domain == addr))
83 return 1; 78 return 1;
84 if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */ 79 if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
85 return 1; 80 return 1;
86 if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */ 81 if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
87 return 1; 82 return 1;
88 return 0; 83 return 0;
89} 84}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 2490fadd0caf..e4f35afe3207 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,6 +37,19 @@
37#ifndef _TIPC_ADDR_H 37#ifndef _TIPC_ADDR_H
38#define _TIPC_ADDR_H 38#define _TIPC_ADDR_H
39 39
40#define TIPC_ZONE_MASK 0xff000000u
41#define TIPC_CLUSTER_MASK 0xfffff000u
42
43static inline u32 tipc_zone_mask(u32 addr)
44{
45 return addr & TIPC_ZONE_MASK;
46}
47
48static inline u32 tipc_cluster_mask(u32 addr)
49{
50 return addr & TIPC_CLUSTER_MASK;
51}
52
40static inline int in_own_cluster(u32 addr) 53static inline int in_own_cluster(u32 addr)
41{ 54{
42 return !((addr ^ tipc_own_addr) >> 12); 55 return !((addr ^ tipc_own_addr) >> 12);
@@ -49,14 +62,13 @@ static inline int in_own_cluster(u32 addr)
49 * after a network hop. 62 * after a network hop.
50 */ 63 */
51 64
52static inline int addr_domain(int sc) 65static inline u32 addr_domain(u32 sc)
53{ 66{
54 if (likely(sc == TIPC_NODE_SCOPE)) 67 if (likely(sc == TIPC_NODE_SCOPE))
55 return tipc_own_addr; 68 return tipc_own_addr;
56 if (sc == TIPC_CLUSTER_SCOPE) 69 if (sc == TIPC_CLUSTER_SCOPE)
57 return tipc_addr(tipc_zone(tipc_own_addr), 70 return tipc_cluster_mask(tipc_own_addr);
58 tipc_cluster(tipc_own_addr), 0); 71 return tipc_zone_mask(tipc_own_addr);
59 return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
60} 72}
61 73
62int tipc_addr_domain_valid(u32); 74int tipc_addr_domain_valid(u32);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 70ab5ef48766..fa68d1e9ff4b 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (c) 2004-2006, Ericsson AB 4 * Copyright (c) 2004-2006, Ericsson AB
5 * Copyright (c) 2004, Intel Corporation. 5 * Copyright (c) 2004, Intel Corporation.
6 * Copyright (c) 2005, Wind River Systems 6 * Copyright (c) 2005, 2010-2011, Wind River Systems
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
@@ -44,13 +44,6 @@
44 44
45#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ 45#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
46 46
47/*
48 * Loss rate for incoming broadcast frames; used to test retransmission code.
49 * Set to N to cause every N'th frame to be discarded; 0 => don't discard any.
50 */
51
52#define TIPC_BCAST_LOSS_RATE 0
53
54/** 47/**
55 * struct bcbearer_pair - a pair of bearers used by broadcast link 48 * struct bcbearer_pair - a pair of bearers used by broadcast link
56 * @primary: pointer to primary bearer 49 * @primary: pointer to primary bearer
@@ -61,8 +54,8 @@
61 */ 54 */
62 55
63struct bcbearer_pair { 56struct bcbearer_pair {
64 struct bearer *primary; 57 struct tipc_bearer *primary;
65 struct bearer *secondary; 58 struct tipc_bearer *secondary;
66}; 59};
67 60
68/** 61/**
@@ -81,7 +74,7 @@ struct bcbearer_pair {
81 */ 74 */
82 75
83struct bcbearer { 76struct bcbearer {
84 struct bearer bearer; 77 struct tipc_bearer bearer;
85 struct media media; 78 struct media media;
86 struct bcbearer_pair bpairs[MAX_BEARERS]; 79 struct bcbearer_pair bpairs[MAX_BEARERS];
87 struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; 80 struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
@@ -93,6 +86,7 @@ struct bcbearer {
93 * struct bclink - link used for broadcast messages 86 * struct bclink - link used for broadcast messages
94 * @link: (non-standard) broadcast link structure 87 * @link: (non-standard) broadcast link structure
95 * @node: (non-standard) node structure representing b'cast link's peer node 88 * @node: (non-standard) node structure representing b'cast link's peer node
89 * @retransmit_to: node that most recently requested a retransmit
96 * 90 *
97 * Handles sequence numbering, fragmentation, bundling, etc. 91 * Handles sequence numbering, fragmentation, bundling, etc.
98 */ 92 */
@@ -100,6 +94,7 @@ struct bcbearer {
100struct bclink { 94struct bclink {
101 struct link link; 95 struct link link;
102 struct tipc_node node; 96 struct tipc_node node;
97 struct tipc_node *retransmit_to;
103}; 98};
104 99
105 100
@@ -184,6 +179,17 @@ static int bclink_ack_allowed(u32 n)
184 179
185 180
186/** 181/**
182 * tipc_bclink_retransmit_to - get most recent node to request retransmission
183 *
184 * Called with bc_lock locked
185 */
186
187struct tipc_node *tipc_bclink_retransmit_to(void)
188{
189 return bclink->retransmit_to;
190}
191
192/**
187 * bclink_retransmit_pkt - retransmit broadcast packets 193 * bclink_retransmit_pkt - retransmit broadcast packets
188 * @after: sequence number of last packet to *not* retransmit 194 * @after: sequence number of last packet to *not* retransmit
189 * @to: sequence number of last packet to retransmit 195 * @to: sequence number of last packet to retransmit
@@ -285,6 +291,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
285 msg = buf_msg(buf); 291 msg = buf_msg(buf);
286 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, 292 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
287 INT_H_SIZE, n_ptr->addr); 293 INT_H_SIZE, n_ptr->addr);
294 msg_set_non_seq(msg, 1);
288 msg_set_mc_netid(msg, tipc_net_id); 295 msg_set_mc_netid(msg, tipc_net_id);
289 msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); 296 msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
290 msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); 297 msg_set_bcgap_after(msg, n_ptr->bclink.gap_after);
@@ -400,13 +407,9 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
400 spin_lock_bh(&bc_lock); 407 spin_lock_bh(&bc_lock);
401 408
402 res = tipc_link_send_buf(bcl, buf); 409 res = tipc_link_send_buf(bcl, buf);
403 if (unlikely(res == -ELINKCONG)) 410 if (likely(res > 0))
404 buf_discard(buf);
405 else
406 bclink_set_last_sent(); 411 bclink_set_last_sent();
407 412
408 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
409 bcl->stats.max_queue_sz = bcl->out_queue_size;
410 bcl->stats.queue_sz_counts++; 413 bcl->stats.queue_sz_counts++;
411 bcl->stats.accu_queue_sz += bcl->out_queue_size; 414 bcl->stats.accu_queue_sz += bcl->out_queue_size;
412 415
@@ -422,9 +425,6 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
422 425
423void tipc_bclink_recv_pkt(struct sk_buff *buf) 426void tipc_bclink_recv_pkt(struct sk_buff *buf)
424{ 427{
425#if (TIPC_BCAST_LOSS_RATE)
426 static int rx_count;
427#endif
428 struct tipc_msg *msg = buf_msg(buf); 428 struct tipc_msg *msg = buf_msg(buf);
429 struct tipc_node *node = tipc_node_find(msg_prevnode(msg)); 429 struct tipc_node *node = tipc_node_find(msg_prevnode(msg));
430 u32 next_in; 430 u32 next_in;
@@ -444,10 +444,9 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
444 tipc_node_unlock(node); 444 tipc_node_unlock(node);
445 spin_lock_bh(&bc_lock); 445 spin_lock_bh(&bc_lock);
446 bcl->stats.recv_nacks++; 446 bcl->stats.recv_nacks++;
447 bcl->owner->next = node; /* remember requestor */ 447 bclink->retransmit_to = node;
448 bclink_retransmit_pkt(msg_bcgap_after(msg), 448 bclink_retransmit_pkt(msg_bcgap_after(msg),
449 msg_bcgap_to(msg)); 449 msg_bcgap_to(msg));
450 bcl->owner->next = NULL;
451 spin_unlock_bh(&bc_lock); 450 spin_unlock_bh(&bc_lock);
452 } else { 451 } else {
453 tipc_bclink_peek_nack(msg_destnode(msg), 452 tipc_bclink_peek_nack(msg_destnode(msg),
@@ -459,14 +458,6 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
459 return; 458 return;
460 } 459 }
461 460
462#if (TIPC_BCAST_LOSS_RATE)
463 if (++rx_count == TIPC_BCAST_LOSS_RATE) {
464 rx_count = 0;
465 buf_discard(buf);
466 return;
467 }
468#endif
469
470 tipc_node_lock(node); 461 tipc_node_lock(node);
471receive: 462receive:
472 deferred = node->bclink.deferred_head; 463 deferred = node->bclink.deferred_head;
@@ -574,8 +565,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
574 bcbearer->remains = tipc_bcast_nmap; 565 bcbearer->remains = tipc_bcast_nmap;
575 566
576 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { 567 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
577 struct bearer *p = bcbearer->bpairs[bp_index].primary; 568 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
578 struct bearer *s = bcbearer->bpairs[bp_index].secondary; 569 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
579 570
580 if (!p) 571 if (!p)
581 break; /* no more bearers to try */ 572 break; /* no more bearers to try */
@@ -584,11 +575,11 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
584 if (bcbearer->remains_new.count == bcbearer->remains.count) 575 if (bcbearer->remains_new.count == bcbearer->remains.count)
585 continue; /* bearer pair doesn't add anything */ 576 continue; /* bearer pair doesn't add anything */
586 577
587 if (p->publ.blocked || 578 if (p->blocked ||
588 p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { 579 p->media->send_msg(buf, p, &p->media->bcast_addr)) {
589 /* unable to send on primary bearer */ 580 /* unable to send on primary bearer */
590 if (!s || s->publ.blocked || 581 if (!s || s->blocked ||
591 s->media->send_msg(buf, &s->publ, 582 s->media->send_msg(buf, s,
592 &s->media->bcast_addr)) { 583 &s->media->bcast_addr)) {
593 /* unable to send on either bearer */ 584 /* unable to send on either bearer */
594 continue; 585 continue;
@@ -633,7 +624,7 @@ void tipc_bcbearer_sort(void)
633 memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); 624 memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
634 625
635 for (b_index = 0; b_index < MAX_BEARERS; b_index++) { 626 for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
636 struct bearer *b = &tipc_bearers[b_index]; 627 struct tipc_bearer *b = &tipc_bearers[b_index];
637 628
638 if (!b->active || !b->nodes.count) 629 if (!b->active || !b->nodes.count)
639 continue; 630 continue;
@@ -682,12 +673,12 @@ void tipc_bcbearer_sort(void)
682 673
683void tipc_bcbearer_push(void) 674void tipc_bcbearer_push(void)
684{ 675{
685 struct bearer *b_ptr; 676 struct tipc_bearer *b_ptr;
686 677
687 spin_lock_bh(&bc_lock); 678 spin_lock_bh(&bc_lock);
688 b_ptr = &bcbearer->bearer; 679 b_ptr = &bcbearer->bearer;
689 if (b_ptr->publ.blocked) { 680 if (b_ptr->blocked) {
690 b_ptr->publ.blocked = 0; 681 b_ptr->blocked = 0;
691 tipc_bearer_lock_push(b_ptr); 682 tipc_bearer_lock_push(b_ptr);
692 } 683 }
693 spin_unlock_bh(&bc_lock); 684 spin_unlock_bh(&bc_lock);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 51f8c5326ce6..500c97f1c859 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -2,7 +2,7 @@
2 * net/tipc/bcast.h: Include file for TIPC broadcast code 2 * net/tipc/bcast.h: Include file for TIPC broadcast code
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -90,6 +90,7 @@ void tipc_port_list_free(struct port_list *pl_ptr);
90 90
91int tipc_bclink_init(void); 91int tipc_bclink_init(void);
92void tipc_bclink_stop(void); 92void tipc_bclink_stop(void);
93struct tipc_node *tipc_bclink_retransmit_to(void);
93void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); 94void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
94int tipc_bclink_send_msg(struct sk_buff *buf); 95int tipc_bclink_send_msg(struct sk_buff *buf);
95void tipc_bclink_recv_pkt(struct sk_buff *buf); 96void tipc_bclink_recv_pkt(struct sk_buff *buf);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 837b7a467885..85209eadfae6 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -2,7 +2,7 @@
2 * net/tipc/bearer.c: TIPC bearer code 2 * net/tipc/bearer.c: TIPC bearer code
3 * 3 *
4 * Copyright (c) 1996-2006, Ericsson AB 4 * Copyright (c) 1996-2006, Ericsson AB
5 * Copyright (c) 2004-2006, Wind River Systems 5 * Copyright (c) 2004-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -44,7 +44,9 @@
44static struct media media_list[MAX_MEDIA]; 44static struct media media_list[MAX_MEDIA];
45static u32 media_count; 45static u32 media_count;
46 46
47struct bearer tipc_bearers[MAX_BEARERS]; 47struct tipc_bearer tipc_bearers[MAX_BEARERS];
48
49static void bearer_disable(struct tipc_bearer *b_ptr);
48 50
49/** 51/**
50 * media_name_valid - validate media name 52 * media_name_valid - validate media name
@@ -158,7 +160,6 @@ int tipc_register_media(u32 media_type,
158 m_ptr->disable_bearer = disable; 160 m_ptr->disable_bearer = disable;
159 m_ptr->addr2str = addr2str; 161 m_ptr->addr2str = addr2str;
160 memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr)); 162 memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr));
161 m_ptr->bcast = 1;
162 strcpy(m_ptr->name, name); 163 strcpy(m_ptr->name, name);
163 m_ptr->priority = bearer_priority; 164 m_ptr->priority = bearer_priority;
164 m_ptr->tolerance = link_tolerance; 165 m_ptr->tolerance = link_tolerance;
@@ -278,13 +279,13 @@ static int bearer_name_validate(const char *name,
278 * bearer_find - locates bearer object with matching bearer name 279 * bearer_find - locates bearer object with matching bearer name
279 */ 280 */
280 281
281static struct bearer *bearer_find(const char *name) 282static struct tipc_bearer *bearer_find(const char *name)
282{ 283{
283 struct bearer *b_ptr; 284 struct tipc_bearer *b_ptr;
284 u32 i; 285 u32 i;
285 286
286 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 287 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
287 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) 288 if (b_ptr->active && (!strcmp(b_ptr->name, name)))
288 return b_ptr; 289 return b_ptr;
289 } 290 }
290 return NULL; 291 return NULL;
@@ -294,16 +295,16 @@ static struct bearer *bearer_find(const char *name)
294 * tipc_bearer_find_interface - locates bearer object with matching interface name 295 * tipc_bearer_find_interface - locates bearer object with matching interface name
295 */ 296 */
296 297
297struct bearer *tipc_bearer_find_interface(const char *if_name) 298struct tipc_bearer *tipc_bearer_find_interface(const char *if_name)
298{ 299{
299 struct bearer *b_ptr; 300 struct tipc_bearer *b_ptr;
300 char *b_if_name; 301 char *b_if_name;
301 u32 i; 302 u32 i;
302 303
303 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 304 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
304 if (!b_ptr->active) 305 if (!b_ptr->active)
305 continue; 306 continue;
306 b_if_name = strchr(b_ptr->publ.name, ':') + 1; 307 b_if_name = strchr(b_ptr->name, ':') + 1;
307 if (!strcmp(b_if_name, if_name)) 308 if (!strcmp(b_if_name, if_name))
308 return b_ptr; 309 return b_ptr;
309 } 310 }
@@ -318,7 +319,7 @@ struct sk_buff *tipc_bearer_get_names(void)
318{ 319{
319 struct sk_buff *buf; 320 struct sk_buff *buf;
320 struct media *m_ptr; 321 struct media *m_ptr;
321 struct bearer *b_ptr; 322 struct tipc_bearer *b_ptr;
322 int i, j; 323 int i, j;
323 324
324 buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); 325 buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
@@ -331,8 +332,8 @@ struct sk_buff *tipc_bearer_get_names(void)
331 b_ptr = &tipc_bearers[j]; 332 b_ptr = &tipc_bearers[j];
332 if (b_ptr->active && (b_ptr->media == m_ptr)) { 333 if (b_ptr->active && (b_ptr->media == m_ptr)) {
333 tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, 334 tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
334 b_ptr->publ.name, 335 b_ptr->name,
335 strlen(b_ptr->publ.name) + 1); 336 strlen(b_ptr->name) + 1);
336 } 337 }
337 } 338 }
338 } 339 }
@@ -340,18 +341,18 @@ struct sk_buff *tipc_bearer_get_names(void)
340 return buf; 341 return buf;
341} 342}
342 343
343void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest) 344void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
344{ 345{
345 tipc_nmap_add(&b_ptr->nodes, dest); 346 tipc_nmap_add(&b_ptr->nodes, dest);
346 tipc_disc_update_link_req(b_ptr->link_req);
347 tipc_bcbearer_sort(); 347 tipc_bcbearer_sort();
348 tipc_disc_add_dest(b_ptr->link_req);
348} 349}
349 350
350void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) 351void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
351{ 352{
352 tipc_nmap_remove(&b_ptr->nodes, dest); 353 tipc_nmap_remove(&b_ptr->nodes, dest);
353 tipc_disc_update_link_req(b_ptr->link_req);
354 tipc_bcbearer_sort(); 354 tipc_bcbearer_sort();
355 tipc_disc_remove_dest(b_ptr->link_req);
355} 356}
356 357
357/* 358/*
@@ -362,12 +363,12 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest)
362 * bearer.lock must be taken before calling 363 * bearer.lock must be taken before calling
363 * Returns binary true(1) ore false(0) 364 * Returns binary true(1) ore false(0)
364 */ 365 */
365static int bearer_push(struct bearer *b_ptr) 366static int bearer_push(struct tipc_bearer *b_ptr)
366{ 367{
367 u32 res = 0; 368 u32 res = 0;
368 struct link *ln, *tln; 369 struct link *ln, *tln;
369 370
370 if (b_ptr->publ.blocked) 371 if (b_ptr->blocked)
371 return 0; 372 return 0;
372 373
373 while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) { 374 while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) {
@@ -382,13 +383,13 @@ static int bearer_push(struct bearer *b_ptr)
382 return list_empty(&b_ptr->cong_links); 383 return list_empty(&b_ptr->cong_links);
383} 384}
384 385
385void tipc_bearer_lock_push(struct bearer *b_ptr) 386void tipc_bearer_lock_push(struct tipc_bearer *b_ptr)
386{ 387{
387 int res; 388 int res;
388 389
389 spin_lock_bh(&b_ptr->publ.lock); 390 spin_lock_bh(&b_ptr->lock);
390 res = bearer_push(b_ptr); 391 res = bearer_push(b_ptr);
391 spin_unlock_bh(&b_ptr->publ.lock); 392 spin_unlock_bh(&b_ptr->lock);
392 if (res) 393 if (res)
393 tipc_bcbearer_push(); 394 tipc_bcbearer_push();
394} 395}
@@ -398,16 +399,14 @@ void tipc_bearer_lock_push(struct bearer *b_ptr)
398 * Interrupt enabling new requests after bearer congestion or blocking: 399 * Interrupt enabling new requests after bearer congestion or blocking:
399 * See bearer_send(). 400 * See bearer_send().
400 */ 401 */
401void tipc_continue(struct tipc_bearer *tb_ptr) 402void tipc_continue(struct tipc_bearer *b_ptr)
402{ 403{
403 struct bearer *b_ptr = (struct bearer *)tb_ptr; 404 spin_lock_bh(&b_ptr->lock);
404
405 spin_lock_bh(&b_ptr->publ.lock);
406 b_ptr->continue_count++; 405 b_ptr->continue_count++;
407 if (!list_empty(&b_ptr->cong_links)) 406 if (!list_empty(&b_ptr->cong_links))
408 tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr); 407 tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr);
409 b_ptr->publ.blocked = 0; 408 b_ptr->blocked = 0;
410 spin_unlock_bh(&b_ptr->publ.lock); 409 spin_unlock_bh(&b_ptr->lock);
411} 410}
412 411
413/* 412/*
@@ -418,7 +417,7 @@ void tipc_continue(struct tipc_bearer *tb_ptr)
418 * bearer.lock is busy 417 * bearer.lock is busy
419 */ 418 */
420 419
421static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_ptr) 420static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr)
422{ 421{
423 list_move_tail(&l_ptr->link_list, &b_ptr->cong_links); 422 list_move_tail(&l_ptr->link_list, &b_ptr->cong_links);
424} 423}
@@ -431,11 +430,11 @@ static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_p
431 * bearer.lock is free 430 * bearer.lock is free
432 */ 431 */
433 432
434void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr) 433void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr)
435{ 434{
436 spin_lock_bh(&b_ptr->publ.lock); 435 spin_lock_bh(&b_ptr->lock);
437 tipc_bearer_schedule_unlocked(b_ptr, l_ptr); 436 tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
438 spin_unlock_bh(&b_ptr->publ.lock); 437 spin_unlock_bh(&b_ptr->lock);
439} 438}
440 439
441 440
@@ -444,18 +443,18 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr)
444 * and if there is, try to resolve it before returning. 443 * and if there is, try to resolve it before returning.
445 * 'tipc_net_lock' is read_locked when this function is called 444 * 'tipc_net_lock' is read_locked when this function is called
446 */ 445 */
447int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr) 446int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr)
448{ 447{
449 int res = 1; 448 int res = 1;
450 449
451 if (list_empty(&b_ptr->cong_links)) 450 if (list_empty(&b_ptr->cong_links))
452 return 1; 451 return 1;
453 spin_lock_bh(&b_ptr->publ.lock); 452 spin_lock_bh(&b_ptr->lock);
454 if (!bearer_push(b_ptr)) { 453 if (!bearer_push(b_ptr)) {
455 tipc_bearer_schedule_unlocked(b_ptr, l_ptr); 454 tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
456 res = 0; 455 res = 0;
457 } 456 }
458 spin_unlock_bh(&b_ptr->publ.lock); 457 spin_unlock_bh(&b_ptr->lock);
459 return res; 458 return res;
460} 459}
461 460
@@ -463,9 +462,9 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
463 * tipc_bearer_congested - determines if bearer is currently congested 462 * tipc_bearer_congested - determines if bearer is currently congested
464 */ 463 */
465 464
466int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr) 465int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr)
467{ 466{
468 if (unlikely(b_ptr->publ.blocked)) 467 if (unlikely(b_ptr->blocked))
469 return 1; 468 return 1;
470 if (likely(list_empty(&b_ptr->cong_links))) 469 if (likely(list_empty(&b_ptr->cong_links)))
471 return 0; 470 return 0;
@@ -476,9 +475,9 @@ int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
476 * tipc_enable_bearer - enable bearer with the given name 475 * tipc_enable_bearer - enable bearer with the given name
477 */ 476 */
478 477
479int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) 478int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
480{ 479{
481 struct bearer *b_ptr; 480 struct tipc_bearer *b_ptr;
482 struct media *m_ptr; 481 struct media *m_ptr;
483 struct bearer_name b_name; 482 struct bearer_name b_name;
484 char addr_string[16]; 483 char addr_string[16];
@@ -496,9 +495,16 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
496 warn("Bearer <%s> rejected, illegal name\n", name); 495 warn("Bearer <%s> rejected, illegal name\n", name);
497 return -EINVAL; 496 return -EINVAL;
498 } 497 }
499 if (!tipc_addr_domain_valid(bcast_scope) || 498 if (tipc_addr_domain_valid(disc_domain) &&
500 !tipc_in_scope(bcast_scope, tipc_own_addr)) { 499 (disc_domain != tipc_own_addr)) {
501 warn("Bearer <%s> rejected, illegal broadcast scope\n", name); 500 if (tipc_in_scope(disc_domain, tipc_own_addr)) {
501 disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK;
502 res = 0; /* accept any node in own cluster */
503 } else if (in_own_cluster(disc_domain))
504 res = 0; /* accept specified node in own cluster */
505 }
506 if (res) {
507 warn("Bearer <%s> rejected, illegal discovery domain\n", name);
502 return -EINVAL; 508 return -EINVAL;
503 } 509 }
504 if ((priority < TIPC_MIN_LINK_PRI || 510 if ((priority < TIPC_MIN_LINK_PRI ||
@@ -514,7 +520,7 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
514 if (!m_ptr) { 520 if (!m_ptr) {
515 warn("Bearer <%s> rejected, media <%s> not registered\n", name, 521 warn("Bearer <%s> rejected, media <%s> not registered\n", name,
516 b_name.media_name); 522 b_name.media_name);
517 goto failed; 523 goto exit;
518 } 524 }
519 525
520 if (priority == TIPC_MEDIA_LINK_PRI) 526 if (priority == TIPC_MEDIA_LINK_PRI)
@@ -528,16 +534,16 @@ restart:
528 bearer_id = i; 534 bearer_id = i;
529 continue; 535 continue;
530 } 536 }
531 if (!strcmp(name, tipc_bearers[i].publ.name)) { 537 if (!strcmp(name, tipc_bearers[i].name)) {
532 warn("Bearer <%s> rejected, already enabled\n", name); 538 warn("Bearer <%s> rejected, already enabled\n", name);
533 goto failed; 539 goto exit;
534 } 540 }
535 if ((tipc_bearers[i].priority == priority) && 541 if ((tipc_bearers[i].priority == priority) &&
536 (++with_this_prio > 2)) { 542 (++with_this_prio > 2)) {
537 if (priority-- == 0) { 543 if (priority-- == 0) {
538 warn("Bearer <%s> rejected, duplicate priority\n", 544 warn("Bearer <%s> rejected, duplicate priority\n",
539 name); 545 name);
540 goto failed; 546 goto exit;
541 } 547 }
542 warn("Bearer <%s> priority adjustment required %u->%u\n", 548 warn("Bearer <%s> priority adjustment required %u->%u\n",
543 name, priority + 1, priority); 549 name, priority + 1, priority);
@@ -547,35 +553,36 @@ restart:
547 if (bearer_id >= MAX_BEARERS) { 553 if (bearer_id >= MAX_BEARERS) {
548 warn("Bearer <%s> rejected, bearer limit reached (%u)\n", 554 warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
549 name, MAX_BEARERS); 555 name, MAX_BEARERS);
550 goto failed; 556 goto exit;
551 } 557 }
552 558
553 b_ptr = &tipc_bearers[bearer_id]; 559 b_ptr = &tipc_bearers[bearer_id];
554 strcpy(b_ptr->publ.name, name); 560 strcpy(b_ptr->name, name);
555 res = m_ptr->enable_bearer(&b_ptr->publ); 561 res = m_ptr->enable_bearer(b_ptr);
556 if (res) { 562 if (res) {
557 warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); 563 warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
558 goto failed; 564 goto exit;
559 } 565 }
560 566
561 b_ptr->identity = bearer_id; 567 b_ptr->identity = bearer_id;
562 b_ptr->media = m_ptr; 568 b_ptr->media = m_ptr;
563 b_ptr->net_plane = bearer_id + 'A'; 569 b_ptr->net_plane = bearer_id + 'A';
564 b_ptr->active = 1; 570 b_ptr->active = 1;
565 b_ptr->detect_scope = bcast_scope;
566 b_ptr->priority = priority; 571 b_ptr->priority = priority;
567 INIT_LIST_HEAD(&b_ptr->cong_links); 572 INIT_LIST_HEAD(&b_ptr->cong_links);
568 INIT_LIST_HEAD(&b_ptr->links); 573 INIT_LIST_HEAD(&b_ptr->links);
569 if (m_ptr->bcast) { 574 spin_lock_init(&b_ptr->lock);
570 b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, 575
571 bcast_scope, 2); 576 res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain);
577 if (res) {
578 bearer_disable(b_ptr);
579 warn("Bearer <%s> rejected, discovery object creation failed\n",
580 name);
581 goto exit;
572 } 582 }
573 spin_lock_init(&b_ptr->publ.lock);
574 write_unlock_bh(&tipc_net_lock);
575 info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 583 info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
576 name, tipc_addr_string_fill(addr_string, bcast_scope), priority); 584 name, tipc_addr_string_fill(addr_string, disc_domain), priority);
577 return 0; 585exit:
578failed:
579 write_unlock_bh(&tipc_net_lock); 586 write_unlock_bh(&tipc_net_lock);
580 return res; 587 return res;
581} 588}
@@ -587,7 +594,7 @@ failed:
587 594
588int tipc_block_bearer(const char *name) 595int tipc_block_bearer(const char *name)
589{ 596{
590 struct bearer *b_ptr = NULL; 597 struct tipc_bearer *b_ptr = NULL;
591 struct link *l_ptr; 598 struct link *l_ptr;
592 struct link *temp_l_ptr; 599 struct link *temp_l_ptr;
593 600
@@ -600,8 +607,8 @@ int tipc_block_bearer(const char *name)
600 } 607 }
601 608
602 info("Blocking bearer <%s>\n", name); 609 info("Blocking bearer <%s>\n", name);
603 spin_lock_bh(&b_ptr->publ.lock); 610 spin_lock_bh(&b_ptr->lock);
604 b_ptr->publ.blocked = 1; 611 b_ptr->blocked = 1;
605 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 612 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
606 struct tipc_node *n_ptr = l_ptr->owner; 613 struct tipc_node *n_ptr = l_ptr->owner;
607 614
@@ -609,7 +616,7 @@ int tipc_block_bearer(const char *name)
609 tipc_link_reset(l_ptr); 616 tipc_link_reset(l_ptr);
610 spin_unlock_bh(&n_ptr->lock); 617 spin_unlock_bh(&n_ptr->lock);
611 } 618 }
612 spin_unlock_bh(&b_ptr->publ.lock); 619 spin_unlock_bh(&b_ptr->lock);
613 read_unlock_bh(&tipc_net_lock); 620 read_unlock_bh(&tipc_net_lock);
614 return 0; 621 return 0;
615} 622}
@@ -620,27 +627,27 @@ int tipc_block_bearer(const char *name)
620 * Note: This routine assumes caller holds tipc_net_lock. 627 * Note: This routine assumes caller holds tipc_net_lock.
621 */ 628 */
622 629
623static void bearer_disable(struct bearer *b_ptr) 630static void bearer_disable(struct tipc_bearer *b_ptr)
624{ 631{
625 struct link *l_ptr; 632 struct link *l_ptr;
626 struct link *temp_l_ptr; 633 struct link *temp_l_ptr;
627 634
628 info("Disabling bearer <%s>\n", b_ptr->publ.name); 635 info("Disabling bearer <%s>\n", b_ptr->name);
629 tipc_disc_stop_link_req(b_ptr->link_req); 636 spin_lock_bh(&b_ptr->lock);
630 spin_lock_bh(&b_ptr->publ.lock); 637 b_ptr->blocked = 1;
631 b_ptr->link_req = NULL; 638 b_ptr->media->disable_bearer(b_ptr);
632 b_ptr->publ.blocked = 1;
633 b_ptr->media->disable_bearer(&b_ptr->publ);
634 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 639 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
635 tipc_link_delete(l_ptr); 640 tipc_link_delete(l_ptr);
636 } 641 }
637 spin_unlock_bh(&b_ptr->publ.lock); 642 if (b_ptr->link_req)
638 memset(b_ptr, 0, sizeof(struct bearer)); 643 tipc_disc_delete(b_ptr->link_req);
644 spin_unlock_bh(&b_ptr->lock);
645 memset(b_ptr, 0, sizeof(struct tipc_bearer));
639} 646}
640 647
641int tipc_disable_bearer(const char *name) 648int tipc_disable_bearer(const char *name)
642{ 649{
643 struct bearer *b_ptr; 650 struct tipc_bearer *b_ptr;
644 int res; 651 int res;
645 652
646 write_lock_bh(&tipc_net_lock); 653 write_lock_bh(&tipc_net_lock);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 85f451d5aacf..31d6172b20fd 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -2,7 +2,7 @@
2 * net/tipc/bearer.h: Include file for TIPC bearer code 2 * net/tipc/bearer.h: Include file for TIPC bearer code
3 * 3 *
4 * Copyright (c) 1996-2006, Ericsson AB 4 * Copyright (c) 1996-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -61,26 +61,7 @@ struct tipc_media_addr {
61 } dev_addr; 61 } dev_addr;
62}; 62};
63 63
64/** 64struct tipc_bearer;
65 * struct tipc_bearer - TIPC bearer info available to media code
66 * @usr_handle: pointer to additional media-specific information about bearer
67 * @mtu: max packet size bearer can support
68 * @blocked: non-zero if bearer is blocked
69 * @lock: spinlock for controlling access to bearer
70 * @addr: media-specific address associated with bearer
71 * @name: bearer name (format = media:interface)
72 *
73 * Note: TIPC initializes "name" and "lock" fields; media code is responsible
74 * for initialization all other fields when a bearer is enabled.
75 */
76struct tipc_bearer {
77 void *usr_handle;
78 u32 mtu;
79 int blocked;
80 spinlock_t lock;
81 struct tipc_media_addr addr;
82 char name[TIPC_MAX_BEARER_NAME];
83};
84 65
85/** 66/**
86 * struct media - TIPC media information available to internal users 67 * struct media - TIPC media information available to internal users
@@ -89,7 +70,6 @@ struct tipc_bearer {
89 * @disable_bearer: routine which disables a bearer 70 * @disable_bearer: routine which disables a bearer
90 * @addr2str: routine which converts bearer's address to string form 71 * @addr2str: routine which converts bearer's address to string form
91 * @bcast_addr: media address used in broadcasting 72 * @bcast_addr: media address used in broadcasting
92 * @bcast: non-zero if media supports broadcasting [currently mandatory]
93 * @priority: default link (and bearer) priority 73 * @priority: default link (and bearer) priority
94 * @tolerance: default time (in ms) before declaring link failure 74 * @tolerance: default time (in ms) before declaring link failure
95 * @window: default window (in packets) before declaring link congestion 75 * @window: default window (in packets) before declaring link congestion
@@ -106,7 +86,6 @@ struct media {
106 char *(*addr2str)(struct tipc_media_addr *a, 86 char *(*addr2str)(struct tipc_media_addr *a,
107 char *str_buf, int str_size); 87 char *str_buf, int str_size);
108 struct tipc_media_addr bcast_addr; 88 struct tipc_media_addr bcast_addr;
109 int bcast;
110 u32 priority; 89 u32 priority;
111 u32 tolerance; 90 u32 tolerance;
112 u32 window; 91 u32 window;
@@ -115,11 +94,15 @@ struct media {
115}; 94};
116 95
117/** 96/**
118 * struct bearer - TIPC bearer information available to internal users 97 * struct tipc_bearer - TIPC bearer structure
119 * @publ: bearer information available to privileged users 98 * @usr_handle: pointer to additional media-specific information about bearer
99 * @mtu: max packet size bearer can support
100 * @blocked: non-zero if bearer is blocked
101 * @lock: spinlock for controlling access to bearer
102 * @addr: media-specific address associated with bearer
103 * @name: bearer name (format = media:interface)
120 * @media: ptr to media structure associated with bearer 104 * @media: ptr to media structure associated with bearer
121 * @priority: default link priority for bearer 105 * @priority: default link priority for bearer
122 * @detect_scope: network address mask used during automatic link creation
123 * @identity: array index of this bearer within TIPC bearer array 106 * @identity: array index of this bearer within TIPC bearer array
124 * @link_req: ptr to (optional) structure making periodic link setup requests 107 * @link_req: ptr to (optional) structure making periodic link setup requests
125 * @links: list of non-congested links associated with bearer 108 * @links: list of non-congested links associated with bearer
@@ -128,13 +111,20 @@ struct media {
128 * @active: non-zero if bearer structure is represents a bearer 111 * @active: non-zero if bearer structure is represents a bearer
129 * @net_plane: network plane ('A' through 'H') currently associated with bearer 112 * @net_plane: network plane ('A' through 'H') currently associated with bearer
130 * @nodes: indicates which nodes in cluster can be reached through bearer 113 * @nodes: indicates which nodes in cluster can be reached through bearer
114 *
115 * Note: media-specific code is responsible for initialization of the fields
116 * indicated below when a bearer is enabled; TIPC's generic bearer code takes
117 * care of initializing all other fields.
131 */ 118 */
132 119struct tipc_bearer {
133struct bearer { 120 void *usr_handle; /* initalized by media */
134 struct tipc_bearer publ; 121 u32 mtu; /* initalized by media */
122 int blocked; /* initalized by media */
123 struct tipc_media_addr addr; /* initalized by media */
124 char name[TIPC_MAX_BEARER_NAME];
125 spinlock_t lock;
135 struct media *media; 126 struct media *media;
136 u32 priority; 127 u32 priority;
137 u32 detect_scope;
138 u32 identity; 128 u32 identity;
139 struct link_req *link_req; 129 struct link_req *link_req;
140 struct list_head links; 130 struct list_head links;
@@ -152,7 +142,7 @@ struct bearer_name {
152 142
153struct link; 143struct link;
154 144
155extern struct bearer tipc_bearers[]; 145extern struct tipc_bearer tipc_bearers[];
156 146
157/* 147/*
158 * TIPC routines available to supported media types 148 * TIPC routines available to supported media types
@@ -173,7 +163,7 @@ void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
173int tipc_block_bearer(const char *name); 163int tipc_block_bearer(const char *name);
174void tipc_continue(struct tipc_bearer *tb_ptr); 164void tipc_continue(struct tipc_bearer *tb_ptr);
175 165
176int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); 166int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority);
177int tipc_disable_bearer(const char *name); 167int tipc_disable_bearer(const char *name);
178 168
179/* 169/*
@@ -186,14 +176,14 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
186struct sk_buff *tipc_media_get_names(void); 176struct sk_buff *tipc_media_get_names(void);
187 177
188struct sk_buff *tipc_bearer_get_names(void); 178struct sk_buff *tipc_bearer_get_names(void);
189void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest); 179void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
190void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest); 180void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
191void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); 181void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr);
192struct bearer *tipc_bearer_find_interface(const char *if_name); 182struct tipc_bearer *tipc_bearer_find_interface(const char *if_name);
193int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); 183int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr);
194int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); 184int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr);
195void tipc_bearer_stop(void); 185void tipc_bearer_stop(void);
196void tipc_bearer_lock_push(struct bearer *b_ptr); 186void tipc_bearer_lock_push(struct tipc_bearer *b_ptr);
197 187
198 188
199/** 189/**
@@ -214,10 +204,11 @@ void tipc_bearer_lock_push(struct bearer *b_ptr);
214 * and let TIPC's link code deal with the undelivered message. 204 * and let TIPC's link code deal with the undelivered message.
215 */ 205 */
216 206
217static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf, 207static inline int tipc_bearer_send(struct tipc_bearer *b_ptr,
208 struct sk_buff *buf,
218 struct tipc_media_addr *dest) 209 struct tipc_media_addr *dest)
219{ 210{
220 return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest); 211 return !b_ptr->media->send_msg(buf, b_ptr, dest);
221} 212}
222 213
223#endif /* _TIPC_BEARER_H */ 214#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index e16750dcf3c1..b25a396b7e1e 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
2 * net/tipc/config.c: TIPC configuration management code 2 * net/tipc/config.c: TIPC configuration management code
3 * 3 *
4 * Copyright (c) 2002-2006, Ericsson AB 4 * Copyright (c) 2002-2006, Ericsson AB
5 * Copyright (c) 2004-2007, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -148,7 +148,7 @@ static struct sk_buff *cfg_enable_bearer(void)
148 148
149 args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); 149 args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
150 if (tipc_enable_bearer(args->name, 150 if (tipc_enable_bearer(args->name,
151 ntohl(args->detect_scope), 151 ntohl(args->disc_domain),
152 ntohl(args->priority))) 152 ntohl(args->priority)))
153 return tipc_cfg_reply_error_string("unable to enable bearer"); 153 return tipc_cfg_reply_error_string("unable to enable bearer");
154 154
@@ -260,25 +260,6 @@ static struct sk_buff *cfg_set_max_ports(void)
260 return tipc_cfg_reply_none(); 260 return tipc_cfg_reply_none();
261} 261}
262 262
263static struct sk_buff *cfg_set_max_nodes(void)
264{
265 u32 value;
266
267 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
268 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
269 value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
270 if (value == tipc_max_nodes)
271 return tipc_cfg_reply_none();
272 if (value != delimit(value, 8, 2047))
273 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
274 " (max nodes must be 8-2047)");
275 if (tipc_mode == TIPC_NET_MODE)
276 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
277 " (cannot change max nodes once TIPC has joined a network)");
278 tipc_max_nodes = value;
279 return tipc_cfg_reply_none();
280}
281
282static struct sk_buff *cfg_set_netid(void) 263static struct sk_buff *cfg_set_netid(void)
283{ 264{
284 u32 value; 265 u32 value;
@@ -397,9 +378,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
397 case TIPC_CMD_SET_MAX_SUBSCR: 378 case TIPC_CMD_SET_MAX_SUBSCR:
398 rep_tlv_buf = cfg_set_max_subscriptions(); 379 rep_tlv_buf = cfg_set_max_subscriptions();
399 break; 380 break;
400 case TIPC_CMD_SET_MAX_NODES:
401 rep_tlv_buf = cfg_set_max_nodes();
402 break;
403 case TIPC_CMD_SET_NETID: 381 case TIPC_CMD_SET_NETID:
404 rep_tlv_buf = cfg_set_netid(); 382 rep_tlv_buf = cfg_set_netid();
405 break; 383 break;
@@ -415,9 +393,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
415 case TIPC_CMD_GET_MAX_SUBSCR: 393 case TIPC_CMD_GET_MAX_SUBSCR:
416 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); 394 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
417 break; 395 break;
418 case TIPC_CMD_GET_MAX_NODES:
419 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
420 break;
421 case TIPC_CMD_GET_NETID: 396 case TIPC_CMD_GET_NETID:
422 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); 397 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
423 break; 398 break;
@@ -431,6 +406,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
431 case TIPC_CMD_GET_MAX_SLAVES: 406 case TIPC_CMD_GET_MAX_SLAVES:
432 case TIPC_CMD_SET_MAX_CLUSTERS: 407 case TIPC_CMD_SET_MAX_CLUSTERS:
433 case TIPC_CMD_GET_MAX_CLUSTERS: 408 case TIPC_CMD_GET_MAX_CLUSTERS:
409 case TIPC_CMD_SET_MAX_NODES:
410 case TIPC_CMD_GET_MAX_NODES:
434 rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 411 rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
435 " (obsolete command)"); 412 " (obsolete command)");
436 break; 413 break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e071579e0850..943b6af84265 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -2,7 +2,7 @@
2 * net/tipc/core.c: TIPC module code 2 * net/tipc/core.c: TIPC module code
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005-2006, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,6 @@
41#include "config.h" 41#include "config.h"
42 42
43 43
44#ifndef CONFIG_TIPC_NODES
45#define CONFIG_TIPC_NODES 255
46#endif
47
48#ifndef CONFIG_TIPC_PORTS 44#ifndef CONFIG_TIPC_PORTS
49#define CONFIG_TIPC_PORTS 8191 45#define CONFIG_TIPC_PORTS 8191
50#endif 46#endif
@@ -57,7 +53,6 @@
57 53
58int tipc_mode = TIPC_NOT_RUNNING; 54int tipc_mode = TIPC_NOT_RUNNING;
59int tipc_random; 55int tipc_random;
60atomic_t tipc_user_count = ATOMIC_INIT(0);
61 56
62const char tipc_alphabet[] = 57const char tipc_alphabet[] =
63 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; 58 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.";
@@ -65,7 +60,6 @@ const char tipc_alphabet[] =
65/* configurable TIPC parameters */ 60/* configurable TIPC parameters */
66 61
67u32 tipc_own_addr; 62u32 tipc_own_addr;
68int tipc_max_nodes;
69int tipc_max_ports; 63int tipc_max_ports;
70int tipc_max_subscriptions; 64int tipc_max_subscriptions;
71int tipc_max_publications; 65int tipc_max_publications;
@@ -185,15 +179,13 @@ static int __init tipc_init(void)
185 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0) 179 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
186 warn("Unable to create log buffer\n"); 180 warn("Unable to create log buffer\n");
187 181
188 info("Activated (version " TIPC_MOD_VER 182 info("Activated (version " TIPC_MOD_VER ")\n");
189 " compiled " __DATE__ " " __TIME__ ")\n");
190 183
191 tipc_own_addr = 0; 184 tipc_own_addr = 0;
192 tipc_remote_management = 1; 185 tipc_remote_management = 1;
193 tipc_max_publications = 10000; 186 tipc_max_publications = 10000;
194 tipc_max_subscriptions = 2000; 187 tipc_max_subscriptions = 2000;
195 tipc_max_ports = CONFIG_TIPC_PORTS; 188 tipc_max_ports = CONFIG_TIPC_PORTS;
196 tipc_max_nodes = CONFIG_TIPC_NODES;
197 tipc_net_id = 4711; 189 tipc_net_id = 4711;
198 190
199 res = tipc_core_start(); 191 res = tipc_core_start();
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 997158546e25..436dda1159d2 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -2,7 +2,7 @@
2 * net/tipc/core.h: Include file for TIPC global declarations 2 * net/tipc/core.h: Include file for TIPC global declarations
3 * 3 *
4 * Copyright (c) 2005-2006, Ericsson AB 4 * Copyright (c) 2005-2006, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -147,7 +147,6 @@ void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
147 */ 147 */
148 148
149extern u32 tipc_own_addr; 149extern u32 tipc_own_addr;
150extern int tipc_max_nodes;
151extern int tipc_max_ports; 150extern int tipc_max_ports;
152extern int tipc_max_subscriptions; 151extern int tipc_max_subscriptions;
153extern int tipc_max_publications; 152extern int tipc_max_publications;
@@ -161,7 +160,6 @@ extern int tipc_remote_management;
161extern int tipc_mode; 160extern int tipc_mode;
162extern int tipc_random; 161extern int tipc_random;
163extern const char tipc_alphabet[]; 162extern const char tipc_alphabet[];
164extern atomic_t tipc_user_count;
165 163
166 164
167/* 165/*
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fa026bd91a68..0987933155b9 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -2,7 +2,7 @@
2 * net/tipc/discover.c 2 * net/tipc/discover.c
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005-2006, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,26 +39,26 @@
39#include "discover.h" 39#include "discover.h"
40 40
41#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */ 41#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */
42#define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */ 42#define TIPC_LINK_REQ_FAST 1000 /* max delay if bearer has no links */
43#define TIPC_LINK_REQ_SLOW 600000 /* normal delay if bearer has links */ 43#define TIPC_LINK_REQ_SLOW 60000 /* max delay if bearer has links */
44 44#define TIPC_LINK_REQ_INACTIVE 0xffffffff /* indicates no timer in use */
45/*
46 * TODO: Most of the inter-cluster setup stuff should be
47 * rewritten, and be made conformant with specification.
48 */
49 45
50 46
51/** 47/**
52 * struct link_req - information about an ongoing link setup request 48 * struct link_req - information about an ongoing link setup request
53 * @bearer: bearer issuing requests 49 * @bearer: bearer issuing requests
54 * @dest: destination address for request messages 50 * @dest: destination address for request messages
51 * @domain: network domain to which links can be established
52 * @num_nodes: number of nodes currently discovered (i.e. with an active link)
55 * @buf: request message to be (repeatedly) sent 53 * @buf: request message to be (repeatedly) sent
56 * @timer: timer governing period between requests 54 * @timer: timer governing period between requests
57 * @timer_intv: current interval between requests (in ms) 55 * @timer_intv: current interval between requests (in ms)
58 */ 56 */
59struct link_req { 57struct link_req {
60 struct bearer *bearer; 58 struct tipc_bearer *bearer;
61 struct tipc_media_addr dest; 59 struct tipc_media_addr dest;
60 u32 domain;
61 int num_nodes;
62 struct sk_buff *buf; 62 struct sk_buff *buf;
63 struct timer_list timer; 63 struct timer_list timer;
64 unsigned int timer_intv; 64 unsigned int timer_intv;
@@ -67,27 +67,24 @@ struct link_req {
67/** 67/**
68 * tipc_disc_init_msg - initialize a link setup message 68 * tipc_disc_init_msg - initialize a link setup message
69 * @type: message type (request or response) 69 * @type: message type (request or response)
70 * @req_links: number of links associated with message
71 * @dest_domain: network domain of node(s) which should respond to message 70 * @dest_domain: network domain of node(s) which should respond to message
72 * @b_ptr: ptr to bearer issuing message 71 * @b_ptr: ptr to bearer issuing message
73 */ 72 */
74 73
75static struct sk_buff *tipc_disc_init_msg(u32 type, 74static struct sk_buff *tipc_disc_init_msg(u32 type,
76 u32 req_links,
77 u32 dest_domain, 75 u32 dest_domain,
78 struct bearer *b_ptr) 76 struct tipc_bearer *b_ptr)
79{ 77{
80 struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); 78 struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
81 struct tipc_msg *msg; 79 struct tipc_msg *msg;
82 80
83 if (buf) { 81 if (buf) {
84 msg = buf_msg(buf); 82 msg = buf_msg(buf);
85 tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); 83 tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
86 msg_set_non_seq(msg, 1); 84 msg_set_non_seq(msg, 1);
87 msg_set_req_links(msg, req_links);
88 msg_set_dest_domain(msg, dest_domain); 85 msg_set_dest_domain(msg, dest_domain);
89 msg_set_bc_netid(msg, tipc_net_id); 86 msg_set_bc_netid(msg, tipc_net_id);
90 msg_set_media_addr(msg, &b_ptr->publ.addr); 87 msg_set_media_addr(msg, &b_ptr->addr);
91 } 88 }
92 return buf; 89 return buf;
93} 90}
@@ -99,7 +96,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
99 * @media_addr: media address advertised by duplicated node 96 * @media_addr: media address advertised by duplicated node
100 */ 97 */
101 98
102static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, 99static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
103 struct tipc_media_addr *media_addr) 100 struct tipc_media_addr *media_addr)
104{ 101{
105 char node_addr_str[16]; 102 char node_addr_str[16];
@@ -111,7 +108,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
111 tipc_media_addr_printf(&pb, media_addr); 108 tipc_media_addr_printf(&pb, media_addr);
112 tipc_printbuf_validate(&pb); 109 tipc_printbuf_validate(&pb);
113 warn("Duplicate %s using %s seen on <%s>\n", 110 warn("Duplicate %s using %s seen on <%s>\n",
114 node_addr_str, media_addr_str, b_ptr->publ.name); 111 node_addr_str, media_addr_str, b_ptr->name);
115} 112}
116 113
117/** 114/**
@@ -120,19 +117,23 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
120 * @b_ptr: bearer that message arrived on 117 * @b_ptr: bearer that message arrived on
121 */ 118 */
122 119
123void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) 120void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
124{ 121{
122 struct tipc_node *n_ptr;
125 struct link *link; 123 struct link *link;
126 struct tipc_media_addr media_addr; 124 struct tipc_media_addr media_addr, *addr;
125 struct sk_buff *rbuf;
127 struct tipc_msg *msg = buf_msg(buf); 126 struct tipc_msg *msg = buf_msg(buf);
128 u32 dest = msg_dest_domain(msg); 127 u32 dest = msg_dest_domain(msg);
129 u32 orig = msg_prevnode(msg); 128 u32 orig = msg_prevnode(msg);
130 u32 net_id = msg_bc_netid(msg); 129 u32 net_id = msg_bc_netid(msg);
131 u32 type = msg_type(msg); 130 u32 type = msg_type(msg);
131 int link_fully_up;
132 132
133 msg_get_media_addr(msg, &media_addr); 133 msg_get_media_addr(msg, &media_addr);
134 buf_discard(buf); 134 buf_discard(buf);
135 135
136 /* Validate discovery message from requesting node */
136 if (net_id != tipc_net_id) 137 if (net_id != tipc_net_id)
137 return; 138 return;
138 if (!tipc_addr_domain_valid(dest)) 139 if (!tipc_addr_domain_valid(dest))
@@ -140,104 +141,127 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
140 if (!tipc_addr_node_valid(orig)) 141 if (!tipc_addr_node_valid(orig))
141 return; 142 return;
142 if (orig == tipc_own_addr) { 143 if (orig == tipc_own_addr) {
143 if (memcmp(&media_addr, &b_ptr->publ.addr, sizeof(media_addr))) 144 if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr)))
144 disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); 145 disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
145 return; 146 return;
146 } 147 }
147 if (!tipc_in_scope(dest, tipc_own_addr)) 148 if (!tipc_in_scope(dest, tipc_own_addr))
148 return; 149 return;
149 if (in_own_cluster(orig)) { 150 if (!tipc_in_scope(b_ptr->link_req->domain, orig))
150 /* Always accept link here */ 151 return;
151 struct sk_buff *rbuf;
152 struct tipc_media_addr *addr;
153 struct tipc_node *n_ptr = tipc_node_find(orig);
154 int link_fully_up;
155
156 if (n_ptr == NULL) {
157 n_ptr = tipc_node_create(orig);
158 if (!n_ptr)
159 return;
160 }
161 spin_lock_bh(&n_ptr->lock);
162
163 /* Don't talk to neighbor during cleanup after last session */
164 152
165 if (n_ptr->cleanup_required) { 153 /* Locate structure corresponding to requesting node */
166 spin_unlock_bh(&n_ptr->lock); 154 n_ptr = tipc_node_find(orig);
155 if (!n_ptr) {
156 n_ptr = tipc_node_create(orig);
157 if (!n_ptr)
167 return; 158 return;
168 } 159 }
160 tipc_node_lock(n_ptr);
161
162 /* Don't talk to neighbor during cleanup after last session */
163 if (n_ptr->cleanup_required) {
164 tipc_node_unlock(n_ptr);
165 return;
166 }
167
168 link = n_ptr->links[b_ptr->identity];
169 169
170 link = n_ptr->links[b_ptr->identity]; 170 /* Create a link endpoint for this bearer, if necessary */
171 if (!link) {
172 link = tipc_link_create(n_ptr, b_ptr, &media_addr);
171 if (!link) { 173 if (!link) {
172 link = tipc_link_create(b_ptr, orig, &media_addr); 174 tipc_node_unlock(n_ptr);
173 if (!link) { 175 return;
174 spin_unlock_bh(&n_ptr->lock);
175 return;
176 }
177 }
178 addr = &link->media_addr;
179 if (memcmp(addr, &media_addr, sizeof(*addr))) {
180 if (tipc_link_is_up(link) || (!link->started)) {
181 disc_dupl_alert(b_ptr, orig, &media_addr);
182 spin_unlock_bh(&n_ptr->lock);
183 return;
184 }
185 warn("Resetting link <%s>, peer interface address changed\n",
186 link->name);
187 memcpy(addr, &media_addr, sizeof(*addr));
188 tipc_link_reset(link);
189 } 176 }
190 link_fully_up = link_working_working(link); 177 }
191 spin_unlock_bh(&n_ptr->lock); 178
192 if ((type == DSC_RESP_MSG) || link_fully_up) 179 /*
180 * Ensure requesting node's media address is correct
181 *
182 * If media address doesn't match and the link is working, reject the
183 * request (must be from a duplicate node).
184 *
185 * If media address doesn't match and the link is not working, accept
186 * the new media address and reset the link to ensure it starts up
187 * cleanly.
188 */
189 addr = &link->media_addr;
190 if (memcmp(addr, &media_addr, sizeof(*addr))) {
191 if (tipc_link_is_up(link) || (!link->started)) {
192 disc_dupl_alert(b_ptr, orig, &media_addr);
193 tipc_node_unlock(n_ptr);
193 return; 194 return;
194 rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr); 195 }
195 if (rbuf != NULL) { 196 warn("Resetting link <%s>, peer interface address changed\n",
196 b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr); 197 link->name);
198 memcpy(addr, &media_addr, sizeof(*addr));
199 tipc_link_reset(link);
200 }
201
202 /* Accept discovery message & send response, if necessary */
203 link_fully_up = link_working_working(link);
204
205 if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) {
206 rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr);
207 if (rbuf) {
208 b_ptr->media->send_msg(rbuf, b_ptr, &media_addr);
197 buf_discard(rbuf); 209 buf_discard(rbuf);
198 } 210 }
199 } 211 }
212
213 tipc_node_unlock(n_ptr);
200} 214}
201 215
202/** 216/**
203 * tipc_disc_stop_link_req - stop sending periodic link setup requests 217 * disc_update - update frequency of periodic link setup requests
204 * @req: ptr to link request structure 218 * @req: ptr to link request structure
219 *
220 * Reinitiates discovery process if discovery object has no associated nodes
221 * and is either not currently searching or is searching at a slow rate
205 */ 222 */
206 223
207void tipc_disc_stop_link_req(struct link_req *req) 224static void disc_update(struct link_req *req)
208{ 225{
209 if (!req) 226 if (!req->num_nodes) {
210 return; 227 if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
228 (req->timer_intv > TIPC_LINK_REQ_FAST)) {
229 req->timer_intv = TIPC_LINK_REQ_INIT;
230 k_start_timer(&req->timer, req->timer_intv);
231 }
232 }
233}
211 234
212 k_cancel_timer(&req->timer); 235/**
213 k_term_timer(&req->timer); 236 * tipc_disc_add_dest - increment set of discovered nodes
214 buf_discard(req->buf); 237 * @req: ptr to link request structure
215 kfree(req); 238 */
239
240void tipc_disc_add_dest(struct link_req *req)
241{
242 req->num_nodes++;
216} 243}
217 244
218/** 245/**
219 * tipc_disc_update_link_req - update frequency of periodic link setup requests 246 * tipc_disc_remove_dest - decrement set of discovered nodes
220 * @req: ptr to link request structure 247 * @req: ptr to link request structure
221 */ 248 */
222 249
223void tipc_disc_update_link_req(struct link_req *req) 250void tipc_disc_remove_dest(struct link_req *req)
224{ 251{
225 if (!req) 252 req->num_nodes--;
226 return; 253 disc_update(req);
254}
227 255
228 if (req->timer_intv == TIPC_LINK_REQ_SLOW) { 256/**
229 if (!req->bearer->nodes.count) { 257 * disc_send_msg - send link setup request message
230 req->timer_intv = TIPC_LINK_REQ_FAST; 258 * @req: ptr to link request structure
231 k_start_timer(&req->timer, req->timer_intv); 259 */
232 } 260
233 } else if (req->timer_intv == TIPC_LINK_REQ_FAST) { 261static void disc_send_msg(struct link_req *req)
234 if (req->bearer->nodes.count) { 262{
235 req->timer_intv = TIPC_LINK_REQ_SLOW; 263 if (!req->bearer->blocked)
236 k_start_timer(&req->timer, req->timer_intv); 264 tipc_bearer_send(req->bearer, req->buf, &req->dest);
237 }
238 } else {
239 /* leave timer "as is" if haven't yet reached a "normal" rate */
240 }
241} 265}
242 266
243/** 267/**
@@ -249,58 +273,86 @@ void tipc_disc_update_link_req(struct link_req *req)
249 273
250static void disc_timeout(struct link_req *req) 274static void disc_timeout(struct link_req *req)
251{ 275{
252 spin_lock_bh(&req->bearer->publ.lock); 276 int max_delay;
253 277
254 req->bearer->media->send_msg(req->buf, &req->bearer->publ, &req->dest); 278 spin_lock_bh(&req->bearer->lock);
255 279
256 if ((req->timer_intv == TIPC_LINK_REQ_SLOW) || 280 /* Stop searching if only desired node has been found */
257 (req->timer_intv == TIPC_LINK_REQ_FAST)) { 281
258 /* leave timer interval "as is" if already at a "normal" rate */ 282 if (tipc_node(req->domain) && req->num_nodes) {
259 } else { 283 req->timer_intv = TIPC_LINK_REQ_INACTIVE;
260 req->timer_intv *= 2; 284 goto exit;
261 if (req->timer_intv > TIPC_LINK_REQ_FAST)
262 req->timer_intv = TIPC_LINK_REQ_FAST;
263 if ((req->timer_intv == TIPC_LINK_REQ_FAST) &&
264 (req->bearer->nodes.count))
265 req->timer_intv = TIPC_LINK_REQ_SLOW;
266 } 285 }
267 k_start_timer(&req->timer, req->timer_intv);
268 286
269 spin_unlock_bh(&req->bearer->publ.lock); 287 /*
288 * Send discovery message, then update discovery timer
289 *
290 * Keep doubling time between requests until limit is reached;
291 * hold at fast polling rate if don't have any associated nodes,
292 * otherwise hold at slow polling rate
293 */
294
295 disc_send_msg(req);
296
297 req->timer_intv *= 2;
298 if (req->num_nodes)
299 max_delay = TIPC_LINK_REQ_SLOW;
300 else
301 max_delay = TIPC_LINK_REQ_FAST;
302 if (req->timer_intv > max_delay)
303 req->timer_intv = max_delay;
304
305 k_start_timer(&req->timer, req->timer_intv);
306exit:
307 spin_unlock_bh(&req->bearer->lock);
270} 308}
271 309
272/** 310/**
273 * tipc_disc_init_link_req - start sending periodic link setup requests 311 * tipc_disc_create - create object to send periodic link setup requests
274 * @b_ptr: ptr to bearer issuing requests 312 * @b_ptr: ptr to bearer issuing requests
275 * @dest: destination address for request messages 313 * @dest: destination address for request messages
276 * @dest_domain: network domain of node(s) which should respond to message 314 * @dest_domain: network domain to which links can be established
277 * @req_links: max number of desired links
278 * 315 *
279 * Returns pointer to link request structure, or NULL if unable to create. 316 * Returns 0 if successful, otherwise -errno.
280 */ 317 */
281 318
282struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 319int tipc_disc_create(struct tipc_bearer *b_ptr,
283 const struct tipc_media_addr *dest, 320 struct tipc_media_addr *dest, u32 dest_domain)
284 u32 dest_domain,
285 u32 req_links)
286{ 321{
287 struct link_req *req; 322 struct link_req *req;
288 323
289 req = kmalloc(sizeof(*req), GFP_ATOMIC); 324 req = kmalloc(sizeof(*req), GFP_ATOMIC);
290 if (!req) 325 if (!req)
291 return NULL; 326 return -ENOMEM;
292 327
293 req->buf = tipc_disc_init_msg(DSC_REQ_MSG, req_links, dest_domain, b_ptr); 328 req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr);
294 if (!req->buf) { 329 if (!req->buf) {
295 kfree(req); 330 kfree(req);
296 return NULL; 331 return -ENOMSG;
297 } 332 }
298 333
299 memcpy(&req->dest, dest, sizeof(*dest)); 334 memcpy(&req->dest, dest, sizeof(*dest));
300 req->bearer = b_ptr; 335 req->bearer = b_ptr;
336 req->domain = dest_domain;
337 req->num_nodes = 0;
301 req->timer_intv = TIPC_LINK_REQ_INIT; 338 req->timer_intv = TIPC_LINK_REQ_INIT;
302 k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); 339 k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
303 k_start_timer(&req->timer, req->timer_intv); 340 k_start_timer(&req->timer, req->timer_intv);
304 return req; 341 b_ptr->link_req = req;
342 disc_send_msg(req);
343 return 0;
344}
345
346/**
347 * tipc_disc_delete - destroy object sending periodic link setup requests
348 * @req: ptr to link request structure
349 */
350
351void tipc_disc_delete(struct link_req *req)
352{
353 k_cancel_timer(&req->timer);
354 k_term_timer(&req->timer);
355 buf_discard(req->buf);
356 kfree(req);
305} 357}
306 358
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index d2c3cffb79fc..a3af595b86cb 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -2,7 +2,7 @@
2 * net/tipc/discover.h 2 * net/tipc/discover.h
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,13 +39,11 @@
39 39
40struct link_req; 40struct link_req;
41 41
42struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 42int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
43 const struct tipc_media_addr *dest, 43 u32 dest_domain);
44 u32 dest_domain, 44void tipc_disc_delete(struct link_req *req);
45 u32 req_links); 45void tipc_disc_add_dest(struct link_req *req);
46void tipc_disc_update_link_req(struct link_req *req); 46void tipc_disc_remove_dest(struct link_req *req);
47void tipc_disc_stop_link_req(struct link_req *req); 47void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
48
49void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
50 48
51#endif 49#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 18702f58d111..5ed4b4f7452d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2,7 +2,7 @@
2 * net/tipc/link.c: TIPC link code 2 * net/tipc/link.c: TIPC link code
3 * 3 *
4 * Copyright (c) 1996-2007, Ericsson AB 4 * Copyright (c) 1996-2007, Ericsson AB
5 * Copyright (c) 2004-2007, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -90,9 +90,10 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr,
90static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); 90static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
91static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf); 91static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf);
92static void link_set_supervision_props(struct link *l_ptr, u32 tolerance); 92static void link_set_supervision_props(struct link *l_ptr, u32 tolerance);
93static int link_send_sections_long(struct port *sender, 93static int link_send_sections_long(struct tipc_port *sender,
94 struct iovec const *msg_sect, 94 struct iovec const *msg_sect,
95 u32 num_sect, u32 destnode); 95 u32 num_sect, unsigned int total_len,
96 u32 destnode);
96static void link_check_defragm_bufs(struct link *l_ptr); 97static void link_check_defragm_bufs(struct link *l_ptr);
97static void link_state_event(struct link *l_ptr, u32 event); 98static void link_state_event(struct link *l_ptr, u32 event);
98static void link_reset_statistics(struct link *l_ptr); 99static void link_reset_statistics(struct link *l_ptr);
@@ -113,7 +114,7 @@ static void link_init_max_pkt(struct link *l_ptr)
113{ 114{
114 u32 max_pkt; 115 u32 max_pkt;
115 116
116 max_pkt = (l_ptr->b_ptr->publ.mtu & ~3); 117 max_pkt = (l_ptr->b_ptr->mtu & ~3);
117 if (max_pkt > MAX_MSG_SIZE) 118 if (max_pkt > MAX_MSG_SIZE)
118 max_pkt = MAX_MSG_SIZE; 119 max_pkt = MAX_MSG_SIZE;
119 120
@@ -246,9 +247,6 @@ static void link_timeout(struct link *l_ptr)
246 l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size; 247 l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size;
247 l_ptr->stats.queue_sz_counts++; 248 l_ptr->stats.queue_sz_counts++;
248 249
249 if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
250 l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
251
252 if (l_ptr->first_out) { 250 if (l_ptr->first_out) {
253 struct tipc_msg *msg = buf_msg(l_ptr->first_out); 251 struct tipc_msg *msg = buf_msg(l_ptr->first_out);
254 u32 length = msg_size(msg); 252 u32 length = msg_size(msg);
@@ -296,19 +294,35 @@ static void link_set_timer(struct link *l_ptr, u32 time)
296 294
297/** 295/**
298 * tipc_link_create - create a new link 296 * tipc_link_create - create a new link
297 * @n_ptr: pointer to associated node
299 * @b_ptr: pointer to associated bearer 298 * @b_ptr: pointer to associated bearer
300 * @peer: network address of node at other end of link
301 * @media_addr: media address to use when sending messages over link 299 * @media_addr: media address to use when sending messages over link
302 * 300 *
303 * Returns pointer to link. 301 * Returns pointer to link.
304 */ 302 */
305 303
306struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, 304struct link *tipc_link_create(struct tipc_node *n_ptr,
305 struct tipc_bearer *b_ptr,
307 const struct tipc_media_addr *media_addr) 306 const struct tipc_media_addr *media_addr)
308{ 307{
309 struct link *l_ptr; 308 struct link *l_ptr;
310 struct tipc_msg *msg; 309 struct tipc_msg *msg;
311 char *if_name; 310 char *if_name;
311 char addr_string[16];
312 u32 peer = n_ptr->addr;
313
314 if (n_ptr->link_cnt >= 2) {
315 tipc_addr_string_fill(addr_string, n_ptr->addr);
316 err("Attempt to establish third link to %s\n", addr_string);
317 return NULL;
318 }
319
320 if (n_ptr->links[b_ptr->identity]) {
321 tipc_addr_string_fill(addr_string, n_ptr->addr);
322 err("Attempt to establish second link on <%s> to %s\n",
323 b_ptr->name, addr_string);
324 return NULL;
325 }
312 326
313 l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); 327 l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
314 if (!l_ptr) { 328 if (!l_ptr) {
@@ -317,7 +331,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
317 } 331 }
318 332
319 l_ptr->addr = peer; 333 l_ptr->addr = peer;
320 if_name = strchr(b_ptr->publ.name, ':') + 1; 334 if_name = strchr(b_ptr->name, ':') + 1;
321 sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", 335 sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
322 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), 336 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
323 tipc_node(tipc_own_addr), 337 tipc_node(tipc_own_addr),
@@ -325,6 +339,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
325 tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); 339 tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
326 /* note: peer i/f is appended to link name by reset/activate */ 340 /* note: peer i/f is appended to link name by reset/activate */
327 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); 341 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
342 l_ptr->owner = n_ptr;
328 l_ptr->checkpoint = 1; 343 l_ptr->checkpoint = 1;
329 l_ptr->b_ptr = b_ptr; 344 l_ptr->b_ptr = b_ptr;
330 link_set_supervision_props(l_ptr, b_ptr->media->tolerance); 345 link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
@@ -348,11 +363,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
348 363
349 link_reset_statistics(l_ptr); 364 link_reset_statistics(l_ptr);
350 365
351 l_ptr->owner = tipc_node_attach_link(l_ptr); 366 tipc_node_attach_link(n_ptr, l_ptr);
352 if (!l_ptr->owner) {
353 kfree(l_ptr);
354 return NULL;
355 }
356 367
357 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); 368 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
358 list_add_tail(&l_ptr->link_list, &b_ptr->links); 369 list_add_tail(&l_ptr->link_list, &b_ptr->links);
@@ -391,7 +402,9 @@ void tipc_link_delete(struct link *l_ptr)
391 402
392static void link_start(struct link *l_ptr) 403static void link_start(struct link *l_ptr)
393{ 404{
405 tipc_node_lock(l_ptr->owner);
394 link_state_event(l_ptr, STARTING_EVT); 406 link_state_event(l_ptr, STARTING_EVT);
407 tipc_node_unlock(l_ptr->owner);
395} 408}
396 409
397/** 410/**
@@ -406,7 +419,7 @@ static void link_start(struct link *l_ptr)
406 419
407static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) 420static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
408{ 421{
409 struct port *p_ptr; 422 struct tipc_port *p_ptr;
410 423
411 spin_lock_bh(&tipc_port_list_lock); 424 spin_lock_bh(&tipc_port_list_lock);
412 p_ptr = tipc_port_lock(origport); 425 p_ptr = tipc_port_lock(origport);
@@ -415,7 +428,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
415 goto exit; 428 goto exit;
416 if (!list_empty(&p_ptr->wait_list)) 429 if (!list_empty(&p_ptr->wait_list))
417 goto exit; 430 goto exit;
418 p_ptr->publ.congested = 1; 431 p_ptr->congested = 1;
419 p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); 432 p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
420 list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); 433 list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
421 l_ptr->stats.link_congs++; 434 l_ptr->stats.link_congs++;
@@ -428,8 +441,8 @@ exit:
428 441
429void tipc_link_wakeup_ports(struct link *l_ptr, int all) 442void tipc_link_wakeup_ports(struct link *l_ptr, int all)
430{ 443{
431 struct port *p_ptr; 444 struct tipc_port *p_ptr;
432 struct port *temp_p_ptr; 445 struct tipc_port *temp_p_ptr;
433 int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; 446 int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size;
434 447
435 if (all) 448 if (all)
@@ -445,11 +458,11 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
445 if (win <= 0) 458 if (win <= 0)
446 break; 459 break;
447 list_del_init(&p_ptr->wait_list); 460 list_del_init(&p_ptr->wait_list);
448 spin_lock_bh(p_ptr->publ.lock); 461 spin_lock_bh(p_ptr->lock);
449 p_ptr->publ.congested = 0; 462 p_ptr->congested = 0;
450 p_ptr->wakeup(&p_ptr->publ); 463 p_ptr->wakeup(p_ptr);
451 win -= p_ptr->waiting_pkts; 464 win -= p_ptr->waiting_pkts;
452 spin_unlock_bh(p_ptr->publ.lock); 465 spin_unlock_bh(p_ptr->lock);
453 } 466 }
454 467
455exit: 468exit:
@@ -549,7 +562,7 @@ void tipc_link_reset(struct link *l_ptr)
549 tipc_node_link_down(l_ptr->owner, l_ptr); 562 tipc_node_link_down(l_ptr->owner, l_ptr);
550 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); 563 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
551 564
552 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && 565 if (was_active_link && tipc_node_active_links(l_ptr->owner) &&
553 l_ptr->owner->permit_changeover) { 566 l_ptr->owner->permit_changeover) {
554 l_ptr->reset_checkpoint = checkpoint; 567 l_ptr->reset_checkpoint = checkpoint;
555 l_ptr->exp_msg_count = START_CHANGEOVER; 568 l_ptr->exp_msg_count = START_CHANGEOVER;
@@ -824,7 +837,29 @@ static void link_add_to_outqueue(struct link *l_ptr,
824 l_ptr->last_out = buf; 837 l_ptr->last_out = buf;
825 } else 838 } else
826 l_ptr->first_out = l_ptr->last_out = buf; 839 l_ptr->first_out = l_ptr->last_out = buf;
840
827 l_ptr->out_queue_size++; 841 l_ptr->out_queue_size++;
842 if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
843 l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
844}
845
846static void link_add_chain_to_outqueue(struct link *l_ptr,
847 struct sk_buff *buf_chain,
848 u32 long_msgno)
849{
850 struct sk_buff *buf;
851 struct tipc_msg *msg;
852
853 if (!l_ptr->next_out)
854 l_ptr->next_out = buf_chain;
855 while (buf_chain) {
856 buf = buf_chain;
857 buf_chain = buf_chain->next;
858
859 msg = buf_msg(buf);
860 msg_set_long_msgno(msg, long_msgno);
861 link_add_to_outqueue(l_ptr, buf, msg);
862 }
828} 863}
829 864
830/* 865/*
@@ -849,8 +884,9 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
849 884
850 if (unlikely(queue_size >= queue_limit)) { 885 if (unlikely(queue_size >= queue_limit)) {
851 if (imp <= TIPC_CRITICAL_IMPORTANCE) { 886 if (imp <= TIPC_CRITICAL_IMPORTANCE) {
852 return link_schedule_port(l_ptr, msg_origport(msg), 887 link_schedule_port(l_ptr, msg_origport(msg), size);
853 size); 888 buf_discard(buf);
889 return -ELINKCONG;
854 } 890 }
855 buf_discard(buf); 891 buf_discard(buf);
856 if (imp > CONN_MANAGER) { 892 if (imp > CONN_MANAGER) {
@@ -867,9 +903,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
867 903
868 /* Packet can be queued or sent: */ 904 /* Packet can be queued or sent: */
869 905
870 if (queue_size > l_ptr->stats.max_queue_sz)
871 l_ptr->stats.max_queue_sz = queue_size;
872
873 if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && 906 if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) &&
874 !link_congested(l_ptr))) { 907 !link_congested(l_ptr))) {
875 link_add_to_outqueue(l_ptr, buf, msg); 908 link_add_to_outqueue(l_ptr, buf, msg);
@@ -1027,12 +1060,13 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
1027 * except for total message length. 1060 * except for total message length.
1028 * Returns user data length or errno. 1061 * Returns user data length or errno.
1029 */ 1062 */
1030int tipc_link_send_sections_fast(struct port *sender, 1063int tipc_link_send_sections_fast(struct tipc_port *sender,
1031 struct iovec const *msg_sect, 1064 struct iovec const *msg_sect,
1032 const u32 num_sect, 1065 const u32 num_sect,
1066 unsigned int total_len,
1033 u32 destaddr) 1067 u32 destaddr)
1034{ 1068{
1035 struct tipc_msg *hdr = &sender->publ.phdr; 1069 struct tipc_msg *hdr = &sender->phdr;
1036 struct link *l_ptr; 1070 struct link *l_ptr;
1037 struct sk_buff *buf; 1071 struct sk_buff *buf;
1038 struct tipc_node *node; 1072 struct tipc_node *node;
@@ -1045,8 +1079,8 @@ again:
1045 * (Must not hold any locks while building message.) 1079 * (Must not hold any locks while building message.)
1046 */ 1080 */
1047 1081
1048 res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, 1082 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len,
1049 !sender->user_port, &buf); 1083 sender->max_pkt, !sender->user_port, &buf);
1050 1084
1051 read_lock_bh(&tipc_net_lock); 1085 read_lock_bh(&tipc_net_lock);
1052 node = tipc_node_find(destaddr); 1086 node = tipc_node_find(destaddr);
@@ -1056,9 +1090,7 @@ again:
1056 if (likely(l_ptr)) { 1090 if (likely(l_ptr)) {
1057 if (likely(buf)) { 1091 if (likely(buf)) {
1058 res = link_send_buf_fast(l_ptr, buf, 1092 res = link_send_buf_fast(l_ptr, buf,
1059 &sender->publ.max_pkt); 1093 &sender->max_pkt);
1060 if (unlikely(res < 0))
1061 buf_discard(buf);
1062exit: 1094exit:
1063 tipc_node_unlock(node); 1095 tipc_node_unlock(node);
1064 read_unlock_bh(&tipc_net_lock); 1096 read_unlock_bh(&tipc_net_lock);
@@ -1075,7 +1107,7 @@ exit:
1075 if (link_congested(l_ptr) || 1107 if (link_congested(l_ptr) ||
1076 !list_empty(&l_ptr->b_ptr->cong_links)) { 1108 !list_empty(&l_ptr->b_ptr->cong_links)) {
1077 res = link_schedule_port(l_ptr, 1109 res = link_schedule_port(l_ptr,
1078 sender->publ.ref, res); 1110 sender->ref, res);
1079 goto exit; 1111 goto exit;
1080 } 1112 }
1081 1113
@@ -1084,16 +1116,17 @@ exit:
1084 * then re-try fast path or fragment the message 1116 * then re-try fast path or fragment the message
1085 */ 1117 */
1086 1118
1087 sender->publ.max_pkt = l_ptr->max_pkt; 1119 sender->max_pkt = l_ptr->max_pkt;
1088 tipc_node_unlock(node); 1120 tipc_node_unlock(node);
1089 read_unlock_bh(&tipc_net_lock); 1121 read_unlock_bh(&tipc_net_lock);
1090 1122
1091 1123
1092 if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) 1124 if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
1093 goto again; 1125 goto again;
1094 1126
1095 return link_send_sections_long(sender, msg_sect, 1127 return link_send_sections_long(sender, msg_sect,
1096 num_sect, destaddr); 1128 num_sect, total_len,
1129 destaddr);
1097 } 1130 }
1098 tipc_node_unlock(node); 1131 tipc_node_unlock(node);
1099 } 1132 }
@@ -1105,7 +1138,7 @@ exit:
1105 return tipc_reject_msg(buf, TIPC_ERR_NO_NODE); 1138 return tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
1106 if (res >= 0) 1139 if (res >= 0)
1107 return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect, 1140 return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
1108 TIPC_ERR_NO_NODE); 1141 total_len, TIPC_ERR_NO_NODE);
1109 return res; 1142 return res;
1110} 1143}
1111 1144
@@ -1123,15 +1156,16 @@ exit:
1123 * 1156 *
1124 * Returns user data length or errno. 1157 * Returns user data length or errno.
1125 */ 1158 */
1126static int link_send_sections_long(struct port *sender, 1159static int link_send_sections_long(struct tipc_port *sender,
1127 struct iovec const *msg_sect, 1160 struct iovec const *msg_sect,
1128 u32 num_sect, 1161 u32 num_sect,
1162 unsigned int total_len,
1129 u32 destaddr) 1163 u32 destaddr)
1130{ 1164{
1131 struct link *l_ptr; 1165 struct link *l_ptr;
1132 struct tipc_node *node; 1166 struct tipc_node *node;
1133 struct tipc_msg *hdr = &sender->publ.phdr; 1167 struct tipc_msg *hdr = &sender->phdr;
1134 u32 dsz = msg_data_sz(hdr); 1168 u32 dsz = total_len;
1135 u32 max_pkt, fragm_sz, rest; 1169 u32 max_pkt, fragm_sz, rest;
1136 struct tipc_msg fragm_hdr; 1170 struct tipc_msg fragm_hdr;
1137 struct sk_buff *buf, *buf_chain, *prev; 1171 struct sk_buff *buf, *buf_chain, *prev;
@@ -1142,7 +1176,7 @@ static int link_send_sections_long(struct port *sender,
1142 1176
1143again: 1177again:
1144 fragm_no = 1; 1178 fragm_no = 1;
1145 max_pkt = sender->publ.max_pkt - INT_H_SIZE; 1179 max_pkt = sender->max_pkt - INT_H_SIZE;
1146 /* leave room for tunnel header in case of link changeover */ 1180 /* leave room for tunnel header in case of link changeover */
1147 fragm_sz = max_pkt - INT_H_SIZE; 1181 fragm_sz = max_pkt - INT_H_SIZE;
1148 /* leave room for fragmentation header in each fragment */ 1182 /* leave room for fragmentation header in each fragment */
@@ -1157,7 +1191,6 @@ again:
1157 1191
1158 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, 1192 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
1159 INT_H_SIZE, msg_destnode(hdr)); 1193 INT_H_SIZE, msg_destnode(hdr));
1160 msg_set_link_selector(&fragm_hdr, sender->publ.ref);
1161 msg_set_size(&fragm_hdr, max_pkt); 1194 msg_set_size(&fragm_hdr, max_pkt);
1162 msg_set_fragm_no(&fragm_hdr, 1); 1195 msg_set_fragm_no(&fragm_hdr, 1);
1163 1196
@@ -1238,13 +1271,13 @@ error:
1238 node = tipc_node_find(destaddr); 1271 node = tipc_node_find(destaddr);
1239 if (likely(node)) { 1272 if (likely(node)) {
1240 tipc_node_lock(node); 1273 tipc_node_lock(node);
1241 l_ptr = node->active_links[sender->publ.ref & 1]; 1274 l_ptr = node->active_links[sender->ref & 1];
1242 if (!l_ptr) { 1275 if (!l_ptr) {
1243 tipc_node_unlock(node); 1276 tipc_node_unlock(node);
1244 goto reject; 1277 goto reject;
1245 } 1278 }
1246 if (l_ptr->max_pkt < max_pkt) { 1279 if (l_ptr->max_pkt < max_pkt) {
1247 sender->publ.max_pkt = l_ptr->max_pkt; 1280 sender->max_pkt = l_ptr->max_pkt;
1248 tipc_node_unlock(node); 1281 tipc_node_unlock(node);
1249 for (; buf_chain; buf_chain = buf) { 1282 for (; buf_chain; buf_chain = buf) {
1250 buf = buf_chain->next; 1283 buf = buf_chain->next;
@@ -1259,28 +1292,15 @@ reject:
1259 buf_discard(buf_chain); 1292 buf_discard(buf_chain);
1260 } 1293 }
1261 return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect, 1294 return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
1262 TIPC_ERR_NO_NODE); 1295 total_len, TIPC_ERR_NO_NODE);
1263 } 1296 }
1264 1297
1265 /* Append whole chain to send queue: */ 1298 /* Append chain of fragments to send queue & send them */
1266 1299
1267 buf = buf_chain; 1300 l_ptr->long_msg_seq_no++;
1268 l_ptr->long_msg_seq_no = mod(l_ptr->long_msg_seq_no + 1); 1301 link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no);
1269 if (!l_ptr->next_out) 1302 l_ptr->stats.sent_fragments += fragm_no;
1270 l_ptr->next_out = buf_chain;
1271 l_ptr->stats.sent_fragmented++; 1303 l_ptr->stats.sent_fragmented++;
1272 while (buf) {
1273 struct sk_buff *next = buf->next;
1274 struct tipc_msg *msg = buf_msg(buf);
1275
1276 l_ptr->stats.sent_fragments++;
1277 msg_set_long_msgno(msg, l_ptr->long_msg_seq_no);
1278 link_add_to_outqueue(l_ptr, buf, msg);
1279 buf = next;
1280 }
1281
1282 /* Send it, if possible: */
1283
1284 tipc_link_push_queue(l_ptr); 1304 tipc_link_push_queue(l_ptr);
1285 tipc_node_unlock(node); 1305 tipc_node_unlock(node);
1286 return dsz; 1306 return dsz;
@@ -1441,7 +1461,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
1441 info("Outstanding acks: %lu\n", 1461 info("Outstanding acks: %lu\n",
1442 (unsigned long) TIPC_SKB_CB(buf)->handle); 1462 (unsigned long) TIPC_SKB_CB(buf)->handle);
1443 1463
1444 n_ptr = l_ptr->owner->next; 1464 n_ptr = tipc_bclink_retransmit_to();
1445 tipc_node_lock(n_ptr); 1465 tipc_node_lock(n_ptr);
1446 1466
1447 tipc_addr_string_fill(addr_string, n_ptr->addr); 1467 tipc_addr_string_fill(addr_string, n_ptr->addr);
@@ -1595,11 +1615,10 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1595 * structure (i.e. cannot be NULL), but bearer can be inactive. 1615 * structure (i.e. cannot be NULL), but bearer can be inactive.
1596 */ 1616 */
1597 1617
1598void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1618void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
1599{ 1619{
1600 read_lock_bh(&tipc_net_lock); 1620 read_lock_bh(&tipc_net_lock);
1601 while (head) { 1621 while (head) {
1602 struct bearer *b_ptr = (struct bearer *)tb_ptr;
1603 struct tipc_node *n_ptr; 1622 struct tipc_node *n_ptr;
1604 struct link *l_ptr; 1623 struct link *l_ptr;
1605 struct sk_buff *crs; 1624 struct sk_buff *crs;
@@ -1735,10 +1754,6 @@ deliver:
1735 tipc_node_unlock(n_ptr); 1754 tipc_node_unlock(n_ptr);
1736 tipc_link_recv_bundle(buf); 1755 tipc_link_recv_bundle(buf);
1737 continue; 1756 continue;
1738 case ROUTE_DISTRIBUTOR:
1739 tipc_node_unlock(n_ptr);
1740 buf_discard(buf);
1741 continue;
1742 case NAME_DISTRIBUTOR: 1757 case NAME_DISTRIBUTOR:
1743 tipc_node_unlock(n_ptr); 1758 tipc_node_unlock(n_ptr);
1744 tipc_named_recv(buf); 1759 tipc_named_recv(buf);
@@ -1765,6 +1780,10 @@ deliver:
1765 goto protocol_check; 1780 goto protocol_check;
1766 } 1781 }
1767 break; 1782 break;
1783 default:
1784 buf_discard(buf);
1785 buf = NULL;
1786 break;
1768 } 1787 }
1769 } 1788 }
1770 tipc_node_unlock(n_ptr); 1789 tipc_node_unlock(n_ptr);
@@ -1900,6 +1919,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1900 struct sk_buff *buf = NULL; 1919 struct sk_buff *buf = NULL;
1901 struct tipc_msg *msg = l_ptr->pmsg; 1920 struct tipc_msg *msg = l_ptr->pmsg;
1902 u32 msg_size = sizeof(l_ptr->proto_msg); 1921 u32 msg_size = sizeof(l_ptr->proto_msg);
1922 int r_flag;
1903 1923
1904 if (link_blocked(l_ptr)) 1924 if (link_blocked(l_ptr))
1905 return; 1925 return;
@@ -1950,15 +1970,14 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1950 msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); 1970 msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1));
1951 msg_set_seq_gap(msg, 0); 1971 msg_set_seq_gap(msg, 0);
1952 msg_set_next_sent(msg, 1); 1972 msg_set_next_sent(msg, 1);
1973 msg_set_probe(msg, 0);
1953 msg_set_link_tolerance(msg, l_ptr->tolerance); 1974 msg_set_link_tolerance(msg, l_ptr->tolerance);
1954 msg_set_linkprio(msg, l_ptr->priority); 1975 msg_set_linkprio(msg, l_ptr->priority);
1955 msg_set_max_pkt(msg, l_ptr->max_pkt_target); 1976 msg_set_max_pkt(msg, l_ptr->max_pkt_target);
1956 } 1977 }
1957 1978
1958 if (tipc_node_has_redundant_links(l_ptr->owner)) 1979 r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr));
1959 msg_set_redundant_link(msg); 1980 msg_set_redundant_link(msg, r_flag);
1960 else
1961 msg_clear_redundant_link(msg);
1962 msg_set_linkprio(msg, l_ptr->priority); 1981 msg_set_linkprio(msg, l_ptr->priority);
1963 1982
1964 /* Ensure sequence number will not fit : */ 1983 /* Ensure sequence number will not fit : */
@@ -1978,7 +1997,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1978 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); 1997 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
1979 return; 1998 return;
1980 } 1999 }
1981 msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
1982 2000
1983 /* Message can be sent */ 2001 /* Message can be sent */
1984 2002
@@ -2066,7 +2084,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
2066 l_ptr->peer_bearer_id = msg_bearer_id(msg); 2084 l_ptr->peer_bearer_id = msg_bearer_id(msg);
2067 2085
2068 /* Synchronize broadcast sequence numbers */ 2086 /* Synchronize broadcast sequence numbers */
2069 if (!tipc_node_has_redundant_links(l_ptr->owner)) 2087 if (!tipc_node_redundant_links(l_ptr->owner))
2070 l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg)); 2088 l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg));
2071 break; 2089 break;
2072 case STATE_MSG: 2090 case STATE_MSG:
@@ -2397,6 +2415,8 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
2397 */ 2415 */
2398static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) 2416static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2399{ 2417{
2418 struct sk_buff *buf_chain = NULL;
2419 struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain;
2400 struct tipc_msg *inmsg = buf_msg(buf); 2420 struct tipc_msg *inmsg = buf_msg(buf);
2401 struct tipc_msg fragm_hdr; 2421 struct tipc_msg fragm_hdr;
2402 u32 insize = msg_size(inmsg); 2422 u32 insize = msg_size(inmsg);
@@ -2405,7 +2425,7 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2405 u32 rest = insize; 2425 u32 rest = insize;
2406 u32 pack_sz = l_ptr->max_pkt; 2426 u32 pack_sz = l_ptr->max_pkt;
2407 u32 fragm_sz = pack_sz - INT_H_SIZE; 2427 u32 fragm_sz = pack_sz - INT_H_SIZE;
2408 u32 fragm_no = 1; 2428 u32 fragm_no = 0;
2409 u32 destaddr; 2429 u32 destaddr;
2410 2430
2411 if (msg_short(inmsg)) 2431 if (msg_short(inmsg))
@@ -2413,17 +2433,10 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2413 else 2433 else
2414 destaddr = msg_destnode(inmsg); 2434 destaddr = msg_destnode(inmsg);
2415 2435
2416 if (msg_routed(inmsg))
2417 msg_set_prevnode(inmsg, tipc_own_addr);
2418
2419 /* Prepare reusable fragment header: */ 2436 /* Prepare reusable fragment header: */
2420 2437
2421 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, 2438 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
2422 INT_H_SIZE, destaddr); 2439 INT_H_SIZE, destaddr);
2423 msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg));
2424 msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++));
2425 msg_set_fragm_no(&fragm_hdr, fragm_no);
2426 l_ptr->stats.sent_fragmented++;
2427 2440
2428 /* Chop up message: */ 2441 /* Chop up message: */
2429 2442
@@ -2436,27 +2449,37 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2436 } 2449 }
2437 fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); 2450 fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
2438 if (fragm == NULL) { 2451 if (fragm == NULL) {
2439 warn("Link unable to fragment message\n"); 2452 buf_discard(buf);
2440 dsz = -ENOMEM; 2453 while (buf_chain) {
2441 goto exit; 2454 buf = buf_chain;
2455 buf_chain = buf_chain->next;
2456 buf_discard(buf);
2457 }
2458 return -ENOMEM;
2442 } 2459 }
2443 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); 2460 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
2461 fragm_no++;
2462 msg_set_fragm_no(&fragm_hdr, fragm_no);
2444 skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE); 2463 skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
2445 skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs, 2464 skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
2446 fragm_sz); 2465 fragm_sz);
2447 /* Send queued messages first, if any: */ 2466 buf_chain_tail->next = fragm;
2467 buf_chain_tail = fragm;
2448 2468
2449 l_ptr->stats.sent_fragments++;
2450 tipc_link_send_buf(l_ptr, fragm);
2451 if (!tipc_link_is_up(l_ptr))
2452 return dsz;
2453 msg_set_fragm_no(&fragm_hdr, ++fragm_no);
2454 rest -= fragm_sz; 2469 rest -= fragm_sz;
2455 crs += fragm_sz; 2470 crs += fragm_sz;
2456 msg_set_type(&fragm_hdr, FRAGMENT); 2471 msg_set_type(&fragm_hdr, FRAGMENT);
2457 } 2472 }
2458exit:
2459 buf_discard(buf); 2473 buf_discard(buf);
2474
2475 /* Append chain of fragments to send queue & send them */
2476
2477 l_ptr->long_msg_seq_no++;
2478 link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no);
2479 l_ptr->stats.sent_fragments += fragm_no;
2480 l_ptr->stats.sent_fragmented++;
2481 tipc_link_push_queue(l_ptr);
2482
2460 return dsz; 2483 return dsz;
2461} 2484}
2462 2485
@@ -2464,7 +2487,7 @@ exit:
2464 * A pending message being re-assembled must store certain values 2487 * A pending message being re-assembled must store certain values
2465 * to handle subsequent fragments correctly. The following functions 2488 * to handle subsequent fragments correctly. The following functions
2466 * help storing these values in unused, available fields in the 2489 * help storing these values in unused, available fields in the
2467 * pending message. This makes dynamic memory allocation unecessary. 2490 * pending message. This makes dynamic memory allocation unnecessary.
2468 */ 2491 */
2469 2492
2470static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno) 2493static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno)
@@ -2618,6 +2641,9 @@ static void link_check_defragm_bufs(struct link *l_ptr)
2618 2641
2619static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) 2642static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
2620{ 2643{
2644 if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
2645 return;
2646
2621 l_ptr->tolerance = tolerance; 2647 l_ptr->tolerance = tolerance;
2622 l_ptr->continuity_interval = 2648 l_ptr->continuity_interval =
2623 ((tolerance / 4) > 500) ? 500 : tolerance / 4; 2649 ((tolerance / 4) > 500) ? 500 : tolerance / 4;
@@ -2658,7 +2684,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
2658static struct link *link_find_link(const char *name, struct tipc_node **node) 2684static struct link *link_find_link(const char *name, struct tipc_node **node)
2659{ 2685{
2660 struct link_name link_name_parts; 2686 struct link_name link_name_parts;
2661 struct bearer *b_ptr; 2687 struct tipc_bearer *b_ptr;
2662 struct link *l_ptr; 2688 struct link *l_ptr;
2663 2689
2664 if (!link_name_validate(name, &link_name_parts)) 2690 if (!link_name_validate(name, &link_name_parts))
@@ -2961,7 +2987,7 @@ static void link_print(struct link *l_ptr, const char *str)
2961 2987
2962 tipc_printf(buf, str); 2988 tipc_printf(buf, str);
2963 tipc_printf(buf, "Link %x<%s>:", 2989 tipc_printf(buf, "Link %x<%s>:",
2964 l_ptr->addr, l_ptr->b_ptr->publ.name); 2990 l_ptr->addr, l_ptr->b_ptr->name);
2965 2991
2966#ifdef CONFIG_TIPC_DEBUG 2992#ifdef CONFIG_TIPC_DEBUG
2967 if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr)) 2993 if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr))
@@ -2981,9 +3007,9 @@ static void link_print(struct link *l_ptr, const char *str)
2981 != (l_ptr->out_queue_size - 1)) || 3007 != (l_ptr->out_queue_size - 1)) ||
2982 (l_ptr->last_out->next != NULL)) { 3008 (l_ptr->last_out->next != NULL)) {
2983 tipc_printf(buf, "\nSend queue inconsistency\n"); 3009 tipc_printf(buf, "\nSend queue inconsistency\n");
2984 tipc_printf(buf, "first_out= %x ", l_ptr->first_out); 3010 tipc_printf(buf, "first_out= %p ", l_ptr->first_out);
2985 tipc_printf(buf, "next_out= %x ", l_ptr->next_out); 3011 tipc_printf(buf, "next_out= %p ", l_ptr->next_out);
2986 tipc_printf(buf, "last_out= %x ", l_ptr->last_out); 3012 tipc_printf(buf, "last_out= %p ", l_ptr->last_out);
2987 } 3013 }
2988 } else 3014 } else
2989 tipc_printf(buf, "[]"); 3015 tipc_printf(buf, "[]");
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 70967e637027..74fbecab1ea0 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -2,7 +2,7 @@
2 * net/tipc/link.h: Include file for TIPC link code 2 * net/tipc/link.h: Include file for TIPC link code
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -122,7 +122,7 @@ struct link {
122 u32 checkpoint; 122 u32 checkpoint;
123 u32 peer_session; 123 u32 peer_session;
124 u32 peer_bearer_id; 124 u32 peer_bearer_id;
125 struct bearer *b_ptr; 125 struct tipc_bearer *b_ptr;
126 u32 tolerance; 126 u32 tolerance;
127 u32 continuity_interval; 127 u32 continuity_interval;
128 u32 abort_limit; 128 u32 abort_limit;
@@ -196,24 +196,19 @@ struct link {
196 u32 bearer_congs; 196 u32 bearer_congs;
197 u32 deferred_recv; 197 u32 deferred_recv;
198 u32 duplicates; 198 u32 duplicates;
199 199 u32 max_queue_sz; /* send queue size high water mark */
200 /* for statistical profiling of send queue size */ 200 u32 accu_queue_sz; /* used for send queue size profiling */
201 201 u32 queue_sz_counts; /* used for send queue size profiling */
202 u32 max_queue_sz; 202 u32 msg_length_counts; /* used for message length profiling */
203 u32 accu_queue_sz; 203 u32 msg_lengths_total; /* used for message length profiling */
204 u32 queue_sz_counts; 204 u32 msg_length_profile[7]; /* used for msg. length profiling */
205
206 /* for statistical profiling of message lengths */
207
208 u32 msg_length_counts;
209 u32 msg_lengths_total;
210 u32 msg_length_profile[7];
211 } stats; 205 } stats;
212}; 206};
213 207
214struct port; 208struct tipc_port;
215 209
216struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, 210struct link *tipc_link_create(struct tipc_node *n_ptr,
211 struct tipc_bearer *b_ptr,
217 const struct tipc_media_addr *media_addr); 212 const struct tipc_media_addr *media_addr);
218void tipc_link_delete(struct link *l_ptr); 213void tipc_link_delete(struct link *l_ptr);
219void tipc_link_changeover(struct link *l_ptr); 214void tipc_link_changeover(struct link *l_ptr);
@@ -230,9 +225,10 @@ void tipc_link_reset(struct link *l_ptr);
230int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); 225int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
231int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); 226int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
232u32 tipc_link_get_max_pkt(u32 dest, u32 selector); 227u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
233int tipc_link_send_sections_fast(struct port *sender, 228int tipc_link_send_sections_fast(struct tipc_port *sender,
234 struct iovec const *msg_sect, 229 struct iovec const *msg_sect,
235 const u32 num_sect, 230 const u32 num_sect,
231 unsigned int total_len,
236 u32 destnode); 232 u32 destnode);
237void tipc_link_recv_bundle(struct sk_buff *buf); 233void tipc_link_recv_bundle(struct sk_buff *buf);
238int tipc_link_recv_fragment(struct sk_buff **pending, 234int tipc_link_recv_fragment(struct sk_buff **pending,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index bb6180c4fcbb..03e57bf92c73 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -2,7 +2,7 @@
2 * net/tipc/msg.c: TIPC message header routines 2 * net/tipc/msg.c: TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -68,20 +68,6 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
68} 68}
69 69
70/** 70/**
71 * tipc_msg_calc_data_size - determine total data size for message
72 */
73
74int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
75{
76 int dsz = 0;
77 int i;
78
79 for (i = 0; i < num_sect; i++)
80 dsz += msg_sect[i].iov_len;
81 return dsz;
82}
83
84/**
85 * tipc_msg_build - create message using specified header and data 71 * tipc_msg_build - create message using specified header and data
86 * 72 *
87 * Note: Caller must not hold any locks in case copy_from_user() is interrupted! 73 * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
@@ -89,18 +75,13 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
89 * Returns message data size or errno 75 * Returns message data size or errno
90 */ 76 */
91 77
92int tipc_msg_build(struct tipc_msg *hdr, 78int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
93 struct iovec const *msg_sect, u32 num_sect, 79 u32 num_sect, unsigned int total_len,
94 int max_size, int usrmem, struct sk_buff **buf) 80 int max_size, int usrmem, struct sk_buff **buf)
95{ 81{
96 int dsz, sz, hsz, pos, res, cnt; 82 int dsz, sz, hsz, pos, res, cnt;
97 83
98 dsz = tipc_msg_calc_data_size(msg_sect, num_sect); 84 dsz = total_len;
99 if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
100 *buf = NULL;
101 return -EINVAL;
102 }
103
104 pos = hsz = msg_hdr_sz(hdr); 85 pos = hsz = msg_hdr_sz(hdr);
105 sz = hsz + dsz; 86 sz = hsz + dsz;
106 msg_set_size(hdr, sz); 87 msg_set_size(hdr, sz);
@@ -192,8 +173,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
192 default: 173 default:
193 tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg)); 174 tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg));
194 } 175 }
195 if (msg_routed(msg) && !msg_non_seq(msg))
196 tipc_printf(buf, "ROUT:");
197 if (msg_reroute_cnt(msg)) 176 if (msg_reroute_cnt(msg))
198 tipc_printf(buf, "REROUTED(%u):", 177 tipc_printf(buf, "REROUTED(%u):",
199 msg_reroute_cnt(msg)); 178 msg_reroute_cnt(msg));
@@ -210,8 +189,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
210 default: 189 default:
211 tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); 190 tipc_printf(buf, "UNKNOWN:%x", msg_type(msg));
212 } 191 }
213 if (msg_routed(msg))
214 tipc_printf(buf, "ROUT:");
215 if (msg_reroute_cnt(msg)) 192 if (msg_reroute_cnt(msg))
216 tipc_printf(buf, "REROUTED(%u):", 193 tipc_printf(buf, "REROUTED(%u):",
217 msg_reroute_cnt(msg)); 194 msg_reroute_cnt(msg));
@@ -232,13 +209,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
232 default: 209 default:
233 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); 210 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
234 } 211 }
235 if (msg_routed(msg))
236 tipc_printf(buf, "ROUT:");
237 if (msg_reroute_cnt(msg)) 212 if (msg_reroute_cnt(msg))
238 tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); 213 tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg));
239 break; 214 break;
240 case LINK_PROTOCOL: 215 case LINK_PROTOCOL:
241 tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg));
242 switch (msg_type(msg)) { 216 switch (msg_type(msg)) {
243 case STATE_MSG: 217 case STATE_MSG:
244 tipc_printf(buf, "STATE:"); 218 tipc_printf(buf, "STATE:");
@@ -275,33 +249,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
275 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); 249 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
276 } 250 }
277 break; 251 break;
278 case ROUTE_DISTRIBUTOR:
279 tipc_printf(buf, "ROUTING_MNG:");
280 switch (msg_type(msg)) {
281 case EXT_ROUTING_TABLE:
282 tipc_printf(buf, "EXT_TBL:");
283 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
284 break;
285 case LOCAL_ROUTING_TABLE:
286 tipc_printf(buf, "LOCAL_TBL:");
287 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
288 break;
289 case SLAVE_ROUTING_TABLE:
290 tipc_printf(buf, "DP_TBL:");
291 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
292 break;
293 case ROUTE_ADDITION:
294 tipc_printf(buf, "ADD:");
295 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
296 break;
297 case ROUTE_REMOVAL:
298 tipc_printf(buf, "REMOVE:");
299 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
300 break;
301 default:
302 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
303 }
304 break;
305 case LINK_CONFIG: 252 case LINK_CONFIG:
306 tipc_printf(buf, "CFG:"); 253 tipc_printf(buf, "CFG:");
307 switch (msg_type(msg)) { 254 switch (msg_type(msg)) {
@@ -381,20 +328,15 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
381 tipc_printf(buf, ":OPRT(%u):", msg_origport(msg)); 328 tipc_printf(buf, ":OPRT(%u):", msg_origport(msg));
382 tipc_printf(buf, ":DPRT(%u):", msg_destport(msg)); 329 tipc_printf(buf, ":DPRT(%u):", msg_destport(msg));
383 } 330 }
384 if (msg_routed(msg) && !msg_non_seq(msg))
385 tipc_printf(buf, ":TSEQN(%u)", msg_transp_seqno(msg));
386 } 331 }
387 if (msg_user(msg) == NAME_DISTRIBUTOR) { 332 if (msg_user(msg) == NAME_DISTRIBUTOR) {
388 tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg)); 333 tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg));
389 tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg)); 334 tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg));
390 if (msg_routed(msg))
391 tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg));
392 } 335 }
393 336
394 if (msg_user(msg) == LINK_CONFIG) { 337 if (msg_user(msg) == LINK_CONFIG) {
395 u32 *raw = (u32 *)msg; 338 u32 *raw = (u32 *)msg;
396 struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5]; 339 struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5];
397 tipc_printf(buf, ":REQL(%u):", msg_req_links(msg));
398 tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg)); 340 tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
399 tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg)); 341 tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
400 tipc_media_addr_printf(buf, orig); 342 tipc_media_addr_printf(buf, orig);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 92c4c4fd7b3f..8452454731fa 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -2,7 +2,7 @@
2 * net/tipc/msg.h: Include file for TIPC message header routines 2 * net/tipc/msg.h: Include file for TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2007, Ericsson AB 4 * Copyright (c) 2000-2007, Ericsson AB
5 * Copyright (c) 2005-2008, Wind River Systems 5 * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,41 +39,24 @@
39 39
40#include "bearer.h" 40#include "bearer.h"
41 41
42/*
43 * Constants and routines used to read and write TIPC payload message headers
44 *
45 * Note: Some items are also used with TIPC internal message headers
46 */
47
42#define TIPC_VERSION 2 48#define TIPC_VERSION 2
43 49
44/* 50/*
45 * TIPC user data message header format, version 2: 51 * Payload message users are defined in TIPC's public API:
46 * 52 * - TIPC_LOW_IMPORTANCE
47 * 53 * - TIPC_MEDIUM_IMPORTANCE
48 * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 54 * - TIPC_HIGH_IMPORTANCE
49 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 55 * - TIPC_CRITICAL_IMPORTANCE
50 * w0:|vers | user |hdr sz |n|d|s|-| message size | 56 */
51 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 57
52 * w1:|mstyp| error |rer cnt|lsc|opt p| broadcast ack no | 58/*
53 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 59 * Payload message types
54 * w2:| link level ack no | broadcast/link level seq no |
55 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
56 * w3:| previous node |
57 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
58 * w4:| originating port |
59 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
60 * w5:| destination port |
61 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
62 * w6:| originating node |
63 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
64 * w7:| destination node |
65 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
66 * w8:| name type / transport sequence number |
67 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
68 * w9:| name instance/multicast lower bound |
69 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
70 * wA:| multicast upper bound |
71 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
72 * / /
73 * \ options \
74 * / /
75 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
76 *
77 */ 60 */
78 61
79#define TIPC_CONN_MSG 0 62#define TIPC_CONN_MSG 0
@@ -81,6 +64,9 @@
81#define TIPC_NAMED_MSG 2 64#define TIPC_NAMED_MSG 2
82#define TIPC_DIRECT_MSG 3 65#define TIPC_DIRECT_MSG 3
83 66
67/*
68 * Message header sizes
69 */
84 70
85#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ 71#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */
86#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ 72#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */
@@ -421,13 +407,6 @@ static inline int msg_is_dest(struct tipc_msg *m, u32 d)
421 return msg_short(m) || (msg_destnode(m) == d); 407 return msg_short(m) || (msg_destnode(m) == d);
422} 408}
423 409
424static inline u32 msg_routed(struct tipc_msg *m)
425{
426 if (likely(msg_short(m)))
427 return 0;
428 return (msg_destnode(m) ^ msg_orignode(m)) >> 11;
429}
430
431static inline u32 msg_nametype(struct tipc_msg *m) 410static inline u32 msg_nametype(struct tipc_msg *m)
432{ 411{
433 return msg_word(m, 8); 412 return msg_word(m, 8);
@@ -438,26 +417,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n)
438 msg_set_word(m, 8, n); 417 msg_set_word(m, 8, n);
439} 418}
440 419
441static inline u32 msg_transp_seqno(struct tipc_msg *m)
442{
443 return msg_word(m, 8);
444}
445
446static inline void msg_set_timestamp(struct tipc_msg *m, u32 n)
447{
448 msg_set_word(m, 8, n);
449}
450
451static inline u32 msg_timestamp(struct tipc_msg *m)
452{
453 return msg_word(m, 8);
454}
455
456static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n)
457{
458 msg_set_word(m, 8, n);
459}
460
461static inline u32 msg_nameinst(struct tipc_msg *m) 420static inline u32 msg_nameinst(struct tipc_msg *m)
462{ 421{
463 return msg_word(m, 9); 422 return msg_word(m, 9);
@@ -500,40 +459,11 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
500 459
501 460
502/* 461/*
503 TIPC internal message header format, version 2 462 * Constants and routines used to read and write TIPC internal message headers
504 463 */
505 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
506 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
507 w0:|vers |msg usr|hdr sz |n|resrv| packet size |
508 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
509 w1:|m typ| sequence gap | broadcast ack no |
510 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
511 w2:| link level ack no/bc_gap_from | seq no / bcast_gap_to |
512 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
513 w3:| previous node |
514 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
515 w4:| next sent broadcast/fragm no | next sent pkt/ fragm msg no |
516 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
517 w5:| session no |rsv=0|r|berid|link prio|netpl|p|
518 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
519 w6:| originating node |
520 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
521 w7:| destination node |
522 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
523 w8:| transport sequence number |
524 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
525 w9:| msg count / bcast tag | link tolerance |
526 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
527 \ \
528 / User Specific Data /
529 \ \
530 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
531
532 NB: CONN_MANAGER use data message format. LINK_CONFIG has own format.
533*/
534 464
535/* 465/*
536 * Internal users 466 * Internal message users
537 */ 467 */
538 468
539#define BCAST_PROTOCOL 5 469#define BCAST_PROTOCOL 5
@@ -545,10 +475,9 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
545#define NAME_DISTRIBUTOR 11 475#define NAME_DISTRIBUTOR 11
546#define MSG_FRAGMENTER 12 476#define MSG_FRAGMENTER 12
547#define LINK_CONFIG 13 477#define LINK_CONFIG 13
548#define DSC_H_SIZE 40
549 478
550/* 479/*
551 * Connection management protocol messages 480 * Connection management protocol message types
552 */ 481 */
553 482
554#define CONN_PROBE 0 483#define CONN_PROBE 0
@@ -556,12 +485,41 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
556#define CONN_ACK 2 485#define CONN_ACK 2
557 486
558/* 487/*
559 * Name distributor messages 488 * Name distributor message types
560 */ 489 */
561 490
562#define PUBLICATION 0 491#define PUBLICATION 0
563#define WITHDRAWAL 1 492#define WITHDRAWAL 1
564 493
494/*
495 * Segmentation message types
496 */
497
498#define FIRST_FRAGMENT 0
499#define FRAGMENT 1
500#define LAST_FRAGMENT 2
501
502/*
503 * Link management protocol message types
504 */
505
506#define STATE_MSG 0
507#define RESET_MSG 1
508#define ACTIVATE_MSG 2
509
510/*
511 * Changeover tunnel message types
512 */
513#define DUPLICATE_MSG 0
514#define ORIGINAL_MSG 1
515
516/*
517 * Config protocol message types
518 */
519
520#define DSC_REQ_MSG 0
521#define DSC_RESP_MSG 1
522
565 523
566/* 524/*
567 * Word 1 525 * Word 1
@@ -577,16 +535,6 @@ static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n)
577 msg_set_bits(m, 1, 16, 0x1fff, n); 535 msg_set_bits(m, 1, 16, 0x1fff, n);
578} 536}
579 537
580static inline u32 msg_req_links(struct tipc_msg *m)
581{
582 return msg_bits(m, 1, 16, 0xfff);
583}
584
585static inline void msg_set_req_links(struct tipc_msg *m, u32 n)
586{
587 msg_set_bits(m, 1, 16, 0xfff, n);
588}
589
590 538
591/* 539/*
592 * Word 2 540 * Word 2
@@ -749,14 +697,9 @@ static inline u32 msg_redundant_link(struct tipc_msg *m)
749 return msg_bits(m, 5, 12, 0x1); 697 return msg_bits(m, 5, 12, 0x1);
750} 698}
751 699
752static inline void msg_set_redundant_link(struct tipc_msg *m) 700static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r)
753{ 701{
754 msg_set_bits(m, 5, 12, 0x1, 1); 702 msg_set_bits(m, 5, 12, 0x1, r);
755}
756
757static inline void msg_clear_redundant_link(struct tipc_msg *m)
758{
759 msg_set_bits(m, 5, 12, 0x1, 0);
760} 703}
761 704
762 705
@@ -804,65 +747,11 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
804 msg_set_bits(m, 9, 0, 0xffff, n); 747 msg_set_bits(m, 9, 0, 0xffff, n);
805} 748}
806 749
807/*
808 * Routing table message data
809 */
810
811
812static inline u32 msg_remote_node(struct tipc_msg *m)
813{
814 return msg_word(m, msg_hdr_sz(m)/4);
815}
816
817static inline void msg_set_remote_node(struct tipc_msg *m, u32 a)
818{
819 msg_set_word(m, msg_hdr_sz(m)/4, a);
820}
821
822/*
823 * Segmentation message types
824 */
825
826#define FIRST_FRAGMENT 0
827#define FRAGMENT 1
828#define LAST_FRAGMENT 2
829
830/*
831 * Link management protocol message types
832 */
833
834#define STATE_MSG 0
835#define RESET_MSG 1
836#define ACTIVATE_MSG 2
837
838/*
839 * Changeover tunnel message types
840 */
841#define DUPLICATE_MSG 0
842#define ORIGINAL_MSG 1
843
844/*
845 * Routing table message types
846 */
847#define EXT_ROUTING_TABLE 0
848#define LOCAL_ROUTING_TABLE 1 /* obsoleted */
849#define SLAVE_ROUTING_TABLE 2
850#define ROUTE_ADDITION 3
851#define ROUTE_REMOVAL 4
852
853/*
854 * Config protocol message types
855 */
856
857#define DSC_REQ_MSG 0
858#define DSC_RESP_MSG 1
859
860u32 tipc_msg_tot_importance(struct tipc_msg *m); 750u32 tipc_msg_tot_importance(struct tipc_msg *m);
861void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, 751void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
862 u32 hsize, u32 destnode); 752 u32 hsize, u32 destnode);
863int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect); 753int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
864int tipc_msg_build(struct tipc_msg *hdr, 754 u32 num_sect, unsigned int total_len,
865 struct iovec const *msg_sect, u32 num_sect,
866 int max_size, int usrmem, struct sk_buff **buf); 755 int max_size, int usrmem, struct sk_buff **buf);
867 756
868static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a) 757static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 483c226c9581..80025a1b3bfd 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -2,7 +2,7 @@
2 * net/tipc/name_distr.c: TIPC name distribution code 2 * net/tipc/name_distr.c: TIPC name distribution code
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -109,11 +109,9 @@ static void named_cluster_distribute(struct sk_buff *buf)
109{ 109{
110 struct sk_buff *buf_copy; 110 struct sk_buff *buf_copy;
111 struct tipc_node *n_ptr; 111 struct tipc_node *n_ptr;
112 u32 n_num;
113 112
114 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { 113 list_for_each_entry(n_ptr, &tipc_node_list, list) {
115 n_ptr = tipc_net.nodes[n_num]; 114 if (tipc_node_active_links(n_ptr)) {
116 if (n_ptr && tipc_node_has_active_links(n_ptr)) {
117 buf_copy = skb_copy(buf, GFP_ATOMIC); 115 buf_copy = skb_copy(buf, GFP_ATOMIC);
118 if (!buf_copy) 116 if (!buf_copy)
119 break; 117 break;
@@ -162,7 +160,7 @@ void tipc_named_withdraw(struct publication *publ)
162 160
163 buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); 161 buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
164 if (!buf) { 162 if (!buf) {
165 warn("Withdrawl distribution failure\n"); 163 warn("Withdrawal distribution failure\n");
166 return; 164 return;
167 } 165 }
168 166
@@ -214,17 +212,16 @@ exit:
214} 212}
215 213
216/** 214/**
217 * node_is_down - remove publication associated with a failed node 215 * named_purge_publ - remove publication associated with a failed node
218 * 216 *
219 * Invoked for each publication issued by a newly failed node. 217 * Invoked for each publication issued by a newly failed node.
220 * Removes publication structure from name table & deletes it. 218 * Removes publication structure from name table & deletes it.
221 * In rare cases the link may have come back up again when this 219 * In rare cases the link may have come back up again when this
222 * function is called, and we have two items representing the same 220 * function is called, and we have two items representing the same
223 * publication. Nudge this item's key to distinguish it from the other. 221 * publication. Nudge this item's key to distinguish it from the other.
224 * (Note: Publication's node subscription is already unsubscribed.)
225 */ 222 */
226 223
227static void node_is_down(struct publication *publ) 224static void named_purge_publ(struct publication *publ)
228{ 225{
229 struct publication *p; 226 struct publication *p;
230 227
@@ -232,6 +229,8 @@ static void node_is_down(struct publication *publ)
232 publ->key += 1222345; 229 publ->key += 1222345;
233 p = tipc_nametbl_remove_publ(publ->type, publ->lower, 230 p = tipc_nametbl_remove_publ(publ->type, publ->lower,
234 publ->node, publ->ref, publ->key); 231 publ->node, publ->ref, publ->key);
232 if (p)
233 tipc_nodesub_unsubscribe(&p->subscr);
235 write_unlock_bh(&tipc_nametbl_lock); 234 write_unlock_bh(&tipc_nametbl_lock);
236 235
237 if (p != publ) { 236 if (p != publ) {
@@ -268,7 +267,8 @@ void tipc_named_recv(struct sk_buff *buf)
268 tipc_nodesub_subscribe(&publ->subscr, 267 tipc_nodesub_subscribe(&publ->subscr,
269 msg_orignode(msg), 268 msg_orignode(msg),
270 publ, 269 publ,
271 (net_ev_handler)node_is_down); 270 (net_ev_handler)
271 named_purge_publ);
272 } 272 }
273 } else if (msg_type(msg) == WITHDRAWAL) { 273 } else if (msg_type(msg) == WITHDRAWAL) {
274 publ = tipc_nametbl_remove_publ(ntohl(item->type), 274 publ = tipc_nametbl_remove_publ(ntohl(item->type),
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 9bacfd00b91e..68b3dd637291 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -2,7 +2,7 @@
2 * net/tipc/net.c: TIPC network routing code 2 * net/tipc/net.c: TIPC network routing code
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,7 @@
39#include "name_distr.h" 39#include "name_distr.h"
40#include "subscr.h" 40#include "subscr.h"
41#include "port.h" 41#include "port.h"
42#include "node.h"
42#include "config.h" 43#include "config.h"
43 44
44/* 45/*
@@ -108,26 +109,6 @@
108*/ 109*/
109 110
110DEFINE_RWLOCK(tipc_net_lock); 111DEFINE_RWLOCK(tipc_net_lock);
111struct network tipc_net;
112
113static int net_start(void)
114{
115 tipc_net.nodes = kcalloc(tipc_max_nodes + 1,
116 sizeof(*tipc_net.nodes), GFP_ATOMIC);
117 tipc_net.highest_node = 0;
118
119 return tipc_net.nodes ? 0 : -ENOMEM;
120}
121
122static void net_stop(void)
123{
124 u32 n_num;
125
126 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++)
127 tipc_node_delete(tipc_net.nodes[n_num]);
128 kfree(tipc_net.nodes);
129 tipc_net.nodes = NULL;
130}
131 112
132static void net_route_named_msg(struct sk_buff *buf) 113static void net_route_named_msg(struct sk_buff *buf)
133{ 114{
@@ -217,9 +198,6 @@ int tipc_net_start(u32 addr)
217 tipc_named_reinit(); 198 tipc_named_reinit();
218 tipc_port_reinit(); 199 tipc_port_reinit();
219 200
220 res = net_start();
221 if (res)
222 return res;
223 res = tipc_bclink_init(); 201 res = tipc_bclink_init();
224 if (res) 202 if (res)
225 return res; 203 return res;
@@ -235,14 +213,16 @@ int tipc_net_start(u32 addr)
235 213
236void tipc_net_stop(void) 214void tipc_net_stop(void)
237{ 215{
216 struct tipc_node *node, *t_node;
217
238 if (tipc_mode != TIPC_NET_MODE) 218 if (tipc_mode != TIPC_NET_MODE)
239 return; 219 return;
240 write_lock_bh(&tipc_net_lock); 220 write_lock_bh(&tipc_net_lock);
241 tipc_bearer_stop(); 221 tipc_bearer_stop();
242 tipc_mode = TIPC_NODE_MODE; 222 tipc_mode = TIPC_NODE_MODE;
243 tipc_bclink_stop(); 223 tipc_bclink_stop();
244 net_stop(); 224 list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
225 tipc_node_delete(node);
245 write_unlock_bh(&tipc_net_lock); 226 write_unlock_bh(&tipc_net_lock);
246 info("Left network mode\n"); 227 info("Left network mode\n");
247} 228}
248
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 4ae59ad04893..9eb4b9e220eb 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -2,7 +2,7 @@
2 * net/tipc/net.h: Include file for TIPC network routing code 2 * net/tipc/net.h: Include file for TIPC network routing code
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -37,23 +37,6 @@
37#ifndef _TIPC_NET_H 37#ifndef _TIPC_NET_H
38#define _TIPC_NET_H 38#define _TIPC_NET_H
39 39
40struct tipc_node;
41
42/**
43 * struct network - TIPC network structure
44 * @nodes: array of pointers to all nodes within cluster
45 * @highest_node: id of highest numbered node within cluster
46 * @links: number of (unicast) links to cluster
47 */
48
49struct network {
50 struct tipc_node **nodes;
51 u32 highest_node;
52 u32 links;
53};
54
55
56extern struct network tipc_net;
57extern rwlock_t tipc_net_lock; 40extern rwlock_t tipc_net_lock;
58 41
59void tipc_net_route_msg(struct sk_buff *buf); 42void tipc_net_route_msg(struct sk_buff *buf);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3af53e327f49..2d106ef4fa4c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2,7 +2,7 @@
2 * net/tipc/node.c: TIPC node management routines 2 * net/tipc/node.c: TIPC node management routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005-2006, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -44,9 +44,33 @@ static void node_established_contact(struct tipc_node *n_ptr);
44 44
45static DEFINE_SPINLOCK(node_create_lock); 45static DEFINE_SPINLOCK(node_create_lock);
46 46
47static struct hlist_head node_htable[NODE_HTABLE_SIZE];
48LIST_HEAD(tipc_node_list);
49static u32 tipc_num_nodes;
50
51static atomic_t tipc_num_links = ATOMIC_INIT(0);
47u32 tipc_own_tag; 52u32 tipc_own_tag;
48 53
49/** 54/**
55 * tipc_node_find - locate specified node object, if it exists
56 */
57
58struct tipc_node *tipc_node_find(u32 addr)
59{
60 struct tipc_node *node;
61 struct hlist_node *pos;
62
63 if (unlikely(!in_own_cluster(addr)))
64 return NULL;
65
66 hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) {
67 if (node->addr == addr)
68 return node;
69 }
70 return NULL;
71}
72
73/**
50 * tipc_node_create - create neighboring node 74 * tipc_node_create - create neighboring node
51 * 75 *
52 * Currently, this routine is called by neighbor discovery code, which holds 76 * Currently, this routine is called by neighbor discovery code, which holds
@@ -58,8 +82,7 @@ u32 tipc_own_tag;
58 82
59struct tipc_node *tipc_node_create(u32 addr) 83struct tipc_node *tipc_node_create(u32 addr)
60{ 84{
61 struct tipc_node *n_ptr; 85 struct tipc_node *n_ptr, *temp_node;
62 u32 n_num;
63 86
64 spin_lock_bh(&node_create_lock); 87 spin_lock_bh(&node_create_lock);
65 88
@@ -78,12 +101,19 @@ struct tipc_node *tipc_node_create(u32 addr)
78 101
79 n_ptr->addr = addr; 102 n_ptr->addr = addr;
80 spin_lock_init(&n_ptr->lock); 103 spin_lock_init(&n_ptr->lock);
104 INIT_HLIST_NODE(&n_ptr->hash);
105 INIT_LIST_HEAD(&n_ptr->list);
81 INIT_LIST_HEAD(&n_ptr->nsub); 106 INIT_LIST_HEAD(&n_ptr->nsub);
82 107
83 n_num = tipc_node(addr); 108 hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
84 tipc_net.nodes[n_num] = n_ptr; 109
85 if (n_num > tipc_net.highest_node) 110 list_for_each_entry(temp_node, &tipc_node_list, list) {
86 tipc_net.highest_node = n_num; 111 if (n_ptr->addr < temp_node->addr)
112 break;
113 }
114 list_add_tail(&n_ptr->list, &temp_node->list);
115
116 tipc_num_nodes++;
87 117
88 spin_unlock_bh(&node_create_lock); 118 spin_unlock_bh(&node_create_lock);
89 return n_ptr; 119 return n_ptr;
@@ -91,18 +121,11 @@ struct tipc_node *tipc_node_create(u32 addr)
91 121
92void tipc_node_delete(struct tipc_node *n_ptr) 122void tipc_node_delete(struct tipc_node *n_ptr)
93{ 123{
94 u32 n_num; 124 list_del(&n_ptr->list);
95 125 hlist_del(&n_ptr->hash);
96 if (!n_ptr)
97 return;
98
99 n_num = tipc_node(n_ptr->addr);
100 tipc_net.nodes[n_num] = NULL;
101 kfree(n_ptr); 126 kfree(n_ptr);
102 127
103 while (!tipc_net.nodes[tipc_net.highest_node]) 128 tipc_num_nodes--;
104 if (--tipc_net.highest_node == 0)
105 break;
106} 129}
107 130
108 131
@@ -200,54 +223,32 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
200 node_lost_contact(n_ptr); 223 node_lost_contact(n_ptr);
201} 224}
202 225
203int tipc_node_has_active_links(struct tipc_node *n_ptr) 226int tipc_node_active_links(struct tipc_node *n_ptr)
204{ 227{
205 return n_ptr->active_links[0] != NULL; 228 return n_ptr->active_links[0] != NULL;
206} 229}
207 230
208int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 231int tipc_node_redundant_links(struct tipc_node *n_ptr)
209{ 232{
210 return n_ptr->working_links > 1; 233 return n_ptr->working_links > 1;
211} 234}
212 235
213int tipc_node_is_up(struct tipc_node *n_ptr) 236int tipc_node_is_up(struct tipc_node *n_ptr)
214{ 237{
215 return tipc_node_has_active_links(n_ptr); 238 return tipc_node_active_links(n_ptr);
216} 239}
217 240
218struct tipc_node *tipc_node_attach_link(struct link *l_ptr) 241void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr)
219{ 242{
220 struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr); 243 n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
221 244 atomic_inc(&tipc_num_links);
222 if (!n_ptr) 245 n_ptr->link_cnt++;
223 n_ptr = tipc_node_create(l_ptr->addr);
224 if (n_ptr) {
225 u32 bearer_id = l_ptr->b_ptr->identity;
226 char addr_string[16];
227
228 if (n_ptr->link_cnt >= 2) {
229 err("Attempt to create third link to %s\n",
230 tipc_addr_string_fill(addr_string, n_ptr->addr));
231 return NULL;
232 }
233
234 if (!n_ptr->links[bearer_id]) {
235 n_ptr->links[bearer_id] = l_ptr;
236 tipc_net.links++;
237 n_ptr->link_cnt++;
238 return n_ptr;
239 }
240 err("Attempt to establish second link on <%s> to %s\n",
241 l_ptr->b_ptr->publ.name,
242 tipc_addr_string_fill(addr_string, l_ptr->addr));
243 }
244 return NULL;
245} 246}
246 247
247void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) 248void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
248{ 249{
249 n_ptr->links[l_ptr->b_ptr->identity] = NULL; 250 n_ptr->links[l_ptr->b_ptr->identity] = NULL;
250 tipc_net.links--; 251 atomic_dec(&tipc_num_links);
251 n_ptr->link_cnt--; 252 n_ptr->link_cnt--;
252} 253}
253 254
@@ -327,7 +328,6 @@ static void node_cleanup_finished(unsigned long node_addr)
327 328
328static void node_lost_contact(struct tipc_node *n_ptr) 329static void node_lost_contact(struct tipc_node *n_ptr)
329{ 330{
330 struct tipc_node_subscr *ns, *tns;
331 char addr_string[16]; 331 char addr_string[16];
332 u32 i; 332 u32 i;
333 333
@@ -365,12 +365,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
365 } 365 }
366 366
367 /* Notify subscribers */ 367 /* Notify subscribers */
368 list_for_each_entry_safe(ns, tns, &n_ptr->nsub, nodesub_list) { 368 tipc_nodesub_notify(n_ptr);
369 ns->node = NULL;
370 list_del_init(&ns->nodesub_list);
371 tipc_k_signal((Handler)ns->handle_node_down,
372 (unsigned long)ns->usr_handle);
373 }
374 369
375 /* Prevent re-contact with node until all cleanup is done */ 370 /* Prevent re-contact with node until all cleanup is done */
376 371
@@ -385,7 +380,6 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
385 struct tipc_node *n_ptr; 380 struct tipc_node *n_ptr;
386 struct tipc_node_info node_info; 381 struct tipc_node_info node_info;
387 u32 payload_size; 382 u32 payload_size;
388 u32 n_num;
389 383
390 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) 384 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
391 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 385 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -396,15 +390,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
396 " (network address)"); 390 " (network address)");
397 391
398 read_lock_bh(&tipc_net_lock); 392 read_lock_bh(&tipc_net_lock);
399 if (!tipc_net.nodes) { 393 if (!tipc_num_nodes) {
400 read_unlock_bh(&tipc_net_lock); 394 read_unlock_bh(&tipc_net_lock);
401 return tipc_cfg_reply_none(); 395 return tipc_cfg_reply_none();
402 } 396 }
403 397
404 /* For now, get space for all other nodes */ 398 /* For now, get space for all other nodes */
405 399
406 payload_size = TLV_SPACE(sizeof(node_info)) * 400 payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
407 (tipc_net.highest_node - 1);
408 if (payload_size > 32768u) { 401 if (payload_size > 32768u) {
409 read_unlock_bh(&tipc_net_lock); 402 read_unlock_bh(&tipc_net_lock);
410 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 403 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -418,9 +411,8 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
418 411
419 /* Add TLVs for all nodes in scope */ 412 /* Add TLVs for all nodes in scope */
420 413
421 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { 414 list_for_each_entry(n_ptr, &tipc_node_list, list) {
422 n_ptr = tipc_net.nodes[n_num]; 415 if (!tipc_in_scope(domain, n_ptr->addr))
423 if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr))
424 continue; 416 continue;
425 node_info.addr = htonl(n_ptr->addr); 417 node_info.addr = htonl(n_ptr->addr);
426 node_info.up = htonl(tipc_node_is_up(n_ptr)); 418 node_info.up = htonl(tipc_node_is_up(n_ptr));
@@ -439,7 +431,6 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
439 struct tipc_node *n_ptr; 431 struct tipc_node *n_ptr;
440 struct tipc_link_info link_info; 432 struct tipc_link_info link_info;
441 u32 payload_size; 433 u32 payload_size;
442 u32 n_num;
443 434
444 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) 435 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
445 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 436 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -456,7 +447,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
456 447
457 /* Get space for all unicast links + multicast link */ 448 /* Get space for all unicast links + multicast link */
458 449
459 payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1); 450 payload_size = TLV_SPACE(sizeof(link_info)) *
451 (atomic_read(&tipc_num_links) + 1);
460 if (payload_size > 32768u) { 452 if (payload_size > 32768u) {
461 read_unlock_bh(&tipc_net_lock); 453 read_unlock_bh(&tipc_net_lock);
462 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 454 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -470,18 +462,17 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
470 462
471 /* Add TLV for broadcast link */ 463 /* Add TLV for broadcast link */
472 464
473 link_info.dest = htonl(tipc_own_addr & 0xfffff00); 465 link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
474 link_info.up = htonl(1); 466 link_info.up = htonl(1);
475 strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); 467 strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
476 tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); 468 tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
477 469
478 /* Add TLVs for any other links in scope */ 470 /* Add TLVs for any other links in scope */
479 471
480 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { 472 list_for_each_entry(n_ptr, &tipc_node_list, list) {
481 u32 i; 473 u32 i;
482 474
483 n_ptr = tipc_net.nodes[n_num]; 475 if (!tipc_in_scope(domain, n_ptr->addr))
484 if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr))
485 continue; 476 continue;
486 tipc_node_lock(n_ptr); 477 tipc_node_lock(n_ptr);
487 for (i = 0; i < MAX_BEARERS; i++) { 478 for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 206a8efa410e..5c61afc7a0b9 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -2,7 +2,7 @@
2 * net/tipc/node.h: Include file for TIPC node management routines 2 * net/tipc/node.h: Include file for TIPC node management routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -46,7 +46,8 @@
46 * struct tipc_node - TIPC node structure 46 * struct tipc_node - TIPC node structure
47 * @addr: network address of node 47 * @addr: network address of node
48 * @lock: spinlock governing access to structure 48 * @lock: spinlock governing access to structure
49 * @next: pointer to next node in sorted list of cluster's nodes 49 * @hash: links to adjacent nodes in unsorted hash chain
50 * @list: links to adjacent nodes in sorted list of cluster's nodes
50 * @nsub: list of "node down" subscriptions monitoring node 51 * @nsub: list of "node down" subscriptions monitoring node
51 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
52 * @links: pointers to all links to node 53 * @links: pointers to all links to node
@@ -69,7 +70,8 @@
69struct tipc_node { 70struct tipc_node {
70 u32 addr; 71 u32 addr;
71 spinlock_t lock; 72 spinlock_t lock;
72 struct tipc_node *next; 73 struct hlist_node hash;
74 struct list_head list;
73 struct list_head nsub; 75 struct list_head nsub;
74 struct link *active_links[2]; 76 struct link *active_links[2];
75 struct link *links[MAX_BEARERS]; 77 struct link *links[MAX_BEARERS];
@@ -90,27 +92,35 @@ struct tipc_node {
90 } bclink; 92 } bclink;
91}; 93};
92 94
95#define NODE_HTABLE_SIZE 512
96extern struct list_head tipc_node_list;
97
98/*
99 * A trivial power-of-two bitmask technique is used for speed, since this
100 * operation is done for every incoming TIPC packet. The number of hash table
101 * entries has been chosen so that no hash chain exceeds 8 nodes and will
102 * usually be much smaller (typically only a single node).
103 */
104static inline unsigned int tipc_hashfn(u32 addr)
105{
106 return addr & (NODE_HTABLE_SIZE - 1);
107}
108
93extern u32 tipc_own_tag; 109extern u32 tipc_own_tag;
94 110
111struct tipc_node *tipc_node_find(u32 addr);
95struct tipc_node *tipc_node_create(u32 addr); 112struct tipc_node *tipc_node_create(u32 addr);
96void tipc_node_delete(struct tipc_node *n_ptr); 113void tipc_node_delete(struct tipc_node *n_ptr);
97struct tipc_node *tipc_node_attach_link(struct link *l_ptr); 114void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr);
98void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); 115void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr);
99void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); 116void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr);
100void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); 117void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr);
101int tipc_node_has_active_links(struct tipc_node *n_ptr); 118int tipc_node_active_links(struct tipc_node *n_ptr);
102int tipc_node_has_redundant_links(struct tipc_node *n_ptr); 119int tipc_node_redundant_links(struct tipc_node *n_ptr);
103int tipc_node_is_up(struct tipc_node *n_ptr); 120int tipc_node_is_up(struct tipc_node *n_ptr);
104struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); 121struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
105struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); 122struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
106 123
107static inline struct tipc_node *tipc_node_find(u32 addr)
108{
109 if (likely(in_own_cluster(addr)))
110 return tipc_net.nodes[tipc_node(addr)];
111 return NULL;
112}
113
114static inline void tipc_node_lock(struct tipc_node *n_ptr) 124static inline void tipc_node_lock(struct tipc_node *n_ptr)
115{ 125{
116 spin_lock_bh(&n_ptr->lock); 126 spin_lock_bh(&n_ptr->lock);
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 018a55332d91..c3c2815ae630 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -2,7 +2,7 @@
2 * net/tipc/node_subscr.c: TIPC "node down" subscription handling 2 * net/tipc/node_subscr.c: TIPC "node down" subscription handling
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -76,3 +76,22 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub)
76 list_del_init(&node_sub->nodesub_list); 76 list_del_init(&node_sub->nodesub_list);
77 tipc_node_unlock(node_sub->node); 77 tipc_node_unlock(node_sub->node);
78} 78}
79
80/**
81 * tipc_nodesub_notify - notify subscribers that a node is unreachable
82 *
83 * Note: node is locked by caller
84 */
85
86void tipc_nodesub_notify(struct tipc_node *node)
87{
88 struct tipc_node_subscr *ns;
89
90 list_for_each_entry(ns, &node->nsub, nodesub_list) {
91 if (ns->handle_node_down) {
92 tipc_k_signal((Handler)ns->handle_node_down,
93 (unsigned long)ns->usr_handle);
94 ns->handle_node_down = NULL;
95 }
96 }
97}
diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h
index 006ed739f515..4bc2ca0867a1 100644
--- a/net/tipc/node_subscr.h
+++ b/net/tipc/node_subscr.h
@@ -2,7 +2,7 @@
2 * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling 2 * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -59,5 +59,6 @@ struct tipc_node_subscr {
59void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, 59void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
60 void *usr_handle, net_ev_handler handle_down); 60 void *usr_handle, net_ev_handler handle_down);
61void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); 61void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub);
62void tipc_nodesub_notify(struct tipc_node *node);
62 63
63#endif 64#endif
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 067bab2a0b98..c68dc956a423 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -2,7 +2,7 @@
2 * net/tipc/port.c: TIPC port code 2 * net/tipc/port.c: TIPC port code
3 * 3 *
4 * Copyright (c) 1992-2007, Ericsson AB 4 * Copyright (c) 1992-2007, Ericsson AB
5 * Copyright (c) 2004-2008, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -54,33 +54,19 @@ static DEFINE_SPINLOCK(queue_lock);
54 54
55static LIST_HEAD(ports); 55static LIST_HEAD(ports);
56static void port_handle_node_down(unsigned long ref); 56static void port_handle_node_down(unsigned long ref);
57static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err); 57static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
58static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err); 58static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
59static void port_timeout(unsigned long ref); 59static void port_timeout(unsigned long ref);
60 60
61 61
62static u32 port_peernode(struct port *p_ptr) 62static u32 port_peernode(struct tipc_port *p_ptr)
63{ 63{
64 return msg_destnode(&p_ptr->publ.phdr); 64 return msg_destnode(&p_ptr->phdr);
65} 65}
66 66
67static u32 port_peerport(struct port *p_ptr) 67static u32 port_peerport(struct tipc_port *p_ptr)
68{ 68{
69 return msg_destport(&p_ptr->publ.phdr); 69 return msg_destport(&p_ptr->phdr);
70}
71
72static u32 port_out_seqno(struct port *p_ptr)
73{
74 return msg_transp_seqno(&p_ptr->publ.phdr);
75}
76
77static void port_incr_out_seqno(struct port *p_ptr)
78{
79 struct tipc_msg *m = &p_ptr->publ.phdr;
80
81 if (likely(!msg_routed(m)))
82 return;
83 msg_set_transp_seqno(m, (msg_transp_seqno(m) + 1));
84} 70}
85 71
86/** 72/**
@@ -88,13 +74,14 @@ static void port_incr_out_seqno(struct port *p_ptr)
88 */ 74 */
89 75
90int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, 76int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
91 u32 num_sect, struct iovec const *msg_sect) 77 u32 num_sect, struct iovec const *msg_sect,
78 unsigned int total_len)
92{ 79{
93 struct tipc_msg *hdr; 80 struct tipc_msg *hdr;
94 struct sk_buff *buf; 81 struct sk_buff *buf;
95 struct sk_buff *ibuf = NULL; 82 struct sk_buff *ibuf = NULL;
96 struct port_list dports = {0, NULL, }; 83 struct port_list dports = {0, NULL, };
97 struct port *oport = tipc_port_deref(ref); 84 struct tipc_port *oport = tipc_port_deref(ref);
98 int ext_targets; 85 int ext_targets;
99 int res; 86 int res;
100 87
@@ -103,13 +90,16 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
103 90
104 /* Create multicast message */ 91 /* Create multicast message */
105 92
106 hdr = &oport->publ.phdr; 93 hdr = &oport->phdr;
107 msg_set_type(hdr, TIPC_MCAST_MSG); 94 msg_set_type(hdr, TIPC_MCAST_MSG);
95 msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
96 msg_set_destport(hdr, 0);
97 msg_set_destnode(hdr, 0);
108 msg_set_nametype(hdr, seq->type); 98 msg_set_nametype(hdr, seq->type);
109 msg_set_namelower(hdr, seq->lower); 99 msg_set_namelower(hdr, seq->lower);
110 msg_set_nameupper(hdr, seq->upper); 100 msg_set_nameupper(hdr, seq->upper);
111 msg_set_hdr_sz(hdr, MCAST_H_SIZE); 101 msg_set_hdr_sz(hdr, MCAST_H_SIZE);
112 res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE, 102 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
113 !oport->user_port, &buf); 103 !oport->user_port, &buf);
114 if (unlikely(!buf)) 104 if (unlikely(!buf))
115 return res; 105 return res;
@@ -175,6 +165,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
175 /* Deliver a copy of message to each destination port */ 165 /* Deliver a copy of message to each destination port */
176 166
177 if (dp->count != 0) { 167 if (dp->count != 0) {
168 msg_set_destnode(msg, tipc_own_addr);
178 if (dp->count == 1) { 169 if (dp->count == 1) {
179 msg_set_destport(msg, dp->ports[0]); 170 msg_set_destport(msg, dp->ports[0]);
180 tipc_port_recv_msg(buf); 171 tipc_port_recv_msg(buf);
@@ -211,7 +202,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
211 void (*wakeup)(struct tipc_port *), 202 void (*wakeup)(struct tipc_port *),
212 const u32 importance) 203 const u32 importance)
213{ 204{
214 struct port *p_ptr; 205 struct tipc_port *p_ptr;
215 struct tipc_msg *msg; 206 struct tipc_msg *msg;
216 u32 ref; 207 u32 ref;
217 208
@@ -220,21 +211,19 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
220 warn("Port creation failed, no memory\n"); 211 warn("Port creation failed, no memory\n");
221 return NULL; 212 return NULL;
222 } 213 }
223 ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); 214 ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
224 if (!ref) { 215 if (!ref) {
225 warn("Port creation failed, reference table exhausted\n"); 216 warn("Port creation failed, reference table exhausted\n");
226 kfree(p_ptr); 217 kfree(p_ptr);
227 return NULL; 218 return NULL;
228 } 219 }
229 220
230 p_ptr->publ.usr_handle = usr_handle; 221 p_ptr->usr_handle = usr_handle;
231 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; 222 p_ptr->max_pkt = MAX_PKT_DEFAULT;
232 p_ptr->publ.ref = ref; 223 p_ptr->ref = ref;
233 msg = &p_ptr->publ.phdr; 224 msg = &p_ptr->phdr;
234 tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); 225 tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
235 msg_set_origport(msg, ref); 226 msg_set_origport(msg, ref);
236 p_ptr->last_in_seqno = 41;
237 p_ptr->sent = 1;
238 INIT_LIST_HEAD(&p_ptr->wait_list); 227 INIT_LIST_HEAD(&p_ptr->wait_list);
239 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); 228 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
240 p_ptr->dispatcher = dispatcher; 229 p_ptr->dispatcher = dispatcher;
@@ -246,12 +235,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
246 INIT_LIST_HEAD(&p_ptr->port_list); 235 INIT_LIST_HEAD(&p_ptr->port_list);
247 list_add_tail(&p_ptr->port_list, &ports); 236 list_add_tail(&p_ptr->port_list, &ports);
248 spin_unlock_bh(&tipc_port_list_lock); 237 spin_unlock_bh(&tipc_port_list_lock);
249 return &(p_ptr->publ); 238 return p_ptr;
250} 239}
251 240
252int tipc_deleteport(u32 ref) 241int tipc_deleteport(u32 ref)
253{ 242{
254 struct port *p_ptr; 243 struct tipc_port *p_ptr;
255 struct sk_buff *buf = NULL; 244 struct sk_buff *buf = NULL;
256 245
257 tipc_withdraw(ref, 0, NULL); 246 tipc_withdraw(ref, 0, NULL);
@@ -263,7 +252,7 @@ int tipc_deleteport(u32 ref)
263 tipc_port_unlock(p_ptr); 252 tipc_port_unlock(p_ptr);
264 253
265 k_cancel_timer(&p_ptr->timer); 254 k_cancel_timer(&p_ptr->timer);
266 if (p_ptr->publ.connected) { 255 if (p_ptr->connected) {
267 buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); 256 buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
268 tipc_nodesub_unsubscribe(&p_ptr->subscription); 257 tipc_nodesub_unsubscribe(&p_ptr->subscription);
269 } 258 }
@@ -279,14 +268,14 @@ int tipc_deleteport(u32 ref)
279 return 0; 268 return 0;
280} 269}
281 270
282static int port_unreliable(struct port *p_ptr) 271static int port_unreliable(struct tipc_port *p_ptr)
283{ 272{
284 return msg_src_droppable(&p_ptr->publ.phdr); 273 return msg_src_droppable(&p_ptr->phdr);
285} 274}
286 275
287int tipc_portunreliable(u32 ref, unsigned int *isunreliable) 276int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
288{ 277{
289 struct port *p_ptr; 278 struct tipc_port *p_ptr;
290 279
291 p_ptr = tipc_port_lock(ref); 280 p_ptr = tipc_port_lock(ref);
292 if (!p_ptr) 281 if (!p_ptr)
@@ -298,24 +287,24 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
298 287
299int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) 288int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
300{ 289{
301 struct port *p_ptr; 290 struct tipc_port *p_ptr;
302 291
303 p_ptr = tipc_port_lock(ref); 292 p_ptr = tipc_port_lock(ref);
304 if (!p_ptr) 293 if (!p_ptr)
305 return -EINVAL; 294 return -EINVAL;
306 msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0)); 295 msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0));
307 tipc_port_unlock(p_ptr); 296 tipc_port_unlock(p_ptr);
308 return 0; 297 return 0;
309} 298}
310 299
311static int port_unreturnable(struct port *p_ptr) 300static int port_unreturnable(struct tipc_port *p_ptr)
312{ 301{
313 return msg_dest_droppable(&p_ptr->publ.phdr); 302 return msg_dest_droppable(&p_ptr->phdr);
314} 303}
315 304
316int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) 305int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
317{ 306{
318 struct port *p_ptr; 307 struct tipc_port *p_ptr;
319 308
320 p_ptr = tipc_port_lock(ref); 309 p_ptr = tipc_port_lock(ref);
321 if (!p_ptr) 310 if (!p_ptr)
@@ -327,12 +316,12 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
327 316
328int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) 317int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
329{ 318{
330 struct port *p_ptr; 319 struct tipc_port *p_ptr;
331 320
332 p_ptr = tipc_port_lock(ref); 321 p_ptr = tipc_port_lock(ref);
333 if (!p_ptr) 322 if (!p_ptr)
334 return -EINVAL; 323 return -EINVAL;
335 msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0)); 324 msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0));
336 tipc_port_unlock(p_ptr); 325 tipc_port_unlock(p_ptr);
337 return 0; 326 return 0;
338} 327}
@@ -345,7 +334,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
345static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, 334static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
346 u32 origport, u32 orignode, 335 u32 origport, u32 orignode,
347 u32 usr, u32 type, u32 err, 336 u32 usr, u32 type, u32 err,
348 u32 seqno, u32 ack) 337 u32 ack)
349{ 338{
350 struct sk_buff *buf; 339 struct sk_buff *buf;
351 struct tipc_msg *msg; 340 struct tipc_msg *msg;
@@ -358,7 +347,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
358 msg_set_destport(msg, destport); 347 msg_set_destport(msg, destport);
359 msg_set_origport(msg, origport); 348 msg_set_origport(msg, origport);
360 msg_set_orignode(msg, orignode); 349 msg_set_orignode(msg, orignode);
361 msg_set_transp_seqno(msg, seqno);
362 msg_set_msgcnt(msg, ack); 350 msg_set_msgcnt(msg, ack);
363 } 351 }
364 return buf; 352 return buf;
@@ -413,10 +401,10 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
413 /* send self-abort message when rejecting on a connected port */ 401 /* send self-abort message when rejecting on a connected port */
414 if (msg_connected(msg)) { 402 if (msg_connected(msg)) {
415 struct sk_buff *abuf = NULL; 403 struct sk_buff *abuf = NULL;
416 struct port *p_ptr = tipc_port_lock(msg_destport(msg)); 404 struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
417 405
418 if (p_ptr) { 406 if (p_ptr) {
419 if (p_ptr->publ.connected) 407 if (p_ptr->connected)
420 abuf = port_build_self_abort_msg(p_ptr, err); 408 abuf = port_build_self_abort_msg(p_ptr, err);
421 tipc_port_unlock(p_ptr); 409 tipc_port_unlock(p_ptr);
422 } 410 }
@@ -429,14 +417,14 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
429 return data_sz; 417 return data_sz;
430} 418}
431 419
432int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 420int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
433 struct iovec const *msg_sect, u32 num_sect, 421 struct iovec const *msg_sect, u32 num_sect,
434 int err) 422 unsigned int total_len, int err)
435{ 423{
436 struct sk_buff *buf; 424 struct sk_buff *buf;
437 int res; 425 int res;
438 426
439 res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE, 427 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
440 !p_ptr->user_port, &buf); 428 !p_ptr->user_port, &buf);
441 if (!buf) 429 if (!buf)
442 return res; 430 return res;
@@ -446,13 +434,13 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
446 434
447static void port_timeout(unsigned long ref) 435static void port_timeout(unsigned long ref)
448{ 436{
449 struct port *p_ptr = tipc_port_lock(ref); 437 struct tipc_port *p_ptr = tipc_port_lock(ref);
450 struct sk_buff *buf = NULL; 438 struct sk_buff *buf = NULL;
451 439
452 if (!p_ptr) 440 if (!p_ptr)
453 return; 441 return;
454 442
455 if (!p_ptr->publ.connected) { 443 if (!p_ptr->connected) {
456 tipc_port_unlock(p_ptr); 444 tipc_port_unlock(p_ptr);
457 return; 445 return;
458 } 446 }
@@ -463,14 +451,12 @@ static void port_timeout(unsigned long ref)
463 } else { 451 } else {
464 buf = port_build_proto_msg(port_peerport(p_ptr), 452 buf = port_build_proto_msg(port_peerport(p_ptr),
465 port_peernode(p_ptr), 453 port_peernode(p_ptr),
466 p_ptr->publ.ref, 454 p_ptr->ref,
467 tipc_own_addr, 455 tipc_own_addr,
468 CONN_MANAGER, 456 CONN_MANAGER,
469 CONN_PROBE, 457 CONN_PROBE,
470 TIPC_OK, 458 TIPC_OK,
471 port_out_seqno(p_ptr),
472 0); 459 0);
473 port_incr_out_seqno(p_ptr);
474 p_ptr->probing_state = PROBING; 460 p_ptr->probing_state = PROBING;
475 k_start_timer(&p_ptr->timer, p_ptr->probing_interval); 461 k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
476 } 462 }
@@ -481,7 +467,7 @@ static void port_timeout(unsigned long ref)
481 467
482static void port_handle_node_down(unsigned long ref) 468static void port_handle_node_down(unsigned long ref)
483{ 469{
484 struct port *p_ptr = tipc_port_lock(ref); 470 struct tipc_port *p_ptr = tipc_port_lock(ref);
485 struct sk_buff *buf = NULL; 471 struct sk_buff *buf = NULL;
486 472
487 if (!p_ptr) 473 if (!p_ptr)
@@ -492,73 +478,71 @@ static void port_handle_node_down(unsigned long ref)
492} 478}
493 479
494 480
495static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err) 481static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err)
496{ 482{
497 u32 imp = msg_importance(&p_ptr->publ.phdr); 483 u32 imp = msg_importance(&p_ptr->phdr);
498 484
499 if (!p_ptr->publ.connected) 485 if (!p_ptr->connected)
500 return NULL; 486 return NULL;
501 if (imp < TIPC_CRITICAL_IMPORTANCE) 487 if (imp < TIPC_CRITICAL_IMPORTANCE)
502 imp++; 488 imp++;
503 return port_build_proto_msg(p_ptr->publ.ref, 489 return port_build_proto_msg(p_ptr->ref,
504 tipc_own_addr, 490 tipc_own_addr,
505 port_peerport(p_ptr), 491 port_peerport(p_ptr),
506 port_peernode(p_ptr), 492 port_peernode(p_ptr),
507 imp, 493 imp,
508 TIPC_CONN_MSG, 494 TIPC_CONN_MSG,
509 err, 495 err,
510 p_ptr->last_in_seqno + 1,
511 0); 496 0);
512} 497}
513 498
514 499
515static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err) 500static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err)
516{ 501{
517 u32 imp = msg_importance(&p_ptr->publ.phdr); 502 u32 imp = msg_importance(&p_ptr->phdr);
518 503
519 if (!p_ptr->publ.connected) 504 if (!p_ptr->connected)
520 return NULL; 505 return NULL;
521 if (imp < TIPC_CRITICAL_IMPORTANCE) 506 if (imp < TIPC_CRITICAL_IMPORTANCE)
522 imp++; 507 imp++;
523 return port_build_proto_msg(port_peerport(p_ptr), 508 return port_build_proto_msg(port_peerport(p_ptr),
524 port_peernode(p_ptr), 509 port_peernode(p_ptr),
525 p_ptr->publ.ref, 510 p_ptr->ref,
526 tipc_own_addr, 511 tipc_own_addr,
527 imp, 512 imp,
528 TIPC_CONN_MSG, 513 TIPC_CONN_MSG,
529 err, 514 err,
530 port_out_seqno(p_ptr),
531 0); 515 0);
532} 516}
533 517
534void tipc_port_recv_proto_msg(struct sk_buff *buf) 518void tipc_port_recv_proto_msg(struct sk_buff *buf)
535{ 519{
536 struct tipc_msg *msg = buf_msg(buf); 520 struct tipc_msg *msg = buf_msg(buf);
537 struct port *p_ptr = tipc_port_lock(msg_destport(msg)); 521 struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
538 u32 err = TIPC_OK; 522 u32 err = TIPC_OK;
539 struct sk_buff *r_buf = NULL; 523 struct sk_buff *r_buf = NULL;
540 struct sk_buff *abort_buf = NULL; 524 struct sk_buff *abort_buf = NULL;
541 525
542 if (!p_ptr) { 526 if (!p_ptr) {
543 err = TIPC_ERR_NO_PORT; 527 err = TIPC_ERR_NO_PORT;
544 } else if (p_ptr->publ.connected) { 528 } else if (p_ptr->connected) {
545 if ((port_peernode(p_ptr) != msg_orignode(msg)) || 529 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
546 (port_peerport(p_ptr) != msg_origport(msg))) { 530 (port_peerport(p_ptr) != msg_origport(msg))) {
547 err = TIPC_ERR_NO_PORT; 531 err = TIPC_ERR_NO_PORT;
548 } else if (msg_type(msg) == CONN_ACK) { 532 } else if (msg_type(msg) == CONN_ACK) {
549 int wakeup = tipc_port_congested(p_ptr) && 533 int wakeup = tipc_port_congested(p_ptr) &&
550 p_ptr->publ.congested && 534 p_ptr->congested &&
551 p_ptr->wakeup; 535 p_ptr->wakeup;
552 p_ptr->acked += msg_msgcnt(msg); 536 p_ptr->acked += msg_msgcnt(msg);
553 if (tipc_port_congested(p_ptr)) 537 if (tipc_port_congested(p_ptr))
554 goto exit; 538 goto exit;
555 p_ptr->publ.congested = 0; 539 p_ptr->congested = 0;
556 if (!wakeup) 540 if (!wakeup)
557 goto exit; 541 goto exit;
558 p_ptr->wakeup(&p_ptr->publ); 542 p_ptr->wakeup(p_ptr);
559 goto exit; 543 goto exit;
560 } 544 }
561 } else if (p_ptr->publ.published) { 545 } else if (p_ptr->published) {
562 err = TIPC_ERR_NO_PORT; 546 err = TIPC_ERR_NO_PORT;
563 } 547 }
564 if (err) { 548 if (err) {
@@ -569,7 +553,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
569 TIPC_HIGH_IMPORTANCE, 553 TIPC_HIGH_IMPORTANCE,
570 TIPC_CONN_MSG, 554 TIPC_CONN_MSG,
571 err, 555 err,
572 0,
573 0); 556 0);
574 goto exit; 557 goto exit;
575 } 558 }
@@ -583,11 +566,9 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
583 CONN_MANAGER, 566 CONN_MANAGER,
584 CONN_PROBE_REPLY, 567 CONN_PROBE_REPLY,
585 TIPC_OK, 568 TIPC_OK,
586 port_out_seqno(p_ptr),
587 0); 569 0);
588 } 570 }
589 p_ptr->probing_state = CONFIRMED; 571 p_ptr->probing_state = CONFIRMED;
590 port_incr_out_seqno(p_ptr);
591exit: 572exit:
592 if (p_ptr) 573 if (p_ptr)
593 tipc_port_unlock(p_ptr); 574 tipc_port_unlock(p_ptr);
@@ -596,29 +577,29 @@ exit:
596 buf_discard(buf); 577 buf_discard(buf);
597} 578}
598 579
599static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id) 580static void port_print(struct tipc_port *p_ptr, struct print_buf *buf, int full_id)
600{ 581{
601 struct publication *publ; 582 struct publication *publ;
602 583
603 if (full_id) 584 if (full_id)
604 tipc_printf(buf, "<%u.%u.%u:%u>:", 585 tipc_printf(buf, "<%u.%u.%u:%u>:",
605 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), 586 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
606 tipc_node(tipc_own_addr), p_ptr->publ.ref); 587 tipc_node(tipc_own_addr), p_ptr->ref);
607 else 588 else
608 tipc_printf(buf, "%-10u:", p_ptr->publ.ref); 589 tipc_printf(buf, "%-10u:", p_ptr->ref);
609 590
610 if (p_ptr->publ.connected) { 591 if (p_ptr->connected) {
611 u32 dport = port_peerport(p_ptr); 592 u32 dport = port_peerport(p_ptr);
612 u32 destnode = port_peernode(p_ptr); 593 u32 destnode = port_peernode(p_ptr);
613 594
614 tipc_printf(buf, " connected to <%u.%u.%u:%u>", 595 tipc_printf(buf, " connected to <%u.%u.%u:%u>",
615 tipc_zone(destnode), tipc_cluster(destnode), 596 tipc_zone(destnode), tipc_cluster(destnode),
616 tipc_node(destnode), dport); 597 tipc_node(destnode), dport);
617 if (p_ptr->publ.conn_type != 0) 598 if (p_ptr->conn_type != 0)
618 tipc_printf(buf, " via {%u,%u}", 599 tipc_printf(buf, " via {%u,%u}",
619 p_ptr->publ.conn_type, 600 p_ptr->conn_type,
620 p_ptr->publ.conn_instance); 601 p_ptr->conn_instance);
621 } else if (p_ptr->publ.published) { 602 } else if (p_ptr->published) {
622 tipc_printf(buf, " bound to"); 603 tipc_printf(buf, " bound to");
623 list_for_each_entry(publ, &p_ptr->publications, pport_list) { 604 list_for_each_entry(publ, &p_ptr->publications, pport_list) {
624 if (publ->lower == publ->upper) 605 if (publ->lower == publ->upper)
@@ -639,7 +620,7 @@ struct sk_buff *tipc_port_get_ports(void)
639 struct sk_buff *buf; 620 struct sk_buff *buf;
640 struct tlv_desc *rep_tlv; 621 struct tlv_desc *rep_tlv;
641 struct print_buf pb; 622 struct print_buf pb;
642 struct port *p_ptr; 623 struct tipc_port *p_ptr;
643 int str_len; 624 int str_len;
644 625
645 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY)); 626 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY));
@@ -650,9 +631,9 @@ struct sk_buff *tipc_port_get_ports(void)
650 tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY); 631 tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY);
651 spin_lock_bh(&tipc_port_list_lock); 632 spin_lock_bh(&tipc_port_list_lock);
652 list_for_each_entry(p_ptr, &ports, port_list) { 633 list_for_each_entry(p_ptr, &ports, port_list) {
653 spin_lock_bh(p_ptr->publ.lock); 634 spin_lock_bh(p_ptr->lock);
654 port_print(p_ptr, &pb, 0); 635 port_print(p_ptr, &pb, 0);
655 spin_unlock_bh(p_ptr->publ.lock); 636 spin_unlock_bh(p_ptr->lock);
656 } 637 }
657 spin_unlock_bh(&tipc_port_list_lock); 638 spin_unlock_bh(&tipc_port_list_lock);
658 str_len = tipc_printbuf_validate(&pb); 639 str_len = tipc_printbuf_validate(&pb);
@@ -665,12 +646,12 @@ struct sk_buff *tipc_port_get_ports(void)
665 646
666void tipc_port_reinit(void) 647void tipc_port_reinit(void)
667{ 648{
668 struct port *p_ptr; 649 struct tipc_port *p_ptr;
669 struct tipc_msg *msg; 650 struct tipc_msg *msg;
670 651
671 spin_lock_bh(&tipc_port_list_lock); 652 spin_lock_bh(&tipc_port_list_lock);
672 list_for_each_entry(p_ptr, &ports, port_list) { 653 list_for_each_entry(p_ptr, &ports, port_list) {
673 msg = &p_ptr->publ.phdr; 654 msg = &p_ptr->phdr;
674 if (msg_orignode(msg) == tipc_own_addr) 655 if (msg_orignode(msg) == tipc_own_addr)
675 break; 656 break;
676 msg_set_prevnode(msg, tipc_own_addr); 657 msg_set_prevnode(msg, tipc_own_addr);
@@ -695,7 +676,7 @@ static void port_dispatcher_sigh(void *dummy)
695 spin_unlock_bh(&queue_lock); 676 spin_unlock_bh(&queue_lock);
696 677
697 while (buf) { 678 while (buf) {
698 struct port *p_ptr; 679 struct tipc_port *p_ptr;
699 struct user_port *up_ptr; 680 struct user_port *up_ptr;
700 struct tipc_portid orig; 681 struct tipc_portid orig;
701 struct tipc_name_seq dseq; 682 struct tipc_name_seq dseq;
@@ -720,8 +701,8 @@ static void port_dispatcher_sigh(void *dummy)
720 orig.node = msg_orignode(msg); 701 orig.node = msg_orignode(msg);
721 up_ptr = p_ptr->user_port; 702 up_ptr = p_ptr->user_port;
722 usr_handle = up_ptr->usr_handle; 703 usr_handle = up_ptr->usr_handle;
723 connected = p_ptr->publ.connected; 704 connected = p_ptr->connected;
724 published = p_ptr->publ.published; 705 published = p_ptr->published;
725 706
726 if (unlikely(msg_errcode(msg))) 707 if (unlikely(msg_errcode(msg)))
727 goto err; 708 goto err;
@@ -732,6 +713,7 @@ static void port_dispatcher_sigh(void *dummy)
732 tipc_conn_msg_event cb = up_ptr->conn_msg_cb; 713 tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
733 u32 peer_port = port_peerport(p_ptr); 714 u32 peer_port = port_peerport(p_ptr);
734 u32 peer_node = port_peernode(p_ptr); 715 u32 peer_node = port_peernode(p_ptr);
716 u32 dsz;
735 717
736 tipc_port_unlock(p_ptr); 718 tipc_port_unlock(p_ptr);
737 if (unlikely(!cb)) 719 if (unlikely(!cb))
@@ -742,13 +724,14 @@ static void port_dispatcher_sigh(void *dummy)
742 } else if ((msg_origport(msg) != peer_port) || 724 } else if ((msg_origport(msg) != peer_port) ||
743 (msg_orignode(msg) != peer_node)) 725 (msg_orignode(msg) != peer_node))
744 goto reject; 726 goto reject;
745 if (unlikely(++p_ptr->publ.conn_unacked >= 727 dsz = msg_data_sz(msg);
746 TIPC_FLOW_CONTROL_WIN)) 728 if (unlikely(dsz &&
729 (++p_ptr->conn_unacked >=
730 TIPC_FLOW_CONTROL_WIN)))
747 tipc_acknowledge(dref, 731 tipc_acknowledge(dref,
748 p_ptr->publ.conn_unacked); 732 p_ptr->conn_unacked);
749 skb_pull(buf, msg_hdr_sz(msg)); 733 skb_pull(buf, msg_hdr_sz(msg));
750 cb(usr_handle, dref, &buf, msg_data(msg), 734 cb(usr_handle, dref, &buf, msg_data(msg), dsz);
751 msg_data_sz(msg));
752 break; 735 break;
753 } 736 }
754 case TIPC_DIRECT_MSG:{ 737 case TIPC_DIRECT_MSG:{
@@ -872,7 +855,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf)
872 855
873static void port_wakeup_sh(unsigned long ref) 856static void port_wakeup_sh(unsigned long ref)
874{ 857{
875 struct port *p_ptr; 858 struct tipc_port *p_ptr;
876 struct user_port *up_ptr; 859 struct user_port *up_ptr;
877 tipc_continue_event cb = NULL; 860 tipc_continue_event cb = NULL;
878 void *uh = NULL; 861 void *uh = NULL;
@@ -898,14 +881,14 @@ static void port_wakeup(struct tipc_port *p_ptr)
898 881
899void tipc_acknowledge(u32 ref, u32 ack) 882void tipc_acknowledge(u32 ref, u32 ack)
900{ 883{
901 struct port *p_ptr; 884 struct tipc_port *p_ptr;
902 struct sk_buff *buf = NULL; 885 struct sk_buff *buf = NULL;
903 886
904 p_ptr = tipc_port_lock(ref); 887 p_ptr = tipc_port_lock(ref);
905 if (!p_ptr) 888 if (!p_ptr)
906 return; 889 return;
907 if (p_ptr->publ.connected) { 890 if (p_ptr->connected) {
908 p_ptr->publ.conn_unacked -= ack; 891 p_ptr->conn_unacked -= ack;
909 buf = port_build_proto_msg(port_peerport(p_ptr), 892 buf = port_build_proto_msg(port_peerport(p_ptr),
910 port_peernode(p_ptr), 893 port_peernode(p_ptr),
911 ref, 894 ref,
@@ -913,7 +896,6 @@ void tipc_acknowledge(u32 ref, u32 ack)
913 CONN_MANAGER, 896 CONN_MANAGER,
914 CONN_ACK, 897 CONN_ACK,
915 TIPC_OK, 898 TIPC_OK,
916 port_out_seqno(p_ptr),
917 ack); 899 ack);
918 } 900 }
919 tipc_port_unlock(p_ptr); 901 tipc_port_unlock(p_ptr);
@@ -936,14 +918,14 @@ int tipc_createport(void *usr_handle,
936 u32 *portref) 918 u32 *portref)
937{ 919{
938 struct user_port *up_ptr; 920 struct user_port *up_ptr;
939 struct port *p_ptr; 921 struct tipc_port *p_ptr;
940 922
941 up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); 923 up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
942 if (!up_ptr) { 924 if (!up_ptr) {
943 warn("Port creation failed, no memory\n"); 925 warn("Port creation failed, no memory\n");
944 return -ENOMEM; 926 return -ENOMEM;
945 } 927 }
946 p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher, 928 p_ptr = (struct tipc_port *)tipc_createport_raw(NULL, port_dispatcher,
947 port_wakeup, importance); 929 port_wakeup, importance);
948 if (!p_ptr) { 930 if (!p_ptr) {
949 kfree(up_ptr); 931 kfree(up_ptr);
@@ -952,7 +934,7 @@ int tipc_createport(void *usr_handle,
952 934
953 p_ptr->user_port = up_ptr; 935 p_ptr->user_port = up_ptr;
954 up_ptr->usr_handle = usr_handle; 936 up_ptr->usr_handle = usr_handle;
955 up_ptr->ref = p_ptr->publ.ref; 937 up_ptr->ref = p_ptr->ref;
956 up_ptr->err_cb = error_cb; 938 up_ptr->err_cb = error_cb;
957 up_ptr->named_err_cb = named_error_cb; 939 up_ptr->named_err_cb = named_error_cb;
958 up_ptr->conn_err_cb = conn_error_cb; 940 up_ptr->conn_err_cb = conn_error_cb;
@@ -960,26 +942,26 @@ int tipc_createport(void *usr_handle,
960 up_ptr->named_msg_cb = named_msg_cb; 942 up_ptr->named_msg_cb = named_msg_cb;
961 up_ptr->conn_msg_cb = conn_msg_cb; 943 up_ptr->conn_msg_cb = conn_msg_cb;
962 up_ptr->continue_event_cb = continue_event_cb; 944 up_ptr->continue_event_cb = continue_event_cb;
963 *portref = p_ptr->publ.ref; 945 *portref = p_ptr->ref;
964 tipc_port_unlock(p_ptr); 946 tipc_port_unlock(p_ptr);
965 return 0; 947 return 0;
966} 948}
967 949
968int tipc_portimportance(u32 ref, unsigned int *importance) 950int tipc_portimportance(u32 ref, unsigned int *importance)
969{ 951{
970 struct port *p_ptr; 952 struct tipc_port *p_ptr;
971 953
972 p_ptr = tipc_port_lock(ref); 954 p_ptr = tipc_port_lock(ref);
973 if (!p_ptr) 955 if (!p_ptr)
974 return -EINVAL; 956 return -EINVAL;
975 *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); 957 *importance = (unsigned int)msg_importance(&p_ptr->phdr);
976 tipc_port_unlock(p_ptr); 958 tipc_port_unlock(p_ptr);
977 return 0; 959 return 0;
978} 960}
979 961
980int tipc_set_portimportance(u32 ref, unsigned int imp) 962int tipc_set_portimportance(u32 ref, unsigned int imp)
981{ 963{
982 struct port *p_ptr; 964 struct tipc_port *p_ptr;
983 965
984 if (imp > TIPC_CRITICAL_IMPORTANCE) 966 if (imp > TIPC_CRITICAL_IMPORTANCE)
985 return -EINVAL; 967 return -EINVAL;
@@ -987,7 +969,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
987 p_ptr = tipc_port_lock(ref); 969 p_ptr = tipc_port_lock(ref);
988 if (!p_ptr) 970 if (!p_ptr)
989 return -EINVAL; 971 return -EINVAL;
990 msg_set_importance(&p_ptr->publ.phdr, (u32)imp); 972 msg_set_importance(&p_ptr->phdr, (u32)imp);
991 tipc_port_unlock(p_ptr); 973 tipc_port_unlock(p_ptr);
992 return 0; 974 return 0;
993} 975}
@@ -995,7 +977,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
995 977
996int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) 978int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
997{ 979{
998 struct port *p_ptr; 980 struct tipc_port *p_ptr;
999 struct publication *publ; 981 struct publication *publ;
1000 u32 key; 982 u32 key;
1001 int res = -EINVAL; 983 int res = -EINVAL;
@@ -1004,7 +986,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1004 if (!p_ptr) 986 if (!p_ptr)
1005 return -EINVAL; 987 return -EINVAL;
1006 988
1007 if (p_ptr->publ.connected) 989 if (p_ptr->connected)
1008 goto exit; 990 goto exit;
1009 if (seq->lower > seq->upper) 991 if (seq->lower > seq->upper)
1010 goto exit; 992 goto exit;
@@ -1016,11 +998,11 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1016 goto exit; 998 goto exit;
1017 } 999 }
1018 publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, 1000 publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
1019 scope, p_ptr->publ.ref, key); 1001 scope, p_ptr->ref, key);
1020 if (publ) { 1002 if (publ) {
1021 list_add(&publ->pport_list, &p_ptr->publications); 1003 list_add(&publ->pport_list, &p_ptr->publications);
1022 p_ptr->pub_count++; 1004 p_ptr->pub_count++;
1023 p_ptr->publ.published = 1; 1005 p_ptr->published = 1;
1024 res = 0; 1006 res = 0;
1025 } 1007 }
1026exit: 1008exit:
@@ -1030,7 +1012,7 @@ exit:
1030 1012
1031int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) 1013int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1032{ 1014{
1033 struct port *p_ptr; 1015 struct tipc_port *p_ptr;
1034 struct publication *publ; 1016 struct publication *publ;
1035 struct publication *tpubl; 1017 struct publication *tpubl;
1036 int res = -EINVAL; 1018 int res = -EINVAL;
@@ -1063,37 +1045,37 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1063 } 1045 }
1064 } 1046 }
1065 if (list_empty(&p_ptr->publications)) 1047 if (list_empty(&p_ptr->publications))
1066 p_ptr->publ.published = 0; 1048 p_ptr->published = 0;
1067 tipc_port_unlock(p_ptr); 1049 tipc_port_unlock(p_ptr);
1068 return res; 1050 return res;
1069} 1051}
1070 1052
1071int tipc_connect2port(u32 ref, struct tipc_portid const *peer) 1053int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
1072{ 1054{
1073 struct port *p_ptr; 1055 struct tipc_port *p_ptr;
1074 struct tipc_msg *msg; 1056 struct tipc_msg *msg;
1075 int res = -EINVAL; 1057 int res = -EINVAL;
1076 1058
1077 p_ptr = tipc_port_lock(ref); 1059 p_ptr = tipc_port_lock(ref);
1078 if (!p_ptr) 1060 if (!p_ptr)
1079 return -EINVAL; 1061 return -EINVAL;
1080 if (p_ptr->publ.published || p_ptr->publ.connected) 1062 if (p_ptr->published || p_ptr->connected)
1081 goto exit; 1063 goto exit;
1082 if (!peer->ref) 1064 if (!peer->ref)
1083 goto exit; 1065 goto exit;
1084 1066
1085 msg = &p_ptr->publ.phdr; 1067 msg = &p_ptr->phdr;
1086 msg_set_destnode(msg, peer->node); 1068 msg_set_destnode(msg, peer->node);
1087 msg_set_destport(msg, peer->ref); 1069 msg_set_destport(msg, peer->ref);
1088 msg_set_orignode(msg, tipc_own_addr); 1070 msg_set_orignode(msg, tipc_own_addr);
1089 msg_set_origport(msg, p_ptr->publ.ref); 1071 msg_set_origport(msg, p_ptr->ref);
1090 msg_set_transp_seqno(msg, 42);
1091 msg_set_type(msg, TIPC_CONN_MSG); 1072 msg_set_type(msg, TIPC_CONN_MSG);
1073 msg_set_lookup_scope(msg, 0);
1092 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1074 msg_set_hdr_sz(msg, SHORT_H_SIZE);
1093 1075
1094 p_ptr->probing_interval = PROBING_INTERVAL; 1076 p_ptr->probing_interval = PROBING_INTERVAL;
1095 p_ptr->probing_state = CONFIRMED; 1077 p_ptr->probing_state = CONFIRMED;
1096 p_ptr->publ.connected = 1; 1078 p_ptr->connected = 1;
1097 k_start_timer(&p_ptr->timer, p_ptr->probing_interval); 1079 k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
1098 1080
1099 tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, 1081 tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
@@ -1102,7 +1084,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
1102 res = 0; 1084 res = 0;
1103exit: 1085exit:
1104 tipc_port_unlock(p_ptr); 1086 tipc_port_unlock(p_ptr);
1105 p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); 1087 p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
1106 return res; 1088 return res;
1107} 1089}
1108 1090
@@ -1120,7 +1102,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
1120 tp_ptr->connected = 0; 1102 tp_ptr->connected = 0;
1121 /* let timer expire on it's own to avoid deadlock! */ 1103 /* let timer expire on it's own to avoid deadlock! */
1122 tipc_nodesub_unsubscribe( 1104 tipc_nodesub_unsubscribe(
1123 &((struct port *)tp_ptr)->subscription); 1105 &((struct tipc_port *)tp_ptr)->subscription);
1124 res = 0; 1106 res = 0;
1125 } else { 1107 } else {
1126 res = -ENOTCONN; 1108 res = -ENOTCONN;
@@ -1135,7 +1117,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
1135 1117
1136int tipc_disconnect(u32 ref) 1118int tipc_disconnect(u32 ref)
1137{ 1119{
1138 struct port *p_ptr; 1120 struct tipc_port *p_ptr;
1139 int res; 1121 int res;
1140 1122
1141 p_ptr = tipc_port_lock(ref); 1123 p_ptr = tipc_port_lock(ref);
@@ -1151,15 +1133,15 @@ int tipc_disconnect(u32 ref)
1151 */ 1133 */
1152int tipc_shutdown(u32 ref) 1134int tipc_shutdown(u32 ref)
1153{ 1135{
1154 struct port *p_ptr; 1136 struct tipc_port *p_ptr;
1155 struct sk_buff *buf = NULL; 1137 struct sk_buff *buf = NULL;
1156 1138
1157 p_ptr = tipc_port_lock(ref); 1139 p_ptr = tipc_port_lock(ref);
1158 if (!p_ptr) 1140 if (!p_ptr)
1159 return -EINVAL; 1141 return -EINVAL;
1160 1142
1161 if (p_ptr->publ.connected) { 1143 if (p_ptr->connected) {
1162 u32 imp = msg_importance(&p_ptr->publ.phdr); 1144 u32 imp = msg_importance(&p_ptr->phdr);
1163 if (imp < TIPC_CRITICAL_IMPORTANCE) 1145 if (imp < TIPC_CRITICAL_IMPORTANCE)
1164 imp++; 1146 imp++;
1165 buf = port_build_proto_msg(port_peerport(p_ptr), 1147 buf = port_build_proto_msg(port_peerport(p_ptr),
@@ -1169,7 +1151,6 @@ int tipc_shutdown(u32 ref)
1169 imp, 1151 imp,
1170 TIPC_CONN_MSG, 1152 TIPC_CONN_MSG,
1171 TIPC_CONN_SHUTDOWN, 1153 TIPC_CONN_SHUTDOWN,
1172 port_out_seqno(p_ptr),
1173 0); 1154 0);
1174 } 1155 }
1175 tipc_port_unlock(p_ptr); 1156 tipc_port_unlock(p_ptr);
@@ -1182,13 +1163,14 @@ int tipc_shutdown(u32 ref)
1182 * message for this node. 1163 * message for this node.
1183 */ 1164 */
1184 1165
1185static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, 1166static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect,
1186 struct iovec const *msg_sect) 1167 struct iovec const *msg_sect,
1168 unsigned int total_len)
1187{ 1169{
1188 struct sk_buff *buf; 1170 struct sk_buff *buf;
1189 int res; 1171 int res;
1190 1172
1191 res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect, 1173 res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len,
1192 MAX_MSG_SIZE, !sender->user_port, &buf); 1174 MAX_MSG_SIZE, !sender->user_port, &buf);
1193 if (likely(buf)) 1175 if (likely(buf))
1194 tipc_port_recv_msg(buf); 1176 tipc_port_recv_msg(buf);
@@ -1199,36 +1181,37 @@ static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
1199 * tipc_send - send message sections on connection 1181 * tipc_send - send message sections on connection
1200 */ 1182 */
1201 1183
1202int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) 1184int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect,
1185 unsigned int total_len)
1203{ 1186{
1204 struct port *p_ptr; 1187 struct tipc_port *p_ptr;
1205 u32 destnode; 1188 u32 destnode;
1206 int res; 1189 int res;
1207 1190
1208 p_ptr = tipc_port_deref(ref); 1191 p_ptr = tipc_port_deref(ref);
1209 if (!p_ptr || !p_ptr->publ.connected) 1192 if (!p_ptr || !p_ptr->connected)
1210 return -EINVAL; 1193 return -EINVAL;
1211 1194
1212 p_ptr->publ.congested = 1; 1195 p_ptr->congested = 1;
1213 if (!tipc_port_congested(p_ptr)) { 1196 if (!tipc_port_congested(p_ptr)) {
1214 destnode = port_peernode(p_ptr); 1197 destnode = port_peernode(p_ptr);
1215 if (likely(destnode != tipc_own_addr)) 1198 if (likely(destnode != tipc_own_addr))
1216 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, 1199 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
1217 destnode); 1200 total_len, destnode);
1218 else 1201 else
1219 res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1202 res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect,
1203 total_len);
1220 1204
1221 if (likely(res != -ELINKCONG)) { 1205 if (likely(res != -ELINKCONG)) {
1222 port_incr_out_seqno(p_ptr); 1206 p_ptr->congested = 0;
1223 p_ptr->publ.congested = 0; 1207 if (res > 0)
1224 p_ptr->sent++; 1208 p_ptr->sent++;
1225 return res; 1209 return res;
1226 } 1210 }
1227 } 1211 }
1228 if (port_unreliable(p_ptr)) { 1212 if (port_unreliable(p_ptr)) {
1229 p_ptr->publ.congested = 0; 1213 p_ptr->congested = 0;
1230 /* Just calculate msg length and return */ 1214 return total_len;
1231 return tipc_msg_calc_data_size(msg_sect, num_sect);
1232 } 1215 }
1233 return -ELINKCONG; 1216 return -ELINKCONG;
1234} 1217}
@@ -1238,19 +1221,20 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1238 */ 1221 */
1239 1222
1240int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, 1223int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1241 unsigned int num_sect, struct iovec const *msg_sect) 1224 unsigned int num_sect, struct iovec const *msg_sect,
1225 unsigned int total_len)
1242{ 1226{
1243 struct port *p_ptr; 1227 struct tipc_port *p_ptr;
1244 struct tipc_msg *msg; 1228 struct tipc_msg *msg;
1245 u32 destnode = domain; 1229 u32 destnode = domain;
1246 u32 destport; 1230 u32 destport;
1247 int res; 1231 int res;
1248 1232
1249 p_ptr = tipc_port_deref(ref); 1233 p_ptr = tipc_port_deref(ref);
1250 if (!p_ptr || p_ptr->publ.connected) 1234 if (!p_ptr || p_ptr->connected)
1251 return -EINVAL; 1235 return -EINVAL;
1252 1236
1253 msg = &p_ptr->publ.phdr; 1237 msg = &p_ptr->phdr;
1254 msg_set_type(msg, TIPC_NAMED_MSG); 1238 msg_set_type(msg, TIPC_NAMED_MSG);
1255 msg_set_orignode(msg, tipc_own_addr); 1239 msg_set_orignode(msg, tipc_own_addr);
1256 msg_set_origport(msg, ref); 1240 msg_set_origport(msg, ref);
@@ -1263,21 +1247,25 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1263 msg_set_destport(msg, destport); 1247 msg_set_destport(msg, destport);
1264 1248
1265 if (likely(destport)) { 1249 if (likely(destport)) {
1266 p_ptr->sent++;
1267 if (likely(destnode == tipc_own_addr)) 1250 if (likely(destnode == tipc_own_addr))
1268 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1251 res = tipc_port_recv_sections(p_ptr, num_sect,
1269 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, 1252 msg_sect, total_len);
1270 destnode); 1253 else
1271 if (likely(res != -ELINKCONG)) 1254 res = tipc_link_send_sections_fast(p_ptr, msg_sect,
1255 num_sect, total_len,
1256 destnode);
1257 if (likely(res != -ELINKCONG)) {
1258 if (res > 0)
1259 p_ptr->sent++;
1272 return res; 1260 return res;
1261 }
1273 if (port_unreliable(p_ptr)) { 1262 if (port_unreliable(p_ptr)) {
1274 /* Just calculate msg length and return */ 1263 return total_len;
1275 return tipc_msg_calc_data_size(msg_sect, num_sect);
1276 } 1264 }
1277 return -ELINKCONG; 1265 return -ELINKCONG;
1278 } 1266 }
1279 return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect, 1267 return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect,
1280 TIPC_ERR_NO_NAME); 1268 total_len, TIPC_ERR_NO_NAME);
1281} 1269}
1282 1270
1283/** 1271/**
@@ -1285,32 +1273,39 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1285 */ 1273 */
1286 1274
1287int tipc_send2port(u32 ref, struct tipc_portid const *dest, 1275int tipc_send2port(u32 ref, struct tipc_portid const *dest,
1288 unsigned int num_sect, struct iovec const *msg_sect) 1276 unsigned int num_sect, struct iovec const *msg_sect,
1277 unsigned int total_len)
1289{ 1278{
1290 struct port *p_ptr; 1279 struct tipc_port *p_ptr;
1291 struct tipc_msg *msg; 1280 struct tipc_msg *msg;
1292 int res; 1281 int res;
1293 1282
1294 p_ptr = tipc_port_deref(ref); 1283 p_ptr = tipc_port_deref(ref);
1295 if (!p_ptr || p_ptr->publ.connected) 1284 if (!p_ptr || p_ptr->connected)
1296 return -EINVAL; 1285 return -EINVAL;
1297 1286
1298 msg = &p_ptr->publ.phdr; 1287 msg = &p_ptr->phdr;
1299 msg_set_type(msg, TIPC_DIRECT_MSG); 1288 msg_set_type(msg, TIPC_DIRECT_MSG);
1289 msg_set_lookup_scope(msg, 0);
1300 msg_set_orignode(msg, tipc_own_addr); 1290 msg_set_orignode(msg, tipc_own_addr);
1301 msg_set_origport(msg, ref); 1291 msg_set_origport(msg, ref);
1302 msg_set_destnode(msg, dest->node); 1292 msg_set_destnode(msg, dest->node);
1303 msg_set_destport(msg, dest->ref); 1293 msg_set_destport(msg, dest->ref);
1304 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); 1294 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
1305 p_ptr->sent++; 1295
1306 if (dest->node == tipc_own_addr) 1296 if (dest->node == tipc_own_addr)
1307 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1297 res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect,
1308 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, dest->node); 1298 total_len);
1309 if (likely(res != -ELINKCONG)) 1299 else
1300 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
1301 total_len, dest->node);
1302 if (likely(res != -ELINKCONG)) {
1303 if (res > 0)
1304 p_ptr->sent++;
1310 return res; 1305 return res;
1306 }
1311 if (port_unreliable(p_ptr)) { 1307 if (port_unreliable(p_ptr)) {
1312 /* Just calculate msg length and return */ 1308 return total_len;
1313 return tipc_msg_calc_data_size(msg_sect, num_sect);
1314 } 1309 }
1315 return -ELINKCONG; 1310 return -ELINKCONG;
1316} 1311}
@@ -1322,15 +1317,15 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
1322int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, 1317int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
1323 struct sk_buff *buf, unsigned int dsz) 1318 struct sk_buff *buf, unsigned int dsz)
1324{ 1319{
1325 struct port *p_ptr; 1320 struct tipc_port *p_ptr;
1326 struct tipc_msg *msg; 1321 struct tipc_msg *msg;
1327 int res; 1322 int res;
1328 1323
1329 p_ptr = (struct port *)tipc_ref_deref(ref); 1324 p_ptr = (struct tipc_port *)tipc_ref_deref(ref);
1330 if (!p_ptr || p_ptr->publ.connected) 1325 if (!p_ptr || p_ptr->connected)
1331 return -EINVAL; 1326 return -EINVAL;
1332 1327
1333 msg = &p_ptr->publ.phdr; 1328 msg = &p_ptr->phdr;
1334 msg_set_type(msg, TIPC_DIRECT_MSG); 1329 msg_set_type(msg, TIPC_DIRECT_MSG);
1335 msg_set_orignode(msg, tipc_own_addr); 1330 msg_set_orignode(msg, tipc_own_addr);
1336 msg_set_origport(msg, ref); 1331 msg_set_origport(msg, ref);
@@ -1343,12 +1338,16 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
1343 1338
1344 skb_push(buf, DIR_MSG_H_SIZE); 1339 skb_push(buf, DIR_MSG_H_SIZE);
1345 skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE); 1340 skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
1346 p_ptr->sent++; 1341
1347 if (dest->node == tipc_own_addr) 1342 if (dest->node == tipc_own_addr)
1348 return tipc_port_recv_msg(buf); 1343 res = tipc_port_recv_msg(buf);
1349 res = tipc_send_buf_fast(buf, dest->node); 1344 else
1350 if (likely(res != -ELINKCONG)) 1345 res = tipc_send_buf_fast(buf, dest->node);
1346 if (likely(res != -ELINKCONG)) {
1347 if (res > 0)
1348 p_ptr->sent++;
1351 return res; 1349 return res;
1350 }
1352 if (port_unreliable(p_ptr)) 1351 if (port_unreliable(p_ptr))
1353 return dsz; 1352 return dsz;
1354 return -ELINKCONG; 1353 return -ELINKCONG;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8e84b989949c..b9aa34195aec 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -2,7 +2,7 @@
2 * net/tipc/port.h: Include file for TIPC port code 2 * net/tipc/port.h: Include file for TIPC port code
3 * 3 *
4 * Copyright (c) 1994-2007, Ericsson AB 4 * Copyright (c) 1994-2007, Ericsson AB
5 * Copyright (c) 2004-2007, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,7 @@ struct user_port {
95}; 95};
96 96
97/** 97/**
98 * struct tipc_port - TIPC port info available to socket API 98 * struct tipc_port - TIPC port structure
99 * @usr_handle: pointer to additional user-defined information about port 99 * @usr_handle: pointer to additional user-defined information about port
100 * @lock: pointer to spinlock for controlling access to port 100 * @lock: pointer to spinlock for controlling access to port
101 * @connected: non-zero if port is currently connected to a peer port 101 * @connected: non-zero if port is currently connected to a peer port
@@ -107,43 +107,33 @@ struct user_port {
107 * @max_pkt: maximum packet size "hint" used when building messages sent by port 107 * @max_pkt: maximum packet size "hint" used when building messages sent by port
108 * @ref: unique reference to port in TIPC object registry 108 * @ref: unique reference to port in TIPC object registry
109 * @phdr: preformatted message header used when sending messages 109 * @phdr: preformatted message header used when sending messages
110 */
111struct tipc_port {
112 void *usr_handle;
113 spinlock_t *lock;
114 int connected;
115 u32 conn_type;
116 u32 conn_instance;
117 u32 conn_unacked;
118 int published;
119 u32 congested;
120 u32 max_pkt;
121 u32 ref;
122 struct tipc_msg phdr;
123};
124
125/**
126 * struct port - TIPC port structure
127 * @publ: TIPC port info available to privileged users
128 * @port_list: adjacent ports in TIPC's global list of ports 110 * @port_list: adjacent ports in TIPC's global list of ports
129 * @dispatcher: ptr to routine which handles received messages 111 * @dispatcher: ptr to routine which handles received messages
130 * @wakeup: ptr to routine to call when port is no longer congested 112 * @wakeup: ptr to routine to call when port is no longer congested
131 * @user_port: ptr to user port associated with port (if any) 113 * @user_port: ptr to user port associated with port (if any)
132 * @wait_list: adjacent ports in list of ports waiting on link congestion 114 * @wait_list: adjacent ports in list of ports waiting on link congestion
133 * @waiting_pkts: 115 * @waiting_pkts:
134 * @sent: 116 * @sent: # of non-empty messages sent by port
135 * @acked: 117 * @acked: # of non-empty message acknowledgements from connected port's peer
136 * @publications: list of publications for port 118 * @publications: list of publications for port
137 * @pub_count: total # of publications port has made during its lifetime 119 * @pub_count: total # of publications port has made during its lifetime
138 * @probing_state: 120 * @probing_state:
139 * @probing_interval: 121 * @probing_interval:
140 * @last_in_seqno:
141 * @timer_ref: 122 * @timer_ref:
142 * @subscription: "node down" subscription used to terminate failed connections 123 * @subscription: "node down" subscription used to terminate failed connections
143 */ 124 */
144 125struct tipc_port {
145struct port { 126 void *usr_handle;
146 struct tipc_port publ; 127 spinlock_t *lock;
128 int connected;
129 u32 conn_type;
130 u32 conn_instance;
131 u32 conn_unacked;
132 int published;
133 u32 congested;
134 u32 max_pkt;
135 u32 ref;
136 struct tipc_msg phdr;
147 struct list_head port_list; 137 struct list_head port_list;
148 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); 138 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
149 void (*wakeup)(struct tipc_port *); 139 void (*wakeup)(struct tipc_port *);
@@ -156,7 +146,6 @@ struct port {
156 u32 pub_count; 146 u32 pub_count;
157 u32 probing_state; 147 u32 probing_state;
158 u32 probing_interval; 148 u32 probing_interval;
159 u32 last_in_seqno;
160 struct timer_list timer; 149 struct timer_list timer;
161 struct tipc_node_subscr subscription; 150 struct tipc_node_subscr subscription;
162}; 151};
@@ -216,23 +205,27 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr);
216/* 205/*
217 * TIPC messaging routines 206 * TIPC messaging routines
218 */ 207 */
219int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); 208int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect,
209 unsigned int total_len);
220 210
221int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, 211int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
222 unsigned int num_sect, struct iovec const *msg_sect); 212 unsigned int num_sect, struct iovec const *msg_sect,
213 unsigned int total_len);
223 214
224int tipc_send2port(u32 portref, struct tipc_portid const *dest, 215int tipc_send2port(u32 portref, struct tipc_portid const *dest,
225 unsigned int num_sect, struct iovec const *msg_sect); 216 unsigned int num_sect, struct iovec const *msg_sect,
217 unsigned int total_len);
226 218
227int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, 219int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
228 struct sk_buff *buf, unsigned int dsz); 220 struct sk_buff *buf, unsigned int dsz);
229 221
230int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, 222int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
231 unsigned int section_count, struct iovec const *msg); 223 unsigned int section_count, struct iovec const *msg,
224 unsigned int total_len);
232 225
233int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 226int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
234 struct iovec const *msg_sect, u32 num_sect, 227 struct iovec const *msg_sect, u32 num_sect,
235 int err); 228 unsigned int total_len, int err);
236struct sk_buff *tipc_port_get_ports(void); 229struct sk_buff *tipc_port_get_ports(void);
237void tipc_port_recv_proto_msg(struct sk_buff *buf); 230void tipc_port_recv_proto_msg(struct sk_buff *buf);
238void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp); 231void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp);
@@ -242,9 +235,9 @@ void tipc_port_reinit(void);
242 * tipc_port_lock - lock port instance referred to and return its pointer 235 * tipc_port_lock - lock port instance referred to and return its pointer
243 */ 236 */
244 237
245static inline struct port *tipc_port_lock(u32 ref) 238static inline struct tipc_port *tipc_port_lock(u32 ref)
246{ 239{
247 return (struct port *)tipc_ref_lock(ref); 240 return (struct tipc_port *)tipc_ref_lock(ref);
248} 241}
249 242
250/** 243/**
@@ -253,27 +246,27 @@ static inline struct port *tipc_port_lock(u32 ref)
253 * Can use pointer instead of tipc_ref_unlock() since port is already locked. 246 * Can use pointer instead of tipc_ref_unlock() since port is already locked.
254 */ 247 */
255 248
256static inline void tipc_port_unlock(struct port *p_ptr) 249static inline void tipc_port_unlock(struct tipc_port *p_ptr)
257{ 250{
258 spin_unlock_bh(p_ptr->publ.lock); 251 spin_unlock_bh(p_ptr->lock);
259} 252}
260 253
261static inline struct port *tipc_port_deref(u32 ref) 254static inline struct tipc_port *tipc_port_deref(u32 ref)
262{ 255{
263 return (struct port *)tipc_ref_deref(ref); 256 return (struct tipc_port *)tipc_ref_deref(ref);
264} 257}
265 258
266static inline u32 tipc_peer_port(struct port *p_ptr) 259static inline u32 tipc_peer_port(struct tipc_port *p_ptr)
267{ 260{
268 return msg_destport(&p_ptr->publ.phdr); 261 return msg_destport(&p_ptr->phdr);
269} 262}
270 263
271static inline u32 tipc_peer_node(struct port *p_ptr) 264static inline u32 tipc_peer_node(struct tipc_port *p_ptr)
272{ 265{
273 return msg_destnode(&p_ptr->publ.phdr); 266 return msg_destnode(&p_ptr->phdr);
274} 267}
275 268
276static inline int tipc_port_congested(struct port *p_ptr) 269static inline int tipc_port_congested(struct tipc_port *p_ptr)
277{ 270{
278 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); 271 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
279} 272}
@@ -284,7 +277,7 @@ static inline int tipc_port_congested(struct port *p_ptr)
284 277
285static inline int tipc_port_recv_msg(struct sk_buff *buf) 278static inline int tipc_port_recv_msg(struct sk_buff *buf)
286{ 279{
287 struct port *p_ptr; 280 struct tipc_port *p_ptr;
288 struct tipc_msg *msg = buf_msg(buf); 281 struct tipc_msg *msg = buf_msg(buf);
289 u32 destport = msg_destport(msg); 282 u32 destport = msg_destport(msg);
290 u32 dsz = msg_data_sz(msg); 283 u32 dsz = msg_data_sz(msg);
@@ -299,7 +292,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
299 /* validate destination & pass to port, otherwise reject message */ 292 /* validate destination & pass to port, otherwise reject message */
300 p_ptr = tipc_port_lock(destport); 293 p_ptr = tipc_port_lock(destport);
301 if (likely(p_ptr)) { 294 if (likely(p_ptr)) {
302 if (likely(p_ptr->publ.connected)) { 295 if (likely(p_ptr->connected)) {
303 if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) || 296 if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) ||
304 (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) || 297 (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) ||
305 (unlikely(!msg_connected(msg)))) { 298 (unlikely(!msg_connected(msg)))) {
@@ -308,7 +301,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
308 goto reject; 301 goto reject;
309 } 302 }
310 } 303 }
311 err = p_ptr->dispatcher(&p_ptr->publ, buf); 304 err = p_ptr->dispatcher(p_ptr, buf);
312 tipc_port_unlock(p_ptr); 305 tipc_port_unlock(p_ptr);
313 if (likely(!err)) 306 if (likely(!err))
314 return dsz; 307 return dsz;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 2b02a3a80313..338837396642 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2,7 +2,7 @@
2 * net/tipc/socket.c: TIPC socket API 2 * net/tipc/socket.c: TIPC socket API
3 * 3 *
4 * Copyright (c) 2001-2007, Ericsson AB 4 * Copyright (c) 2001-2007, Ericsson AB
5 * Copyright (c) 2004-2008, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -58,6 +58,9 @@ struct tipc_sock {
58#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 58#define tipc_sk(sk) ((struct tipc_sock *)(sk))
59#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p)) 59#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p))
60 60
61#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \
62 (sock->state == SS_DISCONNECTING))
63
61static int backlog_rcv(struct sock *sk, struct sk_buff *skb); 64static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
62static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); 65static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
63static void wakeupdispatch(struct tipc_port *tport); 66static void wakeupdispatch(struct tipc_port *tport);
@@ -241,7 +244,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
241 tipc_set_portunreliable(tp_ptr->ref, 1); 244 tipc_set_portunreliable(tp_ptr->ref, 1);
242 } 245 }
243 246
244 atomic_inc(&tipc_user_count);
245 return 0; 247 return 0;
246} 248}
247 249
@@ -290,7 +292,7 @@ static int release(struct socket *sock)
290 if (buf == NULL) 292 if (buf == NULL)
291 break; 293 break;
292 atomic_dec(&tipc_queue_size); 294 atomic_dec(&tipc_queue_size);
293 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) 295 if (TIPC_SKB_CB(buf)->handle != 0)
294 buf_discard(buf); 296 buf_discard(buf);
295 else { 297 else {
296 if ((sock->state == SS_CONNECTING) || 298 if ((sock->state == SS_CONNECTING) ||
@@ -321,7 +323,6 @@ static int release(struct socket *sock)
321 sock_put(sk); 323 sock_put(sk);
322 sock->sk = NULL; 324 sock->sk = NULL;
323 325
324 atomic_dec(&tipc_user_count);
325 return res; 326 return res;
326} 327}
327 328
@@ -495,6 +496,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
495 if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) 496 if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
496 return -EACCES; 497 return -EACCES;
497 498
499 if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
500 return -EMSGSIZE;
498 if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) 501 if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
499 return -EFAULT; 502 return -EFAULT;
500 if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) 503 if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
@@ -532,6 +535,9 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
532 if (unlikely((m->msg_namelen < sizeof(*dest)) || 535 if (unlikely((m->msg_namelen < sizeof(*dest)) ||
533 (dest->family != AF_TIPC))) 536 (dest->family != AF_TIPC)))
534 return -EINVAL; 537 return -EINVAL;
538 if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
539 (m->msg_iovlen > (unsigned)INT_MAX))
540 return -EMSGSIZE;
535 541
536 if (iocb) 542 if (iocb)
537 lock_sock(sk); 543 lock_sock(sk);
@@ -570,12 +576,14 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
570 &dest->addr.name.name, 576 &dest->addr.name.name,
571 dest->addr.name.domain, 577 dest->addr.name.domain,
572 m->msg_iovlen, 578 m->msg_iovlen,
573 m->msg_iov); 579 m->msg_iov,
580 total_len);
574 } else if (dest->addrtype == TIPC_ADDR_ID) { 581 } else if (dest->addrtype == TIPC_ADDR_ID) {
575 res = tipc_send2port(tport->ref, 582 res = tipc_send2port(tport->ref,
576 &dest->addr.id, 583 &dest->addr.id,
577 m->msg_iovlen, 584 m->msg_iovlen,
578 m->msg_iov); 585 m->msg_iov,
586 total_len);
579 } else if (dest->addrtype == TIPC_ADDR_MCAST) { 587 } else if (dest->addrtype == TIPC_ADDR_MCAST) {
580 if (needs_conn) { 588 if (needs_conn) {
581 res = -EOPNOTSUPP; 589 res = -EOPNOTSUPP;
@@ -587,7 +595,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
587 res = tipc_multicast(tport->ref, 595 res = tipc_multicast(tport->ref,
588 &dest->addr.nameseq, 596 &dest->addr.nameseq,
589 m->msg_iovlen, 597 m->msg_iovlen,
590 m->msg_iov); 598 m->msg_iov,
599 total_len);
591 } 600 }
592 if (likely(res != -ELINKCONG)) { 601 if (likely(res != -ELINKCONG)) {
593 if (needs_conn && (res >= 0)) 602 if (needs_conn && (res >= 0))
@@ -637,6 +646,10 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
637 if (unlikely(dest)) 646 if (unlikely(dest))
638 return send_msg(iocb, sock, m, total_len); 647 return send_msg(iocb, sock, m, total_len);
639 648
649 if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
650 (m->msg_iovlen > (unsigned)INT_MAX))
651 return -EMSGSIZE;
652
640 if (iocb) 653 if (iocb)
641 lock_sock(sk); 654 lock_sock(sk);
642 655
@@ -649,7 +662,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
649 break; 662 break;
650 } 663 }
651 664
652 res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov); 665 res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov,
666 total_len);
653 if (likely(res != -ELINKCONG)) 667 if (likely(res != -ELINKCONG))
654 break; 668 break;
655 if (m->msg_flags & MSG_DONTWAIT) { 669 if (m->msg_flags & MSG_DONTWAIT) {
@@ -720,6 +734,12 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
720 goto exit; 734 goto exit;
721 } 735 }
722 736
737 if ((total_len > (unsigned)INT_MAX) ||
738 (m->msg_iovlen > (unsigned)INT_MAX)) {
739 res = -EMSGSIZE;
740 goto exit;
741 }
742
723 /* 743 /*
724 * Send each iovec entry using one or more messages 744 * Send each iovec entry using one or more messages
725 * 745 *
@@ -750,7 +770,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
750 bytes_to_send = curr_left; 770 bytes_to_send = curr_left;
751 my_iov.iov_base = curr_start; 771 my_iov.iov_base = curr_start;
752 my_iov.iov_len = bytes_to_send; 772 my_iov.iov_len = bytes_to_send;
753 res = send_packet(NULL, sock, &my_msg, 0); 773 res = send_packet(NULL, sock, &my_msg, bytes_to_send);
754 if (res < 0) { 774 if (res < 0) {
755 if (bytes_sent) 775 if (bytes_sent)
756 res = bytes_sent; 776 res = bytes_sent;
@@ -911,15 +931,13 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
911 struct tipc_port *tport = tipc_sk_port(sk); 931 struct tipc_port *tport = tipc_sk_port(sk);
912 struct sk_buff *buf; 932 struct sk_buff *buf;
913 struct tipc_msg *msg; 933 struct tipc_msg *msg;
934 long timeout;
914 unsigned int sz; 935 unsigned int sz;
915 u32 err; 936 u32 err;
916 int res; 937 int res;
917 938
918 /* Catch invalid receive requests */ 939 /* Catch invalid receive requests */
919 940
920 if (m->msg_iovlen != 1)
921 return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */
922
923 if (unlikely(!buf_len)) 941 if (unlikely(!buf_len))
924 return -EINVAL; 942 return -EINVAL;
925 943
@@ -930,6 +948,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
930 goto exit; 948 goto exit;
931 } 949 }
932 950
951 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
933restart: 952restart:
934 953
935 /* Look for a message in receive queue; wait if necessary */ 954 /* Look for a message in receive queue; wait if necessary */
@@ -939,17 +958,15 @@ restart:
939 res = -ENOTCONN; 958 res = -ENOTCONN;
940 goto exit; 959 goto exit;
941 } 960 }
942 if (flags & MSG_DONTWAIT) { 961 if (timeout <= 0L) {
943 res = -EWOULDBLOCK; 962 res = timeout ? timeout : -EWOULDBLOCK;
944 goto exit; 963 goto exit;
945 } 964 }
946 release_sock(sk); 965 release_sock(sk);
947 res = wait_event_interruptible(*sk_sleep(sk), 966 timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
948 (!skb_queue_empty(&sk->sk_receive_queue) || 967 tipc_rx_ready(sock),
949 (sock->state == SS_DISCONNECTING))); 968 timeout);
950 lock_sock(sk); 969 lock_sock(sk);
951 if (res)
952 goto exit;
953 } 970 }
954 971
955 /* Look at first message in receive queue */ 972 /* Look at first message in receive queue */
@@ -991,11 +1008,10 @@ restart:
991 sz = buf_len; 1008 sz = buf_len;
992 m->msg_flags |= MSG_TRUNC; 1009 m->msg_flags |= MSG_TRUNC;
993 } 1010 }
994 if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg), 1011 res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
995 sz))) { 1012 m->msg_iov, sz);
996 res = -EFAULT; 1013 if (res)
997 goto exit; 1014 goto exit;
998 }
999 res = sz; 1015 res = sz;
1000 } else { 1016 } else {
1001 if ((sock->state == SS_READY) || 1017 if ((sock->state == SS_READY) ||
@@ -1038,19 +1054,15 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1038 struct tipc_port *tport = tipc_sk_port(sk); 1054 struct tipc_port *tport = tipc_sk_port(sk);
1039 struct sk_buff *buf; 1055 struct sk_buff *buf;
1040 struct tipc_msg *msg; 1056 struct tipc_msg *msg;
1057 long timeout;
1041 unsigned int sz; 1058 unsigned int sz;
1042 int sz_to_copy, target, needed; 1059 int sz_to_copy, target, needed;
1043 int sz_copied = 0; 1060 int sz_copied = 0;
1044 char __user *crs = m->msg_iov->iov_base;
1045 unsigned char *buf_crs;
1046 u32 err; 1061 u32 err;
1047 int res = 0; 1062 int res = 0;
1048 1063
1049 /* Catch invalid receive attempts */ 1064 /* Catch invalid receive attempts */
1050 1065
1051 if (m->msg_iovlen != 1)
1052 return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */
1053
1054 if (unlikely(!buf_len)) 1066 if (unlikely(!buf_len))
1055 return -EINVAL; 1067 return -EINVAL;
1056 1068
@@ -1063,7 +1075,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1063 } 1075 }
1064 1076
1065 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); 1077 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1066 1078 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1067restart: 1079restart:
1068 1080
1069 /* Look for a message in receive queue; wait if necessary */ 1081 /* Look for a message in receive queue; wait if necessary */
@@ -1073,17 +1085,15 @@ restart:
1073 res = -ENOTCONN; 1085 res = -ENOTCONN;
1074 goto exit; 1086 goto exit;
1075 } 1087 }
1076 if (flags & MSG_DONTWAIT) { 1088 if (timeout <= 0L) {
1077 res = -EWOULDBLOCK; 1089 res = timeout ? timeout : -EWOULDBLOCK;
1078 goto exit; 1090 goto exit;
1079 } 1091 }
1080 release_sock(sk); 1092 release_sock(sk);
1081 res = wait_event_interruptible(*sk_sleep(sk), 1093 timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
1082 (!skb_queue_empty(&sk->sk_receive_queue) || 1094 tipc_rx_ready(sock),
1083 (sock->state == SS_DISCONNECTING))); 1095 timeout);
1084 lock_sock(sk); 1096 lock_sock(sk);
1085 if (res)
1086 goto exit;
1087 } 1097 }
1088 1098
1089 /* Look at first message in receive queue */ 1099 /* Look at first message in receive queue */
@@ -1112,24 +1122,25 @@ restart:
1112 /* Capture message data (if valid) & compute return value (always) */ 1122 /* Capture message data (if valid) & compute return value (always) */
1113 1123
1114 if (!err) { 1124 if (!err) {
1115 buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); 1125 u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
1116 sz = (unsigned char *)msg + msg_size(msg) - buf_crs;
1117 1126
1127 sz -= offset;
1118 needed = (buf_len - sz_copied); 1128 needed = (buf_len - sz_copied);
1119 sz_to_copy = (sz <= needed) ? sz : needed; 1129 sz_to_copy = (sz <= needed) ? sz : needed;
1120 if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) { 1130
1121 res = -EFAULT; 1131 res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
1132 m->msg_iov, sz_to_copy);
1133 if (res)
1122 goto exit; 1134 goto exit;
1123 } 1135
1124 sz_copied += sz_to_copy; 1136 sz_copied += sz_to_copy;
1125 1137
1126 if (sz_to_copy < sz) { 1138 if (sz_to_copy < sz) {
1127 if (!(flags & MSG_PEEK)) 1139 if (!(flags & MSG_PEEK))
1128 TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy; 1140 TIPC_SKB_CB(buf)->handle =
1141 (void *)(unsigned long)(offset + sz_to_copy);
1129 goto exit; 1142 goto exit;
1130 } 1143 }
1131
1132 crs += sz_to_copy;
1133 } else { 1144 } else {
1134 if (sz_copied != 0) 1145 if (sz_copied != 0)
1135 goto exit; /* can't add error msg to valid data */ 1146 goto exit; /* can't add error msg to valid data */
@@ -1256,7 +1267,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1256 1267
1257 /* Enqueue message (finally!) */ 1268 /* Enqueue message (finally!) */
1258 1269
1259 TIPC_SKB_CB(buf)->handle = msg_data(msg); 1270 TIPC_SKB_CB(buf)->handle = 0;
1260 atomic_inc(&tipc_queue_size); 1271 atomic_inc(&tipc_queue_size);
1261 __skb_queue_tail(&sk->sk_receive_queue, buf); 1272 __skb_queue_tail(&sk->sk_receive_queue, buf);
1262 1273
@@ -1608,7 +1619,7 @@ restart:
1608 buf = __skb_dequeue(&sk->sk_receive_queue); 1619 buf = __skb_dequeue(&sk->sk_receive_queue);
1609 if (buf) { 1620 if (buf) {
1610 atomic_dec(&tipc_queue_size); 1621 atomic_dec(&tipc_queue_size);
1611 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) { 1622 if (TIPC_SKB_CB(buf)->handle != 0) {
1612 buf_discard(buf); 1623 buf_discard(buf);
1613 goto restart; 1624 goto restart;
1614 } 1625 }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ca04479c3d42..6cf726863485 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -2,7 +2,7 @@
2 * net/tipc/subscr.c: TIPC network topology service 2 * net/tipc/subscr.c: TIPC network topology service
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -109,7 +109,7 @@ static void subscr_send_event(struct subscription *sub,
109 sub->evt.found_upper = htohl(found_upper, sub->swap); 109 sub->evt.found_upper = htohl(found_upper, sub->swap);
110 sub->evt.port.ref = htohl(port_ref, sub->swap); 110 sub->evt.port.ref = htohl(port_ref, sub->swap);
111 sub->evt.port.node = htohl(node, sub->swap); 111 sub->evt.port.node = htohl(node, sub->swap);
112 tipc_send(sub->server_ref, 1, &msg_sect); 112 tipc_send(sub->server_ref, 1, &msg_sect, msg_sect.iov_len);
113} 113}
114 114
115/** 115/**
@@ -160,7 +160,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
160 160
161static void subscr_timeout(struct subscription *sub) 161static void subscr_timeout(struct subscription *sub)
162{ 162{
163 struct port *server_port; 163 struct tipc_port *server_port;
164 164
165 /* Validate server port reference (in case subscriber is terminating) */ 165 /* Validate server port reference (in case subscriber is terminating) */
166 166
@@ -472,8 +472,6 @@ static void subscr_named_msg_event(void *usr_handle,
472 struct tipc_portid const *orig, 472 struct tipc_portid const *orig,
473 struct tipc_name_seq const *dest) 473 struct tipc_name_seq const *dest)
474{ 474{
475 static struct iovec msg_sect = {NULL, 0};
476
477 struct subscriber *subscriber; 475 struct subscriber *subscriber;
478 u32 server_port_ref; 476 u32 server_port_ref;
479 477
@@ -508,7 +506,7 @@ static void subscr_named_msg_event(void *usr_handle,
508 506
509 /* Lock server port (& save lock address for future use) */ 507 /* Lock server port (& save lock address for future use) */
510 508
511 subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock; 509 subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock;
512 510
513 /* Add subscriber to topology server's subscriber list */ 511 /* Add subscriber to topology server's subscriber list */
514 512
@@ -523,7 +521,7 @@ static void subscr_named_msg_event(void *usr_handle,
523 521
524 /* Send an ACK- to complete connection handshaking */ 522 /* Send an ACK- to complete connection handshaking */
525 523
526 tipc_send(server_port_ref, 1, &msg_sect); 524 tipc_send(server_port_ref, 0, NULL, 0);
527 525
528 /* Handle optional subscription request */ 526 /* Handle optional subscription request */
529 527
@@ -542,7 +540,6 @@ int tipc_subscr_start(void)
542 spin_lock_init(&topsrv.lock); 540 spin_lock_init(&topsrv.lock);
543 INIT_LIST_HEAD(&topsrv.subscriber_list); 541 INIT_LIST_HEAD(&topsrv.subscriber_list);
544 542
545 spin_lock_bh(&topsrv.lock);
546 res = tipc_createport(NULL, 543 res = tipc_createport(NULL,
547 TIPC_CRITICAL_IMPORTANCE, 544 TIPC_CRITICAL_IMPORTANCE,
548 NULL, 545 NULL,
@@ -563,12 +560,10 @@ int tipc_subscr_start(void)
563 goto failed; 560 goto failed;
564 } 561 }
565 562
566 spin_unlock_bh(&topsrv.lock);
567 return 0; 563 return 0;
568 564
569failed: 565failed:
570 err("Failed to create subscription service\n"); 566 err("Failed to create subscription service\n");
571 spin_unlock_bh(&topsrv.lock);
572 return res; 567 return res;
573} 568}
574 569
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d286204..b1d75beb7e20 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -207,7 +207,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
207 /* 207 /*
208 * This may look like an off by one error but it is a bit more 208 * This may look like an off by one error but it is a bit more
209 * subtle. 108 is the longest valid AF_UNIX path for a binding. 209 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 * sun_path[108] doesnt as such exist. However in kernel space 210 * sun_path[108] doesn't as such exist. However in kernel space
211 * we are guaranteed that it is a valid memory location in our 211 * we are guaranteed that it is a valid memory location in our
212 * kernel address buffer. 212 * kernel address buffer.
213 */ 213 */
@@ -524,6 +524,8 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *,
524 int, int); 524 int, int);
525static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, 525static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
526 struct msghdr *, size_t); 526 struct msghdr *, size_t);
527static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
528 struct msghdr *, size_t, int);
527 529
528static const struct proto_ops unix_stream_ops = { 530static const struct proto_ops unix_stream_ops = {
529 .family = PF_UNIX, 531 .family = PF_UNIX,
@@ -583,7 +585,7 @@ static const struct proto_ops unix_seqpacket_ops = {
583 .setsockopt = sock_no_setsockopt, 585 .setsockopt = sock_no_setsockopt,
584 .getsockopt = sock_no_getsockopt, 586 .getsockopt = sock_no_getsockopt,
585 .sendmsg = unix_seqpacket_sendmsg, 587 .sendmsg = unix_seqpacket_sendmsg,
586 .recvmsg = unix_dgram_recvmsg, 588 .recvmsg = unix_seqpacket_recvmsg,
587 .mmap = sock_no_mmap, 589 .mmap = sock_no_mmap,
588 .sendpage = sock_no_sendpage, 590 .sendpage = sock_no_sendpage,
589}; 591};
@@ -850,7 +852,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
850 * Get the parent directory, calculate the hash for last 852 * Get the parent directory, calculate the hash for last
851 * component. 853 * component.
852 */ 854 */
853 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 855 err = kern_path_parent(sunaddr->sun_path, &nd);
854 if (err) 856 if (err)
855 goto out_mknod_parent; 857 goto out_mknod_parent;
856 858
@@ -1124,7 +1126,7 @@ restart:
1124 1126
1125 /* Latch our state. 1127 /* Latch our state.
1126 1128
1127 It is tricky place. We need to grab write lock and cannot 1129 It is tricky place. We need to grab our state lock and cannot
1128 drop lock on peer. It is dangerous because deadlock is 1130 drop lock on peer. It is dangerous because deadlock is
1129 possible. Connect to self case and simultaneous 1131 possible. Connect to self case and simultaneous
1130 attempt to connect are eliminated by checking socket 1132 attempt to connect are eliminated by checking socket
@@ -1171,7 +1173,7 @@ restart:
1171 newsk->sk_type = sk->sk_type; 1173 newsk->sk_type = sk->sk_type;
1172 init_peercred(newsk); 1174 init_peercred(newsk);
1173 newu = unix_sk(newsk); 1175 newu = unix_sk(newsk);
1174 newsk->sk_wq = &newu->peer_wq; 1176 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1175 otheru = unix_sk(other); 1177 otheru = unix_sk(other);
1176 1178
1177 /* copy address information from listening to new sock*/ 1179 /* copy address information from listening to new sock*/
@@ -1475,6 +1477,12 @@ restart:
1475 goto out_free; 1477 goto out_free;
1476 } 1478 }
1477 1479
1480 if (sk_filter(other, skb) < 0) {
1481 /* Toss the packet but do not return any error to the sender */
1482 err = len;
1483 goto out_free;
1484 }
1485
1478 unix_state_lock(other); 1486 unix_state_lock(other);
1479 err = -EPERM; 1487 err = -EPERM;
1480 if (!unix_may_send(sk, other)) 1488 if (!unix_may_send(sk, other))
@@ -1561,7 +1569,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1561 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1569 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1562 struct sock *sk = sock->sk; 1570 struct sock *sk = sock->sk;
1563 struct sock *other = NULL; 1571 struct sock *other = NULL;
1564 struct sockaddr_un *sunaddr = msg->msg_name;
1565 int err, size; 1572 int err, size;
1566 struct sk_buff *skb; 1573 struct sk_buff *skb;
1567 int sent = 0; 1574 int sent = 0;
@@ -1584,7 +1591,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1584 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1591 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1585 goto out_err; 1592 goto out_err;
1586 } else { 1593 } else {
1587 sunaddr = NULL;
1588 err = -ENOTCONN; 1594 err = -ENOTCONN;
1589 other = unix_peer(sk); 1595 other = unix_peer(sk);
1590 if (!other) 1596 if (!other)
@@ -1695,6 +1701,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1695 return unix_dgram_sendmsg(kiocb, sock, msg, len); 1701 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1696} 1702}
1697 1703
1704static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1705 struct msghdr *msg, size_t size,
1706 int flags)
1707{
1708 struct sock *sk = sock->sk;
1709
1710 if (sk->sk_state != TCP_ESTABLISHED)
1711 return -ENOTCONN;
1712
1713 return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1714}
1715
1698static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1716static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1699{ 1717{
1700 struct unix_sock *u = unix_sk(sk); 1718 struct unix_sock *u = unix_sk(sk);
@@ -1724,7 +1742,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1724 1742
1725 msg->msg_namelen = 0; 1743 msg->msg_namelen = 0;
1726 1744
1727 mutex_lock(&u->readlock); 1745 err = mutex_lock_interruptible(&u->readlock);
1746 if (err) {
1747 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1748 goto out;
1749 }
1728 1750
1729 skb = skb_recv_datagram(sk, flags, noblock, &err); 1751 skb = skb_recv_datagram(sk, flags, noblock, &err);
1730 if (!skb) { 1752 if (!skb) {
@@ -1864,7 +1886,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1864 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1886 memset(&tmp_scm, 0, sizeof(tmp_scm));
1865 } 1887 }
1866 1888
1867 mutex_lock(&u->readlock); 1889 err = mutex_lock_interruptible(&u->readlock);
1890 if (err) {
1891 err = sock_intr_errno(timeo);
1892 goto out;
1893 }
1868 1894
1869 do { 1895 do {
1870 int chunk; 1896 int chunk;
@@ -1895,11 +1921,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1895 1921
1896 timeo = unix_stream_data_wait(sk, timeo); 1922 timeo = unix_stream_data_wait(sk, timeo);
1897 1923
1898 if (signal_pending(current)) { 1924 if (signal_pending(current)
1925 || mutex_lock_interruptible(&u->readlock)) {
1899 err = sock_intr_errno(timeo); 1926 err = sock_intr_errno(timeo);
1900 goto out; 1927 goto out;
1901 } 1928 }
1902 mutex_lock(&u->readlock); 1929
1903 continue; 1930 continue;
1904 unlock: 1931 unlock:
1905 unix_state_unlock(sk); 1932 unix_state_unlock(sk);
@@ -1978,36 +2005,38 @@ static int unix_shutdown(struct socket *sock, int mode)
1978 2005
1979 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); 2006 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1980 2007
1981 if (mode) { 2008 if (!mode)
1982 unix_state_lock(sk); 2009 return 0;
1983 sk->sk_shutdown |= mode; 2010
1984 other = unix_peer(sk); 2011 unix_state_lock(sk);
1985 if (other) 2012 sk->sk_shutdown |= mode;
1986 sock_hold(other); 2013 other = unix_peer(sk);
1987 unix_state_unlock(sk); 2014 if (other)
1988 sk->sk_state_change(sk); 2015 sock_hold(other);
1989 2016 unix_state_unlock(sk);
1990 if (other && 2017 sk->sk_state_change(sk);
1991 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2018
1992 2019 if (other &&
1993 int peer_mode = 0; 2020 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1994 2021
1995 if (mode&RCV_SHUTDOWN) 2022 int peer_mode = 0;
1996 peer_mode |= SEND_SHUTDOWN; 2023
1997 if (mode&SEND_SHUTDOWN) 2024 if (mode&RCV_SHUTDOWN)
1998 peer_mode |= RCV_SHUTDOWN; 2025 peer_mode |= SEND_SHUTDOWN;
1999 unix_state_lock(other); 2026 if (mode&SEND_SHUTDOWN)
2000 other->sk_shutdown |= peer_mode; 2027 peer_mode |= RCV_SHUTDOWN;
2001 unix_state_unlock(other); 2028 unix_state_lock(other);
2002 other->sk_state_change(other); 2029 other->sk_shutdown |= peer_mode;
2003 if (peer_mode == SHUTDOWN_MASK) 2030 unix_state_unlock(other);
2004 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2031 other->sk_state_change(other);
2005 else if (peer_mode & RCV_SHUTDOWN) 2032 if (peer_mode == SHUTDOWN_MASK)
2006 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2033 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2007 } 2034 else if (peer_mode & RCV_SHUTDOWN)
2008 if (other) 2035 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2009 sock_put(other);
2010 } 2036 }
2037 if (other)
2038 sock_put(other);
2039
2011 return 0; 2040 return 0;
2012} 2041}
2013 2042
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
104 /* 104 /*
105 * Socket ? 105 * Socket ?
106 */ 106 */
107 if (S_ISSOCK(inode->i_mode)) { 107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 108 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 109 struct sock *s = sock->sk;
110 110
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 74944a2dd436..788a12c1eb5d 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -59,8 +59,6 @@
59#include <asm/uaccess.h> /* copy_to/from_user */ 59#include <asm/uaccess.h> /* copy_to/from_user */
60#include <linux/init.h> /* __initfunc et al. */ 60#include <linux/init.h> /* __initfunc et al. */
61 61
62#define KMEM_SAFETYZONE 8
63
64#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) 62#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev)))
65 63
66/* 64/*
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 11f25c7a7a05..f346395314ba 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -51,7 +51,7 @@
51 51
52/* 52/*
53 * Structures for interfacing with the /proc filesystem. 53 * Structures for interfacing with the /proc filesystem.
54 * Router creates its own directory /proc/net/router with the folowing 54 * Router creates its own directory /proc/net/router with the following
55 * entries: 55 * entries:
56 * config device configuration 56 * config device configuration
57 * status global device statistics 57 * status global device statistics
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e9a5f8ca4c27..c22ef3492ee6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -46,6 +46,11 @@ static struct dentry *ieee80211_debugfs_dir;
46/* for the cleanup, scan and event works */ 46/* for the cleanup, scan and event works */
47struct workqueue_struct *cfg80211_wq; 47struct workqueue_struct *cfg80211_wq;
48 48
49static bool cfg80211_disable_40mhz_24ghz;
50module_param(cfg80211_disable_40mhz_24ghz, bool, 0644);
51MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz,
52 "Disable 40MHz support in the 2.4GHz band");
53
49/* requires cfg80211_mutex to be held! */ 54/* requires cfg80211_mutex to be held! */
50struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) 55struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
51{ 56{
@@ -365,7 +370,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
365 spin_lock_init(&rdev->bss_lock); 370 spin_lock_init(&rdev->bss_lock);
366 INIT_LIST_HEAD(&rdev->bss_list); 371 INIT_LIST_HEAD(&rdev->bss_list);
367 INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); 372 INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
368 373 INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results);
369#ifdef CONFIG_CFG80211_WEXT 374#ifdef CONFIG_CFG80211_WEXT
370 rdev->wiphy.wext = &cfg80211_wext_handler; 375 rdev->wiphy.wext = &cfg80211_wext_handler;
371#endif 376#endif
@@ -411,6 +416,67 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
411} 416}
412EXPORT_SYMBOL(wiphy_new); 417EXPORT_SYMBOL(wiphy_new);
413 418
419static int wiphy_verify_combinations(struct wiphy *wiphy)
420{
421 const struct ieee80211_iface_combination *c;
422 int i, j;
423
424 /* If we have combinations enforce them */
425 if (wiphy->n_iface_combinations)
426 wiphy->flags |= WIPHY_FLAG_ENFORCE_COMBINATIONS;
427
428 for (i = 0; i < wiphy->n_iface_combinations; i++) {
429 u32 cnt = 0;
430 u16 all_iftypes = 0;
431
432 c = &wiphy->iface_combinations[i];
433
434 /* Combinations with just one interface aren't real */
435 if (WARN_ON(c->max_interfaces < 2))
436 return -EINVAL;
437
438 /* Need at least one channel */
439 if (WARN_ON(!c->num_different_channels))
440 return -EINVAL;
441
442 if (WARN_ON(!c->n_limits))
443 return -EINVAL;
444
445 for (j = 0; j < c->n_limits; j++) {
446 u16 types = c->limits[j].types;
447
448 /*
449 * interface types shouldn't overlap, this is
450 * used in cfg80211_can_change_interface()
451 */
452 if (WARN_ON(types & all_iftypes))
453 return -EINVAL;
454 all_iftypes |= types;
455
456 if (WARN_ON(!c->limits[j].max))
457 return -EINVAL;
458
459 /* Shouldn't list software iftypes in combinations! */
460 if (WARN_ON(wiphy->software_iftypes & types))
461 return -EINVAL;
462
463 cnt += c->limits[j].max;
464 /*
465 * Don't advertise an unsupported type
466 * in a combination.
467 */
468 if (WARN_ON((wiphy->interface_modes & types) != types))
469 return -EINVAL;
470 }
471
472 /* You can't even choose that many! */
473 if (WARN_ON(cnt < c->max_interfaces))
474 return -EINVAL;
475 }
476
477 return 0;
478}
479
414int wiphy_register(struct wiphy *wiphy) 480int wiphy_register(struct wiphy *wiphy)
415{ 481{
416 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 482 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -439,6 +505,10 @@ int wiphy_register(struct wiphy *wiphy)
439 if (WARN_ON(ifmodes != wiphy->interface_modes)) 505 if (WARN_ON(ifmodes != wiphy->interface_modes))
440 wiphy->interface_modes = ifmodes; 506 wiphy->interface_modes = ifmodes;
441 507
508 res = wiphy_verify_combinations(wiphy);
509 if (res)
510 return res;
511
442 /* sanity check supported bands/channels */ 512 /* sanity check supported bands/channels */
443 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 513 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
444 sband = wiphy->bands[band]; 514 sband = wiphy->bands[band];
@@ -451,6 +521,18 @@ int wiphy_register(struct wiphy *wiphy)
451 return -EINVAL; 521 return -EINVAL;
452 522
453 /* 523 /*
524 * Since cfg80211_disable_40mhz_24ghz is global, we can
525 * modify the sband's ht data even if the driver uses a
526 * global structure for that.
527 */
528 if (cfg80211_disable_40mhz_24ghz &&
529 band == IEEE80211_BAND_2GHZ &&
530 sband->ht_cap.ht_supported) {
531 sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
532 sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
533 }
534
535 /*
454 * Since we use a u32 for rate bitmaps in 536 * Since we use a u32 for rate bitmaps in
455 * ieee80211_get_response_rate, we cannot 537 * ieee80211_get_response_rate, we cannot
456 * have more than 32 legacy rates. 538 * have more than 32 legacy rates.
@@ -476,6 +558,13 @@ int wiphy_register(struct wiphy *wiphy)
476 return -EINVAL; 558 return -EINVAL;
477 } 559 }
478 560
561 if (rdev->wiphy.wowlan.n_patterns) {
562 if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len ||
563 rdev->wiphy.wowlan.pattern_min_len >
564 rdev->wiphy.wowlan.pattern_max_len))
565 return -EINVAL;
566 }
567
479 /* check and set up bitrates */ 568 /* check and set up bitrates */
480 ieee80211_set_bitrate_flags(wiphy); 569 ieee80211_set_bitrate_flags(wiphy);
481 570
@@ -614,6 +703,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
614 mutex_destroy(&rdev->devlist_mtx); 703 mutex_destroy(&rdev->devlist_mtx);
615 list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list) 704 list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
616 cfg80211_put_bss(&scan->pub); 705 cfg80211_put_bss(&scan->pub);
706 cfg80211_rdev_free_wowlan(rdev);
617 kfree(rdev); 707 kfree(rdev);
618} 708}
619 709
@@ -647,6 +737,11 @@ static void wdev_cleanup_work(struct work_struct *work)
647 ___cfg80211_scan_done(rdev, true); 737 ___cfg80211_scan_done(rdev, true);
648 } 738 }
649 739
740 if (WARN_ON(rdev->sched_scan_req &&
741 rdev->sched_scan_req->dev == wdev->netdev)) {
742 __cfg80211_stop_sched_scan(rdev, false);
743 }
744
650 cfg80211_unlock_rdev(rdev); 745 cfg80211_unlock_rdev(rdev);
651 746
652 mutex_lock(&rdev->devlist_mtx); 747 mutex_lock(&rdev->devlist_mtx);
@@ -668,6 +763,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
668 struct net_device *dev = ndev; 763 struct net_device *dev = ndev;
669 struct wireless_dev *wdev = dev->ieee80211_ptr; 764 struct wireless_dev *wdev = dev->ieee80211_ptr;
670 struct cfg80211_registered_device *rdev; 765 struct cfg80211_registered_device *rdev;
766 int ret;
671 767
672 if (!wdev) 768 if (!wdev)
673 return NOTIFY_DONE; 769 return NOTIFY_DONE;
@@ -718,13 +814,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
718 wdev->ps = false; 814 wdev->ps = false;
719 /* allow mac80211 to determine the timeout */ 815 /* allow mac80211 to determine the timeout */
720 wdev->ps_timeout = -1; 816 wdev->ps_timeout = -1;
721 if (rdev->ops->set_power_mgmt)
722 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
723 wdev->ps,
724 wdev->ps_timeout)) {
725 /* assume this means it's off */
726 wdev->ps = false;
727 }
728 817
729 if (!dev->ethtool_ops) 818 if (!dev->ethtool_ops)
730 dev->ethtool_ops = &cfg80211_ethtool_ops; 819 dev->ethtool_ops = &cfg80211_ethtool_ops;
@@ -741,6 +830,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
741 break; 830 break;
742 case NL80211_IFTYPE_P2P_CLIENT: 831 case NL80211_IFTYPE_P2P_CLIENT:
743 case NL80211_IFTYPE_STATION: 832 case NL80211_IFTYPE_STATION:
833 cfg80211_lock_rdev(rdev);
834 __cfg80211_stop_sched_scan(rdev, false);
835 cfg80211_unlock_rdev(rdev);
836
744 wdev_lock(wdev); 837 wdev_lock(wdev);
745#ifdef CONFIG_CFG80211_WEXT 838#ifdef CONFIG_CFG80211_WEXT
746 kfree(wdev->wext.ie); 839 kfree(wdev->wext.ie);
@@ -759,6 +852,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
759 default: 852 default:
760 break; 853 break;
761 } 854 }
855 wdev->beacon_interval = 0;
762 break; 856 break;
763 case NETDEV_DOWN: 857 case NETDEV_DOWN:
764 dev_hold(dev); 858 dev_hold(dev);
@@ -813,6 +907,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
813 rdev->opencount++; 907 rdev->opencount++;
814 mutex_unlock(&rdev->devlist_mtx); 908 mutex_unlock(&rdev->devlist_mtx);
815 cfg80211_unlock_rdev(rdev); 909 cfg80211_unlock_rdev(rdev);
910
911 /*
912 * Configure power management to the driver here so that its
913 * correctly set also after interface type changes etc.
914 */
915 if (wdev->iftype == NL80211_IFTYPE_STATION &&
916 rdev->ops->set_power_mgmt)
917 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
918 wdev->ps,
919 wdev->ps_timeout)) {
920 /* assume this means it's off */
921 wdev->ps = false;
922 }
816 break; 923 break;
817 case NETDEV_UNREGISTER: 924 case NETDEV_UNREGISTER:
818 /* 925 /*
@@ -852,6 +959,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
852 return notifier_from_errno(-EOPNOTSUPP); 959 return notifier_from_errno(-EOPNOTSUPP);
853 if (rfkill_blocked(rdev->rfkill)) 960 if (rfkill_blocked(rdev->rfkill))
854 return notifier_from_errno(-ERFKILL); 961 return notifier_from_errno(-ERFKILL);
962 ret = cfg80211_can_add_interface(rdev, wdev->iftype);
963 if (ret)
964 return notifier_from_errno(ret);
855 break; 965 break;
856 } 966 }
857 967
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 26a0a084e16b..bf0fb40e3c8b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -60,8 +60,10 @@ struct cfg80211_registered_device {
60 struct rb_root bss_tree; 60 struct rb_root bss_tree;
61 u32 bss_generation; 61 u32 bss_generation;
62 struct cfg80211_scan_request *scan_req; /* protected by RTNL */ 62 struct cfg80211_scan_request *scan_req; /* protected by RTNL */
63 struct cfg80211_sched_scan_request *sched_scan_req;
63 unsigned long suspend_at; 64 unsigned long suspend_at;
64 struct work_struct scan_done_wk; 65 struct work_struct scan_done_wk;
66 struct work_struct sched_scan_results_wk;
65 67
66#ifdef CONFIG_NL80211_TESTMODE 68#ifdef CONFIG_NL80211_TESTMODE
67 struct genl_info *testmode_info; 69 struct genl_info *testmode_info;
@@ -70,6 +72,8 @@ struct cfg80211_registered_device {
70 struct work_struct conn_work; 72 struct work_struct conn_work;
71 struct work_struct event_work; 73 struct work_struct event_work;
72 74
75 struct cfg80211_wowlan *wowlan;
76
73 /* must be last because of the way we do wiphy_priv(), 77 /* must be last because of the way we do wiphy_priv(),
74 * and it should at least be aligned to NETDEV_ALIGN */ 78 * and it should at least be aligned to NETDEV_ALIGN */
75 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); 79 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -89,6 +93,18 @@ bool wiphy_idx_valid(int wiphy_idx)
89 return wiphy_idx >= 0; 93 return wiphy_idx >= 0;
90} 94}
91 95
96static inline void
97cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
98{
99 int i;
100
101 if (!rdev->wowlan)
102 return;
103 for (i = 0; i < rdev->wowlan->n_patterns; i++)
104 kfree(rdev->wowlan->patterns[i].mask);
105 kfree(rdev->wowlan->patterns);
106 kfree(rdev->wowlan);
107}
92 108
93extern struct workqueue_struct *cfg80211_wq; 109extern struct workqueue_struct *cfg80211_wq;
94extern struct mutex cfg80211_mutex; 110extern struct mutex cfg80211_mutex;
@@ -397,12 +413,26 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
397void cfg80211_sme_disassoc(struct net_device *dev, int idx); 413void cfg80211_sme_disassoc(struct net_device *dev, int idx);
398void __cfg80211_scan_done(struct work_struct *wk); 414void __cfg80211_scan_done(struct work_struct *wk);
399void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); 415void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak);
416void __cfg80211_sched_scan_results(struct work_struct *wk);
417int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
418 bool driver_initiated);
400void cfg80211_upload_connect_keys(struct wireless_dev *wdev); 419void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
401int cfg80211_change_iface(struct cfg80211_registered_device *rdev, 420int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
402 struct net_device *dev, enum nl80211_iftype ntype, 421 struct net_device *dev, enum nl80211_iftype ntype,
403 u32 *flags, struct vif_params *params); 422 u32 *flags, struct vif_params *params);
404void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); 423void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
405 424
425int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
426 struct wireless_dev *wdev,
427 enum nl80211_iftype iftype);
428
429static inline int
430cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
431 enum nl80211_iftype iftype)
432{
433 return cfg80211_can_change_interface(rdev, NULL, iftype);
434}
435
406struct ieee80211_channel * 436struct ieee80211_channel *
407rdev_freq_to_chan(struct cfg80211_registered_device *rdev, 437rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
408 int freq, enum nl80211_channel_type channel_type); 438 int freq, enum nl80211_channel_type channel_type);
@@ -412,6 +442,9 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
412 442
413u16 cfg80211_calculate_bitrate(struct rate_info *rate); 443u16 cfg80211_calculate_bitrate(struct rate_info *rate);
414 444
445int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
446 u32 beacon_int);
447
415#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS 448#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
416#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) 449#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond)
417#else 450#else
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index ca4c825be93d..9bde4d1d3e9b 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,5 +1,6 @@
1#include <linux/utsname.h> 1#include <linux/utsname.h>
2#include <net/cfg80211.h> 2#include <net/cfg80211.h>
3#include "core.h"
3#include "ethtool.h" 4#include "ethtool.h"
4 5
5static void cfg80211_get_drvinfo(struct net_device *dev, 6static void cfg80211_get_drvinfo(struct net_device *dev,
@@ -37,9 +38,41 @@ static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs,
37 regs->len = 0; 38 regs->len = 0;
38} 39}
39 40
41static void cfg80211_get_ringparam(struct net_device *dev,
42 struct ethtool_ringparam *rp)
43{
44 struct wireless_dev *wdev = dev->ieee80211_ptr;
45 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
46
47 memset(rp, 0, sizeof(*rp));
48
49 if (rdev->ops->get_ringparam)
50 rdev->ops->get_ringparam(wdev->wiphy,
51 &rp->tx_pending, &rp->tx_max_pending,
52 &rp->rx_pending, &rp->rx_max_pending);
53}
54
55static int cfg80211_set_ringparam(struct net_device *dev,
56 struct ethtool_ringparam *rp)
57{
58 struct wireless_dev *wdev = dev->ieee80211_ptr;
59 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
60
61 if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
62 return -EINVAL;
63
64 if (rdev->ops->set_ringparam)
65 return rdev->ops->set_ringparam(wdev->wiphy,
66 rp->tx_pending, rp->rx_pending);
67
68 return -ENOTSUPP;
69}
70
40const struct ethtool_ops cfg80211_ethtool_ops = { 71const struct ethtool_ops cfg80211_ethtool_ops = {
41 .get_drvinfo = cfg80211_get_drvinfo, 72 .get_drvinfo = cfg80211_get_drvinfo,
42 .get_regs_len = cfg80211_get_regs_len, 73 .get_regs_len = cfg80211_get_regs_len,
43 .get_regs = cfg80211_get_regs, 74 .get_regs = cfg80211_get_regs,
44 .get_link = ethtool_op_get_link, 75 .get_link = ethtool_op_get_link,
76 .get_ringparam = cfg80211_get_ringparam,
77 .set_ringparam = cfg80211_set_ringparam,
45}; 78};
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index e2e88878ba35..2f265e033ae2 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -96,13 +96,12 @@ static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
96 u8 *key, int keylen, void *priv) 96 u8 *key, int keylen, void *priv)
97{ 97{
98 struct lib80211_wep_data *wep = priv; 98 struct lib80211_wep_data *wep = priv;
99 u32 klen, len; 99 u32 klen;
100 u8 *pos; 100 u8 *pos;
101 101
102 if (skb_headroom(skb) < 4 || skb->len < hdr_len) 102 if (skb_headroom(skb) < 4 || skb->len < hdr_len)
103 return -1; 103 return -1;
104 104
105 len = skb->len - hdr_len;
106 pos = skb_push(skb, 4); 105 pos = skb_push(skb, 4);
107 memmove(pos, pos + 4, hdr_len); 106 memmove(pos, pos + 4, hdr_len);
108 pos += hdr_len; 107 pos += hdr_len;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 73e39c171ffb..5c116083eeca 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,5 +1,6 @@
1#include <linux/ieee80211.h> 1#include <linux/ieee80211.h>
2#include <net/cfg80211.h> 2#include <net/cfg80211.h>
3#include "nl80211.h"
3#include "core.h" 4#include "core.h"
4 5
5/* Default values, timeouts in ms */ 6/* Default values, timeouts in ms */
@@ -53,8 +54,9 @@ const struct mesh_config default_mesh_config = {
53const struct mesh_setup default_mesh_setup = { 54const struct mesh_setup default_mesh_setup = {
54 .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, 55 .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP,
55 .path_metric = IEEE80211_PATH_METRIC_AIRTIME, 56 .path_metric = IEEE80211_PATH_METRIC_AIRTIME,
56 .vendor_ie = NULL, 57 .ie = NULL,
57 .vendor_ie_len = 0, 58 .ie_len = 0,
59 .is_secure = false,
58}; 60};
59 61
60int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, 62int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
@@ -72,6 +74,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
72 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) 74 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
73 return -EOPNOTSUPP; 75 return -EOPNOTSUPP;
74 76
77 if (!(rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
78 setup->is_secure)
79 return -EOPNOTSUPP;
80
75 if (wdev->mesh_id_len) 81 if (wdev->mesh_id_len)
76 return -EALREADY; 82 return -EALREADY;
77 83
@@ -105,6 +111,19 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
105 return err; 111 return err;
106} 112}
107 113
114void cfg80211_notify_new_peer_candidate(struct net_device *dev,
115 const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
116{
117 struct wireless_dev *wdev = dev->ieee80211_ptr;
118
119 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
120 return;
121
122 nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
123 macaddr, ie, ie_len, gfp);
124}
125EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
126
108static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, 127static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
109 struct net_device *dev) 128 struct net_device *dev)
110{ 129{
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index aa5df8865ff7..493b939970cd 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -770,6 +770,15 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
770} 770}
771EXPORT_SYMBOL(cfg80211_new_sta); 771EXPORT_SYMBOL(cfg80211_new_sta);
772 772
773void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
774{
775 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
776 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
777
778 nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp);
779}
780EXPORT_SYMBOL(cfg80211_del_sta);
781
773struct cfg80211_mgmt_registration { 782struct cfg80211_mgmt_registration {
774 struct list_head list; 783 struct list_head list;
775 784
@@ -954,6 +963,16 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
954 if (memcmp(mgmt->bssid, dev->dev_addr, ETH_ALEN)) 963 if (memcmp(mgmt->bssid, dev->dev_addr, ETH_ALEN))
955 err = -EINVAL; 964 err = -EINVAL;
956 break; 965 break;
966 case NL80211_IFTYPE_MESH_POINT:
967 if (memcmp(mgmt->sa, mgmt->bssid, ETH_ALEN)) {
968 err = -EINVAL;
969 break;
970 }
971 /*
972 * check for mesh DA must be done by driver as
973 * cfg80211 doesn't track the stations
974 */
975 break;
957 default: 976 default:
958 err = -EOPNOTSUPP; 977 err = -EOPNOTSUPP;
959 break; 978 break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9b62710891a2..2222ce08ee91 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -124,6 +124,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
124 [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 }, 124 [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
125 125
126 [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, 126 [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
127 [NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG },
127 128
128 [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, 129 [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
129 .len = NL80211_HT_CAPABILITY_LEN }, 130 .len = NL80211_HT_CAPABILITY_LEN },
@@ -172,6 +173,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
172 [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, 173 [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
173 [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, 174 [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
174 [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, 175 [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
176 [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
177 [NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
178 [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
175}; 179};
176 180
177/* policy for the key attributes */ 181/* policy for the key attributes */
@@ -193,6 +197,15 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
193 [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, 197 [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
194}; 198};
195 199
200/* policy for WoWLAN attributes */
201static const struct nla_policy
202nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
203 [NL80211_WOWLAN_TRIG_ANY] = { .type = NLA_FLAG },
204 [NL80211_WOWLAN_TRIG_DISCONNECT] = { .type = NLA_FLAG },
205 [NL80211_WOWLAN_TRIG_MAGIC_PKT] = { .type = NLA_FLAG },
206 [NL80211_WOWLAN_TRIG_PKT_PATTERN] = { .type = NLA_NESTED },
207};
208
196/* ifidx get helper */ 209/* ifidx get helper */
197static int nl80211_get_ifidx(struct netlink_callback *cb) 210static int nl80211_get_ifidx(struct netlink_callback *cb)
198{ 211{
@@ -533,6 +546,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
533 case NL80211_IFTYPE_AP: 546 case NL80211_IFTYPE_AP:
534 case NL80211_IFTYPE_AP_VLAN: 547 case NL80211_IFTYPE_AP_VLAN:
535 case NL80211_IFTYPE_P2P_GO: 548 case NL80211_IFTYPE_P2P_GO:
549 case NL80211_IFTYPE_MESH_POINT:
536 break; 550 break;
537 case NL80211_IFTYPE_ADHOC: 551 case NL80211_IFTYPE_ADHOC:
538 if (!wdev->current_bss) 552 if (!wdev->current_bss)
@@ -550,6 +564,88 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
550 return 0; 564 return 0;
551} 565}
552 566
567static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes)
568{
569 struct nlattr *nl_modes = nla_nest_start(msg, attr);
570 int i;
571
572 if (!nl_modes)
573 goto nla_put_failure;
574
575 i = 0;
576 while (ifmodes) {
577 if (ifmodes & 1)
578 NLA_PUT_FLAG(msg, i);
579 ifmodes >>= 1;
580 i++;
581 }
582
583 nla_nest_end(msg, nl_modes);
584 return 0;
585
586nla_put_failure:
587 return -ENOBUFS;
588}
589
590static int nl80211_put_iface_combinations(struct wiphy *wiphy,
591 struct sk_buff *msg)
592{
593 struct nlattr *nl_combis;
594 int i, j;
595
596 nl_combis = nla_nest_start(msg,
597 NL80211_ATTR_INTERFACE_COMBINATIONS);
598 if (!nl_combis)
599 goto nla_put_failure;
600
601 for (i = 0; i < wiphy->n_iface_combinations; i++) {
602 const struct ieee80211_iface_combination *c;
603 struct nlattr *nl_combi, *nl_limits;
604
605 c = &wiphy->iface_combinations[i];
606
607 nl_combi = nla_nest_start(msg, i + 1);
608 if (!nl_combi)
609 goto nla_put_failure;
610
611 nl_limits = nla_nest_start(msg, NL80211_IFACE_COMB_LIMITS);
612 if (!nl_limits)
613 goto nla_put_failure;
614
615 for (j = 0; j < c->n_limits; j++) {
616 struct nlattr *nl_limit;
617
618 nl_limit = nla_nest_start(msg, j + 1);
619 if (!nl_limit)
620 goto nla_put_failure;
621 NLA_PUT_U32(msg, NL80211_IFACE_LIMIT_MAX,
622 c->limits[j].max);
623 if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES,
624 c->limits[j].types))
625 goto nla_put_failure;
626 nla_nest_end(msg, nl_limit);
627 }
628
629 nla_nest_end(msg, nl_limits);
630
631 if (c->beacon_int_infra_match)
632 NLA_PUT_FLAG(msg,
633 NL80211_IFACE_COMB_STA_AP_BI_MATCH);
634 NLA_PUT_U32(msg, NL80211_IFACE_COMB_NUM_CHANNELS,
635 c->num_different_channels);
636 NLA_PUT_U32(msg, NL80211_IFACE_COMB_MAXNUM,
637 c->max_interfaces);
638
639 nla_nest_end(msg, nl_combi);
640 }
641
642 nla_nest_end(msg, nl_combis);
643
644 return 0;
645nla_put_failure:
646 return -ENOBUFS;
647}
648
553static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, 649static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
554 struct cfg80211_registered_device *dev) 650 struct cfg80211_registered_device *dev)
555{ 651{
@@ -557,13 +653,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
557 struct nlattr *nl_bands, *nl_band; 653 struct nlattr *nl_bands, *nl_band;
558 struct nlattr *nl_freqs, *nl_freq; 654 struct nlattr *nl_freqs, *nl_freq;
559 struct nlattr *nl_rates, *nl_rate; 655 struct nlattr *nl_rates, *nl_rate;
560 struct nlattr *nl_modes;
561 struct nlattr *nl_cmds; 656 struct nlattr *nl_cmds;
562 enum ieee80211_band band; 657 enum ieee80211_band band;
563 struct ieee80211_channel *chan; 658 struct ieee80211_channel *chan;
564 struct ieee80211_rate *rate; 659 struct ieee80211_rate *rate;
565 int i; 660 int i;
566 u16 ifmodes = dev->wiphy.interface_modes;
567 const struct ieee80211_txrx_stypes *mgmt_stypes = 661 const struct ieee80211_txrx_stypes *mgmt_stypes =
568 dev->wiphy.mgmt_stypes; 662 dev->wiphy.mgmt_stypes;
569 663
@@ -594,6 +688,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
594 688
595 if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) 689 if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)
596 NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN); 690 NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN);
691 if (dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH)
692 NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_MESH_AUTH);
597 693
598 NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES, 694 NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
599 sizeof(u32) * dev->wiphy.n_cipher_suites, 695 sizeof(u32) * dev->wiphy.n_cipher_suites,
@@ -621,20 +717,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
621 } 717 }
622 } 718 }
623 719
624 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 720 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
625 if (!nl_modes) 721 dev->wiphy.interface_modes))
626 goto nla_put_failure; 722 goto nla_put_failure;
627 723
628 i = 0;
629 while (ifmodes) {
630 if (ifmodes & 1)
631 NLA_PUT_FLAG(msg, i);
632 ifmodes >>= 1;
633 i++;
634 }
635
636 nla_nest_end(msg, nl_modes);
637
638 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); 724 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
639 if (!nl_bands) 725 if (!nl_bands)
640 goto nla_put_failure; 726 goto nla_put_failure;
@@ -746,6 +832,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
746 } 832 }
747 CMD(set_channel, SET_CHANNEL); 833 CMD(set_channel, SET_CHANNEL);
748 CMD(set_wds_peer, SET_WDS_PEER); 834 CMD(set_wds_peer, SET_WDS_PEER);
835 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
836 CMD(sched_scan_start, START_SCHED_SCAN);
749 837
750#undef CMD 838#undef CMD
751 839
@@ -818,6 +906,42 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
818 nla_nest_end(msg, nl_ifs); 906 nla_nest_end(msg, nl_ifs);
819 } 907 }
820 908
909 if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) {
910 struct nlattr *nl_wowlan;
911
912 nl_wowlan = nla_nest_start(msg,
913 NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
914 if (!nl_wowlan)
915 goto nla_put_failure;
916
917 if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY)
918 NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_ANY);
919 if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT)
920 NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_DISCONNECT);
921 if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT)
922 NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT);
923 if (dev->wiphy.wowlan.n_patterns) {
924 struct nl80211_wowlan_pattern_support pat = {
925 .max_patterns = dev->wiphy.wowlan.n_patterns,
926 .min_pattern_len =
927 dev->wiphy.wowlan.pattern_min_len,
928 .max_pattern_len =
929 dev->wiphy.wowlan.pattern_max_len,
930 };
931 NLA_PUT(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
932 sizeof(pat), &pat);
933 }
934
935 nla_nest_end(msg, nl_wowlan);
936 }
937
938 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
939 dev->wiphy.software_iftypes))
940 goto nla_put_failure;
941
942 if (nl80211_put_iface_combinations(&dev->wiphy, msg))
943 goto nla_put_failure;
944
821 return genlmsg_end(msg, hdr); 945 return genlmsg_end(msg, hdr);
822 946
823 nla_put_failure: 947 nla_put_failure:
@@ -1679,14 +1803,6 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1679 if (err) 1803 if (err)
1680 goto out; 1804 goto out;
1681 1805
1682 if (!(rdev->wiphy.flags &
1683 WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS)) {
1684 if (!key.def_uni || !key.def_multi) {
1685 err = -EOPNOTSUPP;
1686 goto out;
1687 }
1688 }
1689
1690 err = rdev->ops->set_default_key(&rdev->wiphy, dev, key.idx, 1806 err = rdev->ops->set_default_key(&rdev->wiphy, dev, key.idx,
1691 key.def_uni, key.def_multi); 1807 key.def_uni, key.def_multi);
1692 1808
@@ -1837,8 +1953,9 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1837 struct beacon_parameters *info); 1953 struct beacon_parameters *info);
1838 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 1954 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1839 struct net_device *dev = info->user_ptr[1]; 1955 struct net_device *dev = info->user_ptr[1];
1956 struct wireless_dev *wdev = dev->ieee80211_ptr;
1840 struct beacon_parameters params; 1957 struct beacon_parameters params;
1841 int haveinfo = 0; 1958 int haveinfo = 0, err;
1842 1959
1843 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL])) 1960 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]))
1844 return -EINVAL; 1961 return -EINVAL;
@@ -1847,6 +1964,8 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1847 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) 1964 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
1848 return -EOPNOTSUPP; 1965 return -EOPNOTSUPP;
1849 1966
1967 memset(&params, 0, sizeof(params));
1968
1850 switch (info->genlhdr->cmd) { 1969 switch (info->genlhdr->cmd) {
1851 case NL80211_CMD_NEW_BEACON: 1970 case NL80211_CMD_NEW_BEACON:
1852 /* these are required for NEW_BEACON */ 1971 /* these are required for NEW_BEACON */
@@ -1855,6 +1974,15 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1855 !info->attrs[NL80211_ATTR_BEACON_HEAD]) 1974 !info->attrs[NL80211_ATTR_BEACON_HEAD])
1856 return -EINVAL; 1975 return -EINVAL;
1857 1976
1977 params.interval =
1978 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
1979 params.dtim_period =
1980 nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
1981
1982 err = cfg80211_validate_beacon_int(rdev, params.interval);
1983 if (err)
1984 return err;
1985
1858 call = rdev->ops->add_beacon; 1986 call = rdev->ops->add_beacon;
1859 break; 1987 break;
1860 case NL80211_CMD_SET_BEACON: 1988 case NL80211_CMD_SET_BEACON:
@@ -1868,20 +1996,6 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1868 if (!call) 1996 if (!call)
1869 return -EOPNOTSUPP; 1997 return -EOPNOTSUPP;
1870 1998
1871 memset(&params, 0, sizeof(params));
1872
1873 if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
1874 params.interval =
1875 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
1876 haveinfo = 1;
1877 }
1878
1879 if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
1880 params.dtim_period =
1881 nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
1882 haveinfo = 1;
1883 }
1884
1885 if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { 1999 if (info->attrs[NL80211_ATTR_BEACON_HEAD]) {
1886 params.head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); 2000 params.head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]);
1887 params.head_len = 2001 params.head_len =
@@ -1899,13 +2013,18 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1899 if (!haveinfo) 2013 if (!haveinfo)
1900 return -EINVAL; 2014 return -EINVAL;
1901 2015
1902 return call(&rdev->wiphy, dev, &params); 2016 err = call(&rdev->wiphy, dev, &params);
2017 if (!err && params.interval)
2018 wdev->beacon_interval = params.interval;
2019 return err;
1903} 2020}
1904 2021
1905static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) 2022static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1906{ 2023{
1907 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 2024 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1908 struct net_device *dev = info->user_ptr[1]; 2025 struct net_device *dev = info->user_ptr[1];
2026 struct wireless_dev *wdev = dev->ieee80211_ptr;
2027 int err;
1909 2028
1910 if (!rdev->ops->del_beacon) 2029 if (!rdev->ops->del_beacon)
1911 return -EOPNOTSUPP; 2030 return -EOPNOTSUPP;
@@ -1914,7 +2033,10 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1914 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) 2033 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
1915 return -EOPNOTSUPP; 2034 return -EOPNOTSUPP;
1916 2035
1917 return rdev->ops->del_beacon(&rdev->wiphy, dev); 2036 err = rdev->ops->del_beacon(&rdev->wiphy, dev);
2037 if (!err)
2038 wdev->beacon_interval = 0;
2039 return err;
1918} 2040}
1919 2041
1920static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { 2042static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
@@ -1922,6 +2044,7 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
1922 [NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG }, 2044 [NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG },
1923 [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG }, 2045 [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG },
1924 [NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG }, 2046 [NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG },
2047 [NL80211_STA_FLAG_AUTHENTICATED] = { .type = NLA_FLAG },
1925}; 2048};
1926 2049
1927static int parse_station_flags(struct genl_info *info, 2050static int parse_station_flags(struct genl_info *info,
@@ -1968,13 +2091,41 @@ static int parse_station_flags(struct genl_info *info,
1968 return 0; 2091 return 0;
1969} 2092}
1970 2093
2094static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
2095 int attr)
2096{
2097 struct nlattr *rate;
2098 u16 bitrate;
2099
2100 rate = nla_nest_start(msg, attr);
2101 if (!rate)
2102 goto nla_put_failure;
2103
2104 /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
2105 bitrate = cfg80211_calculate_bitrate(info);
2106 if (bitrate > 0)
2107 NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
2108
2109 if (info->flags & RATE_INFO_FLAGS_MCS)
2110 NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, info->mcs);
2111 if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
2112 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
2113 if (info->flags & RATE_INFO_FLAGS_SHORT_GI)
2114 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
2115
2116 nla_nest_end(msg, rate);
2117 return true;
2118
2119nla_put_failure:
2120 return false;
2121}
2122
1971static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, 2123static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1972 int flags, struct net_device *dev, 2124 int flags, struct net_device *dev,
1973 const u8 *mac_addr, struct station_info *sinfo) 2125 const u8 *mac_addr, struct station_info *sinfo)
1974{ 2126{
1975 void *hdr; 2127 void *hdr;
1976 struct nlattr *sinfoattr, *txrate; 2128 struct nlattr *sinfoattr, *bss_param;
1977 u16 bitrate;
1978 2129
1979 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); 2130 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
1980 if (!hdr) 2131 if (!hdr)
@@ -1988,6 +2139,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1988 sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); 2139 sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
1989 if (!sinfoattr) 2140 if (!sinfoattr)
1990 goto nla_put_failure; 2141 goto nla_put_failure;
2142 if (sinfo->filled & STATION_INFO_CONNECTED_TIME)
2143 NLA_PUT_U32(msg, NL80211_STA_INFO_CONNECTED_TIME,
2144 sinfo->connected_time);
1991 if (sinfo->filled & STATION_INFO_INACTIVE_TIME) 2145 if (sinfo->filled & STATION_INFO_INACTIVE_TIME)
1992 NLA_PUT_U32(msg, NL80211_STA_INFO_INACTIVE_TIME, 2146 NLA_PUT_U32(msg, NL80211_STA_INFO_INACTIVE_TIME,
1993 sinfo->inactive_time); 2147 sinfo->inactive_time);
@@ -2013,24 +2167,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
2013 NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, 2167 NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
2014 sinfo->signal_avg); 2168 sinfo->signal_avg);
2015 if (sinfo->filled & STATION_INFO_TX_BITRATE) { 2169 if (sinfo->filled & STATION_INFO_TX_BITRATE) {
2016 txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); 2170 if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
2017 if (!txrate) 2171 NL80211_STA_INFO_TX_BITRATE))
2172 goto nla_put_failure;
2173 }
2174 if (sinfo->filled & STATION_INFO_RX_BITRATE) {
2175 if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
2176 NL80211_STA_INFO_RX_BITRATE))
2018 goto nla_put_failure; 2177 goto nla_put_failure;
2019
2020 /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
2021 bitrate = cfg80211_calculate_bitrate(&sinfo->txrate);
2022 if (bitrate > 0)
2023 NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
2024
2025 if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS)
2026 NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS,
2027 sinfo->txrate.mcs);
2028 if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
2029 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
2030 if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI)
2031 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
2032
2033 nla_nest_end(msg, txrate);
2034 } 2178 }
2035 if (sinfo->filled & STATION_INFO_RX_PACKETS) 2179 if (sinfo->filled & STATION_INFO_RX_PACKETS)
2036 NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS, 2180 NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS,
@@ -2044,6 +2188,25 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
2044 if (sinfo->filled & STATION_INFO_TX_FAILED) 2188 if (sinfo->filled & STATION_INFO_TX_FAILED)
2045 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED, 2189 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
2046 sinfo->tx_failed); 2190 sinfo->tx_failed);
2191 if (sinfo->filled & STATION_INFO_BSS_PARAM) {
2192 bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
2193 if (!bss_param)
2194 goto nla_put_failure;
2195
2196 if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_CTS_PROT)
2197 NLA_PUT_FLAG(msg, NL80211_STA_BSS_PARAM_CTS_PROT);
2198 if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_PREAMBLE)
2199 NLA_PUT_FLAG(msg, NL80211_STA_BSS_PARAM_SHORT_PREAMBLE);
2200 if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_SLOT_TIME)
2201 NLA_PUT_FLAG(msg,
2202 NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME);
2203 NLA_PUT_U8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD,
2204 sinfo->bss_param.dtim_period);
2205 NLA_PUT_U16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
2206 sinfo->bss_param.beacon_interval);
2207
2208 nla_nest_end(msg, bss_param);
2209 }
2047 nla_nest_end(msg, sinfoattr); 2210 nla_nest_end(msg, sinfoattr);
2048 2211
2049 return genlmsg_end(msg, hdr); 2212 return genlmsg_end(msg, hdr);
@@ -2172,6 +2335,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2172 memset(&params, 0, sizeof(params)); 2335 memset(&params, 0, sizeof(params));
2173 2336
2174 params.listen_interval = -1; 2337 params.listen_interval = -1;
2338 params.plink_state = -1;
2175 2339
2176 if (info->attrs[NL80211_ATTR_STA_AID]) 2340 if (info->attrs[NL80211_ATTR_STA_AID])
2177 return -EINVAL; 2341 return -EINVAL;
@@ -2203,6 +2367,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2203 params.plink_action = 2367 params.plink_action =
2204 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 2368 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
2205 2369
2370 if (info->attrs[NL80211_ATTR_STA_PLINK_STATE])
2371 params.plink_state =
2372 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
2373
2206 err = get_vlan(info, rdev, &params.vlan); 2374 err = get_vlan(info, rdev, &params.vlan);
2207 if (err) 2375 if (err)
2208 goto out; 2376 goto out;
@@ -2242,9 +2410,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2242 err = -EINVAL; 2410 err = -EINVAL;
2243 if (params.listen_interval >= 0) 2411 if (params.listen_interval >= 0)
2244 err = -EINVAL; 2412 err = -EINVAL;
2245 if (params.supported_rates) 2413 if (params.sta_flags_mask &
2246 err = -EINVAL; 2414 ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
2247 if (params.sta_flags_mask) 2415 BIT(NL80211_STA_FLAG_MFP) |
2416 BIT(NL80211_STA_FLAG_AUTHORIZED)))
2248 err = -EINVAL; 2417 err = -EINVAL;
2249 break; 2418 break;
2250 default: 2419 default:
@@ -2306,11 +2475,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2306 params.ht_capa = 2475 params.ht_capa =
2307 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); 2476 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
2308 2477
2478 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
2479 params.plink_action =
2480 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
2481
2309 if (parse_station_flags(info, &params)) 2482 if (parse_station_flags(info, &params))
2310 return -EINVAL; 2483 return -EINVAL;
2311 2484
2312 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2485 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2313 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && 2486 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2487 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
2314 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) 2488 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2315 return -EINVAL; 2489 return -EINVAL;
2316 2490
@@ -2718,7 +2892,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
2718 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 2892 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2719 NL80211_CMD_GET_MESH_CONFIG); 2893 NL80211_CMD_GET_MESH_CONFIG);
2720 if (!hdr) 2894 if (!hdr)
2721 goto nla_put_failure; 2895 goto out;
2722 pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); 2896 pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG);
2723 if (!pinfoattr) 2897 if (!pinfoattr)
2724 goto nla_put_failure; 2898 goto nla_put_failure;
@@ -2759,6 +2933,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
2759 2933
2760 nla_put_failure: 2934 nla_put_failure:
2761 genlmsg_cancel(msg, hdr); 2935 genlmsg_cancel(msg, hdr);
2936 out:
2762 nlmsg_free(msg); 2937 nlmsg_free(msg);
2763 return -ENOBUFS; 2938 return -ENOBUFS;
2764} 2939}
@@ -2785,8 +2960,10 @@ static const struct nla_policy
2785 nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = { 2960 nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
2786 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, 2961 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
2787 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, 2962 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
2788 [NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY, 2963 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
2964 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
2789 .len = IEEE80211_MAX_DATA_LEN }, 2965 .len = IEEE80211_MAX_DATA_LEN },
2966 [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
2790}; 2967};
2791 2968
2792static int nl80211_parse_mesh_config(struct genl_info *info, 2969static int nl80211_parse_mesh_config(struct genl_info *info,
@@ -2887,14 +3064,17 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
2887 IEEE80211_PATH_METRIC_VENDOR : 3064 IEEE80211_PATH_METRIC_VENDOR :
2888 IEEE80211_PATH_METRIC_AIRTIME; 3065 IEEE80211_PATH_METRIC_AIRTIME;
2889 3066
2890 if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) { 3067
3068 if (tb[NL80211_MESH_SETUP_IE]) {
2891 struct nlattr *ieattr = 3069 struct nlattr *ieattr =
2892 tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]; 3070 tb[NL80211_MESH_SETUP_IE];
2893 if (!is_valid_ie_attr(ieattr)) 3071 if (!is_valid_ie_attr(ieattr))
2894 return -EINVAL; 3072 return -EINVAL;
2895 setup->vendor_ie = nla_data(ieattr); 3073 setup->ie = nla_data(ieattr);
2896 setup->vendor_ie_len = nla_len(ieattr); 3074 setup->ie_len = nla_len(ieattr);
2897 } 3075 }
3076 setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
3077 setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
2898 3078
2899 return 0; 3079 return 0;
2900} 3080}
@@ -2954,7 +3134,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
2954 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 3134 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2955 NL80211_CMD_GET_REG); 3135 NL80211_CMD_GET_REG);
2956 if (!hdr) 3136 if (!hdr)
2957 goto nla_put_failure; 3137 goto put_failure;
2958 3138
2959 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, 3139 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
2960 cfg80211_regdomain->alpha2); 3140 cfg80211_regdomain->alpha2);
@@ -3001,6 +3181,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
3001 3181
3002nla_put_failure: 3182nla_put_failure:
3003 genlmsg_cancel(msg, hdr); 3183 genlmsg_cancel(msg, hdr);
3184put_failure:
3004 nlmsg_free(msg); 3185 nlmsg_free(msg);
3005 err = -EMSGSIZE; 3186 err = -EMSGSIZE;
3006out: 3187out:
@@ -3262,6 +3443,188 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3262 return err; 3443 return err;
3263} 3444}
3264 3445
3446static int nl80211_start_sched_scan(struct sk_buff *skb,
3447 struct genl_info *info)
3448{
3449 struct cfg80211_sched_scan_request *request;
3450 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3451 struct net_device *dev = info->user_ptr[1];
3452 struct cfg80211_ssid *ssid;
3453 struct ieee80211_channel *channel;
3454 struct nlattr *attr;
3455 struct wiphy *wiphy;
3456 int err, tmp, n_ssids = 0, n_channels, i;
3457 u32 interval;
3458 enum ieee80211_band band;
3459 size_t ie_len;
3460
3461 if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
3462 !rdev->ops->sched_scan_start)
3463 return -EOPNOTSUPP;
3464
3465 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
3466 return -EINVAL;
3467
3468 if (rdev->sched_scan_req)
3469 return -EINPROGRESS;
3470
3471 if (!info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
3472 return -EINVAL;
3473
3474 interval = nla_get_u32(info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
3475 if (interval == 0)
3476 return -EINVAL;
3477
3478 wiphy = &rdev->wiphy;
3479
3480 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
3481 n_channels = validate_scan_freqs(
3482 info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
3483 if (!n_channels)
3484 return -EINVAL;
3485 } else {
3486 n_channels = 0;
3487
3488 for (band = 0; band < IEEE80211_NUM_BANDS; band++)
3489 if (wiphy->bands[band])
3490 n_channels += wiphy->bands[band]->n_channels;
3491 }
3492
3493 if (info->attrs[NL80211_ATTR_SCAN_SSIDS])
3494 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS],
3495 tmp)
3496 n_ssids++;
3497
3498 if (n_ssids > wiphy->max_scan_ssids)
3499 return -EINVAL;
3500
3501 if (info->attrs[NL80211_ATTR_IE])
3502 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
3503 else
3504 ie_len = 0;
3505
3506 if (ie_len > wiphy->max_scan_ie_len)
3507 return -EINVAL;
3508
3509 request = kzalloc(sizeof(*request)
3510 + sizeof(*ssid) * n_ssids
3511 + sizeof(channel) * n_channels
3512 + ie_len, GFP_KERNEL);
3513 if (!request)
3514 return -ENOMEM;
3515
3516 if (n_ssids)
3517 request->ssids = (void *)&request->channels[n_channels];
3518 request->n_ssids = n_ssids;
3519 if (ie_len) {
3520 if (request->ssids)
3521 request->ie = (void *)(request->ssids + n_ssids);
3522 else
3523 request->ie = (void *)(request->channels + n_channels);
3524 }
3525
3526 i = 0;
3527 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
3528 /* user specified, bail out if channel not found */
3529 nla_for_each_nested(attr,
3530 info->attrs[NL80211_ATTR_SCAN_FREQUENCIES],
3531 tmp) {
3532 struct ieee80211_channel *chan;
3533
3534 chan = ieee80211_get_channel(wiphy, nla_get_u32(attr));
3535
3536 if (!chan) {
3537 err = -EINVAL;
3538 goto out_free;
3539 }
3540
3541 /* ignore disabled channels */
3542 if (chan->flags & IEEE80211_CHAN_DISABLED)
3543 continue;
3544
3545 request->channels[i] = chan;
3546 i++;
3547 }
3548 } else {
3549 /* all channels */
3550 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
3551 int j;
3552 if (!wiphy->bands[band])
3553 continue;
3554 for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
3555 struct ieee80211_channel *chan;
3556
3557 chan = &wiphy->bands[band]->channels[j];
3558
3559 if (chan->flags & IEEE80211_CHAN_DISABLED)
3560 continue;
3561
3562 request->channels[i] = chan;
3563 i++;
3564 }
3565 }
3566 }
3567
3568 if (!i) {
3569 err = -EINVAL;
3570 goto out_free;
3571 }
3572
3573 request->n_channels = i;
3574
3575 i = 0;
3576 if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
3577 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS],
3578 tmp) {
3579 if (request->ssids[i].ssid_len >
3580 IEEE80211_MAX_SSID_LEN) {
3581 err = -EINVAL;
3582 goto out_free;
3583 }
3584 memcpy(request->ssids[i].ssid, nla_data(attr),
3585 nla_len(attr));
3586 request->ssids[i].ssid_len = nla_len(attr);
3587 i++;
3588 }
3589 }
3590
3591 if (info->attrs[NL80211_ATTR_IE]) {
3592 request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
3593 memcpy((void *)request->ie,
3594 nla_data(info->attrs[NL80211_ATTR_IE]),
3595 request->ie_len);
3596 }
3597
3598 request->dev = dev;
3599 request->wiphy = &rdev->wiphy;
3600 request->interval = interval;
3601
3602 err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
3603 if (!err) {
3604 rdev->sched_scan_req = request;
3605 nl80211_send_sched_scan(rdev, dev,
3606 NL80211_CMD_START_SCHED_SCAN);
3607 goto out;
3608 }
3609
3610out_free:
3611 kfree(request);
3612out:
3613 return err;
3614}
3615
3616static int nl80211_stop_sched_scan(struct sk_buff *skb,
3617 struct genl_info *info)
3618{
3619 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3620
3621 if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
3622 !rdev->ops->sched_scan_stop)
3623 return -EOPNOTSUPP;
3624
3625 return __cfg80211_stop_sched_scan(rdev, false);
3626}
3627
3265static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, 3628static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
3266 struct cfg80211_registered_device *rdev, 3629 struct cfg80211_registered_device *rdev,
3267 struct wireless_dev *wdev, 3630 struct wireless_dev *wdev,
@@ -4760,6 +5123,194 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
4760 return cfg80211_leave_mesh(rdev, dev); 5123 return cfg80211_leave_mesh(rdev, dev);
4761} 5124}
4762 5125
5126static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
5127{
5128 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5129 struct sk_buff *msg;
5130 void *hdr;
5131
5132 if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
5133 return -EOPNOTSUPP;
5134
5135 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
5136 if (!msg)
5137 return -ENOMEM;
5138
5139 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
5140 NL80211_CMD_GET_WOWLAN);
5141 if (!hdr)
5142 goto nla_put_failure;
5143
5144 if (rdev->wowlan) {
5145 struct nlattr *nl_wowlan;
5146
5147 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
5148 if (!nl_wowlan)
5149 goto nla_put_failure;
5150
5151 if (rdev->wowlan->any)
5152 NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_ANY);
5153 if (rdev->wowlan->disconnect)
5154 NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_DISCONNECT);
5155 if (rdev->wowlan->magic_pkt)
5156 NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT);
5157 if (rdev->wowlan->n_patterns) {
5158 struct nlattr *nl_pats, *nl_pat;
5159 int i, pat_len;
5160
5161 nl_pats = nla_nest_start(msg,
5162 NL80211_WOWLAN_TRIG_PKT_PATTERN);
5163 if (!nl_pats)
5164 goto nla_put_failure;
5165
5166 for (i = 0; i < rdev->wowlan->n_patterns; i++) {
5167 nl_pat = nla_nest_start(msg, i + 1);
5168 if (!nl_pat)
5169 goto nla_put_failure;
5170 pat_len = rdev->wowlan->patterns[i].pattern_len;
5171 NLA_PUT(msg, NL80211_WOWLAN_PKTPAT_MASK,
5172 DIV_ROUND_UP(pat_len, 8),
5173 rdev->wowlan->patterns[i].mask);
5174 NLA_PUT(msg, NL80211_WOWLAN_PKTPAT_PATTERN,
5175 pat_len,
5176 rdev->wowlan->patterns[i].pattern);
5177 nla_nest_end(msg, nl_pat);
5178 }
5179 nla_nest_end(msg, nl_pats);
5180 }
5181
5182 nla_nest_end(msg, nl_wowlan);
5183 }
5184
5185 genlmsg_end(msg, hdr);
5186 return genlmsg_reply(msg, info);
5187
5188nla_put_failure:
5189 nlmsg_free(msg);
5190 return -ENOBUFS;
5191}
5192
5193static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
5194{
5195 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5196 struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG];
5197 struct cfg80211_wowlan no_triggers = {};
5198 struct cfg80211_wowlan new_triggers = {};
5199 struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan;
5200 int err, i;
5201
5202 if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
5203 return -EOPNOTSUPP;
5204
5205 if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS])
5206 goto no_triggers;
5207
5208 err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG,
5209 nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
5210 nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
5211 nl80211_wowlan_policy);
5212 if (err)
5213 return err;
5214
5215 if (tb[NL80211_WOWLAN_TRIG_ANY]) {
5216 if (!(wowlan->flags & WIPHY_WOWLAN_ANY))
5217 return -EINVAL;
5218 new_triggers.any = true;
5219 }
5220
5221 if (tb[NL80211_WOWLAN_TRIG_DISCONNECT]) {
5222 if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT))
5223 return -EINVAL;
5224 new_triggers.disconnect = true;
5225 }
5226
5227 if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) {
5228 if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT))
5229 return -EINVAL;
5230 new_triggers.magic_pkt = true;
5231 }
5232
5233 if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) {
5234 struct nlattr *pat;
5235 int n_patterns = 0;
5236 int rem, pat_len, mask_len;
5237 struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT];
5238
5239 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
5240 rem)
5241 n_patterns++;
5242 if (n_patterns > wowlan->n_patterns)
5243 return -EINVAL;
5244
5245 new_triggers.patterns = kcalloc(n_patterns,
5246 sizeof(new_triggers.patterns[0]),
5247 GFP_KERNEL);
5248 if (!new_triggers.patterns)
5249 return -ENOMEM;
5250
5251 new_triggers.n_patterns = n_patterns;
5252 i = 0;
5253
5254 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
5255 rem) {
5256 nla_parse(pat_tb, MAX_NL80211_WOWLAN_PKTPAT,
5257 nla_data(pat), nla_len(pat), NULL);
5258 err = -EINVAL;
5259 if (!pat_tb[NL80211_WOWLAN_PKTPAT_MASK] ||
5260 !pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN])
5261 goto error;
5262 pat_len = nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]);
5263 mask_len = DIV_ROUND_UP(pat_len, 8);
5264 if (nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]) !=
5265 mask_len)
5266 goto error;
5267 if (pat_len > wowlan->pattern_max_len ||
5268 pat_len < wowlan->pattern_min_len)
5269 goto error;
5270
5271 new_triggers.patterns[i].mask =
5272 kmalloc(mask_len + pat_len, GFP_KERNEL);
5273 if (!new_triggers.patterns[i].mask) {
5274 err = -ENOMEM;
5275 goto error;
5276 }
5277 new_triggers.patterns[i].pattern =
5278 new_triggers.patterns[i].mask + mask_len;
5279 memcpy(new_triggers.patterns[i].mask,
5280 nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]),
5281 mask_len);
5282 new_triggers.patterns[i].pattern_len = pat_len;
5283 memcpy(new_triggers.patterns[i].pattern,
5284 nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]),
5285 pat_len);
5286 i++;
5287 }
5288 }
5289
5290 if (memcmp(&new_triggers, &no_triggers, sizeof(new_triggers))) {
5291 struct cfg80211_wowlan *ntrig;
5292 ntrig = kmemdup(&new_triggers, sizeof(new_triggers),
5293 GFP_KERNEL);
5294 if (!ntrig) {
5295 err = -ENOMEM;
5296 goto error;
5297 }
5298 cfg80211_rdev_free_wowlan(rdev);
5299 rdev->wowlan = ntrig;
5300 } else {
5301 no_triggers:
5302 cfg80211_rdev_free_wowlan(rdev);
5303 rdev->wowlan = NULL;
5304 }
5305
5306 return 0;
5307 error:
5308 for (i = 0; i < new_triggers.n_patterns; i++)
5309 kfree(new_triggers.patterns[i].mask);
5310 kfree(new_triggers.patterns);
5311 return err;
5312}
5313
4763#define NL80211_FLAG_NEED_WIPHY 0x01 5314#define NL80211_FLAG_NEED_WIPHY 0x01
4764#define NL80211_FLAG_NEED_NETDEV 0x02 5315#define NL80211_FLAG_NEED_NETDEV 0x02
4765#define NL80211_FLAG_NEED_RTNL 0x04 5316#define NL80211_FLAG_NEED_RTNL 0x04
@@ -5044,6 +5595,22 @@ static struct genl_ops nl80211_ops[] = {
5044 .dumpit = nl80211_dump_scan, 5595 .dumpit = nl80211_dump_scan,
5045 }, 5596 },
5046 { 5597 {
5598 .cmd = NL80211_CMD_START_SCHED_SCAN,
5599 .doit = nl80211_start_sched_scan,
5600 .policy = nl80211_policy,
5601 .flags = GENL_ADMIN_PERM,
5602 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
5603 NL80211_FLAG_NEED_RTNL,
5604 },
5605 {
5606 .cmd = NL80211_CMD_STOP_SCHED_SCAN,
5607 .doit = nl80211_stop_sched_scan,
5608 .policy = nl80211_policy,
5609 .flags = GENL_ADMIN_PERM,
5610 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
5611 NL80211_FLAG_NEED_RTNL,
5612 },
5613 {
5047 .cmd = NL80211_CMD_AUTHENTICATE, 5614 .cmd = NL80211_CMD_AUTHENTICATE,
5048 .doit = nl80211_authenticate, 5615 .doit = nl80211_authenticate,
5049 .policy = nl80211_policy, 5616 .policy = nl80211_policy,
@@ -5258,6 +5825,22 @@ static struct genl_ops nl80211_ops[] = {
5258 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | 5825 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
5259 NL80211_FLAG_NEED_RTNL, 5826 NL80211_FLAG_NEED_RTNL,
5260 }, 5827 },
5828 {
5829 .cmd = NL80211_CMD_GET_WOWLAN,
5830 .doit = nl80211_get_wowlan,
5831 .policy = nl80211_policy,
5832 /* can be retrieved by unprivileged users */
5833 .internal_flags = NL80211_FLAG_NEED_WIPHY |
5834 NL80211_FLAG_NEED_RTNL,
5835 },
5836 {
5837 .cmd = NL80211_CMD_SET_WOWLAN,
5838 .doit = nl80211_set_wowlan,
5839 .policy = nl80211_policy,
5840 .flags = GENL_ADMIN_PERM,
5841 .internal_flags = NL80211_FLAG_NEED_WIPHY |
5842 NL80211_FLAG_NEED_RTNL,
5843 },
5261}; 5844};
5262 5845
5263static struct genl_multicast_group nl80211_mlme_mcgrp = { 5846static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5353,6 +5936,28 @@ static int nl80211_send_scan_msg(struct sk_buff *msg,
5353 return -EMSGSIZE; 5936 return -EMSGSIZE;
5354} 5937}
5355 5938
5939static int
5940nl80211_send_sched_scan_msg(struct sk_buff *msg,
5941 struct cfg80211_registered_device *rdev,
5942 struct net_device *netdev,
5943 u32 pid, u32 seq, int flags, u32 cmd)
5944{
5945 void *hdr;
5946
5947 hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
5948 if (!hdr)
5949 return -1;
5950
5951 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
5952 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
5953
5954 return genlmsg_end(msg, hdr);
5955
5956 nla_put_failure:
5957 genlmsg_cancel(msg, hdr);
5958 return -EMSGSIZE;
5959}
5960
5356void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, 5961void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
5357 struct net_device *netdev) 5962 struct net_device *netdev)
5358{ 5963{
@@ -5410,6 +6015,43 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
5410 nl80211_scan_mcgrp.id, GFP_KERNEL); 6015 nl80211_scan_mcgrp.id, GFP_KERNEL);
5411} 6016}
5412 6017
6018void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
6019 struct net_device *netdev)
6020{
6021 struct sk_buff *msg;
6022
6023 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
6024 if (!msg)
6025 return;
6026
6027 if (nl80211_send_sched_scan_msg(msg, rdev, netdev, 0, 0, 0,
6028 NL80211_CMD_SCHED_SCAN_RESULTS) < 0) {
6029 nlmsg_free(msg);
6030 return;
6031 }
6032
6033 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
6034 nl80211_scan_mcgrp.id, GFP_KERNEL);
6035}
6036
6037void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
6038 struct net_device *netdev, u32 cmd)
6039{
6040 struct sk_buff *msg;
6041
6042 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
6043 if (!msg)
6044 return;
6045
6046 if (nl80211_send_sched_scan_msg(msg, rdev, netdev, 0, 0, 0, cmd) < 0) {
6047 nlmsg_free(msg);
6048 return;
6049 }
6050
6051 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
6052 nl80211_scan_mcgrp.id, GFP_KERNEL);
6053}
6054
5413/* 6055/*
5414 * This can happen on global regulatory changes or device specific settings 6056 * This can happen on global regulatory changes or device specific settings
5415 * based on custom world regulatory domains. 6057 * based on custom world regulatory domains.
@@ -5765,6 +6407,44 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
5765 nlmsg_free(msg); 6407 nlmsg_free(msg);
5766} 6408}
5767 6409
6410void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
6411 struct net_device *netdev,
6412 const u8 *macaddr, const u8* ie, u8 ie_len,
6413 gfp_t gfp)
6414{
6415 struct sk_buff *msg;
6416 void *hdr;
6417
6418 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
6419 if (!msg)
6420 return;
6421
6422 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NEW_PEER_CANDIDATE);
6423 if (!hdr) {
6424 nlmsg_free(msg);
6425 return;
6426 }
6427
6428 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
6429 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
6430 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr);
6431 if (ie_len && ie)
6432 NLA_PUT(msg, NL80211_ATTR_IE, ie_len , ie);
6433
6434 if (genlmsg_end(msg, hdr) < 0) {
6435 nlmsg_free(msg);
6436 return;
6437 }
6438
6439 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
6440 nl80211_mlme_mcgrp.id, gfp);
6441 return;
6442
6443 nla_put_failure:
6444 genlmsg_cancel(msg, hdr);
6445 nlmsg_free(msg);
6446}
6447
5768void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, 6448void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
5769 struct net_device *netdev, const u8 *addr, 6449 struct net_device *netdev, const u8 *addr,
5770 enum nl80211_key_type key_type, int key_id, 6450 enum nl80211_key_type key_type, int key_id,
@@ -5946,6 +6626,40 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
5946 nl80211_mlme_mcgrp.id, gfp); 6626 nl80211_mlme_mcgrp.id, gfp);
5947} 6627}
5948 6628
6629void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
6630 struct net_device *dev, const u8 *mac_addr,
6631 gfp_t gfp)
6632{
6633 struct sk_buff *msg;
6634 void *hdr;
6635
6636 msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
6637 if (!msg)
6638 return;
6639
6640 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_STATION);
6641 if (!hdr) {
6642 nlmsg_free(msg);
6643 return;
6644 }
6645
6646 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
6647 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
6648
6649 if (genlmsg_end(msg, hdr) < 0) {
6650 nlmsg_free(msg);
6651 return;
6652 }
6653
6654 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
6655 nl80211_mlme_mcgrp.id, gfp);
6656 return;
6657
6658 nla_put_failure:
6659 genlmsg_cancel(msg, hdr);
6660 nlmsg_free(msg);
6661}
6662
5949int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 6663int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
5950 struct net_device *netdev, u32 nlpid, 6664 struct net_device *netdev, u32 nlpid,
5951 int freq, const u8 *buf, size_t len, gfp_t gfp) 6665 int freq, const u8 *buf, size_t len, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index e3f7fa886966..2f1bfb87a651 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -12,6 +12,10 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
12 struct net_device *netdev); 12 struct net_device *netdev);
13void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, 13void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
14 struct net_device *netdev); 14 struct net_device *netdev);
15void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
16 struct net_device *netdev, u32 cmd);
17void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
18 struct net_device *netdev);
15void nl80211_send_reg_change_event(struct regulatory_request *request); 19void nl80211_send_reg_change_event(struct regulatory_request *request);
16void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, 20void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
17 struct net_device *netdev, 21 struct net_device *netdev,
@@ -50,6 +54,10 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
50 struct net_device *netdev, u16 reason, 54 struct net_device *netdev, u16 reason,
51 const u8 *ie, size_t ie_len, bool from_ap); 55 const u8 *ie, size_t ie_len, bool from_ap);
52 56
57void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
58 struct net_device *netdev,
59 const u8 *macaddr, const u8* ie, u8 ie_len,
60 gfp_t gfp);
53void 61void
54nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, 62nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
55 struct net_device *netdev, const u8 *addr, 63 struct net_device *netdev, const u8 *addr,
@@ -79,6 +87,9 @@ void nl80211_send_remain_on_channel_cancel(
79void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, 87void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
80 struct net_device *dev, const u8 *mac_addr, 88 struct net_device *dev, const u8 *mac_addr,
81 struct station_info *sinfo, gfp_t gfp); 89 struct station_info *sinfo, gfp_t gfp);
90void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
91 struct net_device *dev, const u8 *mac_addr,
92 gfp_t gfp);
82 93
83int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 94int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
84 struct net_device *netdev, u32 nlpid, int freq, 95 struct net_device *netdev, u32 nlpid, int freq,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 37693b6ef23a..1ad0f39fe091 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -63,6 +63,10 @@ static struct regulatory_request *last_request;
63/* To trigger userspace events */ 63/* To trigger userspace events */
64static struct platform_device *reg_pdev; 64static struct platform_device *reg_pdev;
65 65
66static struct device_type reg_device_type = {
67 .uevent = reg_device_uevent,
68};
69
66/* 70/*
67 * Central wireless core regulatory domains, we only need two, 71 * Central wireless core regulatory domains, we only need two,
68 * the current one and a world regulatory domain in case we have no 72 * the current one and a world regulatory domain in case we have no
@@ -102,6 +106,9 @@ struct reg_beacon {
102static void reg_todo(struct work_struct *work); 106static void reg_todo(struct work_struct *work);
103static DECLARE_WORK(reg_work, reg_todo); 107static DECLARE_WORK(reg_work, reg_todo);
104 108
109static void reg_timeout_work(struct work_struct *work);
110static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work);
111
105/* We keep a static world regulatory domain in case of the absence of CRDA */ 112/* We keep a static world regulatory domain in case of the absence of CRDA */
106static const struct ieee80211_regdomain world_regdom = { 113static const struct ieee80211_regdomain world_regdom = {
107 .n_reg_rules = 5, 114 .n_reg_rules = 5,
@@ -362,16 +369,11 @@ static inline void reg_regdb_query(const char *alpha2) {}
362 369
363/* 370/*
364 * This lets us keep regulatory code which is updated on a regulatory 371 * This lets us keep regulatory code which is updated on a regulatory
365 * basis in userspace. 372 * basis in userspace. Country information is filled in by
373 * reg_device_uevent
366 */ 374 */
367static int call_crda(const char *alpha2) 375static int call_crda(const char *alpha2)
368{ 376{
369 char country_env[9 + 2] = "COUNTRY=";
370 char *envp[] = {
371 country_env,
372 NULL
373 };
374
375 if (!is_world_regdom((char *) alpha2)) 377 if (!is_world_regdom((char *) alpha2))
376 pr_info("Calling CRDA for country: %c%c\n", 378 pr_info("Calling CRDA for country: %c%c\n",
377 alpha2[0], alpha2[1]); 379 alpha2[0], alpha2[1]);
@@ -381,10 +383,7 @@ static int call_crda(const char *alpha2)
381 /* query internal regulatory database (if it exists) */ 383 /* query internal regulatory database (if it exists) */
382 reg_regdb_query(alpha2); 384 reg_regdb_query(alpha2);
383 385
384 country_env[8] = alpha2[0]; 386 return kobject_uevent(&reg_pdev->dev.kobj, KOBJ_CHANGE);
385 country_env[9] = alpha2[1];
386
387 return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, envp);
388} 387}
389 388
390/* Used by nl80211 before kmalloc'ing our regulatory domain */ 389/* Used by nl80211 before kmalloc'ing our regulatory domain */
@@ -673,11 +672,9 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
673 for (i = 0; i < regd->n_reg_rules; i++) { 672 for (i = 0; i < regd->n_reg_rules; i++) {
674 const struct ieee80211_reg_rule *rr; 673 const struct ieee80211_reg_rule *rr;
675 const struct ieee80211_freq_range *fr = NULL; 674 const struct ieee80211_freq_range *fr = NULL;
676 const struct ieee80211_power_rule *pr = NULL;
677 675
678 rr = &regd->reg_rules[i]; 676 rr = &regd->reg_rules[i];
679 fr = &rr->freq_range; 677 fr = &rr->freq_range;
680 pr = &rr->power_rule;
681 678
682 /* 679 /*
683 * We only need to know if one frequency rule was 680 * We only need to know if one frequency rule was
@@ -813,7 +810,7 @@ static void handle_channel(struct wiphy *wiphy,
813 if (r) { 810 if (r) {
814 /* 811 /*
815 * We will disable all channels that do not match our 812 * We will disable all channels that do not match our
816 * recieved regulatory rule unless the hint is coming 813 * received regulatory rule unless the hint is coming
817 * from a Country IE and the Country IE had no information 814 * from a Country IE and the Country IE had no information
818 * about a band. The IEEE 802.11 spec allows for an AP 815 * about a band. The IEEE 802.11 spec allows for an AP
819 * to send only a subset of the regulatory rules allowed, 816 * to send only a subset of the regulatory rules allowed,
@@ -842,7 +839,7 @@ static void handle_channel(struct wiphy *wiphy,
842 request_wiphy && request_wiphy == wiphy && 839 request_wiphy && request_wiphy == wiphy &&
843 request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { 840 request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) {
844 /* 841 /*
845 * This gaurantees the driver's requested regulatory domain 842 * This guarantees the driver's requested regulatory domain
846 * will always be used as a base for further regulatory 843 * will always be used as a base for further regulatory
847 * settings 844 * settings
848 */ 845 */
@@ -1334,6 +1331,9 @@ static void reg_set_request_processed(void)
1334 need_more_processing = true; 1331 need_more_processing = true;
1335 spin_unlock(&reg_requests_lock); 1332 spin_unlock(&reg_requests_lock);
1336 1333
1334 if (last_request->initiator == NL80211_REGDOM_SET_BY_USER)
1335 cancel_delayed_work_sync(&reg_timeout);
1336
1337 if (need_more_processing) 1337 if (need_more_processing)
1338 schedule_work(&reg_work); 1338 schedule_work(&reg_work);
1339} 1339}
@@ -1444,8 +1444,18 @@ static void reg_process_hint(struct regulatory_request *reg_request)
1444 r = __regulatory_hint(wiphy, reg_request); 1444 r = __regulatory_hint(wiphy, reg_request);
1445 /* This is required so that the orig_* parameters are saved */ 1445 /* This is required so that the orig_* parameters are saved */
1446 if (r == -EALREADY && wiphy && 1446 if (r == -EALREADY && wiphy &&
1447 wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) 1447 wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) {
1448 wiphy_update_regulatory(wiphy, initiator); 1448 wiphy_update_regulatory(wiphy, initiator);
1449 return;
1450 }
1451
1452 /*
1453 * We only time out user hints, given that they should be the only
1454 * source of bogus requests.
1455 */
1456 if (r != -EALREADY &&
1457 reg_request->initiator == NL80211_REGDOM_SET_BY_USER)
1458 schedule_delayed_work(&reg_timeout, msecs_to_jiffies(3142));
1449} 1459}
1450 1460
1451/* 1461/*
@@ -1748,6 +1758,8 @@ static void restore_regulatory_settings(bool reset_user)
1748{ 1758{
1749 char alpha2[2]; 1759 char alpha2[2];
1750 struct reg_beacon *reg_beacon, *btmp; 1760 struct reg_beacon *reg_beacon, *btmp;
1761 struct regulatory_request *reg_request, *tmp;
1762 LIST_HEAD(tmp_reg_req_list);
1751 1763
1752 mutex_lock(&cfg80211_mutex); 1764 mutex_lock(&cfg80211_mutex);
1753 mutex_lock(&reg_mutex); 1765 mutex_lock(&reg_mutex);
@@ -1755,6 +1767,25 @@ static void restore_regulatory_settings(bool reset_user)
1755 reset_regdomains(); 1767 reset_regdomains();
1756 restore_alpha2(alpha2, reset_user); 1768 restore_alpha2(alpha2, reset_user);
1757 1769
1770 /*
1771 * If there's any pending requests we simply
1772 * stash them to a temporary pending queue and
1773 * add then after we've restored regulatory
1774 * settings.
1775 */
1776 spin_lock(&reg_requests_lock);
1777 if (!list_empty(&reg_requests_list)) {
1778 list_for_each_entry_safe(reg_request, tmp,
1779 &reg_requests_list, list) {
1780 if (reg_request->initiator !=
1781 NL80211_REGDOM_SET_BY_USER)
1782 continue;
1783 list_del(&reg_request->list);
1784 list_add_tail(&reg_request->list, &tmp_reg_req_list);
1785 }
1786 }
1787 spin_unlock(&reg_requests_lock);
1788
1758 /* Clear beacon hints */ 1789 /* Clear beacon hints */
1759 spin_lock_bh(&reg_pending_beacons_lock); 1790 spin_lock_bh(&reg_pending_beacons_lock);
1760 if (!list_empty(&reg_pending_beacons)) { 1791 if (!list_empty(&reg_pending_beacons)) {
@@ -1789,8 +1820,31 @@ static void restore_regulatory_settings(bool reset_user)
1789 */ 1820 */
1790 if (is_an_alpha2(alpha2)) 1821 if (is_an_alpha2(alpha2))
1791 regulatory_hint_user(user_alpha2); 1822 regulatory_hint_user(user_alpha2);
1792}
1793 1823
1824 if (list_empty(&tmp_reg_req_list))
1825 return;
1826
1827 mutex_lock(&cfg80211_mutex);
1828 mutex_lock(&reg_mutex);
1829
1830 spin_lock(&reg_requests_lock);
1831 list_for_each_entry_safe(reg_request, tmp, &tmp_reg_req_list, list) {
1832 REG_DBG_PRINT("Adding request for country %c%c back "
1833 "into the queue\n",
1834 reg_request->alpha2[0],
1835 reg_request->alpha2[1]);
1836 list_del(&reg_request->list);
1837 list_add_tail(&reg_request->list, &reg_requests_list);
1838 }
1839 spin_unlock(&reg_requests_lock);
1840
1841 mutex_unlock(&reg_mutex);
1842 mutex_unlock(&cfg80211_mutex);
1843
1844 REG_DBG_PRINT("Kicking the queue\n");
1845
1846 schedule_work(&reg_work);
1847}
1794 1848
1795void regulatory_hint_disconnect(void) 1849void regulatory_hint_disconnect(void)
1796{ 1850{
@@ -1801,9 +1855,9 @@ void regulatory_hint_disconnect(void)
1801 1855
1802static bool freq_is_chan_12_13_14(u16 freq) 1856static bool freq_is_chan_12_13_14(u16 freq)
1803{ 1857{
1804 if (freq == ieee80211_channel_to_frequency(12) || 1858 if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) ||
1805 freq == ieee80211_channel_to_frequency(13) || 1859 freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) ||
1806 freq == ieee80211_channel_to_frequency(14)) 1860 freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ))
1807 return true; 1861 return true;
1808 return false; 1862 return false;
1809} 1863}
@@ -2087,6 +2141,25 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2087 return r; 2141 return r;
2088} 2142}
2089 2143
2144#ifdef CONFIG_HOTPLUG
2145int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
2146{
2147 if (last_request && !last_request->processed) {
2148 if (add_uevent_var(env, "COUNTRY=%c%c",
2149 last_request->alpha2[0],
2150 last_request->alpha2[1]))
2151 return -ENOMEM;
2152 }
2153
2154 return 0;
2155}
2156#else
2157int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
2158{
2159 return -ENODEV;
2160}
2161#endif /* CONFIG_HOTPLUG */
2162
2090/* Caller must hold cfg80211_mutex */ 2163/* Caller must hold cfg80211_mutex */
2091void reg_device_remove(struct wiphy *wiphy) 2164void reg_device_remove(struct wiphy *wiphy)
2092{ 2165{
@@ -2110,6 +2183,13 @@ out:
2110 mutex_unlock(&reg_mutex); 2183 mutex_unlock(&reg_mutex);
2111} 2184}
2112 2185
2186static void reg_timeout_work(struct work_struct *work)
2187{
2188 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, "
2189 "restoring regulatory settings");
2190 restore_regulatory_settings(true);
2191}
2192
2113int __init regulatory_init(void) 2193int __init regulatory_init(void)
2114{ 2194{
2115 int err = 0; 2195 int err = 0;
@@ -2118,6 +2198,8 @@ int __init regulatory_init(void)
2118 if (IS_ERR(reg_pdev)) 2198 if (IS_ERR(reg_pdev))
2119 return PTR_ERR(reg_pdev); 2199 return PTR_ERR(reg_pdev);
2120 2200
2201 reg_pdev->dev.type = &reg_device_type;
2202
2121 spin_lock_init(&reg_requests_lock); 2203 spin_lock_init(&reg_requests_lock);
2122 spin_lock_init(&reg_pending_beacons_lock); 2204 spin_lock_init(&reg_pending_beacons_lock);
2123 2205
@@ -2161,6 +2243,7 @@ void /* __init_or_exit */ regulatory_exit(void)
2161 struct reg_beacon *reg_beacon, *btmp; 2243 struct reg_beacon *reg_beacon, *btmp;
2162 2244
2163 cancel_work_sync(&reg_work); 2245 cancel_work_sync(&reg_work);
2246 cancel_delayed_work_sync(&reg_timeout);
2164 2247
2165 mutex_lock(&cfg80211_mutex); 2248 mutex_lock(&cfg80211_mutex);
2166 mutex_lock(&reg_mutex); 2249 mutex_lock(&reg_mutex);
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index c4695d07af23..b67d1c3a2fb9 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -8,6 +8,7 @@ bool reg_is_valid_request(const char *alpha2);
8 8
9int regulatory_hint_user(const char *alpha2); 9int regulatory_hint_user(const char *alpha2);
10 10
11int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env);
11void reg_device_remove(struct wiphy *wiphy); 12void reg_device_remove(struct wiphy *wiphy);
12 13
13int __init regulatory_init(void); 14int __init regulatory_init(void);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ea427f418f64..73a441d237b5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -93,6 +93,69 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
93} 93}
94EXPORT_SYMBOL(cfg80211_scan_done); 94EXPORT_SYMBOL(cfg80211_scan_done);
95 95
96void __cfg80211_sched_scan_results(struct work_struct *wk)
97{
98 struct cfg80211_registered_device *rdev;
99
100 rdev = container_of(wk, struct cfg80211_registered_device,
101 sched_scan_results_wk);
102
103 cfg80211_lock_rdev(rdev);
104
105 /* we don't have sched_scan_req anymore if the scan is stopping */
106 if (rdev->sched_scan_req)
107 nl80211_send_sched_scan_results(rdev,
108 rdev->sched_scan_req->dev);
109
110 cfg80211_unlock_rdev(rdev);
111}
112
113void cfg80211_sched_scan_results(struct wiphy *wiphy)
114{
115 /* ignore if we're not scanning */
116 if (wiphy_to_dev(wiphy)->sched_scan_req)
117 queue_work(cfg80211_wq,
118 &wiphy_to_dev(wiphy)->sched_scan_results_wk);
119}
120EXPORT_SYMBOL(cfg80211_sched_scan_results);
121
122void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
123{
124 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
125
126 cfg80211_lock_rdev(rdev);
127 __cfg80211_stop_sched_scan(rdev, true);
128 cfg80211_unlock_rdev(rdev);
129}
130EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
131
132int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
133 bool driver_initiated)
134{
135 int err;
136 struct net_device *dev;
137
138 ASSERT_RDEV_LOCK(rdev);
139
140 if (!rdev->sched_scan_req)
141 return 0;
142
143 dev = rdev->sched_scan_req->dev;
144
145 if (!driver_initiated) {
146 err = rdev->ops->sched_scan_stop(&rdev->wiphy, dev);
147 if (err)
148 return err;
149 }
150
151 nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
152
153 kfree(rdev->sched_scan_req);
154 rdev->sched_scan_req = NULL;
155
156 return err;
157}
158
96static void bss_release(struct kref *ref) 159static void bss_release(struct kref *ref)
97{ 160{
98 struct cfg80211_internal_bss *bss; 161 struct cfg80211_internal_bss *bss;
@@ -124,6 +187,15 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev,
124} 187}
125 188
126/* must hold dev->bss_lock! */ 189/* must hold dev->bss_lock! */
190static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
191 struct cfg80211_internal_bss *bss)
192{
193 list_del_init(&bss->list);
194 rb_erase(&bss->rbn, &dev->bss_tree);
195 kref_put(&bss->ref, bss_release);
196}
197
198/* must hold dev->bss_lock! */
127void cfg80211_bss_expire(struct cfg80211_registered_device *dev) 199void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
128{ 200{
129 struct cfg80211_internal_bss *bss, *tmp; 201 struct cfg80211_internal_bss *bss, *tmp;
@@ -134,9 +206,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
134 continue; 206 continue;
135 if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) 207 if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
136 continue; 208 continue;
137 list_del(&bss->list); 209 __cfg80211_unlink_bss(dev, bss);
138 rb_erase(&bss->rbn, &dev->bss_tree);
139 kref_put(&bss->ref, bss_release);
140 expired = true; 210 expired = true;
141 } 211 }
142 212
@@ -203,7 +273,7 @@ static bool is_mesh(struct cfg80211_bss *a,
203{ 273{
204 const u8 *ie; 274 const u8 *ie;
205 275
206 if (!is_zero_ether_addr(a->bssid)) 276 if (!WLAN_CAPABILITY_IS_MBSS(a->capability))
207 return false; 277 return false;
208 278
209 ie = cfg80211_find_ie(WLAN_EID_MESH_ID, 279 ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
@@ -241,11 +311,7 @@ static int cmp_bss(struct cfg80211_bss *a,
241 if (a->channel != b->channel) 311 if (a->channel != b->channel)
242 return b->channel->center_freq - a->channel->center_freq; 312 return b->channel->center_freq - a->channel->center_freq;
243 313
244 r = memcmp(a->bssid, b->bssid, ETH_ALEN); 314 if (WLAN_CAPABILITY_IS_MBSS(a->capability | b->capability)) {
245 if (r)
246 return r;
247
248 if (is_zero_ether_addr(a->bssid)) {
249 r = cmp_ies(WLAN_EID_MESH_ID, 315 r = cmp_ies(WLAN_EID_MESH_ID,
250 a->information_elements, 316 a->information_elements,
251 a->len_information_elements, 317 a->len_information_elements,
@@ -260,6 +326,10 @@ static int cmp_bss(struct cfg80211_bss *a,
260 b->len_information_elements); 326 b->len_information_elements);
261 } 327 }
262 328
329 r = memcmp(a->bssid, b->bssid, ETH_ALEN);
330 if (r)
331 return r;
332
263 return cmp_ies(WLAN_EID_SSID, 333 return cmp_ies(WLAN_EID_SSID,
264 a->information_elements, 334 a->information_elements,
265 a->len_information_elements, 335 a->len_information_elements,
@@ -400,7 +470,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
400 470
401 res->ts = jiffies; 471 res->ts = jiffies;
402 472
403 if (is_zero_ether_addr(res->pub.bssid)) { 473 if (WLAN_CAPABILITY_IS_MBSS(res->pub.capability)) {
404 /* must be mesh, verify */ 474 /* must be mesh, verify */
405 meshid = cfg80211_find_ie(WLAN_EID_MESH_ID, 475 meshid = cfg80211_find_ie(WLAN_EID_MESH_ID,
406 res->pub.information_elements, 476 res->pub.information_elements,
@@ -585,16 +655,23 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
585 struct cfg80211_internal_bss *res; 655 struct cfg80211_internal_bss *res;
586 size_t ielen = len - offsetof(struct ieee80211_mgmt, 656 size_t ielen = len - offsetof(struct ieee80211_mgmt,
587 u.probe_resp.variable); 657 u.probe_resp.variable);
588 size_t privsz = wiphy->bss_priv_size; 658 size_t privsz;
659
660 if (WARN_ON(!mgmt))
661 return NULL;
662
663 if (WARN_ON(!wiphy))
664 return NULL;
589 665
590 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && 666 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
591 (signal < 0 || signal > 100))) 667 (signal < 0 || signal > 100)))
592 return NULL; 668 return NULL;
593 669
594 if (WARN_ON(!mgmt || !wiphy || 670 if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
595 len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
596 return NULL; 671 return NULL;
597 672
673 privsz = wiphy->bss_priv_size;
674
598 res = kzalloc(sizeof(*res) + privsz + ielen, gfp); 675 res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
599 if (!res) 676 if (!res)
600 return NULL; 677 return NULL;
@@ -662,11 +739,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
662 739
663 spin_lock_bh(&dev->bss_lock); 740 spin_lock_bh(&dev->bss_lock);
664 if (!list_empty(&bss->list)) { 741 if (!list_empty(&bss->list)) {
665 list_del_init(&bss->list); 742 __cfg80211_unlink_bss(dev, bss);
666 dev->bss_generation++; 743 dev->bss_generation++;
667 rb_erase(&bss->rbn, &dev->bss_tree);
668
669 kref_put(&bss->ref, bss_release);
670 } 744 }
671 spin_unlock_bh(&dev->bss_lock); 745 spin_unlock_bh(&dev->bss_lock);
672} 746}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 4294fa22bb2d..c6e4ca6a7d2e 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -93,7 +93,7 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
93 93
94 if (rdev->ops->suspend) { 94 if (rdev->ops->suspend) {
95 rtnl_lock(); 95 rtnl_lock();
96 ret = rdev->ops->suspend(&rdev->wiphy); 96 ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
97 rtnl_unlock(); 97 rtnl_unlock();
98 } 98 }
99 99
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7620ae2fcf18..f0536d44d43c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
29} 29}
30EXPORT_SYMBOL(ieee80211_get_response_rate); 30EXPORT_SYMBOL(ieee80211_get_response_rate);
31 31
32int ieee80211_channel_to_frequency(int chan) 32int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band)
33{ 33{
34 if (chan < 14) 34 /* see 802.11 17.3.8.3.2 and Annex J
35 return 2407 + chan * 5; 35 * there are overlapping channel numbers in 5GHz and 2GHz bands */
36 36 if (band == IEEE80211_BAND_5GHZ) {
37 if (chan == 14) 37 if (chan >= 182 && chan <= 196)
38 return 2484; 38 return 4000 + chan * 5;
39 39 else
40 /* FIXME: 802.11j 17.3.8.3.2 */ 40 return 5000 + chan * 5;
41 return (chan + 1000) * 5; 41 } else { /* IEEE80211_BAND_2GHZ */
42 if (chan == 14)
43 return 2484;
44 else if (chan < 14)
45 return 2407 + chan * 5;
46 else
47 return 0; /* not supported */
48 }
42} 49}
43EXPORT_SYMBOL(ieee80211_channel_to_frequency); 50EXPORT_SYMBOL(ieee80211_channel_to_frequency);
44 51
45int ieee80211_frequency_to_channel(int freq) 52int ieee80211_frequency_to_channel(int freq)
46{ 53{
54 /* see 802.11 17.3.8.3.2 and Annex J */
47 if (freq == 2484) 55 if (freq == 2484)
48 return 14; 56 return 14;
49 57 else if (freq < 2484)
50 if (freq < 2484)
51 return (freq - 2407) / 5; 58 return (freq - 2407) / 5;
52 59 else if (freq >= 4910 && freq <= 4980)
53 /* FIXME: 802.11j 17.3.8.3.2 */ 60 return (freq - 4000) / 5;
54 return freq/5 - 1000; 61 else
62 return (freq - 5000) / 5;
55} 63}
56EXPORT_SYMBOL(ieee80211_frequency_to_channel); 64EXPORT_SYMBOL(ieee80211_frequency_to_channel);
57 65
@@ -159,12 +167,15 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
159 167
160 /* 168 /*
161 * Disallow pairwise keys with non-zero index unless it's WEP 169 * Disallow pairwise keys with non-zero index unless it's WEP
162 * (because current deployments use pairwise WEP keys with 170 * or a vendor specific cipher (because current deployments use
163 * non-zero indizes but 802.11i clearly specifies to use zero) 171 * pairwise WEP keys with non-zero indices and for vendor specific
172 * ciphers this should be validated in the driver or hardware level
173 * - but 802.11i clearly specifies to use zero)
164 */ 174 */
165 if (pairwise && key_idx && 175 if (pairwise && key_idx &&
166 params->cipher != WLAN_CIPHER_SUITE_WEP40 && 176 ((params->cipher == WLAN_CIPHER_SUITE_TKIP) ||
167 params->cipher != WLAN_CIPHER_SUITE_WEP104) 177 (params->cipher == WLAN_CIPHER_SUITE_CCMP) ||
178 (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC)))
168 return -EINVAL; 179 return -EINVAL;
169 180
170 switch (params->cipher) { 181 switch (params->cipher) {
@@ -533,7 +544,8 @@ EXPORT_SYMBOL(ieee80211_data_from_8023);
533 544
534void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, 545void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
535 const u8 *addr, enum nl80211_iftype iftype, 546 const u8 *addr, enum nl80211_iftype iftype,
536 const unsigned int extra_headroom) 547 const unsigned int extra_headroom,
548 bool has_80211_header)
537{ 549{
538 struct sk_buff *frame = NULL; 550 struct sk_buff *frame = NULL;
539 u16 ethertype; 551 u16 ethertype;
@@ -542,14 +554,18 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
542 int remaining, err; 554 int remaining, err;
543 u8 dst[ETH_ALEN], src[ETH_ALEN]; 555 u8 dst[ETH_ALEN], src[ETH_ALEN];
544 556
545 err = ieee80211_data_to_8023(skb, addr, iftype); 557 if (has_80211_header) {
546 if (err) 558 err = ieee80211_data_to_8023(skb, addr, iftype);
547 goto out; 559 if (err)
560 goto out;
548 561
549 /* skip the wrapping header */ 562 /* skip the wrapping header */
550 eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr)); 563 eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
551 if (!eth) 564 if (!eth)
552 goto out; 565 goto out;
566 } else {
567 eth = (struct ethhdr *) skb->data;
568 }
553 569
554 while (skb != frame) { 570 while (skb != frame) {
555 u8 padding; 571 u8 padding;
@@ -792,6 +808,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
792 return -EBUSY; 808 return -EBUSY;
793 809
794 if (ntype != otype) { 810 if (ntype != otype) {
811 err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
812 ntype);
813 if (err)
814 return err;
815
795 dev->ieee80211_ptr->use_4addr = false; 816 dev->ieee80211_ptr->use_4addr = false;
796 dev->ieee80211_ptr->mesh_id_up_len = 0; 817 dev->ieee80211_ptr->mesh_id_up_len = 0;
797 818
@@ -885,3 +906,103 @@ u16 cfg80211_calculate_bitrate(struct rate_info *rate)
885 /* do NOT round down here */ 906 /* do NOT round down here */
886 return (bitrate + 50000) / 100000; 907 return (bitrate + 50000) / 100000;
887} 908}
909
910int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
911 u32 beacon_int)
912{
913 struct wireless_dev *wdev;
914 int res = 0;
915
916 if (!beacon_int)
917 return -EINVAL;
918
919 mutex_lock(&rdev->devlist_mtx);
920
921 list_for_each_entry(wdev, &rdev->netdev_list, list) {
922 if (!wdev->beacon_interval)
923 continue;
924 if (wdev->beacon_interval != beacon_int) {
925 res = -EINVAL;
926 break;
927 }
928 }
929
930 mutex_unlock(&rdev->devlist_mtx);
931
932 return res;
933}
934
935int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
936 struct wireless_dev *wdev,
937 enum nl80211_iftype iftype)
938{
939 struct wireless_dev *wdev_iter;
940 int num[NUM_NL80211_IFTYPES];
941 int total = 1;
942 int i, j;
943
944 ASSERT_RTNL();
945
946 /* Always allow software iftypes */
947 if (rdev->wiphy.software_iftypes & BIT(iftype))
948 return 0;
949
950 /*
951 * Drivers will gradually all set this flag, until all
952 * have it we only enforce for those that set it.
953 */
954 if (!(rdev->wiphy.flags & WIPHY_FLAG_ENFORCE_COMBINATIONS))
955 return 0;
956
957 memset(num, 0, sizeof(num));
958
959 num[iftype] = 1;
960
961 mutex_lock(&rdev->devlist_mtx);
962 list_for_each_entry(wdev_iter, &rdev->netdev_list, list) {
963 if (wdev_iter == wdev)
964 continue;
965 if (!netif_running(wdev_iter->netdev))
966 continue;
967
968 if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype))
969 continue;
970
971 num[wdev_iter->iftype]++;
972 total++;
973 }
974 mutex_unlock(&rdev->devlist_mtx);
975
976 for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
977 const struct ieee80211_iface_combination *c;
978 struct ieee80211_iface_limit *limits;
979
980 c = &rdev->wiphy.iface_combinations[i];
981
982 limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
983 GFP_KERNEL);
984 if (!limits)
985 return -ENOMEM;
986 if (total > c->max_interfaces)
987 goto cont;
988
989 for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
990 if (rdev->wiphy.software_iftypes & BIT(iftype))
991 continue;
992 for (j = 0; j < c->n_limits; j++) {
993 if (!(limits[j].types & iftype))
994 continue;
995 if (limits[j].max < num[iftype])
996 goto cont;
997 limits[j].max -= num[iftype];
998 }
999 }
1000 /* yay, it fits */
1001 kfree(limits);
1002 return 0;
1003 cont:
1004 kfree(limits);
1005 }
1006
1007 return -EBUSY;
1008}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index d112f038edf0..0bf169bb770e 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq)
267 * -EINVAL for impossible things. 267 * -EINVAL for impossible things.
268 */ 268 */
269 if (freq->e == 0) { 269 if (freq->e == 0) {
270 enum ieee80211_band band = IEEE80211_BAND_2GHZ;
270 if (freq->m < 0) 271 if (freq->m < 0)
271 return 0; 272 return 0;
272 return ieee80211_channel_to_frequency(freq->m); 273 if (freq->m > 14)
274 band = IEEE80211_BAND_5GHZ;
275 return ieee80211_channel_to_frequency(freq->m, band);
273 } else { 276 } else {
274 int i, div = 1000000; 277 int i, div = 1000000;
275 for (i = 0; i < freq->e; i++) 278 for (i = 0; i < freq->e; i++)
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 2196e55e4f61..e6759c9660bb 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -5,7 +5,6 @@
5config X25 5config X25
6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)" 6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
7 depends on EXPERIMENTAL 7 depends on EXPERIMENTAL
8 depends on BKL # should be fixable
9 ---help--- 8 ---help---
10 X.25 is a set of standardized network protocols, similar in scope to 9 X.25 is a set of standardized network protocols, similar in scope to
11 frame relay; the one physical line from your box to the X.25 network 10 frame relay; the one physical line from your box to the X.25 network
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ad96ee90fe27..4680b1e4c79c 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -40,7 +40,6 @@
40#include <linux/errno.h> 40#include <linux/errno.h>
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/smp_lock.h>
44#include <linux/timer.h> 43#include <linux/timer.h>
45#include <linux/string.h> 44#include <linux/string.h>
46#include <linux/net.h> 45#include <linux/net.h>
@@ -432,15 +431,6 @@ void x25_destroy_socket_from_timer(struct sock *sk)
432 sock_put(sk); 431 sock_put(sk);
433} 432}
434 433
435static void x25_destroy_socket(struct sock *sk)
436{
437 sock_hold(sk);
438 lock_sock(sk);
439 __x25_destroy_socket(sk);
440 release_sock(sk);
441 sock_put(sk);
442}
443
444/* 434/*
445 * Handling for system calls applied via the various interfaces to a 435 * Handling for system calls applied via the various interfaces to a
446 * X.25 socket object. 436 * X.25 socket object.
@@ -647,18 +637,19 @@ static int x25_release(struct socket *sock)
647 struct sock *sk = sock->sk; 637 struct sock *sk = sock->sk;
648 struct x25_sock *x25; 638 struct x25_sock *x25;
649 639
650 lock_kernel();
651 if (!sk) 640 if (!sk)
652 goto out; 641 return 0;
653 642
654 x25 = x25_sk(sk); 643 x25 = x25_sk(sk);
655 644
645 sock_hold(sk);
646 lock_sock(sk);
656 switch (x25->state) { 647 switch (x25->state) {
657 648
658 case X25_STATE_0: 649 case X25_STATE_0:
659 case X25_STATE_2: 650 case X25_STATE_2:
660 x25_disconnect(sk, 0, 0, 0); 651 x25_disconnect(sk, 0, 0, 0);
661 x25_destroy_socket(sk); 652 __x25_destroy_socket(sk);
662 goto out; 653 goto out;
663 654
664 case X25_STATE_1: 655 case X25_STATE_1:
@@ -678,7 +669,8 @@ static int x25_release(struct socket *sock)
678 669
679 sock_orphan(sk); 670 sock_orphan(sk);
680out: 671out:
681 unlock_kernel(); 672 release_sock(sk);
673 sock_put(sk);
682 return 0; 674 return 0;
683} 675}
684 676
@@ -1085,7 +1077,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1085 size_t size; 1077 size_t size;
1086 int qbit = 0, rc = -EINVAL; 1078 int qbit = 0, rc = -EINVAL;
1087 1079
1088 lock_kernel(); 1080 lock_sock(sk);
1089 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT)) 1081 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT))
1090 goto out; 1082 goto out;
1091 1083
@@ -1148,7 +1140,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1148 1140
1149 size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN; 1141 size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN;
1150 1142
1143 release_sock(sk);
1151 skb = sock_alloc_send_skb(sk, size, noblock, &rc); 1144 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
1145 lock_sock(sk);
1152 if (!skb) 1146 if (!skb)
1153 goto out; 1147 goto out;
1154 X25_SKB_CB(skb)->flags = msg->msg_flags; 1148 X25_SKB_CB(skb)->flags = msg->msg_flags;
@@ -1231,26 +1225,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1231 len++; 1225 len++;
1232 } 1226 }
1233 1227
1234 /*
1235 * lock_sock() is currently only used to serialize this x25_kick()
1236 * against input-driven x25_kick() calls. It currently only blocks
1237 * incoming packets for this socket and does not protect against
1238 * any other socket state changes and is not called from anywhere
1239 * else. As x25_kick() cannot block and as long as all socket
1240 * operations are BKL-wrapped, we don't need take to care about
1241 * purging the backlog queue in x25_release().
1242 *
1243 * Using lock_sock() to protect all socket operations entirely
1244 * (and making the whole x25 stack SMP aware) unfortunately would
1245 * require major changes to {send,recv}msg and skb allocation methods.
1246 * -> 2.5 ;)
1247 */
1248 lock_sock(sk);
1249 x25_kick(sk); 1228 x25_kick(sk);
1250 release_sock(sk);
1251 rc = len; 1229 rc = len;
1252out: 1230out:
1253 unlock_kernel(); 1231 release_sock(sk);
1254 return rc; 1232 return rc;
1255out_kfree_skb: 1233out_kfree_skb:
1256 kfree_skb(skb); 1234 kfree_skb(skb);
@@ -1271,7 +1249,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1271 unsigned char *asmptr; 1249 unsigned char *asmptr;
1272 int rc = -ENOTCONN; 1250 int rc = -ENOTCONN;
1273 1251
1274 lock_kernel(); 1252 lock_sock(sk);
1275 /* 1253 /*
1276 * This works for seqpacket too. The receiver has ordered the queue for 1254 * This works for seqpacket too. The receiver has ordered the queue for
1277 * us! We do one quick check first though 1255 * us! We do one quick check first though
@@ -1300,8 +1278,10 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1300 msg->msg_flags |= MSG_OOB; 1278 msg->msg_flags |= MSG_OOB;
1301 } else { 1279 } else {
1302 /* Now we can treat all alike */ 1280 /* Now we can treat all alike */
1281 release_sock(sk);
1303 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1282 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1304 flags & MSG_DONTWAIT, &rc); 1283 flags & MSG_DONTWAIT, &rc);
1284 lock_sock(sk);
1305 if (!skb) 1285 if (!skb)
1306 goto out; 1286 goto out;
1307 1287
@@ -1338,14 +1318,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1338 1318
1339 msg->msg_namelen = sizeof(struct sockaddr_x25); 1319 msg->msg_namelen = sizeof(struct sockaddr_x25);
1340 1320
1341 lock_sock(sk);
1342 x25_check_rbuf(sk); 1321 x25_check_rbuf(sk);
1343 release_sock(sk);
1344 rc = copied; 1322 rc = copied;
1345out_free_dgram: 1323out_free_dgram:
1346 skb_free_datagram(sk, skb); 1324 skb_free_datagram(sk, skb);
1347out: 1325out:
1348 unlock_kernel(); 1326 release_sock(sk);
1349 return rc; 1327 return rc;
1350} 1328}
1351 1329
@@ -1581,18 +1559,18 @@ out_cud_release:
1581 1559
1582 case SIOCX25CALLACCPTAPPRV: { 1560 case SIOCX25CALLACCPTAPPRV: {
1583 rc = -EINVAL; 1561 rc = -EINVAL;
1584 lock_kernel(); 1562 lock_sock(sk);
1585 if (sk->sk_state != TCP_CLOSE) 1563 if (sk->sk_state != TCP_CLOSE)
1586 break; 1564 break;
1587 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); 1565 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
1588 unlock_kernel(); 1566 release_sock(sk);
1589 rc = 0; 1567 rc = 0;
1590 break; 1568 break;
1591 } 1569 }
1592 1570
1593 case SIOCX25SENDCALLACCPT: { 1571 case SIOCX25SENDCALLACCPT: {
1594 rc = -EINVAL; 1572 rc = -EINVAL;
1595 lock_kernel(); 1573 lock_sock(sk);
1596 if (sk->sk_state != TCP_ESTABLISHED) 1574 if (sk->sk_state != TCP_ESTABLISHED)
1597 break; 1575 break;
1598 /* must call accptapprv above */ 1576 /* must call accptapprv above */
@@ -1600,7 +1578,7 @@ out_cud_release:
1600 break; 1578 break;
1601 x25_write_internal(sk, X25_CALL_ACCEPTED); 1579 x25_write_internal(sk, X25_CALL_ACCEPTED);
1602 x25->state = X25_STATE_3; 1580 x25->state = X25_STATE_3;
1603 unlock_kernel(); 1581 release_sock(sk);
1604 rc = 0; 1582 rc = 0;
1605 break; 1583 break;
1606 } 1584 }
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 406207515b5e..f77e4e75f914 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -31,7 +31,7 @@
31 * x25_parse_facilities - Parse facilities from skb into the facilities structs 31 * x25_parse_facilities - Parse facilities from skb into the facilities structs
32 * 32 *
33 * @skb: sk_buff to parse 33 * @skb: sk_buff to parse
34 * @facilities: Regular facilites, updated as facilities are found 34 * @facilities: Regular facilities, updated as facilities are found
35 * @dte_facs: ITU DTE facilities, updated as DTE facilities are found 35 * @dte_facs: ITU DTE facilities, updated as DTE facilities are found
36 * @vc_fac_mask: mask is updated with all facilities found 36 * @vc_fac_mask: mask is updated with all facilities found
37 * 37 *
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index 25a810793968..c541b622ae16 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -31,7 +31,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
31 goto out_no_route; 31 goto out_no_route;
32 32
33 if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) { 33 if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) {
34 /* This shouldnt happen, if it occurs somehow 34 /* This shouldn't happen, if it occurs somehow
35 * do something sensible 35 * do something sensible
36 */ 36 */
37 goto out_put_route; 37 goto out_put_route;
@@ -45,7 +45,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
45 } 45 }
46 46
47 /* Remote end sending a call request on an already 47 /* Remote end sending a call request on an already
48 * established LCI? It shouldnt happen, just in case.. 48 * established LCI? It shouldn't happen, just in case..
49 */ 49 */
50 read_lock_bh(&x25_forward_list_lock); 50 read_lock_bh(&x25_forward_list_lock);
51 list_for_each(entry, &x25_forward_list) { 51 list_for_each(entry, &x25_forward_list) {
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index d00649fb251d..0144271d2184 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -68,8 +68,11 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
68 frontlen = skb_headroom(skb); 68 frontlen = skb_headroom(skb);
69 69
70 while (skb->len > 0) { 70 while (skb->len > 0) {
71 if ((skbn = sock_alloc_send_skb(sk, frontlen + max_len, 71 release_sock(sk);
72 noblock, &err)) == NULL){ 72 skbn = sock_alloc_send_skb(sk, frontlen + max_len,
73 noblock, &err);
74 lock_sock(sk);
75 if (!skbn) {
73 if (err == -EWOULDBLOCK && noblock){ 76 if (err == -EWOULDBLOCK && noblock){
74 kfree_skb(skb); 77 kfree_skb(skb);
75 return sent; 78 return sent;
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index c631047e1b27..aa429eefe919 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ 5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
6 xfrm_input.o xfrm_output.o xfrm_algo.o \ 6 xfrm_input.o xfrm_output.o xfrm_algo.o \
7 xfrm_sysctl.o 7 xfrm_sysctl.o xfrm_replay.o
8obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o 8obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
9obj-$(CONFIG_XFRM_USER) += xfrm_user.o 9obj-$(CONFIG_XFRM_USER) += xfrm_user.o
10obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o 10obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 8b4d6e3246e5..58064d9e565d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -618,21 +618,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry,
618 (entry->compat && !strcmp(name, entry->compat))); 618 (entry->compat && !strcmp(name, entry->compat)));
619} 619}
620 620
621struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) 621struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe)
622{ 622{
623 return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, 623 return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name,
624 probe); 624 probe);
625} 625}
626EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); 626EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
627 627
628struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) 628struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe)
629{ 629{
630 return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, 630 return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name,
631 probe); 631 probe);
632} 632}
633EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); 633EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
634 634
635struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) 635struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe)
636{ 636{
637 return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, 637 return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name,
638 probe); 638 probe);
@@ -654,7 +654,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry,
654 !strcmp(name, entry->name); 654 !strcmp(name, entry->name);
655} 655}
656 656
657struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) 657struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe)
658{ 658{
659 struct xfrm_aead_name data = { 659 struct xfrm_aead_name data = {
660 .name = name, 660 .name = name,
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 8e69533d2313..7199d78b2aa1 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -4,29 +4,32 @@
4#include <linux/xfrm.h> 4#include <linux/xfrm.h>
5#include <linux/socket.h> 5#include <linux/socket.h>
6 6
7static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) 7static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)
8{ 8{
9 return ntohl(addr->a4); 9 return ntohl(addr->a4);
10} 10}
11 11
12static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) 12static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)
13{ 13{
14 return ntohl(addr->a6[2] ^ addr->a6[3]); 14 return ntohl(addr->a6[2] ^ addr->a6[3]);
15} 15}
16 16
17static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) 17static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
18 const xfrm_address_t *saddr)
18{ 19{
19 u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; 20 u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;
20 return ntohl((__force __be32)sum); 21 return ntohl((__force __be32)sum);
21} 22}
22 23
23static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) 24static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
25 const xfrm_address_t *saddr)
24{ 26{
25 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ 27 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
26 saddr->a6[2] ^ saddr->a6[3]); 28 saddr->a6[2] ^ saddr->a6[3]);
27} 29}
28 30
29static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, 31static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr,
32 const xfrm_address_t *saddr,
30 u32 reqid, unsigned short family, 33 u32 reqid, unsigned short family,
31 unsigned int hmask) 34 unsigned int hmask)
32{ 35{
@@ -42,8 +45,8 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
42 return (h ^ (h >> 16)) & hmask; 45 return (h ^ (h >> 16)) & hmask;
43} 46}
44 47
45static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, 48static inline unsigned __xfrm_src_hash(const xfrm_address_t *daddr,
46 xfrm_address_t *saddr, 49 const xfrm_address_t *saddr,
47 unsigned short family, 50 unsigned short family,
48 unsigned int hmask) 51 unsigned int hmask)
49{ 52{
@@ -60,8 +63,8 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
60} 63}
61 64
62static inline unsigned int 65static inline unsigned int
63__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, 66__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto,
64 unsigned int hmask) 67 unsigned short family, unsigned int hmask)
65{ 68{
66 unsigned int h = (__force u32)spi ^ proto; 69 unsigned int h = (__force u32)spi ^ proto;
67 switch (family) { 70 switch (family) {
@@ -80,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
80 return (index ^ (index >> 8)) & hmask; 83 return (index ^ (index >> 8)) & hmask;
81} 84}
82 85
83static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) 86static inline unsigned int __sel_hash(const struct xfrm_selector *sel,
87 unsigned short family, unsigned int hmask)
84{ 88{
85 xfrm_address_t *daddr = &sel->daddr; 89 const xfrm_address_t *daddr = &sel->daddr;
86 xfrm_address_t *saddr = &sel->saddr; 90 const xfrm_address_t *saddr = &sel->saddr;
87 unsigned int h = 0; 91 unsigned int h = 0;
88 92
89 switch (family) { 93 switch (family) {
@@ -107,7 +111,9 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short
107 return h & hmask; 111 return h & hmask;
108} 112}
109 113
110static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) 114static inline unsigned int __addr_hash(const xfrm_address_t *daddr,
115 const xfrm_address_t *saddr,
116 unsigned short family, unsigned int hmask)
111{ 117{
112 unsigned int h = 0; 118 unsigned int h = 0;
113 119
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 45f1c98d4fce..a026b0ef2443 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -107,6 +107,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
107 struct net *net = dev_net(skb->dev); 107 struct net *net = dev_net(skb->dev);
108 int err; 108 int err;
109 __be32 seq; 109 __be32 seq;
110 __be32 seq_hi;
110 struct xfrm_state *x; 111 struct xfrm_state *x;
111 xfrm_address_t *daddr; 112 xfrm_address_t *daddr;
112 struct xfrm_mode *inner_mode; 113 struct xfrm_mode *inner_mode;
@@ -118,7 +119,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
118 if (encap_type < 0) { 119 if (encap_type < 0) {
119 async = 1; 120 async = 1;
120 x = xfrm_input_state(skb); 121 x = xfrm_input_state(skb);
121 seq = XFRM_SKB_CB(skb)->seq.input; 122 seq = XFRM_SKB_CB(skb)->seq.input.low;
122 goto resume; 123 goto resume;
123 } 124 }
124 125
@@ -172,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
172 goto drop_unlock; 173 goto drop_unlock;
173 } 174 }
174 175
175 if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { 176 if (x->repl->check(x, skb, seq)) {
176 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 177 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
177 goto drop_unlock; 178 goto drop_unlock;
178 } 179 }
@@ -184,7 +185,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
184 185
185 spin_unlock(&x->lock); 186 spin_unlock(&x->lock);
186 187
187 XFRM_SKB_CB(skb)->seq.input = seq; 188 seq_hi = htonl(xfrm_replay_seqhi(x, seq));
189
190 XFRM_SKB_CB(skb)->seq.input.low = seq;
191 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
192
193 skb_dst_force(skb);
188 194
189 nexthdr = x->type->input(x, skb); 195 nexthdr = x->type->input(x, skb);
190 196
@@ -206,8 +212,7 @@ resume:
206 /* only the first xfrm gets the encap type */ 212 /* only the first xfrm gets the encap type */
207 encap_type = 0; 213 encap_type = 0;
208 214
209 if (x->props.replay_window) 215 x->repl->advance(x, seq);
210 xfrm_replay_advance(x, seq);
211 216
212 x->curlft.bytes += skb->len; 217 x->curlft.bytes += skb->len;
213 x->curlft.packets++; 218 x->curlft.packets++;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 64f2ae1fdc15..47bacd8c0250 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -67,17 +67,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
67 goto error; 67 goto error;
68 } 68 }
69 69
70 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { 70 err = x->repl->overflow(x, skb);
71 XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; 71 if (err) {
72 if (unlikely(x->replay.oseq == 0)) { 72 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
73 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 73 goto error;
74 x->replay.oseq--;
75 xfrm_audit_state_replay_overflow(x, skb);
76 err = -EOVERFLOW;
77 goto error;
78 }
79 if (xfrm_aevent_is_on(net))
80 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
81 } 74 }
82 75
83 x->curlft.bytes += skb->len; 76 x->curlft.bytes += skb->len;
@@ -85,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
85 78
86 spin_unlock_bh(&x->lock); 79 spin_unlock_bh(&x->lock);
87 80
81 skb_dst_force(skb);
82
88 err = x->type->output(x, skb); 83 err = x->type->output(x, skb);
89 if (err == -EINPROGRESS) 84 if (err == -EINPROGRESS)
90 goto out_exit; 85 goto out_exit;
@@ -101,7 +96,7 @@ resume:
101 err = -EHOSTUNREACH; 96 err = -EHOSTUNREACH;
102 goto error_nolock; 97 goto error_nolock;
103 } 98 }
104 skb_dst_set(skb, dst_clone(dst)); 99 skb_dst_set(skb, dst);
105 x = dst->xfrm; 100 x = dst->xfrm;
106 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); 101 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
107 102
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6459588befc3..9bec2e8a838c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -50,37 +50,40 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
51static void xfrm_init_pmtu(struct dst_entry *dst); 51static void xfrm_init_pmtu(struct dst_entry *dst);
52static int stale_bundle(struct dst_entry *dst); 52static int stale_bundle(struct dst_entry *dst);
53static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, 53static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family);
54 struct flowi *fl, int family, int strict);
55 54
56 55
57static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 56static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
58 int dir); 57 int dir);
59 58
60static inline int 59static inline int
61__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) 60__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
62{ 61{
63 return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && 62 const struct flowi4 *fl4 = &fl->u.ip4;
64 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && 63
65 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 64 return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) &&
66 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 65 addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) &&
67 (fl->proto == sel->proto || !sel->proto) && 66 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
68 (fl->oif == sel->ifindex || !sel->ifindex); 67 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
68 (fl4->flowi4_proto == sel->proto || !sel->proto) &&
69 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
69} 70}
70 71
71static inline int 72static inline int
72__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) 73__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
73{ 74{
74 return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && 75 const struct flowi6 *fl6 = &fl->u.ip6;
75 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && 76
76 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 77 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
77 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 78 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
78 (fl->proto == sel->proto || !sel->proto) && 79 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
79 (fl->oif == sel->ifindex || !sel->ifindex); 80 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
81 (fl6->flowi6_proto == sel->proto || !sel->proto) &&
82 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
80} 83}
81 84
82int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, 85int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
83 unsigned short family) 86 unsigned short family)
84{ 87{
85 switch (family) { 88 switch (family) {
86 case AF_INET: 89 case AF_INET:
@@ -92,8 +95,8 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
92} 95}
93 96
94static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 97static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
95 xfrm_address_t *saddr, 98 const xfrm_address_t *saddr,
96 xfrm_address_t *daddr, 99 const xfrm_address_t *daddr,
97 int family) 100 int family)
98{ 101{
99 struct xfrm_policy_afinfo *afinfo; 102 struct xfrm_policy_afinfo *afinfo;
@@ -311,7 +314,9 @@ static inline unsigned int idx_hash(struct net *net, u32 index)
311 return __idx_hash(index, net->xfrm.policy_idx_hmask); 314 return __idx_hash(index, net->xfrm.policy_idx_hmask);
312} 315}
313 316
314static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) 317static struct hlist_head *policy_hash_bysel(struct net *net,
318 const struct xfrm_selector *sel,
319 unsigned short family, int dir)
315{ 320{
316 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 321 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
317 unsigned int hash = __sel_hash(sel, family, hmask); 322 unsigned int hash = __sel_hash(sel, family, hmask);
@@ -321,7 +326,10 @@ static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selecto
321 net->xfrm.policy_bydst[dir].table + hash); 326 net->xfrm.policy_bydst[dir].table + hash);
322} 327}
323 328
324static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) 329static struct hlist_head *policy_hash_direct(struct net *net,
330 const xfrm_address_t *daddr,
331 const xfrm_address_t *saddr,
332 unsigned short family, int dir)
325{ 333{
326 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 334 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
327 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 335 unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
@@ -864,32 +872,33 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
864 * 872 *
865 * Returns 0 if policy found, else an -errno. 873 * Returns 0 if policy found, else an -errno.
866 */ 874 */
867static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, 875static int xfrm_policy_match(const struct xfrm_policy *pol,
876 const struct flowi *fl,
868 u8 type, u16 family, int dir) 877 u8 type, u16 family, int dir)
869{ 878{
870 struct xfrm_selector *sel = &pol->selector; 879 const struct xfrm_selector *sel = &pol->selector;
871 int match, ret = -ESRCH; 880 int match, ret = -ESRCH;
872 881
873 if (pol->family != family || 882 if (pol->family != family ||
874 (fl->mark & pol->mark.m) != pol->mark.v || 883 (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
875 pol->type != type) 884 pol->type != type)
876 return ret; 885 return ret;
877 886
878 match = xfrm_selector_match(sel, fl, family); 887 match = xfrm_selector_match(sel, fl, family);
879 if (match) 888 if (match)
880 ret = security_xfrm_policy_lookup(pol->security, fl->secid, 889 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
881 dir); 890 dir);
882 891
883 return ret; 892 return ret;
884} 893}
885 894
886static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 895static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
887 struct flowi *fl, 896 const struct flowi *fl,
888 u16 family, u8 dir) 897 u16 family, u8 dir)
889{ 898{
890 int err; 899 int err;
891 struct xfrm_policy *pol, *ret; 900 struct xfrm_policy *pol, *ret;
892 xfrm_address_t *daddr, *saddr; 901 const xfrm_address_t *daddr, *saddr;
893 struct hlist_node *entry; 902 struct hlist_node *entry;
894 struct hlist_head *chain; 903 struct hlist_head *chain;
895 u32 priority = ~0U; 904 u32 priority = ~0U;
@@ -941,7 +950,7 @@ fail:
941} 950}
942 951
943static struct xfrm_policy * 952static struct xfrm_policy *
944__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) 953__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
945{ 954{
946#ifdef CONFIG_XFRM_SUB_POLICY 955#ifdef CONFIG_XFRM_SUB_POLICY
947 struct xfrm_policy *pol; 956 struct xfrm_policy *pol;
@@ -954,7 +963,7 @@ __xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
954} 963}
955 964
956static struct flow_cache_object * 965static struct flow_cache_object *
957xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, 966xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
958 u8 dir, struct flow_cache_object *old_obj, void *ctx) 967 u8 dir, struct flow_cache_object *old_obj, void *ctx)
959{ 968{
960 struct xfrm_policy *pol; 969 struct xfrm_policy *pol;
@@ -990,7 +999,8 @@ static inline int policy_to_flow_dir(int dir)
990 } 999 }
991} 1000}
992 1001
993static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 1002static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1003 const struct flowi *fl)
994{ 1004{
995 struct xfrm_policy *pol; 1005 struct xfrm_policy *pol;
996 1006
@@ -1006,7 +1016,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
1006 goto out; 1016 goto out;
1007 } 1017 }
1008 err = security_xfrm_policy_lookup(pol->security, 1018 err = security_xfrm_policy_lookup(pol->security,
1009 fl->secid, 1019 fl->flowi_secid,
1010 policy_to_flow_dir(dir)); 1020 policy_to_flow_dir(dir));
1011 if (!err) 1021 if (!err)
1012 xfrm_pol_hold(pol); 1022 xfrm_pol_hold(pol);
@@ -1098,7 +1108,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1098 return 0; 1108 return 0;
1099} 1109}
1100 1110
1101static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) 1111static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1102{ 1112{
1103 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1113 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1104 1114
@@ -1157,9 +1167,8 @@ xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1157/* Resolve list of templates for the flow, given policy. */ 1167/* Resolve list of templates for the flow, given policy. */
1158 1168
1159static int 1169static int
1160xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, 1170xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1161 struct xfrm_state **xfrm, 1171 struct xfrm_state **xfrm, unsigned short family)
1162 unsigned short family)
1163{ 1172{
1164 struct net *net = xp_net(policy); 1173 struct net *net = xp_net(policy);
1165 int nx; 1174 int nx;
@@ -1214,9 +1223,8 @@ fail:
1214} 1223}
1215 1224
1216static int 1225static int
1217xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, 1226xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1218 struct xfrm_state **xfrm, 1227 struct xfrm_state **xfrm, unsigned short family)
1219 unsigned short family)
1220{ 1228{
1221 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1229 struct xfrm_state *tp[XFRM_MAX_DEPTH];
1222 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1230 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
@@ -1256,7 +1264,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1256 * still valid. 1264 * still valid.
1257 */ 1265 */
1258 1266
1259static inline int xfrm_get_tos(struct flowi *fl, int family) 1267static inline int xfrm_get_tos(const struct flowi *fl, int family)
1260{ 1268{
1261 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1269 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1262 int tos; 1270 int tos;
@@ -1340,7 +1348,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1340 default: 1348 default:
1341 BUG(); 1349 BUG();
1342 } 1350 }
1343 xdst = dst_alloc(dst_ops); 1351 xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
1352 memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry));
1344 xfrm_policy_put_afinfo(afinfo); 1353 xfrm_policy_put_afinfo(afinfo);
1345 1354
1346 if (likely(xdst)) 1355 if (likely(xdst))
@@ -1369,7 +1378,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1369} 1378}
1370 1379
1371static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1380static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1372 struct flowi *fl) 1381 const struct flowi *fl)
1373{ 1382{
1374 struct xfrm_policy_afinfo *afinfo = 1383 struct xfrm_policy_afinfo *afinfo =
1375 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1384 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1392,12 +1401,13 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1392 1401
1393static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1402static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1394 struct xfrm_state **xfrm, int nx, 1403 struct xfrm_state **xfrm, int nx,
1395 struct flowi *fl, 1404 const struct flowi *fl,
1396 struct dst_entry *dst) 1405 struct dst_entry *dst)
1397{ 1406{
1398 struct net *net = xp_net(policy); 1407 struct net *net = xp_net(policy);
1399 unsigned long now = jiffies; 1408 unsigned long now = jiffies;
1400 struct net_device *dev; 1409 struct net_device *dev;
1410 struct xfrm_mode *inner_mode;
1401 struct dst_entry *dst_prev = NULL; 1411 struct dst_entry *dst_prev = NULL;
1402 struct dst_entry *dst0 = NULL; 1412 struct dst_entry *dst0 = NULL;
1403 int i = 0; 1413 int i = 0;
@@ -1428,6 +1438,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1428 goto put_states; 1438 goto put_states;
1429 } 1439 }
1430 1440
1441 if (xfrm[i]->sel.family == AF_UNSPEC) {
1442 inner_mode = xfrm_ip2inner_mode(xfrm[i],
1443 xfrm_af2proto(family));
1444 if (!inner_mode) {
1445 err = -EAFNOSUPPORT;
1446 dst_release(dst);
1447 goto put_states;
1448 }
1449 } else
1450 inner_mode = xfrm[i]->inner_mode;
1451
1431 if (!dst_prev) 1452 if (!dst_prev)
1432 dst0 = dst1; 1453 dst0 = dst1;
1433 else { 1454 else {
@@ -1456,7 +1477,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1456 dst1->lastuse = now; 1477 dst1->lastuse = now;
1457 1478
1458 dst1->input = dst_discard; 1479 dst1->input = dst_discard;
1459 dst1->output = xfrm[i]->outer_mode->afinfo->output; 1480 dst1->output = inner_mode->afinfo->output;
1460 1481
1461 dst1->next = dst_prev; 1482 dst1->next = dst_prev;
1462 dst_prev = dst1; 1483 dst_prev = dst1;
@@ -1508,7 +1529,7 @@ free_dst:
1508} 1529}
1509 1530
1510static int inline 1531static int inline
1511xfrm_dst_alloc_copy(void **target, void *src, int size) 1532xfrm_dst_alloc_copy(void **target, const void *src, int size)
1512{ 1533{
1513 if (!*target) { 1534 if (!*target) {
1514 *target = kmalloc(size, GFP_ATOMIC); 1535 *target = kmalloc(size, GFP_ATOMIC);
@@ -1520,7 +1541,7 @@ xfrm_dst_alloc_copy(void **target, void *src, int size)
1520} 1541}
1521 1542
1522static int inline 1543static int inline
1523xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) 1544xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel)
1524{ 1545{
1525#ifdef CONFIG_XFRM_SUB_POLICY 1546#ifdef CONFIG_XFRM_SUB_POLICY
1526 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1547 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1532,7 +1553,7 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1532} 1553}
1533 1554
1534static int inline 1555static int inline
1535xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) 1556xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl)
1536{ 1557{
1537#ifdef CONFIG_XFRM_SUB_POLICY 1558#ifdef CONFIG_XFRM_SUB_POLICY
1538 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1559 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1542,7 +1563,7 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1542#endif 1563#endif
1543} 1564}
1544 1565
1545static int xfrm_expand_policies(struct flowi *fl, u16 family, 1566static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1546 struct xfrm_policy **pols, 1567 struct xfrm_policy **pols,
1547 int *num_pols, int *num_xfrms) 1568 int *num_pols, int *num_xfrms)
1548{ 1569{
@@ -1588,7 +1609,7 @@ static int xfrm_expand_policies(struct flowi *fl, u16 family,
1588 1609
1589static struct xfrm_dst * 1610static struct xfrm_dst *
1590xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1611xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1591 struct flowi *fl, u16 family, 1612 const struct flowi *fl, u16 family,
1592 struct dst_entry *dst_orig) 1613 struct dst_entry *dst_orig)
1593{ 1614{
1594 struct net *net = xp_net(pols[0]); 1615 struct net *net = xp_net(pols[0]);
@@ -1631,7 +1652,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1631} 1652}
1632 1653
1633static struct flow_cache_object * 1654static struct flow_cache_object *
1634xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, 1655xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1635 struct flow_cache_object *oldflo, void *ctx) 1656 struct flow_cache_object *oldflo, void *ctx)
1636{ 1657{
1637 struct dst_entry *dst_orig = (struct dst_entry *)ctx; 1658 struct dst_entry *dst_orig = (struct dst_entry *)ctx;
@@ -1730,18 +1751,36 @@ error:
1730 return ERR_PTR(err); 1751 return ERR_PTR(err);
1731} 1752}
1732 1753
1754static struct dst_entry *make_blackhole(struct net *net, u16 family,
1755 struct dst_entry *dst_orig)
1756{
1757 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1758 struct dst_entry *ret;
1759
1760 if (!afinfo) {
1761 dst_release(dst_orig);
1762 ret = ERR_PTR(-EINVAL);
1763 } else {
1764 ret = afinfo->blackhole_route(net, dst_orig);
1765 }
1766 xfrm_policy_put_afinfo(afinfo);
1767
1768 return ret;
1769}
1770
1733/* Main function: finds/creates a bundle for given flow. 1771/* Main function: finds/creates a bundle for given flow.
1734 * 1772 *
1735 * At the moment we eat a raw IP route. Mostly to speed up lookups 1773 * At the moment we eat a raw IP route. Mostly to speed up lookups
1736 * on interfaces with disabled IPsec. 1774 * on interfaces with disabled IPsec.
1737 */ 1775 */
1738int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, 1776struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
1739 struct sock *sk, int flags) 1777 const struct flowi *fl,
1778 struct sock *sk, int flags)
1740{ 1779{
1741 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1780 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1742 struct flow_cache_object *flo; 1781 struct flow_cache_object *flo;
1743 struct xfrm_dst *xdst; 1782 struct xfrm_dst *xdst;
1744 struct dst_entry *dst, *dst_orig = *dst_p, *route; 1783 struct dst_entry *dst, *route;
1745 u16 family = dst_orig->ops->family; 1784 u16 family = dst_orig->ops->family;
1746 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1785 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1747 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 1786 int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
@@ -1778,6 +1817,8 @@ restart:
1778 goto no_transform; 1817 goto no_transform;
1779 } 1818 }
1780 1819
1820 dst_hold(&xdst->u.dst);
1821
1781 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 1822 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
1782 xdst->u.dst.next = xfrm_policy_sk_bundles; 1823 xdst->u.dst.next = xfrm_policy_sk_bundles;
1783 xfrm_policy_sk_bundles = &xdst->u.dst; 1824 xfrm_policy_sk_bundles = &xdst->u.dst;
@@ -1823,9 +1864,10 @@ restart:
1823 dst_release(dst); 1864 dst_release(dst);
1824 xfrm_pols_put(pols, drop_pols); 1865 xfrm_pols_put(pols, drop_pols);
1825 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 1866 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1826 return -EREMOTE; 1867
1868 return make_blackhole(net, family, dst_orig);
1827 } 1869 }
1828 if (flags & XFRM_LOOKUP_WAIT) { 1870 if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) {
1829 DECLARE_WAITQUEUE(wait, current); 1871 DECLARE_WAITQUEUE(wait, current);
1830 1872
1831 add_wait_queue(&net->xfrm.km_waitq, &wait); 1873 add_wait_queue(&net->xfrm.km_waitq, &wait);
@@ -1867,47 +1909,33 @@ no_transform:
1867 goto error; 1909 goto error;
1868 } else if (num_xfrms > 0) { 1910 } else if (num_xfrms > 0) {
1869 /* Flow transformed */ 1911 /* Flow transformed */
1870 *dst_p = dst;
1871 dst_release(dst_orig); 1912 dst_release(dst_orig);
1872 } else { 1913 } else {
1873 /* Flow passes untransformed */ 1914 /* Flow passes untransformed */
1874 dst_release(dst); 1915 dst_release(dst);
1916 dst = dst_orig;
1875 } 1917 }
1876ok: 1918ok:
1877 xfrm_pols_put(pols, drop_pols); 1919 xfrm_pols_put(pols, drop_pols);
1878 return 0; 1920 return dst;
1879 1921
1880nopol: 1922nopol:
1881 if (!(flags & XFRM_LOOKUP_ICMP)) 1923 if (!(flags & XFRM_LOOKUP_ICMP)) {
1924 dst = dst_orig;
1882 goto ok; 1925 goto ok;
1926 }
1883 err = -ENOENT; 1927 err = -ENOENT;
1884error: 1928error:
1885 dst_release(dst); 1929 dst_release(dst);
1886dropdst: 1930dropdst:
1887 dst_release(dst_orig); 1931 dst_release(dst_orig);
1888 *dst_p = NULL;
1889 xfrm_pols_put(pols, drop_pols); 1932 xfrm_pols_put(pols, drop_pols);
1890 return err; 1933 return ERR_PTR(err);
1891}
1892EXPORT_SYMBOL(__xfrm_lookup);
1893
1894int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1895 struct sock *sk, int flags)
1896{
1897 int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
1898
1899 if (err == -EREMOTE) {
1900 dst_release(*dst_p);
1901 *dst_p = NULL;
1902 err = -EAGAIN;
1903 }
1904
1905 return err;
1906} 1934}
1907EXPORT_SYMBOL(xfrm_lookup); 1935EXPORT_SYMBOL(xfrm_lookup);
1908 1936
1909static inline int 1937static inline int
1910xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) 1938xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
1911{ 1939{
1912 struct xfrm_state *x; 1940 struct xfrm_state *x;
1913 1941
@@ -1926,7 +1954,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1926 */ 1954 */
1927 1955
1928static inline int 1956static inline int
1929xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 1957xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
1930 unsigned short family) 1958 unsigned short family)
1931{ 1959{
1932 if (xfrm_state_kern(x)) 1960 if (xfrm_state_kern(x))
@@ -1949,7 +1977,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1949 * Otherwise "-2 - errored_index" is returned. 1977 * Otherwise "-2 - errored_index" is returned.
1950 */ 1978 */
1951static inline int 1979static inline int
1952xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 1980xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
1953 unsigned short family) 1981 unsigned short family)
1954{ 1982{
1955 int idx = start; 1983 int idx = start;
@@ -1981,13 +2009,13 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1981 return -EAFNOSUPPORT; 2009 return -EAFNOSUPPORT;
1982 2010
1983 afinfo->decode_session(skb, fl, reverse); 2011 afinfo->decode_session(skb, fl, reverse);
1984 err = security_xfrm_decode_session(skb, &fl->secid); 2012 err = security_xfrm_decode_session(skb, &fl->flowi_secid);
1985 xfrm_policy_put_afinfo(afinfo); 2013 xfrm_policy_put_afinfo(afinfo);
1986 return err; 2014 return err;
1987} 2015}
1988EXPORT_SYMBOL(__xfrm_decode_session); 2016EXPORT_SYMBOL(__xfrm_decode_session);
1989 2017
1990static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) 2018static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
1991{ 2019{
1992 for (; k < sp->len; k++) { 2020 for (; k < sp->len; k++) {
1993 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2021 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
@@ -2162,7 +2190,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2162 struct net *net = dev_net(skb->dev); 2190 struct net *net = dev_net(skb->dev);
2163 struct flowi fl; 2191 struct flowi fl;
2164 struct dst_entry *dst; 2192 struct dst_entry *dst;
2165 int res; 2193 int res = 1;
2166 2194
2167 if (xfrm_decode_session(skb, &fl, family) < 0) { 2195 if (xfrm_decode_session(skb, &fl, family) < 0) {
2168 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2196 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
@@ -2170,9 +2198,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2170 } 2198 }
2171 2199
2172 skb_dst_force(skb); 2200 skb_dst_force(skb);
2173 dst = skb_dst(skb);
2174 2201
2175 res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; 2202 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
2203 if (IS_ERR(dst)) {
2204 res = 0;
2205 dst = NULL;
2206 }
2176 skb_dst_set(skb, dst); 2207 skb_dst_set(skb, dst);
2177 return res; 2208 return res;
2178} 2209}
@@ -2210,7 +2241,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2210 2241
2211static int stale_bundle(struct dst_entry *dst) 2242static int stale_bundle(struct dst_entry *dst)
2212{ 2243{
2213 return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); 2244 return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC);
2214} 2245}
2215 2246
2216void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2247void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -2282,8 +2313,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
2282 * still valid. 2313 * still valid.
2283 */ 2314 */
2284 2315
2285static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, 2316static int xfrm_bundle_ok(struct xfrm_dst *first, int family)
2286 struct flowi *fl, int family, int strict)
2287{ 2317{
2288 struct dst_entry *dst = &first->u.dst; 2318 struct dst_entry *dst = &first->u.dst;
2289 struct xfrm_dst *last; 2319 struct xfrm_dst *last;
@@ -2292,26 +2322,12 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2292 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2322 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2293 (dst->dev && !netif_running(dst->dev))) 2323 (dst->dev && !netif_running(dst->dev)))
2294 return 0; 2324 return 0;
2295#ifdef CONFIG_XFRM_SUB_POLICY
2296 if (fl) {
2297 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2298 return 0;
2299 if (first->partner &&
2300 !xfrm_selector_match(first->partner, fl, family))
2301 return 0;
2302 }
2303#endif
2304 2325
2305 last = NULL; 2326 last = NULL;
2306 2327
2307 do { 2328 do {
2308 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2329 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2309 2330
2310 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2311 return 0;
2312 if (fl && pol &&
2313 !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2314 return 0;
2315 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2331 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2316 return 0; 2332 return 0;
2317 if (xdst->xfrm_genid != dst->xfrm->genid) 2333 if (xdst->xfrm_genid != dst->xfrm->genid)
@@ -2320,11 +2336,6 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2320 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2336 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2321 return 0; 2337 return 0;
2322 2338
2323 if (strict && fl &&
2324 !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2325 !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2326 return 0;
2327
2328 mtu = dst_mtu(dst->child); 2339 mtu = dst_mtu(dst->child);
2329 if (xdst->child_mtu_cached != mtu) { 2340 if (xdst->child_mtu_cached != mtu) {
2330 last = xdst; 2341 last = xdst;
@@ -2735,8 +2746,8 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2735#endif 2746#endif
2736 2747
2737#ifdef CONFIG_XFRM_MIGRATE 2748#ifdef CONFIG_XFRM_MIGRATE
2738static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, 2749static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
2739 struct xfrm_selector *sel_tgt) 2750 const struct xfrm_selector *sel_tgt)
2740{ 2751{
2741 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 2752 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2742 if (sel_tgt->family == sel_cmp->family && 2753 if (sel_tgt->family == sel_cmp->family &&
@@ -2756,7 +2767,7 @@ static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2756 return 0; 2767 return 0;
2757} 2768}
2758 2769
2759static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, 2770static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
2760 u8 dir, u8 type) 2771 u8 dir, u8 type)
2761{ 2772{
2762 struct xfrm_policy *pol, *ret = NULL; 2773 struct xfrm_policy *pol, *ret = NULL;
@@ -2792,7 +2803,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2792 return ret; 2803 return ret;
2793} 2804}
2794 2805
2795static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) 2806static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
2796{ 2807{
2797 int match = 0; 2808 int match = 0;
2798 2809
@@ -2862,7 +2873,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
2862 return 0; 2873 return 0;
2863} 2874}
2864 2875
2865static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) 2876static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
2866{ 2877{
2867 int i, j; 2878 int i, j;
2868 2879
@@ -2896,7 +2907,7 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2896 return 0; 2907 return 0;
2897} 2908}
2898 2909
2899int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2910int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2900 struct xfrm_migrate *m, int num_migrate, 2911 struct xfrm_migrate *m, int num_migrate,
2901 struct xfrm_kmaddress *k) 2912 struct xfrm_kmaddress *k)
2902{ 2913{
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
new file mode 100644
index 000000000000..47f1b8638df9
--- /dev/null
+++ b/net/xfrm/xfrm_replay.c
@@ -0,0 +1,550 @@
1/*
2 * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c.
3 *
4 * Copyright (C) 2010 secunet Security Networks AG
5 * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#include <net/xfrm.h>
22
23u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
24{
25 u32 seq, seq_hi, bottom;
26 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
27
28 if (!(x->props.flags & XFRM_STATE_ESN))
29 return 0;
30
31 seq = ntohl(net_seq);
32 seq_hi = replay_esn->seq_hi;
33 bottom = replay_esn->seq - replay_esn->replay_window + 1;
34
35 if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) {
36 /* A. same subspace */
37 if (unlikely(seq < bottom))
38 seq_hi++;
39 } else {
40 /* B. window spans two subspaces */
41 if (unlikely(seq >= bottom))
42 seq_hi--;
43 }
44
45 return seq_hi;
46}
47
48static void xfrm_replay_notify(struct xfrm_state *x, int event)
49{
50 struct km_event c;
51 /* we send notify messages in case
52 * 1. we updated on of the sequence numbers, and the seqno difference
53 * is at least x->replay_maxdiff, in this case we also update the
54 * timeout of our timer function
55 * 2. if x->replay_maxage has elapsed since last update,
56 * and there were changes
57 *
58 * The state structure must be locked!
59 */
60
61 switch (event) {
62 case XFRM_REPLAY_UPDATE:
63 if (x->replay_maxdiff &&
64 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
65 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
66 if (x->xflags & XFRM_TIME_DEFER)
67 event = XFRM_REPLAY_TIMEOUT;
68 else
69 return;
70 }
71
72 break;
73
74 case XFRM_REPLAY_TIMEOUT:
75 if (memcmp(&x->replay, &x->preplay,
76 sizeof(struct xfrm_replay_state)) == 0) {
77 x->xflags |= XFRM_TIME_DEFER;
78 return;
79 }
80
81 break;
82 }
83
84 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
85 c.event = XFRM_MSG_NEWAE;
86 c.data.aevent = event;
87 km_state_notify(x, &c);
88
89 if (x->replay_maxage &&
90 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
91 x->xflags &= ~XFRM_TIME_DEFER;
92}
93
94static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
95{
96 int err = 0;
97 struct net *net = xs_net(x);
98
99 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
100 XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq;
101 if (unlikely(x->replay.oseq == 0)) {
102 x->replay.oseq--;
103 xfrm_audit_state_replay_overflow(x, skb);
104 err = -EOVERFLOW;
105
106 return err;
107 }
108 if (xfrm_aevent_is_on(net))
109 x->repl->notify(x, XFRM_REPLAY_UPDATE);
110 }
111
112 return err;
113}
114
115static int xfrm_replay_check(struct xfrm_state *x,
116 struct sk_buff *skb, __be32 net_seq)
117{
118 u32 diff;
119 u32 seq = ntohl(net_seq);
120
121 if (!x->props.replay_window)
122 return 0;
123
124 if (unlikely(seq == 0))
125 goto err;
126
127 if (likely(seq > x->replay.seq))
128 return 0;
129
130 diff = x->replay.seq - seq;
131 if (diff >= min_t(unsigned int, x->props.replay_window,
132 sizeof(x->replay.bitmap) * 8)) {
133 x->stats.replay_window++;
134 goto err;
135 }
136
137 if (x->replay.bitmap & (1U << diff)) {
138 x->stats.replay++;
139 goto err;
140 }
141 return 0;
142
143err:
144 xfrm_audit_state_replay(x, skb, net_seq);
145 return -EINVAL;
146}
147
148static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
149{
150 u32 diff;
151 u32 seq = ntohl(net_seq);
152
153 if (!x->props.replay_window)
154 return;
155
156 if (seq > x->replay.seq) {
157 diff = seq - x->replay.seq;
158 if (diff < x->props.replay_window)
159 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
160 else
161 x->replay.bitmap = 1;
162 x->replay.seq = seq;
163 } else {
164 diff = x->replay.seq - seq;
165 x->replay.bitmap |= (1U << diff);
166 }
167
168 if (xfrm_aevent_is_on(xs_net(x)))
169 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
170}
171
172static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
173{
174 int err = 0;
175 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
176 struct net *net = xs_net(x);
177
178 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
179 XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq;
180 if (unlikely(replay_esn->oseq == 0)) {
181 replay_esn->oseq--;
182 xfrm_audit_state_replay_overflow(x, skb);
183 err = -EOVERFLOW;
184
185 return err;
186 }
187 if (xfrm_aevent_is_on(net))
188 x->repl->notify(x, XFRM_REPLAY_UPDATE);
189 }
190
191 return err;
192}
193
194static int xfrm_replay_check_bmp(struct xfrm_state *x,
195 struct sk_buff *skb, __be32 net_seq)
196{
197 unsigned int bitnr, nr;
198 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
199 u32 pos;
200 u32 seq = ntohl(net_seq);
201 u32 diff = replay_esn->seq - seq;
202
203 if (!replay_esn->replay_window)
204 return 0;
205
206 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
207
208 if (unlikely(seq == 0))
209 goto err;
210
211 if (likely(seq > replay_esn->seq))
212 return 0;
213
214 if (diff >= replay_esn->replay_window) {
215 x->stats.replay_window++;
216 goto err;
217 }
218
219 if (pos >= diff) {
220 bitnr = (pos - diff) % replay_esn->replay_window;
221 nr = bitnr >> 5;
222 bitnr = bitnr & 0x1F;
223 if (replay_esn->bmp[nr] & (1U << bitnr))
224 goto err_replay;
225 } else {
226 bitnr = replay_esn->replay_window - (diff - pos);
227 nr = bitnr >> 5;
228 bitnr = bitnr & 0x1F;
229 if (replay_esn->bmp[nr] & (1U << bitnr))
230 goto err_replay;
231 }
232 return 0;
233
234err_replay:
235 x->stats.replay++;
236err:
237 xfrm_audit_state_replay(x, skb, net_seq);
238 return -EINVAL;
239}
240
241static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
242{
243 unsigned int bitnr, nr, i;
244 u32 diff;
245 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
246 u32 seq = ntohl(net_seq);
247 u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
248
249 if (!replay_esn->replay_window)
250 return;
251
252 if (seq > replay_esn->seq) {
253 diff = seq - replay_esn->seq;
254
255 if (diff < replay_esn->replay_window) {
256 for (i = 1; i < diff; i++) {
257 bitnr = (pos + i) % replay_esn->replay_window;
258 nr = bitnr >> 5;
259 bitnr = bitnr & 0x1F;
260 replay_esn->bmp[nr] &= ~(1U << bitnr);
261 }
262
263 bitnr = (pos + diff) % replay_esn->replay_window;
264 nr = bitnr >> 5;
265 bitnr = bitnr & 0x1F;
266 replay_esn->bmp[nr] |= (1U << bitnr);
267 } else {
268 nr = replay_esn->replay_window >> 5;
269 for (i = 0; i <= nr; i++)
270 replay_esn->bmp[i] = 0;
271
272 bitnr = (pos + diff) % replay_esn->replay_window;
273 nr = bitnr >> 5;
274 bitnr = bitnr & 0x1F;
275 replay_esn->bmp[nr] |= (1U << bitnr);
276 }
277
278 replay_esn->seq = seq;
279 } else {
280 diff = replay_esn->seq - seq;
281
282 if (pos >= diff) {
283 bitnr = (pos - diff) % replay_esn->replay_window;
284 nr = bitnr >> 5;
285 bitnr = bitnr & 0x1F;
286 replay_esn->bmp[nr] |= (1U << bitnr);
287 } else {
288 bitnr = replay_esn->replay_window - (diff - pos);
289 nr = bitnr >> 5;
290 bitnr = bitnr & 0x1F;
291 replay_esn->bmp[nr] |= (1U << bitnr);
292 }
293 }
294
295 if (xfrm_aevent_is_on(xs_net(x)))
296 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
297}
298
299static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
300{
301 struct km_event c;
302 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
303 struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn;
304
305 /* we send notify messages in case
306 * 1. we updated on of the sequence numbers, and the seqno difference
307 * is at least x->replay_maxdiff, in this case we also update the
308 * timeout of our timer function
309 * 2. if x->replay_maxage has elapsed since last update,
310 * and there were changes
311 *
312 * The state structure must be locked!
313 */
314
315 switch (event) {
316 case XFRM_REPLAY_UPDATE:
317 if (x->replay_maxdiff &&
318 (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
319 (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) {
320 if (x->xflags & XFRM_TIME_DEFER)
321 event = XFRM_REPLAY_TIMEOUT;
322 else
323 return;
324 }
325
326 break;
327
328 case XFRM_REPLAY_TIMEOUT:
329 if (memcmp(x->replay_esn, x->preplay_esn,
330 xfrm_replay_state_esn_len(replay_esn)) == 0) {
331 x->xflags |= XFRM_TIME_DEFER;
332 return;
333 }
334
335 break;
336 }
337
338 memcpy(x->preplay_esn, x->replay_esn,
339 xfrm_replay_state_esn_len(replay_esn));
340 c.event = XFRM_MSG_NEWAE;
341 c.data.aevent = event;
342 km_state_notify(x, &c);
343
344 if (x->replay_maxage &&
345 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
346 x->xflags &= ~XFRM_TIME_DEFER;
347}
348
349static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
350{
351 int err = 0;
352 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
353 struct net *net = xs_net(x);
354
355 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
356 XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq;
357 XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi;
358
359 if (unlikely(replay_esn->oseq == 0)) {
360 XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi;
361
362 if (replay_esn->oseq_hi == 0) {
363 replay_esn->oseq--;
364 replay_esn->oseq_hi--;
365 xfrm_audit_state_replay_overflow(x, skb);
366 err = -EOVERFLOW;
367
368 return err;
369 }
370 }
371 if (xfrm_aevent_is_on(net))
372 x->repl->notify(x, XFRM_REPLAY_UPDATE);
373 }
374
375 return err;
376}
377
378static int xfrm_replay_check_esn(struct xfrm_state *x,
379 struct sk_buff *skb, __be32 net_seq)
380{
381 unsigned int bitnr, nr;
382 u32 diff;
383 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
384 u32 pos;
385 u32 seq = ntohl(net_seq);
386 u32 wsize = replay_esn->replay_window;
387 u32 top = replay_esn->seq;
388 u32 bottom = top - wsize + 1;
389
390 if (!wsize)
391 return 0;
392
393 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
394
395 if (unlikely(seq == 0 && replay_esn->seq_hi == 0 &&
396 (replay_esn->seq < replay_esn->replay_window - 1)))
397 goto err;
398
399 diff = top - seq;
400
401 if (likely(top >= wsize - 1)) {
402 /* A. same subspace */
403 if (likely(seq > top) || seq < bottom)
404 return 0;
405 } else {
406 /* B. window spans two subspaces */
407 if (likely(seq > top && seq < bottom))
408 return 0;
409 if (seq >= bottom)
410 diff = ~seq + top + 1;
411 }
412
413 if (diff >= replay_esn->replay_window) {
414 x->stats.replay_window++;
415 goto err;
416 }
417
418 if (pos >= diff) {
419 bitnr = (pos - diff) % replay_esn->replay_window;
420 nr = bitnr >> 5;
421 bitnr = bitnr & 0x1F;
422 if (replay_esn->bmp[nr] & (1U << bitnr))
423 goto err_replay;
424 } else {
425 bitnr = replay_esn->replay_window - (diff - pos);
426 nr = bitnr >> 5;
427 bitnr = bitnr & 0x1F;
428 if (replay_esn->bmp[nr] & (1U << bitnr))
429 goto err_replay;
430 }
431 return 0;
432
433err_replay:
434 x->stats.replay++;
435err:
436 xfrm_audit_state_replay(x, skb, net_seq);
437 return -EINVAL;
438}
439
440static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
441{
442 unsigned int bitnr, nr, i;
443 int wrap;
444 u32 diff, pos, seq, seq_hi;
445 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
446
447 if (!replay_esn->replay_window)
448 return;
449
450 seq = ntohl(net_seq);
451 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
452 seq_hi = xfrm_replay_seqhi(x, net_seq);
453 wrap = seq_hi - replay_esn->seq_hi;
454
455 if ((!wrap && seq > replay_esn->seq) || wrap > 0) {
456 if (likely(!wrap))
457 diff = seq - replay_esn->seq;
458 else
459 diff = ~replay_esn->seq + seq + 1;
460
461 if (diff < replay_esn->replay_window) {
462 for (i = 1; i < diff; i++) {
463 bitnr = (pos + i) % replay_esn->replay_window;
464 nr = bitnr >> 5;
465 bitnr = bitnr & 0x1F;
466 replay_esn->bmp[nr] &= ~(1U << bitnr);
467 }
468
469 bitnr = (pos + diff) % replay_esn->replay_window;
470 nr = bitnr >> 5;
471 bitnr = bitnr & 0x1F;
472 replay_esn->bmp[nr] |= (1U << bitnr);
473 } else {
474 nr = replay_esn->replay_window >> 5;
475 for (i = 0; i <= nr; i++)
476 replay_esn->bmp[i] = 0;
477
478 bitnr = (pos + diff) % replay_esn->replay_window;
479 nr = bitnr >> 5;
480 bitnr = bitnr & 0x1F;
481 replay_esn->bmp[nr] |= (1U << bitnr);
482 }
483
484 replay_esn->seq = seq;
485
486 if (unlikely(wrap > 0))
487 replay_esn->seq_hi++;
488 } else {
489 diff = replay_esn->seq - seq;
490
491 if (pos >= diff) {
492 bitnr = (pos - diff) % replay_esn->replay_window;
493 nr = bitnr >> 5;
494 bitnr = bitnr & 0x1F;
495 replay_esn->bmp[nr] |= (1U << bitnr);
496 } else {
497 bitnr = replay_esn->replay_window - (diff - pos);
498 nr = bitnr >> 5;
499 bitnr = bitnr & 0x1F;
500 replay_esn->bmp[nr] |= (1U << bitnr);
501 }
502 }
503
504 if (xfrm_aevent_is_on(xs_net(x)))
505 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
506}
507
508static struct xfrm_replay xfrm_replay_legacy = {
509 .advance = xfrm_replay_advance,
510 .check = xfrm_replay_check,
511 .notify = xfrm_replay_notify,
512 .overflow = xfrm_replay_overflow,
513};
514
515static struct xfrm_replay xfrm_replay_bmp = {
516 .advance = xfrm_replay_advance_bmp,
517 .check = xfrm_replay_check_bmp,
518 .notify = xfrm_replay_notify_bmp,
519 .overflow = xfrm_replay_overflow_bmp,
520};
521
522static struct xfrm_replay xfrm_replay_esn = {
523 .advance = xfrm_replay_advance_esn,
524 .check = xfrm_replay_check_esn,
525 .notify = xfrm_replay_notify_bmp,
526 .overflow = xfrm_replay_overflow_esn,
527};
528
529int xfrm_init_replay(struct xfrm_state *x)
530{
531 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
532
533 if (replay_esn) {
534 if (replay_esn->replay_window >
535 replay_esn->bmp_len * sizeof(__u32) * 8)
536 return -EINVAL;
537
538 if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0)
539 return -EINVAL;
540
541 if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn)
542 x->repl = &xfrm_replay_esn;
543 else
544 x->repl = &xfrm_replay_bmp;
545 } else
546 x->repl = &xfrm_replay_legacy;
547
548 return 0;
549}
550EXPORT_SYMBOL(xfrm_init_replay);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 220ebc05c7af..d70f85eb7864 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -42,16 +42,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
42static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); 42static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
43static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 43static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
44 44
45#ifdef CONFIG_AUDITSYSCALL
46static void xfrm_audit_state_replay(struct xfrm_state *x,
47 struct sk_buff *skb, __be32 net_seq);
48#else
49#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0)
50#endif /* CONFIG_AUDITSYSCALL */
51
52static inline unsigned int xfrm_dst_hash(struct net *net, 45static inline unsigned int xfrm_dst_hash(struct net *net,
53 xfrm_address_t *daddr, 46 const xfrm_address_t *daddr,
54 xfrm_address_t *saddr, 47 const xfrm_address_t *saddr,
55 u32 reqid, 48 u32 reqid,
56 unsigned short family) 49 unsigned short family)
57{ 50{
@@ -59,15 +52,16 @@ static inline unsigned int xfrm_dst_hash(struct net *net,
59} 52}
60 53
61static inline unsigned int xfrm_src_hash(struct net *net, 54static inline unsigned int xfrm_src_hash(struct net *net,
62 xfrm_address_t *daddr, 55 const xfrm_address_t *daddr,
63 xfrm_address_t *saddr, 56 const xfrm_address_t *saddr,
64 unsigned short family) 57 unsigned short family)
65{ 58{
66 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); 59 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
67} 60}
68 61
69static inline unsigned int 62static inline unsigned int
70xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) 63xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
64 __be32 spi, u8 proto, unsigned short family)
71{ 65{
72 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); 66 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
73} 67}
@@ -362,6 +356,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
362 kfree(x->calg); 356 kfree(x->calg);
363 kfree(x->encap); 357 kfree(x->encap);
364 kfree(x->coaddr); 358 kfree(x->coaddr);
359 kfree(x->replay_esn);
360 kfree(x->preplay_esn);
365 if (x->inner_mode) 361 if (x->inner_mode)
366 xfrm_put_mode(x->inner_mode); 362 xfrm_put_mode(x->inner_mode);
367 if (x->inner_mode_iaf) 363 if (x->inner_mode_iaf)
@@ -656,9 +652,9 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
656EXPORT_SYMBOL(xfrm_sad_getinfo); 652EXPORT_SYMBOL(xfrm_sad_getinfo);
657 653
658static int 654static int
659xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, 655xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
660 struct xfrm_tmpl *tmpl, 656 const struct xfrm_tmpl *tmpl,
661 xfrm_address_t *daddr, xfrm_address_t *saddr, 657 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
662 unsigned short family) 658 unsigned short family)
663{ 659{
664 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 660 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
@@ -677,7 +673,10 @@ xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
677 return 0; 673 return 0;
678} 674}
679 675
680static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) 676static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
677 const xfrm_address_t *daddr,
678 __be32 spi, u8 proto,
679 unsigned short family)
681{ 680{
682 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); 681 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
683 struct xfrm_state *x; 682 struct xfrm_state *x;
@@ -699,7 +698,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad
699 return NULL; 698 return NULL;
700} 699}
701 700
702static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) 701static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
702 const xfrm_address_t *daddr,
703 const xfrm_address_t *saddr,
704 u8 proto, unsigned short family)
703{ 705{
704 unsigned int h = xfrm_src_hash(net, daddr, saddr, family); 706 unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
705 struct xfrm_state *x; 707 struct xfrm_state *x;
@@ -746,8 +748,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
746} 748}
747 749
748static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, 750static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
749 struct flowi *fl, unsigned short family, 751 const struct flowi *fl, unsigned short family,
750 xfrm_address_t *daddr, xfrm_address_t *saddr,
751 struct xfrm_state **best, int *acq_in_progress, 752 struct xfrm_state **best, int *acq_in_progress,
752 int *error) 753 int *error)
753{ 754{
@@ -784,8 +785,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
784} 785}
785 786
786struct xfrm_state * 787struct xfrm_state *
787xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 788xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
788 struct flowi *fl, struct xfrm_tmpl *tmpl, 789 const struct flowi *fl, struct xfrm_tmpl *tmpl,
789 struct xfrm_policy *pol, int *err, 790 struct xfrm_policy *pol, int *err,
790 unsigned short family) 791 unsigned short family)
791{ 792{
@@ -813,7 +814,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
813 tmpl->mode == x->props.mode && 814 tmpl->mode == x->props.mode &&
814 tmpl->id.proto == x->id.proto && 815 tmpl->id.proto == x->id.proto &&
815 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 816 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
816 xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, 817 xfrm_state_look_at(pol, x, fl, encap_family,
817 &best, &acquire_in_progress, &error); 818 &best, &acquire_in_progress, &error);
818 } 819 }
819 if (best) 820 if (best)
@@ -829,7 +830,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
829 tmpl->mode == x->props.mode && 830 tmpl->mode == x->props.mode &&
830 tmpl->id.proto == x->id.proto && 831 tmpl->id.proto == x->id.proto &&
831 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 832 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
832 xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, 833 xfrm_state_look_at(pol, x, fl, encap_family,
833 &best, &acquire_in_progress, &error); 834 &best, &acquire_in_progress, &error);
834 } 835 }
835 836
@@ -853,7 +854,7 @@ found:
853 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); 854 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
854 memcpy(&x->mark, &pol->mark, sizeof(x->mark)); 855 memcpy(&x->mark, &pol->mark, sizeof(x->mark));
855 856
856 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); 857 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
857 if (error) { 858 if (error) {
858 x->km.state = XFRM_STATE_DEAD; 859 x->km.state = XFRM_STATE_DEAD;
859 to_put = x; 860 to_put = x;
@@ -991,7 +992,11 @@ void xfrm_state_insert(struct xfrm_state *x)
991EXPORT_SYMBOL(xfrm_state_insert); 992EXPORT_SYMBOL(xfrm_state_insert);
992 993
993/* xfrm_state_lock is held */ 994/* xfrm_state_lock is held */
994static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) 995static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
996 unsigned short family, u8 mode,
997 u32 reqid, u8 proto,
998 const xfrm_address_t *daddr,
999 const xfrm_address_t *saddr, int create)
995{ 1000{
996 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); 1001 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
997 struct hlist_node *entry; 1002 struct hlist_node *entry;
@@ -1031,15 +1036,15 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
1031 1036
1032 case AF_INET6: 1037 case AF_INET6:
1033 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, 1038 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
1034 (struct in6_addr *)daddr); 1039 (const struct in6_addr *)daddr);
1035 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, 1040 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
1036 (struct in6_addr *)saddr); 1041 (const struct in6_addr *)saddr);
1037 x->sel.prefixlen_d = 128; 1042 x->sel.prefixlen_d = 128;
1038 x->sel.prefixlen_s = 128; 1043 x->sel.prefixlen_s = 128;
1039 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, 1044 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
1040 (struct in6_addr *)saddr); 1045 (const struct in6_addr *)saddr);
1041 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, 1046 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
1042 (struct in6_addr *)daddr); 1047 (const struct in6_addr *)daddr);
1043 break; 1048 break;
1044 } 1049 }
1045 1050
@@ -1176,6 +1181,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1176 goto error; 1181 goto error;
1177 } 1182 }
1178 1183
1184 if (orig->replay_esn) {
1185 err = xfrm_replay_clone(x, orig);
1186 if (err)
1187 goto error;
1188 }
1189
1179 memcpy(&x->mark, &orig->mark, sizeof(x->mark)); 1190 memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1180 1191
1181 err = xfrm_init_state(x); 1192 err = xfrm_init_state(x);
@@ -1369,7 +1380,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
1369EXPORT_SYMBOL(xfrm_state_check_expire); 1380EXPORT_SYMBOL(xfrm_state_check_expire);
1370 1381
1371struct xfrm_state * 1382struct xfrm_state *
1372xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, 1383xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1373 u8 proto, unsigned short family) 1384 u8 proto, unsigned short family)
1374{ 1385{
1375 struct xfrm_state *x; 1386 struct xfrm_state *x;
@@ -1383,7 +1394,7 @@ EXPORT_SYMBOL(xfrm_state_lookup);
1383 1394
1384struct xfrm_state * 1395struct xfrm_state *
1385xfrm_state_lookup_byaddr(struct net *net, u32 mark, 1396xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1386 xfrm_address_t *daddr, xfrm_address_t *saddr, 1397 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1387 u8 proto, unsigned short family) 1398 u8 proto, unsigned short family)
1388{ 1399{
1389 struct xfrm_state *x; 1400 struct xfrm_state *x;
@@ -1397,7 +1408,7 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1397 1408
1398struct xfrm_state * 1409struct xfrm_state *
1399xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, 1410xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
1400 xfrm_address_t *daddr, xfrm_address_t *saddr, 1411 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1401 int create, unsigned short family) 1412 int create, unsigned short family)
1402{ 1413{
1403 struct xfrm_state *x; 1414 struct xfrm_state *x;
@@ -1609,54 +1620,6 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk)
1609} 1620}
1610EXPORT_SYMBOL(xfrm_state_walk_done); 1621EXPORT_SYMBOL(xfrm_state_walk_done);
1611 1622
1612
1613void xfrm_replay_notify(struct xfrm_state *x, int event)
1614{
1615 struct km_event c;
1616 /* we send notify messages in case
1617 * 1. we updated on of the sequence numbers, and the seqno difference
1618 * is at least x->replay_maxdiff, in this case we also update the
1619 * timeout of our timer function
1620 * 2. if x->replay_maxage has elapsed since last update,
1621 * and there were changes
1622 *
1623 * The state structure must be locked!
1624 */
1625
1626 switch (event) {
1627 case XFRM_REPLAY_UPDATE:
1628 if (x->replay_maxdiff &&
1629 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1630 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1631 if (x->xflags & XFRM_TIME_DEFER)
1632 event = XFRM_REPLAY_TIMEOUT;
1633 else
1634 return;
1635 }
1636
1637 break;
1638
1639 case XFRM_REPLAY_TIMEOUT:
1640 if ((x->replay.seq == x->preplay.seq) &&
1641 (x->replay.bitmap == x->preplay.bitmap) &&
1642 (x->replay.oseq == x->preplay.oseq)) {
1643 x->xflags |= XFRM_TIME_DEFER;
1644 return;
1645 }
1646
1647 break;
1648 }
1649
1650 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1651 c.event = XFRM_MSG_NEWAE;
1652 c.data.aevent = event;
1653 km_state_notify(x, &c);
1654
1655 if (x->replay_maxage &&
1656 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1657 x->xflags &= ~XFRM_TIME_DEFER;
1658}
1659
1660static void xfrm_replay_timer_handler(unsigned long data) 1623static void xfrm_replay_timer_handler(unsigned long data)
1661{ 1624{
1662 struct xfrm_state *x = (struct xfrm_state*)data; 1625 struct xfrm_state *x = (struct xfrm_state*)data;
@@ -1665,7 +1628,7 @@ static void xfrm_replay_timer_handler(unsigned long data)
1665 1628
1666 if (x->km.state == XFRM_STATE_VALID) { 1629 if (x->km.state == XFRM_STATE_VALID) {
1667 if (xfrm_aevent_is_on(xs_net(x))) 1630 if (xfrm_aevent_is_on(xs_net(x)))
1668 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); 1631 x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1669 else 1632 else
1670 x->xflags |= XFRM_TIME_DEFER; 1633 x->xflags |= XFRM_TIME_DEFER;
1671 } 1634 }
@@ -1673,61 +1636,10 @@ static void xfrm_replay_timer_handler(unsigned long data)
1673 spin_unlock(&x->lock); 1636 spin_unlock(&x->lock);
1674} 1637}
1675 1638
1676int xfrm_replay_check(struct xfrm_state *x,
1677 struct sk_buff *skb, __be32 net_seq)
1678{
1679 u32 diff;
1680 u32 seq = ntohl(net_seq);
1681
1682 if (unlikely(seq == 0))
1683 goto err;
1684
1685 if (likely(seq > x->replay.seq))
1686 return 0;
1687
1688 diff = x->replay.seq - seq;
1689 if (diff >= min_t(unsigned int, x->props.replay_window,
1690 sizeof(x->replay.bitmap) * 8)) {
1691 x->stats.replay_window++;
1692 goto err;
1693 }
1694
1695 if (x->replay.bitmap & (1U << diff)) {
1696 x->stats.replay++;
1697 goto err;
1698 }
1699 return 0;
1700
1701err:
1702 xfrm_audit_state_replay(x, skb, net_seq);
1703 return -EINVAL;
1704}
1705
1706void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1707{
1708 u32 diff;
1709 u32 seq = ntohl(net_seq);
1710
1711 if (seq > x->replay.seq) {
1712 diff = seq - x->replay.seq;
1713 if (diff < x->props.replay_window)
1714 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1715 else
1716 x->replay.bitmap = 1;
1717 x->replay.seq = seq;
1718 } else {
1719 diff = x->replay.seq - seq;
1720 x->replay.bitmap |= (1U << diff);
1721 }
1722
1723 if (xfrm_aevent_is_on(xs_net(x)))
1724 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1725}
1726
1727static LIST_HEAD(xfrm_km_list); 1639static LIST_HEAD(xfrm_km_list);
1728static DEFINE_RWLOCK(xfrm_km_lock); 1640static DEFINE_RWLOCK(xfrm_km_lock);
1729 1641
1730void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 1642void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1731{ 1643{
1732 struct xfrm_mgr *km; 1644 struct xfrm_mgr *km;
1733 1645
@@ -1738,7 +1650,7 @@ void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1738 read_unlock(&xfrm_km_lock); 1650 read_unlock(&xfrm_km_lock);
1739} 1651}
1740 1652
1741void km_state_notify(struct xfrm_state *x, struct km_event *c) 1653void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1742{ 1654{
1743 struct xfrm_mgr *km; 1655 struct xfrm_mgr *km;
1744 read_lock(&xfrm_km_lock); 1656 read_lock(&xfrm_km_lock);
@@ -1819,9 +1731,9 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1819EXPORT_SYMBOL(km_policy_expired); 1731EXPORT_SYMBOL(km_policy_expired);
1820 1732
1821#ifdef CONFIG_XFRM_MIGRATE 1733#ifdef CONFIG_XFRM_MIGRATE
1822int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 1734int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
1823 struct xfrm_migrate *m, int num_migrate, 1735 const struct xfrm_migrate *m, int num_migrate,
1824 struct xfrm_kmaddress *k) 1736 const struct xfrm_kmaddress *k)
1825{ 1737{
1826 int err = -EINVAL; 1738 int err = -EINVAL;
1827 int ret; 1739 int ret;
@@ -2001,7 +1913,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2001 return res; 1913 return res;
2002} 1914}
2003 1915
2004int xfrm_init_state(struct xfrm_state *x) 1916int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
2005{ 1917{
2006 struct xfrm_state_afinfo *afinfo; 1918 struct xfrm_state_afinfo *afinfo;
2007 struct xfrm_mode *inner_mode; 1919 struct xfrm_mode *inner_mode;
@@ -2074,12 +1986,25 @@ int xfrm_init_state(struct xfrm_state *x)
2074 if (x->outer_mode == NULL) 1986 if (x->outer_mode == NULL)
2075 goto error; 1987 goto error;
2076 1988
1989 if (init_replay) {
1990 err = xfrm_init_replay(x);
1991 if (err)
1992 goto error;
1993 }
1994
2077 x->km.state = XFRM_STATE_VALID; 1995 x->km.state = XFRM_STATE_VALID;
2078 1996
2079error: 1997error:
2080 return err; 1998 return err;
2081} 1999}
2082 2000
2001EXPORT_SYMBOL(__xfrm_init_state);
2002
2003int xfrm_init_state(struct xfrm_state *x)
2004{
2005 return __xfrm_init_state(x, true);
2006}
2007
2083EXPORT_SYMBOL(xfrm_init_state); 2008EXPORT_SYMBOL(xfrm_init_state);
2084 2009
2085int __net_init xfrm_state_init(struct net *net) 2010int __net_init xfrm_state_init(struct net *net)
@@ -2167,8 +2092,8 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2167static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, 2092static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2168 struct audit_buffer *audit_buf) 2093 struct audit_buffer *audit_buf)
2169{ 2094{
2170 struct iphdr *iph4; 2095 const struct iphdr *iph4;
2171 struct ipv6hdr *iph6; 2096 const struct ipv6hdr *iph6;
2172 2097
2173 switch (family) { 2098 switch (family) {
2174 case AF_INET: 2099 case AF_INET:
@@ -2236,7 +2161,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2236} 2161}
2237EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); 2162EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2238 2163
2239static void xfrm_audit_state_replay(struct xfrm_state *x, 2164void xfrm_audit_state_replay(struct xfrm_state *x,
2240 struct sk_buff *skb, __be32 net_seq) 2165 struct sk_buff *skb, __be32 net_seq)
2241{ 2166{
2242 struct audit_buffer *audit_buf; 2167 struct audit_buffer *audit_buf;
@@ -2251,6 +2176,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x,
2251 spi, spi, ntohl(net_seq)); 2176 spi, spi, ntohl(net_seq));
2252 audit_log_end(audit_buf); 2177 audit_log_end(audit_buf);
2253} 2178}
2179EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2254 2180
2255void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) 2181void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2256{ 2182{
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 61291965c5f6..c658cb3bc7c3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -119,6 +119,25 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)
119 return 0; 119 return 0;
120} 120}
121 121
122static inline int verify_replay(struct xfrm_usersa_info *p,
123 struct nlattr **attrs)
124{
125 struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
126
127 if ((p->flags & XFRM_STATE_ESN) && !rt)
128 return -EINVAL;
129
130 if (!rt)
131 return 0;
132
133 if (p->id.proto != IPPROTO_ESP)
134 return -EINVAL;
135
136 if (p->replay_window != 0)
137 return -EINVAL;
138
139 return 0;
140}
122 141
123static int verify_newsa_info(struct xfrm_usersa_info *p, 142static int verify_newsa_info(struct xfrm_usersa_info *p,
124 struct nlattr **attrs) 143 struct nlattr **attrs)
@@ -214,6 +233,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
214 goto out; 233 goto out;
215 if ((err = verify_sec_ctx_len(attrs))) 234 if ((err = verify_sec_ctx_len(attrs)))
216 goto out; 235 goto out;
236 if ((err = verify_replay(p, attrs)))
237 goto out;
217 238
218 err = -EINVAL; 239 err = -EINVAL;
219 switch (p->mode) { 240 switch (p->mode) {
@@ -234,7 +255,7 @@ out:
234} 255}
235 256
236static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, 257static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
237 struct xfrm_algo_desc *(*get_byname)(char *, int), 258 struct xfrm_algo_desc *(*get_byname)(const char *, int),
238 struct nlattr *rta) 259 struct nlattr *rta)
239{ 260{
240 struct xfrm_algo *p, *ualg; 261 struct xfrm_algo *p, *ualg;
@@ -345,6 +366,50 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
345 return 0; 366 return 0;
346} 367}
347 368
369static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn,
370 struct nlattr *rp)
371{
372 struct xfrm_replay_state_esn *up;
373
374 if (!replay_esn || !rp)
375 return 0;
376
377 up = nla_data(rp);
378
379 if (xfrm_replay_state_esn_len(replay_esn) !=
380 xfrm_replay_state_esn_len(up))
381 return -EINVAL;
382
383 return 0;
384}
385
386static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn,
387 struct xfrm_replay_state_esn **preplay_esn,
388 struct nlattr *rta)
389{
390 struct xfrm_replay_state_esn *p, *pp, *up;
391
392 if (!rta)
393 return 0;
394
395 up = nla_data(rta);
396
397 p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
398 if (!p)
399 return -ENOMEM;
400
401 pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
402 if (!pp) {
403 kfree(p);
404 return -ENOMEM;
405 }
406
407 *replay_esn = p;
408 *preplay_esn = pp;
409
410 return 0;
411}
412
348static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) 413static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
349{ 414{
350 int len = 0; 415 int len = 0;
@@ -380,10 +445,20 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
380static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) 445static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs)
381{ 446{
382 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; 447 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
448 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
383 struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; 449 struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
384 struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; 450 struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
385 struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; 451 struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
386 452
453 if (re) {
454 struct xfrm_replay_state_esn *replay_esn;
455 replay_esn = nla_data(re);
456 memcpy(x->replay_esn, replay_esn,
457 xfrm_replay_state_esn_len(replay_esn));
458 memcpy(x->preplay_esn, replay_esn,
459 xfrm_replay_state_esn_len(replay_esn));
460 }
461
387 if (rp) { 462 if (rp) {
388 struct xfrm_replay_state *replay; 463 struct xfrm_replay_state *replay;
389 replay = nla_data(rp); 464 replay = nla_data(rp);
@@ -459,7 +534,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
459 534
460 xfrm_mark_get(attrs, &x->mark); 535 xfrm_mark_get(attrs, &x->mark);
461 536
462 err = xfrm_init_state(x); 537 err = __xfrm_init_state(x, false);
463 if (err) 538 if (err)
464 goto error; 539 goto error;
465 540
@@ -467,16 +542,19 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
467 security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) 542 security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
468 goto error; 543 goto error;
469 544
545 if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
546 attrs[XFRMA_REPLAY_ESN_VAL])))
547 goto error;
548
470 x->km.seq = p->seq; 549 x->km.seq = p->seq;
471 x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; 550 x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth;
472 /* sysctl_xfrm_aevent_etime is in 100ms units */ 551 /* sysctl_xfrm_aevent_etime is in 100ms units */
473 x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; 552 x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M;
474 x->preplay.bitmap = 0;
475 x->preplay.seq = x->replay.seq+x->replay_maxdiff;
476 x->preplay.oseq = x->replay.oseq +x->replay_maxdiff;
477 553
478 /* override default values from above */ 554 if ((err = xfrm_init_replay(x)))
555 goto error;
479 556
557 /* override default values from above */
480 xfrm_update_ae_params(x, attrs); 558 xfrm_update_ae_params(x, attrs);
481 559
482 return x; 560 return x;
@@ -497,9 +575,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
497 struct xfrm_state *x; 575 struct xfrm_state *x;
498 int err; 576 int err;
499 struct km_event c; 577 struct km_event c;
500 uid_t loginuid = NETLINK_CB(skb).loginuid; 578 uid_t loginuid = audit_get_loginuid(current);
501 u32 sessionid = NETLINK_CB(skb).sessionid; 579 u32 sessionid = audit_get_sessionid(current);
502 u32 sid = NETLINK_CB(skb).sid; 580 u32 sid;
503 581
504 err = verify_newsa_info(p, attrs); 582 err = verify_newsa_info(p, attrs);
505 if (err) 583 if (err)
@@ -515,6 +593,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
515 else 593 else
516 err = xfrm_state_update(x); 594 err = xfrm_state_update(x);
517 595
596 security_task_getsecid(current, &sid);
518 xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); 597 xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
519 598
520 if (err < 0) { 599 if (err < 0) {
@@ -575,9 +654,9 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
575 int err = -ESRCH; 654 int err = -ESRCH;
576 struct km_event c; 655 struct km_event c;
577 struct xfrm_usersa_id *p = nlmsg_data(nlh); 656 struct xfrm_usersa_id *p = nlmsg_data(nlh);
578 uid_t loginuid = NETLINK_CB(skb).loginuid; 657 uid_t loginuid = audit_get_loginuid(current);
579 u32 sessionid = NETLINK_CB(skb).sessionid; 658 u32 sessionid = audit_get_sessionid(current);
580 u32 sid = NETLINK_CB(skb).sid; 659 u32 sid;
581 660
582 x = xfrm_user_state_lookup(net, p, attrs, &err); 661 x = xfrm_user_state_lookup(net, p, attrs, &err);
583 if (x == NULL) 662 if (x == NULL)
@@ -602,6 +681,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
602 km_state_notify(x, &c); 681 km_state_notify(x, &c);
603 682
604out: 683out:
684 security_task_getsecid(current, &sid);
605 xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); 685 xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
606 xfrm_state_put(x); 686 xfrm_state_put(x);
607 return err; 687 return err;
@@ -705,6 +785,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
705 if (xfrm_mark_put(skb, &x->mark)) 785 if (xfrm_mark_put(skb, &x->mark))
706 goto nla_put_failure; 786 goto nla_put_failure;
707 787
788 if (x->replay_esn)
789 NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL,
790 xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn);
791
708 if (x->security && copy_sec_ctx(x->security, skb) < 0) 792 if (x->security && copy_sec_ctx(x->security, skb) < 0)
709 goto nla_put_failure; 793 goto nla_put_failure;
710 794
@@ -813,7 +897,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
813 u32 *f; 897 u32 *f;
814 898
815 nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); 899 nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
816 if (nlh == NULL) /* shouldnt really happen ... */ 900 if (nlh == NULL) /* shouldn't really happen ... */
817 return -EMSGSIZE; 901 return -EMSGSIZE;
818 902
819 f = nlmsg_data(nlh); 903 f = nlmsg_data(nlh);
@@ -873,7 +957,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
873 u32 *f; 957 u32 *f;
874 958
875 nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); 959 nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
876 if (nlh == NULL) /* shouldnt really happen ... */ 960 if (nlh == NULL) /* shouldn't really happen ... */
877 return -EMSGSIZE; 961 return -EMSGSIZE;
878 962
879 f = nlmsg_data(nlh); 963 f = nlmsg_data(nlh);
@@ -1265,9 +1349,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1265 struct km_event c; 1349 struct km_event c;
1266 int err; 1350 int err;
1267 int excl; 1351 int excl;
1268 uid_t loginuid = NETLINK_CB(skb).loginuid; 1352 uid_t loginuid = audit_get_loginuid(current);
1269 u32 sessionid = NETLINK_CB(skb).sessionid; 1353 u32 sessionid = audit_get_sessionid(current);
1270 u32 sid = NETLINK_CB(skb).sid; 1354 u32 sid;
1271 1355
1272 err = verify_newpolicy_info(p); 1356 err = verify_newpolicy_info(p);
1273 if (err) 1357 if (err)
@@ -1280,12 +1364,13 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1280 if (!xp) 1364 if (!xp)
1281 return err; 1365 return err;
1282 1366
1283 /* shouldnt excl be based on nlh flags?? 1367 /* shouldn't excl be based on nlh flags??
1284 * Aha! this is anti-netlink really i.e more pfkey derived 1368 * Aha! this is anti-netlink really i.e more pfkey derived
1285 * in netlink excl is a flag and you wouldnt need 1369 * in netlink excl is a flag and you wouldnt need
1286 * a type XFRM_MSG_UPDPOLICY - JHS */ 1370 * a type XFRM_MSG_UPDPOLICY - JHS */
1287 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 1371 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
1288 err = xfrm_policy_insert(p->dir, xp, excl); 1372 err = xfrm_policy_insert(p->dir, xp, excl);
1373 security_task_getsecid(current, &sid);
1289 xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); 1374 xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
1290 1375
1291 if (err) { 1376 if (err) {
@@ -1522,10 +1607,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1522 NETLINK_CB(skb).pid); 1607 NETLINK_CB(skb).pid);
1523 } 1608 }
1524 } else { 1609 } else {
1525 uid_t loginuid = NETLINK_CB(skb).loginuid; 1610 uid_t loginuid = audit_get_loginuid(current);
1526 u32 sessionid = NETLINK_CB(skb).sessionid; 1611 u32 sessionid = audit_get_sessionid(current);
1527 u32 sid = NETLINK_CB(skb).sid; 1612 u32 sid;
1528 1613
1614 security_task_getsecid(current, &sid);
1529 xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, 1615 xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
1530 sid); 1616 sid);
1531 1617
@@ -1553,9 +1639,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
1553 struct xfrm_audit audit_info; 1639 struct xfrm_audit audit_info;
1554 int err; 1640 int err;
1555 1641
1556 audit_info.loginuid = NETLINK_CB(skb).loginuid; 1642 audit_info.loginuid = audit_get_loginuid(current);
1557 audit_info.sessionid = NETLINK_CB(skb).sessionid; 1643 audit_info.sessionid = audit_get_sessionid(current);
1558 audit_info.secid = NETLINK_CB(skb).sid; 1644 security_task_getsecid(current, &audit_info.secid);
1559 err = xfrm_state_flush(net, p->proto, &audit_info); 1645 err = xfrm_state_flush(net, p->proto, &audit_info);
1560 if (err) { 1646 if (err) {
1561 if (err == -ESRCH) /* empty table */ 1647 if (err == -ESRCH) /* empty table */
@@ -1572,17 +1658,21 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
1572 return 0; 1658 return 0;
1573} 1659}
1574 1660
1575static inline size_t xfrm_aevent_msgsize(void) 1661static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x)
1576{ 1662{
1663 size_t replay_size = x->replay_esn ?
1664 xfrm_replay_state_esn_len(x->replay_esn) :
1665 sizeof(struct xfrm_replay_state);
1666
1577 return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) 1667 return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id))
1578 + nla_total_size(sizeof(struct xfrm_replay_state)) 1668 + nla_total_size(replay_size)
1579 + nla_total_size(sizeof(struct xfrm_lifetime_cur)) 1669 + nla_total_size(sizeof(struct xfrm_lifetime_cur))
1580 + nla_total_size(sizeof(struct xfrm_mark)) 1670 + nla_total_size(sizeof(struct xfrm_mark))
1581 + nla_total_size(4) /* XFRM_AE_RTHR */ 1671 + nla_total_size(4) /* XFRM_AE_RTHR */
1582 + nla_total_size(4); /* XFRM_AE_ETHR */ 1672 + nla_total_size(4); /* XFRM_AE_ETHR */
1583} 1673}
1584 1674
1585static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) 1675static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
1586{ 1676{
1587 struct xfrm_aevent_id *id; 1677 struct xfrm_aevent_id *id;
1588 struct nlmsghdr *nlh; 1678 struct nlmsghdr *nlh;
@@ -1600,7 +1690,13 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
1600 id->reqid = x->props.reqid; 1690 id->reqid = x->props.reqid;
1601 id->flags = c->data.aevent; 1691 id->flags = c->data.aevent;
1602 1692
1603 NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); 1693 if (x->replay_esn)
1694 NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL,
1695 xfrm_replay_state_esn_len(x->replay_esn),
1696 x->replay_esn);
1697 else
1698 NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay);
1699
1604 NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); 1700 NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft);
1605 1701
1606 if (id->flags & XFRM_AE_RTHR) 1702 if (id->flags & XFRM_AE_RTHR)
@@ -1633,16 +1729,16 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1633 struct xfrm_aevent_id *p = nlmsg_data(nlh); 1729 struct xfrm_aevent_id *p = nlmsg_data(nlh);
1634 struct xfrm_usersa_id *id = &p->sa_id; 1730 struct xfrm_usersa_id *id = &p->sa_id;
1635 1731
1636 r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC);
1637 if (r_skb == NULL)
1638 return -ENOMEM;
1639
1640 mark = xfrm_mark_get(attrs, &m); 1732 mark = xfrm_mark_get(attrs, &m);
1641 1733
1642 x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); 1734 x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family);
1643 if (x == NULL) { 1735 if (x == NULL)
1644 kfree_skb(r_skb);
1645 return -ESRCH; 1736 return -ESRCH;
1737
1738 r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
1739 if (r_skb == NULL) {
1740 xfrm_state_put(x);
1741 return -ENOMEM;
1646 } 1742 }
1647 1743
1648 /* 1744 /*
@@ -1674,9 +1770,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1674 struct xfrm_mark m; 1770 struct xfrm_mark m;
1675 struct xfrm_aevent_id *p = nlmsg_data(nlh); 1771 struct xfrm_aevent_id *p = nlmsg_data(nlh);
1676 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; 1772 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
1773 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
1677 struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; 1774 struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
1678 1775
1679 if (!lt && !rp) 1776 if (!lt && !rp && !re)
1680 return err; 1777 return err;
1681 1778
1682 /* pedantic mode - thou shalt sayeth replaceth */ 1779 /* pedantic mode - thou shalt sayeth replaceth */
@@ -1692,6 +1789,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1692 if (x->km.state != XFRM_STATE_VALID) 1789 if (x->km.state != XFRM_STATE_VALID)
1693 goto out; 1790 goto out;
1694 1791
1792 err = xfrm_replay_verify_len(x->replay_esn, rp);
1793 if (err)
1794 goto out;
1795
1695 spin_lock_bh(&x->lock); 1796 spin_lock_bh(&x->lock);
1696 xfrm_update_ae_params(x, attrs); 1797 xfrm_update_ae_params(x, attrs);
1697 spin_unlock_bh(&x->lock); 1798 spin_unlock_bh(&x->lock);
@@ -1720,9 +1821,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1720 if (err) 1821 if (err)
1721 return err; 1822 return err;
1722 1823
1723 audit_info.loginuid = NETLINK_CB(skb).loginuid; 1824 audit_info.loginuid = audit_get_loginuid(current);
1724 audit_info.sessionid = NETLINK_CB(skb).sessionid; 1825 audit_info.sessionid = audit_get_sessionid(current);
1725 audit_info.secid = NETLINK_CB(skb).sid; 1826 security_task_getsecid(current, &audit_info.secid);
1726 err = xfrm_policy_flush(net, type, &audit_info); 1827 err = xfrm_policy_flush(net, type, &audit_info);
1727 if (err) { 1828 if (err) {
1728 if (err == -ESRCH) /* empty table */ 1829 if (err == -ESRCH) /* empty table */
@@ -1789,9 +1890,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1789 1890
1790 err = 0; 1891 err = 0;
1791 if (up->hard) { 1892 if (up->hard) {
1792 uid_t loginuid = NETLINK_CB(skb).loginuid; 1893 uid_t loginuid = audit_get_loginuid(current);
1793 uid_t sessionid = NETLINK_CB(skb).sessionid; 1894 u32 sessionid = audit_get_sessionid(current);
1794 u32 sid = NETLINK_CB(skb).sid; 1895 u32 sid;
1896
1897 security_task_getsecid(current, &sid);
1795 xfrm_policy_delete(xp, p->dir); 1898 xfrm_policy_delete(xp, p->dir);
1796 xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); 1899 xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
1797 1900
@@ -1830,9 +1933,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1830 km_state_expired(x, ue->hard, current->pid); 1933 km_state_expired(x, ue->hard, current->pid);
1831 1934
1832 if (ue->hard) { 1935 if (ue->hard) {
1833 uid_t loginuid = NETLINK_CB(skb).loginuid; 1936 uid_t loginuid = audit_get_loginuid(current);
1834 uid_t sessionid = NETLINK_CB(skb).sessionid; 1937 u32 sessionid = audit_get_sessionid(current);
1835 u32 sid = NETLINK_CB(skb).sid; 1938 u32 sid;
1939
1940 security_task_getsecid(current, &sid);
1836 __xfrm_state_delete(x); 1941 __xfrm_state_delete(x);
1837 xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); 1942 xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
1838 } 1943 }
@@ -1986,7 +2091,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
1986#endif 2091#endif
1987 2092
1988#ifdef CONFIG_XFRM_MIGRATE 2093#ifdef CONFIG_XFRM_MIGRATE
1989static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) 2094static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb)
1990{ 2095{
1991 struct xfrm_user_migrate um; 2096 struct xfrm_user_migrate um;
1992 2097
@@ -2004,7 +2109,7 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb)
2004 return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); 2109 return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um);
2005} 2110}
2006 2111
2007static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) 2112static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb)
2008{ 2113{
2009 struct xfrm_user_kmaddress uk; 2114 struct xfrm_user_kmaddress uk;
2010 2115
@@ -2025,11 +2130,11 @@ static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma)
2025 + userpolicy_type_attrsize(); 2130 + userpolicy_type_attrsize();
2026} 2131}
2027 2132
2028static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, 2133static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
2029 int num_migrate, struct xfrm_kmaddress *k, 2134 int num_migrate, const struct xfrm_kmaddress *k,
2030 struct xfrm_selector *sel, u8 dir, u8 type) 2135 const struct xfrm_selector *sel, u8 dir, u8 type)
2031{ 2136{
2032 struct xfrm_migrate *mp; 2137 const struct xfrm_migrate *mp;
2033 struct xfrm_userpolicy_id *pol_id; 2138 struct xfrm_userpolicy_id *pol_id;
2034 struct nlmsghdr *nlh; 2139 struct nlmsghdr *nlh;
2035 int i; 2140 int i;
@@ -2061,9 +2166,9 @@ nlmsg_failure:
2061 return -EMSGSIZE; 2166 return -EMSGSIZE;
2062} 2167}
2063 2168
2064static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2169static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2065 struct xfrm_migrate *m, int num_migrate, 2170 const struct xfrm_migrate *m, int num_migrate,
2066 struct xfrm_kmaddress *k) 2171 const struct xfrm_kmaddress *k)
2067{ 2172{
2068 struct net *net = &init_net; 2173 struct net *net = &init_net;
2069 struct sk_buff *skb; 2174 struct sk_buff *skb;
@@ -2079,9 +2184,9 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2079 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); 2184 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
2080} 2185}
2081#else 2186#else
2082static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2187static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2083 struct xfrm_migrate *m, int num_migrate, 2188 const struct xfrm_migrate *m, int num_migrate,
2084 struct xfrm_kmaddress *k) 2189 const struct xfrm_kmaddress *k)
2085{ 2190{
2086 return -ENOPROTOOPT; 2191 return -ENOPROTOOPT;
2087} 2192}
@@ -2137,6 +2242,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2137 [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, 2242 [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) },
2138 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, 2243 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
2139 [XFRMA_TFCPAD] = { .type = NLA_U32 }, 2244 [XFRMA_TFCPAD] = { .type = NLA_U32 },
2245 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
2140}; 2246};
2141 2247
2142static struct xfrm_link { 2248static struct xfrm_link {
@@ -2220,7 +2326,7 @@ static inline size_t xfrm_expire_msgsize(void)
2220 + nla_total_size(sizeof(struct xfrm_mark)); 2326 + nla_total_size(sizeof(struct xfrm_mark));
2221} 2327}
2222 2328
2223static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) 2329static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
2224{ 2330{
2225 struct xfrm_user_expire *ue; 2331 struct xfrm_user_expire *ue;
2226 struct nlmsghdr *nlh; 2332 struct nlmsghdr *nlh;
@@ -2242,7 +2348,7 @@ nla_put_failure:
2242 return -EMSGSIZE; 2348 return -EMSGSIZE;
2243} 2349}
2244 2350
2245static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) 2351static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
2246{ 2352{
2247 struct net *net = xs_net(x); 2353 struct net *net = xs_net(x);
2248 struct sk_buff *skb; 2354 struct sk_buff *skb;
@@ -2259,12 +2365,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
2259 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); 2365 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
2260} 2366}
2261 2367
2262static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) 2368static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
2263{ 2369{
2264 struct net *net = xs_net(x); 2370 struct net *net = xs_net(x);
2265 struct sk_buff *skb; 2371 struct sk_buff *skb;
2266 2372
2267 skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); 2373 skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
2268 if (skb == NULL) 2374 if (skb == NULL)
2269 return -ENOMEM; 2375 return -ENOMEM;
2270 2376
@@ -2274,7 +2380,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
2274 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); 2380 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
2275} 2381}
2276 2382
2277static int xfrm_notify_sa_flush(struct km_event *c) 2383static int xfrm_notify_sa_flush(const struct km_event *c)
2278{ 2384{
2279 struct net *net = c->net; 2385 struct net *net = c->net;
2280 struct xfrm_usersa_flush *p; 2386 struct xfrm_usersa_flush *p;
@@ -2318,6 +2424,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
2318 l += nla_total_size(sizeof(*x->encap)); 2424 l += nla_total_size(sizeof(*x->encap));
2319 if (x->tfcpad) 2425 if (x->tfcpad)
2320 l += nla_total_size(sizeof(x->tfcpad)); 2426 l += nla_total_size(sizeof(x->tfcpad));
2427 if (x->replay_esn)
2428 l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn));
2321 if (x->security) 2429 if (x->security)
2322 l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + 2430 l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) +
2323 x->security->ctx_len); 2431 x->security->ctx_len);
@@ -2330,7 +2438,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
2330 return l; 2438 return l;
2331} 2439}
2332 2440
2333static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) 2441static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
2334{ 2442{
2335 struct net *net = xs_net(x); 2443 struct net *net = xs_net(x);
2336 struct xfrm_usersa_info *p; 2444 struct xfrm_usersa_info *p;
@@ -2387,7 +2495,7 @@ nla_put_failure:
2387 return -1; 2495 return -1;
2388} 2496}
2389 2497
2390static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) 2498static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c)
2391{ 2499{
2392 2500
2393 switch (c->event) { 2501 switch (c->event) {
@@ -2546,7 +2654,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
2546} 2654}
2547 2655
2548static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, 2656static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
2549 int dir, struct km_event *c) 2657 int dir, const struct km_event *c)
2550{ 2658{
2551 struct xfrm_user_polexpire *upe; 2659 struct xfrm_user_polexpire *upe;
2552 struct nlmsghdr *nlh; 2660 struct nlmsghdr *nlh;
@@ -2576,7 +2684,7 @@ nlmsg_failure:
2576 return -EMSGSIZE; 2684 return -EMSGSIZE;
2577} 2685}
2578 2686
2579static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2687static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2580{ 2688{
2581 struct net *net = xp_net(xp); 2689 struct net *net = xp_net(xp);
2582 struct sk_buff *skb; 2690 struct sk_buff *skb;
@@ -2591,7 +2699,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
2591 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); 2699 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
2592} 2700}
2593 2701
2594static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) 2702static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
2595{ 2703{
2596 struct net *net = xp_net(xp); 2704 struct net *net = xp_net(xp);
2597 struct xfrm_userpolicy_info *p; 2705 struct xfrm_userpolicy_info *p;
@@ -2656,7 +2764,7 @@ nlmsg_failure:
2656 return -1; 2764 return -1;
2657} 2765}
2658 2766
2659static int xfrm_notify_policy_flush(struct km_event *c) 2767static int xfrm_notify_policy_flush(const struct km_event *c)
2660{ 2768{
2661 struct net *net = c->net; 2769 struct net *net = c->net;
2662 struct nlmsghdr *nlh; 2770 struct nlmsghdr *nlh;
@@ -2681,7 +2789,7 @@ nlmsg_failure:
2681 return -1; 2789 return -1;
2682} 2790}
2683 2791
2684static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2792static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2685{ 2793{
2686 2794
2687 switch (c->event) { 2795 switch (c->event) {