aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c168
-rw-r--r--net/9p/protocol.c50
-rw-r--r--net/9p/trans_common.c97
-rw-r--r--net/9p/trans_common.h32
-rw-r--r--net/9p/trans_fd.c54
-rw-r--r--net/9p/trans_rdma.c7
-rw-r--r--net/9p/trans_virtio.c185
-rw-r--r--net/9p/util.c2
-rw-r--r--net/Kconfig6
-rw-r--r--net/Makefile4
-rw-r--r--net/appletalk/ddp.c43
-rw-r--r--net/atm/clip.c8
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/aggregation.c10
-rw-r--r--net/batman-adv/aggregation.h6
-rw-r--r--net/batman-adv/bat_debugfs.c6
-rw-r--r--net/batman-adv/bat_debugfs.h2
-rw-r--r--net/batman-adv/bat_sysfs.c53
-rw-r--r--net/batman-adv/bat_sysfs.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/gateway_client.c142
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c420
-rw-r--r--net/batman-adv/hard-interface.h21
-rw-r--r--net/batman-adv/hash.c28
-rw-r--r--net/batman-adv/hash.h119
-rw-r--r--net/batman-adv/icmp_socket.c43
-rw-r--r--net/batman-adv/icmp_socket.h4
-rw-r--r--net/batman-adv/main.c16
-rw-r--r--net/batman-adv/main.h29
-rw-r--r--net/batman-adv/originator.c254
-rw-r--r--net/batman-adv/originator.h52
-rw-r--r--net/batman-adv/packet.h3
-rw-r--r--net/batman-adv/ring_buffer.c2
-rw-r--r--net/batman-adv/ring_buffer.h2
-rw-r--r--net/batman-adv/routing.c1000
-rw-r--r--net/batman-adv/routing.h30
-rw-r--r--net/batman-adv/send.c110
-rw-r--r--net/batman-adv/send.h12
-rw-r--r--net/batman-adv/soft-interface.c77
-rw-r--r--net/batman-adv/soft-interface.h5
-rw-r--r--net/batman-adv/translation-table.c208
-rw-r--r--net/batman-adv/translation-table.h4
-rw-r--r--net/batman-adv/types.h50
-rw-r--r--net/batman-adv/unicast.c121
-rw-r--r--net/batman-adv/unicast.h27
-rw-r--r--net/batman-adv/vis.c194
-rw-r--r--net/batman-adv/vis.h2
-rw-r--r--net/bluetooth/Kconfig20
-rw-r--r--net/bluetooth/Makefile4
-rw-r--r--net/bluetooth/af_bluetooth.c51
-rw-r--r--net/bluetooth/bnep/core.c2
-rw-r--r--net/bluetooth/bnep/sock.c1
-rw-r--r--net/bluetooth/cmtp/capi.c3
-rw-r--r--net/bluetooth/cmtp/core.c11
-rw-r--r--net/bluetooth/hci_conn.c80
-rw-r--r--net/bluetooth/hci_core.c345
-rw-r--r--net/bluetooth/hci_event.c691
-rw-r--r--net/bluetooth/hci_sock.c8
-rw-r--r--net/bluetooth/hci_sysfs.c58
-rw-r--r--net/bluetooth/hidp/core.c208
-rw-r--r--net/bluetooth/hidp/hidp.h15
-rw-r--r--net/bluetooth/l2cap_core.c (renamed from net/bluetooth/l2cap.c)1521
-rw-r--r--net/bluetooth/l2cap_sock.c1156
-rw-r--r--net/bluetooth/mgmt.c1531
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/bluetooth/sco.c24
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br_device.c21
-rw-r--r--net/bridge/br_if.c15
-rw-r--r--net/bridge/br_input.c25
-rw-r--r--net/bridge/br_netfilter.c17
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_stp.c39
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/bridge/netfilter/ebt_ip6.c46
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/caif/cfcnfg.c2
-rw-r--r--net/caif/cfdgml.c1
-rw-r--r--net/caif/cfserl.c1
-rw-r--r--net/caif/cfutill.c2
-rw-r--r--net/caif/cfveil.c2
-rw-r--r--net/ceph/armor.c4
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/osd_client.c624
-rw-r--r--net/core/dev.c475
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/dst.c43
-rw-r--r--net/core/ethtool.c607
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/neighbour.c13
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/netpoll.c13
-rw-r--r--net/core/pktgen.c240
-rw-r--r--net/core/rtnetlink.c86
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/dcb/dcbnl.c148
-rw-r--r--net/dccp/ccids/ccid2.c9
-rw-r--r--net/dccp/ipv4.c50
-rw-r--r--net/dccp/ipv6.c188
-rw-r--r--net/decnet/af_decnet.c16
-rw-r--r--net/decnet/dn_fib.c23
-rw-r--r--net/decnet/dn_nsp_out.c16
-rw-r--r--net/decnet/dn_route.c300
-rw-r--r--net/decnet/dn_rules.c17
-rw-r--r--net/decnet/dn_table.c7
-rw-r--r--net/dsa/mv88e6060.c7
-rw-r--r--net/econet/af_econet.c2
-rw-r--r--net/ipv4/Kconfig42
-rw-r--r--net/ipv4/Makefile4
-rw-r--r--net/ipv4/af_inet.c46
-rw-r--r--net/ipv4/ah4.c27
-rw-r--r--net/ipv4/arp.c25
-rw-r--r--net/ipv4/datagram.c11
-rw-r--r--net/ipv4/devinet.c84
-rw-r--r--net/ipv4/esp4.c104
-rw-r--r--net/ipv4/fib_frontend.c105
-rw-r--r--net/ipv4/fib_hash.c1133
-rw-r--r--net/ipv4/fib_lookup.h10
-rw-r--r--net/ipv4/fib_rules.c25
-rw-r--r--net/ipv4/fib_semantics.c257
-rw-r--r--net/ipv4/fib_trie.c272
-rw-r--r--net/ipv4/icmp.c240
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/inet_connection_sock.c27
-rw-r--r--net/ipv4/inetpeer.c148
-rw-r--r--net/ipv4/ip_gre.c58
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c345
-rw-r--r--net/ipv4/ipip.c41
-rw-r--r--net/ipv4/ipmr.c79
-rw-r--r--net/ipv4/netfilter.c36
-rw-r--r--net/ipv4/netfilter/Kconfig13
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c5
-rw-r--r--net/ipv4/netfilter/ip_tables.c9
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c12
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c3
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c134
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c33
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c9
-rw-r--r--net/ipv4/raw.c39
-rw-r--r--net/ipv4/route.c1181
-rw-r--r--net/ipv4/syncookies.c25
-rw-r--r--net/ipv4/tcp.c20
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cubic.c47
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_lp.c2
-rw-r--r--net/ipv4/tcp_scalable.c2
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c139
-rw-r--r--net/ipv4/xfrm4_policy.c74
-rw-r--r--net/ipv4/xfrm4_state.c20
-rw-r--r--net/ipv6/addrconf.c3
-rw-r--r--net/ipv6/af_inet6.c49
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/datagram.c88
-rw-r--r--net/ipv6/esp6.c109
-rw-r--r--net/ipv6/exthdrs.c12
-rw-r--r--net/ipv6/fib6_rules.c19
-rw-r--r--net/ipv6/icmp.c226
-rw-r--r--net/ipv6/inet6_connection_sock.c81
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_output.c156
-rw-r--r--net/ipv6/ip6_tunnel.c83
-rw-r--r--net/ipv6/ip6mr.c56
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/mcast.c27
-rw-r--r--net/ipv6/mip6.c16
-rw-r--r--net/ipv6/ndisc.c22
-rw-r--r--net/ipv6/netfilter.c19
-rw-r--r--net/ipv6/netfilter/ip6_tables.c9
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c3
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c21
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c3
-rw-r--r--net/ipv6/raw.c106
-rw-r--r--net/ipv6/route.c188
-rw-r--r--net/ipv6/sit.c58
-rw-r--r--net/ipv6/syncookies.c31
-rw-r--r--net/ipv6/sysctl_net_ipv6.c18
-rw-r--r--net/ipv6/tcp_ipv6.c169
-rw-r--r--net/ipv6/udp.c91
-rw-r--r--net/ipv6/xfrm6_policy.c49
-rw-r--r--net/ipv6/xfrm6_state.c20
-rw-r--r--net/ipx/Kconfig1
-rw-r--r--net/ipx/af_ipx.c54
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c12
-rw-r--r--net/key/af_key.c243
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c36
-rw-r--r--net/llc/llc_input.c25
-rw-r--r--net/mac80211/Kconfig4
-rw-r--r--net/mac80211/agg-rx.c7
-rw-r--r--net/mac80211/agg-tx.c23
-rw-r--r--net/mac80211/cfg.c116
-rw-r--r--net/mac80211/chan.c3
-rw-r--r--net/mac80211/debugfs.c6
-rw-r--r--net/mac80211/debugfs_netdev.c122
-rw-r--r--net/mac80211/driver-ops.h67
-rw-r--r--net/mac80211/driver-trace.h274
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/ibss.c21
-rw-r--r--net/mac80211/ieee80211_i.h17
-rw-r--r--net/mac80211/iface.c9
-rw-r--r--net/mac80211/key.h1
-rw-r--r--net/mac80211/main.c88
-rw-r--r--net/mac80211/mesh.c4
-rw-r--r--net/mac80211/mlme.c144
-rw-r--r--net/mac80211/offchannel.c68
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c60
-rw-r--r--net/mac80211/rc80211_pid.h3
-rw-r--r--net/mac80211/rx.c130
-rw-r--r--net/mac80211/scan.c138
-rw-r--r--net/mac80211/sta_info.c3
-rw-r--r--net/mac80211/sta_info.h6
-rw-r--r--net/mac80211/status.c10
-rw-r--r--net/mac80211/tx.c196
-rw-r--r--net/mac80211/util.c6
-rw-r--r--net/mac80211/work.c122
-rw-r--r--net/mac80211/wpa.c39
-rw-r--r--net/netfilter/Kconfig77
-rw-r--r--net/netfilter/Makefile10
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipset/Kconfig122
-rw-r--r--net/netfilter/ipset/Makefile24
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c587
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c652
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c515
-rw-r--r--net/netfilter/ipset/ip_set_core.c1683
-rw-r--r--net/netfilter/ipset/ip_set_getport.c141
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c464
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c530
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c548
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c614
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c458
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c564
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c584
-rw-r--r--net/netfilter/ipset/pfxlen.c291
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c75
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c248
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c456
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c1002
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c171
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c99
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c114
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c129
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c153
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c142
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c110
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c25
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c1238
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c117
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c82
-rw-r--r--net/netfilter/nf_conntrack_core.c58
-rw-r--r--net/netfilter/nf_conntrack_expect.c34
-rw-r--r--net/netfilter/nf_conntrack_extend.c11
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c32
-rw-r--r--net/netfilter/nf_conntrack_helper.c20
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c74
-rw-r--r--net/netfilter/nf_conntrack_netlink.c49
-rw-r--r--net/netfilter/nf_conntrack_proto.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c18
-rw-r--r--net/netfilter/nf_conntrack_snmp.c77
-rw-r--r--net/netfilter/nf_conntrack_standalone.c45
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c120
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nf_queue.c82
-rw-r--r--net/netfilter/nfnetlink_log.c9
-rw-r--r--net/netfilter/nfnetlink_queue.c22
-rw-r--r--net/netfilter/x_tables.c124
-rw-r--r--net/netfilter/xt_AUDIT.c222
-rw-r--r--net/netfilter/xt_CLASSIFY.c36
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_LED.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c34
-rw-r--r--net/netfilter/xt_TCPMSS.c15
-rw-r--r--net/netfilter/xt_TEE.c27
-rw-r--r--net/netfilter/xt_addrtype.c229
-rw-r--r--net/netfilter/xt_connlimit.c99
-rw-r--r--net/netfilter/xt_conntrack.c80
-rw-r--r--net/netfilter/xt_cpu.c2
-rw-r--r--net/netfilter/xt_devgroup.c82
-rw-r--r--net/netfilter/xt_iprange.c18
-rw-r--r--net/netfilter/xt_ipvs.c2
-rw-r--r--net/netfilter/xt_set.c359
-rw-r--r--net/netlabel/netlabel_user.h6
-rw-r--r--net/netlink/af_netlink.c9
-rw-r--r--net/packet/af_packet.c41
-rw-r--r--net/phonet/Kconfig12
-rw-r--r--net/phonet/af_phonet.c32
-rw-r--r--net/phonet/pep.c834
-rw-r--r--net/phonet/socket.c126
-rw-r--r--net/rds/ib.c9
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c27
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rose/af_rose.c7
-rw-r--r--net/rose/rose_route.c28
-rw-r--r--net/rxrpc/ar-key.c19
-rw-r--r--net/rxrpc/ar-peer.c28
-rw-r--r--net/sched/Kconfig39
-rw-r--r--net/sched/Makefile4
-rw-r--r--net/sched/act_api.c46
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ipt.c16
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c9
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/cls_api.c33
-rw-r--r--net/sched/cls_basic.c17
-rw-r--r--net/sched/cls_cgroup.c8
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_fw.c38
-rw-r--r--net/sched/cls_route.c126
-rw-r--r--net/sched/cls_rsvp.h95
-rw-r--r--net/sched/cls_tcindex.c2
-rw-r--r--net/sched/cls_u32.c89
-rw-r--r--net/sched/em_cmp.c47
-rw-r--r--net/sched/em_meta.c48
-rw-r--r--net/sched/em_nbyte.c3
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sched/em_u32.c2
-rw-r--r--net/sched/ematch.c37
-rw-r--r--net/sched/sch_api.c173
-rw-r--r--net/sched/sch_atm.c16
-rw-r--r--net/sched/sch_cbq.c362
-rw-r--r--net/sched/sch_choke.c688
-rw-r--r--net/sched/sch_dsmark.c21
-rw-r--r--net/sched/sch_fifo.c50
-rw-r--r--net/sched/sch_generic.c58
-rw-r--r--net/sched/sch_gred.c85
-rw-r--r--net/sched/sch_hfsc.c37
-rw-r--r--net/sched/sch_htb.c106
-rw-r--r--net/sched/sch_mq.c1
-rw-r--r--net/sched/sch_mqprio.c418
-rw-r--r--net/sched/sch_multiq.c8
-rw-r--r--net/sched/sch_netem.c411
-rw-r--r--net/sched/sch_prio.c34
-rw-r--r--net/sched/sch_red.c61
-rw-r--r--net/sched/sch_sfb.c709
-rw-r--r--net/sched/sch_sfq.c67
-rw-r--r--net/sched/sch_tbf.c39
-rw-r--r--net/sched/sch_teql.c36
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/input.c3
-rw-r--r--net/sctp/ipv6.c42
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/protocol.c33
-rw-r--r--net/sctp/sm_make_chunk.c3
-rw-r--r--net/sctp/socket.c11
-rw-r--r--net/sctp/tsnmap.c2
-rw-r--r--net/sctp/ulpqueue.c7
-rw-r--r--net/socket.c145
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c38
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/svcsock.c32
-rw-r--r--net/tipc/Kconfig12
-rw-r--r--net/tipc/addr.c15
-rw-r--r--net/tipc/addr.h17
-rw-r--r--net/tipc/bcast.c47
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/bearer.c116
-rw-r--r--net/tipc/bearer.h73
-rw-r--r--net/tipc/config.c31
-rw-r--r--net/tipc/core.c9
-rw-r--r--net/tipc/core.h4
-rw-r--r--net/tipc/discover.c140
-rw-r--r--net/tipc/discover.h9
-rw-r--r--net/tipc/link.c130
-rw-r--r--net/tipc/link.h29
-rw-r--r--net/tipc/msg.c41
-rw-r--r--net/tipc/msg.h64
-rw-r--r--net/tipc/name_distr.c18
-rw-r--r--net/tipc/net.c32
-rw-r--r--net/tipc/net.h19
-rw-r--r--net/tipc/node.c125
-rw-r--r--net/tipc/node.h36
-rw-r--r--net/tipc/node_subscr.c21
-rw-r--r--net/tipc/node_subscr.h3
-rw-r--r--net/tipc/port.c306
-rw-r--r--net/tipc/port.h73
-rw-r--r--net/tipc/socket.c76
-rw-r--r--net/tipc/subscr.c13
-rw-r--r--net/unix/af_unix.c91
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wanrouter/wanmain.c2
-rw-r--r--net/wireless/core.c20
-rw-r--r--net/wireless/ethtool.c33
-rw-r--r--net/wireless/nl80211.c62
-rw-r--r--net/wireless/reg.c45
-rw-r--r--net/wireless/reg.h1
-rw-r--r--net/wireless/util.c47
-rw-r--r--net/wireless/wext-compat.c5
-rw-r--r--net/x25/Kconfig1
-rw-r--r--net/x25/af_x25.c58
-rw-r--r--net/x25/x25_out.c7
-rw-r--r--net/xfrm/Makefile2
-rw-r--r--net/xfrm/xfrm_algo.c8
-rw-r--r--net/xfrm/xfrm_hash.h32
-rw-r--r--net/xfrm/xfrm_input.c13
-rw-r--r--net/xfrm/xfrm_output.c15
-rw-r--r--net/xfrm/xfrm_policy.c218
-rw-r--r--net/xfrm/xfrm_replay.c534
-rw-r--r--net/xfrm/xfrm_state.c190
-rw-r--r--net/xfrm/xfrm_user.c211
452 files changed, 31091 insertions, 13305 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6e64f7c6a2e..7850412f52b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev,
327static void vlan_transfer_features(struct net_device *dev, 327static void vlan_transfer_features(struct net_device *dev,
328 struct net_device *vlandev) 328 struct net_device *vlandev)
329{ 329{
330 unsigned long old_features = vlandev->features; 330 u32 old_features = vlandev->features;
331 331
332 vlandev->features &= ~dev->vlan_features; 332 vlandev->features &= ~dev->vlan_features;
333 vlandev->features |= dev->features & dev->vlan_features; 333 vlandev->features |= dev->features & dev->vlan_features;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index be737539f34..e34ea9e5e28 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -625,6 +625,19 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
625 rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); 625 rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
626 return rc; 626 return rc;
627} 627}
628
629static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
630 struct scatterlist *sgl, unsigned int sgc)
631{
632 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
633 const struct net_device_ops *ops = real_dev->netdev_ops;
634 int rc = 0;
635
636 if (ops->ndo_fcoe_ddp_target)
637 rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
638
639 return rc;
640}
628#endif 641#endif
629 642
630static void vlan_dev_change_rx_flags(struct net_device *dev, int change) 643static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
@@ -707,6 +720,7 @@ static int vlan_dev_init(struct net_device *dev)
707 dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; 720 dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
708#endif 721#endif
709 722
723 dev->needed_headroom = real_dev->needed_headroom;
710 if (real_dev->features & NETIF_F_HW_VLAN_TX) { 724 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
711 dev->header_ops = real_dev->header_ops; 725 dev->header_ops = real_dev->header_ops;
712 dev->hard_header_len = real_dev->hard_header_len; 726 dev->hard_header_len = real_dev->hard_header_len;
@@ -858,6 +872,7 @@ static const struct net_device_ops vlan_netdev_ops = {
858 .ndo_fcoe_enable = vlan_dev_fcoe_enable, 872 .ndo_fcoe_enable = vlan_dev_fcoe_enable,
859 .ndo_fcoe_disable = vlan_dev_fcoe_disable, 873 .ndo_fcoe_disable = vlan_dev_fcoe_disable,
860 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, 874 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
875 .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target,
861#endif 876#endif
862}; 877};
863 878
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a..a0874cc1f71 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9 util.o \ 9 util.o \
10 protocol.o \ 10 protocol.o \
11 trans_fd.o \ 11 trans_fd.o \
12 trans_common.o \
12 13
139pnet_virtio-objs := \ 149pnet_virtio-objs := \
14 trans_virtio.o \ 15 trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbf..2ccbf04d37d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -223,16 +223,29 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
223 223
224 req = &c->reqs[row][col]; 224 req = &c->reqs[row][col];
225 if (!req->tc) { 225 if (!req->tc) {
226 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); 226 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
227 if (!req->wq) { 227 if (!req->wq) {
228 printk(KERN_ERR "Couldn't grow tag array\n"); 228 printk(KERN_ERR "Couldn't grow tag array\n");
229 return ERR_PTR(-ENOMEM); 229 return ERR_PTR(-ENOMEM);
230 } 230 }
231 init_waitqueue_head(req->wq); 231 init_waitqueue_head(req->wq);
232 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, 232 if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
233 GFP_KERNEL); 233 P9_TRANS_PREF_PAYLOAD_SEP) {
234 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, 234 int alloc_msize = min(c->msize, 4096);
235 GFP_KERNEL); 235 req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
236 GFP_NOFS);
237 req->tc->capacity = alloc_msize;
238 req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
239 GFP_NOFS);
240 req->rc->capacity = alloc_msize;
241 } else {
242 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
243 GFP_NOFS);
244 req->tc->capacity = c->msize;
245 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
246 GFP_NOFS);
247 req->rc->capacity = c->msize;
248 }
236 if ((!req->tc) || (!req->rc)) { 249 if ((!req->tc) || (!req->rc)) {
237 printk(KERN_ERR "Couldn't grow tag array\n"); 250 printk(KERN_ERR "Couldn't grow tag array\n");
238 kfree(req->tc); 251 kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
243 return ERR_PTR(-ENOMEM); 256 return ERR_PTR(-ENOMEM);
244 } 257 }
245 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); 258 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
246 req->tc->capacity = c->msize;
247 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); 259 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
248 req->rc->capacity = c->msize;
249 } 260 }
250 261
251 p9pdu_reset(req->tc); 262 p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
443{ 454{
444 int8_t type; 455 int8_t type;
445 int err; 456 int err;
457 int ecode;
446 458
447 err = p9_parse_header(req->rc, NULL, &type, NULL, 0); 459 err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
448 if (err) { 460 if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
450 return err; 462 return err;
451 } 463 }
452 464
453 if (type == P9_RERROR || type == P9_RLERROR) { 465 if (type != P9_RERROR && type != P9_RLERROR)
454 int ecode; 466 return 0;
455
456 if (!p9_is_proto_dotl(c)) {
457 char *ename;
458 467
459 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 468 if (!p9_is_proto_dotl(c)) {
460 &ename, &ecode); 469 char *ename;
461 if (err) 470
462 goto out_err; 471 if (req->tc->pbuf_size) {
472 /* Handle user buffers */
473 size_t len = req->rc->size - req->rc->offset;
474 if (req->tc->pubuf) {
475 /* User Buffer */
476 err = copy_from_user(
477 &req->rc->sdata[req->rc->offset],
478 req->tc->pubuf, len);
479 if (err) {
480 err = -EFAULT;
481 goto out_err;
482 }
483 } else {
484 /* Kernel Buffer */
485 memmove(&req->rc->sdata[req->rc->offset],
486 req->tc->pkbuf, len);
487 }
488 }
489 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
490 &ename, &ecode);
491 if (err)
492 goto out_err;
463 493
464 if (p9_is_proto_dotu(c)) 494 if (p9_is_proto_dotu(c))
465 err = -ecode; 495 err = -ecode;
466 496
467 if (!err || !IS_ERR_VALUE(err)) { 497 if (!err || !IS_ERR_VALUE(err)) {
468 err = p9_errstr2errno(ename, strlen(ename)); 498 err = p9_errstr2errno(ename, strlen(ename));
469 499
470 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); 500 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
501 ename);
471 502
472 kfree(ename); 503 kfree(ename);
473 }
474 } else {
475 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
476 err = -ecode;
477
478 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
479 } 504 }
505 } else {
506 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
507 err = -ecode;
508
509 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
510 }
480 511
481 } else
482 err = 0;
483 512
484 return err; 513 return err;
485 514
@@ -1191,6 +1220,27 @@ error:
1191} 1220}
1192EXPORT_SYMBOL(p9_client_fsync); 1221EXPORT_SYMBOL(p9_client_fsync);
1193 1222
1223int p9_client_sync_fs(struct p9_fid *fid)
1224{
1225 int err = 0;
1226 struct p9_req_t *req;
1227 struct p9_client *clnt;
1228
1229 P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
1230
1231 clnt = fid->clnt;
1232 req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
1233 if (IS_ERR(req)) {
1234 err = PTR_ERR(req);
1235 goto error;
1236 }
1237 P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
1238 p9_free_req(clnt, req);
1239error:
1240 return err;
1241}
1242EXPORT_SYMBOL(p9_client_sync_fs);
1243
1194int p9_client_clunk(struct p9_fid *fid) 1244int p9_client_clunk(struct p9_fid *fid)
1195{ 1245{
1196 int err; 1246 int err;
@@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1270 if (count < rsize) 1320 if (count < rsize)
1271 rsize = count; 1321 rsize = count;
1272 1322
1273 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); 1323 /* Don't bother zerocopy form small IO (< 1024) */
1324 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1325 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1326 req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
1327 rsize, data, udata);
1328 } else {
1329 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
1330 rsize);
1331 }
1274 if (IS_ERR(req)) { 1332 if (IS_ERR(req)) {
1275 err = PTR_ERR(req); 1333 err = PTR_ERR(req);
1276 goto error; 1334 goto error;
@@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1284 1342
1285 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1343 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
1286 1344
1287 if (data) { 1345 if (!req->tc->pbuf_size) {
1288 memmove(data, dataptr, count); 1346 if (data) {
1289 } else { 1347 memmove(data, dataptr, count);
1290 err = copy_to_user(udata, dataptr, count); 1348 } else {
1291 if (err) { 1349 err = copy_to_user(udata, dataptr, count);
1292 err = -EFAULT; 1350 if (err) {
1293 goto free_and_error; 1351 err = -EFAULT;
1352 goto free_and_error;
1353 }
1294 } 1354 }
1295 } 1355 }
1296 p9_free_req(clnt, req); 1356 p9_free_req(clnt, req);
@@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1323 1383
1324 if (count < rsize) 1384 if (count < rsize)
1325 rsize = count; 1385 rsize = count;
1326 if (data) 1386
1327 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, 1387 /* Don't bother zerocopy form small IO (< 1024) */
1328 rsize, data); 1388 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1329 else 1389 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1330 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, 1390 req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
1331 rsize, udata); 1391 rsize, data, udata);
1392 } else {
1393
1394 if (data)
1395 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
1396 offset, rsize, data);
1397 else
1398 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
1399 offset, rsize, udata);
1400 }
1332 if (IS_ERR(req)) { 1401 if (IS_ERR(req)) {
1333 err = PTR_ERR(req); 1402 err = PTR_ERR(req);
1334 goto error; 1403 goto error;
@@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1716 if (count < rsize) 1785 if (count < rsize)
1717 rsize = count; 1786 rsize = count;
1718 1787
1719 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); 1788 if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1789 P9_TRANS_PREF_PAYLOAD_SEP) {
1790 req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
1791 offset, rsize, data);
1792 } else {
1793 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
1794 offset, rsize);
1795 }
1720 if (IS_ERR(req)) { 1796 if (IS_ERR(req)) {
1721 err = PTR_ERR(req); 1797 err = PTR_ERR(req);
1722 goto error; 1798 goto error;
@@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1730 1806
1731 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); 1807 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
1732 1808
1733 if (data) 1809 if (!req->tc->pbuf_size && data)
1734 memmove(data, dataptr, count); 1810 memmove(data, dataptr, count);
1735 1811
1736 p9_free_req(clnt, req); 1812 p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f21092..8a4084fa8b5 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
114 return size - len; 114 return size - len;
115} 115}
116 116
117static size_t
118pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
119 size_t size)
120{
121 BUG_ON(pdu->size > P9_IOHDRSZ);
122 pdu->pubuf = (char __user *)udata;
123 pdu->pkbuf = (char *)kdata;
124 pdu->pbuf_size = size;
125 return 0;
126}
127
128static size_t
129pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
130{
131 BUG_ON(pdu->size > P9_READDIRHDRSZ);
132 pdu->pkbuf = (char *)kdata;
133 pdu->pbuf_size = size;
134 return 0;
135}
136
117/* 137/*
118 b - int8_t 138 b - int8_t
119 w - int16_t 139 w - int16_t
@@ -185,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
185 if (errcode) 205 if (errcode)
186 break; 206 break;
187 207
188 *sptr = kmalloc(len + 1, GFP_KERNEL); 208 *sptr = kmalloc(len + 1, GFP_NOFS);
189 if (*sptr == NULL) { 209 if (*sptr == NULL) {
190 errcode = -EFAULT; 210 errcode = -EFAULT;
191 break; 211 break;
@@ -253,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
253 if (!errcode) { 273 if (!errcode) {
254 *wnames = 274 *wnames =
255 kmalloc(sizeof(char *) * *nwname, 275 kmalloc(sizeof(char *) * *nwname,
256 GFP_KERNEL); 276 GFP_NOFS);
257 if (!*wnames) 277 if (!*wnames)
258 errcode = -ENOMEM; 278 errcode = -ENOMEM;
259 } 279 }
@@ -297,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
297 *wqids = 317 *wqids =
298 kmalloc(*nwqid * 318 kmalloc(*nwqid *
299 sizeof(struct p9_qid), 319 sizeof(struct p9_qid),
300 GFP_KERNEL); 320 GFP_NOFS);
301 if (*wqids == NULL) 321 if (*wqids == NULL)
302 errcode = -ENOMEM; 322 errcode = -ENOMEM;
303 } 323 }
@@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
445 errcode = -EFAULT; 465 errcode = -EFAULT;
446 } 466 }
447 break; 467 break;
468 case 'E':{
469 int32_t cnt = va_arg(ap, int32_t);
470 const char *k = va_arg(ap, const void *);
471 const char *u = va_arg(ap, const void *);
472 errcode = p9pdu_writef(pdu, proto_version, "d",
473 cnt);
474 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
475 errcode = -EFAULT;
476 }
477 break;
478 case 'F':{
479 int32_t cnt = va_arg(ap, int32_t);
480 const char *k = va_arg(ap, const void *);
481 errcode = p9pdu_writef(pdu, proto_version, "d",
482 cnt);
483 if (!errcode && pdu_write_readdir(pdu, k, cnt))
484 errcode = -EFAULT;
485 }
486 break;
448 case 'U':{ 487 case 'U':{
449 int32_t count = va_arg(ap, int32_t); 488 int32_t count = va_arg(ap, int32_t);
450 const char __user *udata = 489 const char __user *udata =
@@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read);
579 618
580int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) 619int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
581{ 620{
621 pdu->id = type;
582 return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); 622 return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
583} 623}
584 624
@@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
606{ 646{
607 pdu->offset = 0; 647 pdu->offset = 0;
608 pdu->size = 0; 648 pdu->size = 0;
649 pdu->private = NULL;
650 pdu->pubuf = NULL;
651 pdu->pkbuf = NULL;
652 pdu->pbuf_size = 0;
609} 653}
610 654
611int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, 655int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 00000000000..9172ab78fcb
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,97 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15#include <linux/slab.h>
16#include <linux/module.h>
17#include <net/9p/9p.h>
18#include <net/9p/client.h>
19#include <linux/scatterlist.h>
20#include "trans_common.h"
21
22/**
23 * p9_release_req_pages - Release pages after the transaction.
24 * @*private: PDU's private page of struct trans_rpage_info
25 */
26void
27p9_release_req_pages(struct trans_rpage_info *rpinfo)
28{
29 int i = 0;
30
31 while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
32 put_page(rpinfo->rp_data[i]);
33 i++;
34 }
35}
36EXPORT_SYMBOL(p9_release_req_pages);
37
38/**
39 * p9_nr_pages - Return number of pages needed to accomodate the payload.
40 */
41int
42p9_nr_pages(struct p9_req_t *req)
43{
44 unsigned long start_page, end_page;
45 start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
46 end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
47 PAGE_SIZE - 1) >> PAGE_SHIFT;
48 return end_page - start_page;
49}
50EXPORT_SYMBOL(p9_nr_pages);
51
52/**
53 * payload_gup - Translates user buffer into kernel pages and
54 * pins them either for read/write through get_user_pages_fast().
55 * @req: Request to be sent to server.
56 * @pdata_off: data offset into the first page after translation (gup).
57 * @pdata_len: Total length of the IO. gup may not return requested # of pages.
58 * @nr_pages: number of pages to accomodate the payload
59 * @rw: Indicates if the pages are for read or write.
60 */
61int
62p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
63 int nr_pages, u8 rw)
64{
65 uint32_t first_page_bytes = 0;
66 uint32_t pdata_mapped_pages;
67 struct trans_rpage_info *rpinfo;
68
69 *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
70
71 if (*pdata_off)
72 first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
73 req->tc->pbuf_size);
74
75 rpinfo = req->tc->private;
76 pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
77 nr_pages, rw, &rpinfo->rp_data[0]);
78
79 if (pdata_mapped_pages < 0) {
80 printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
81 "nr_pages:%d\n", pdata_mapped_pages,
82 req->tc->pubuf, nr_pages);
83 pdata_mapped_pages = 0;
84 return -EIO;
85 }
86 rpinfo->rp_nr_pages = pdata_mapped_pages;
87 if (*pdata_off) {
88 *pdata_len = first_page_bytes;
89 *pdata_len += min((req->tc->pbuf_size - *pdata_len),
90 ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
91 } else {
92 *pdata_len = min(req->tc->pbuf_size,
93 (size_t)pdata_mapped_pages << PAGE_SHIFT);
94 }
95 return 0;
96}
97EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 00000000000..76309223bb0
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15/* TRUE if it is user context */
16#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
17
18/**
19 * struct trans_rpage_info - To store mapped page information in PDU.
20 * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
21 * @rp_nr_pages: Number of mapped pages
22 * @rp_data: Array of page pointers
23 */
24struct trans_rpage_info {
25 u8 rp_alloc;
26 int rp_nr_pages;
27 struct page *rp_data[0];
28};
29
30void p9_release_req_pages(struct trans_rpage_info *);
31int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
32int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 078eb162d9b..aa5672b15ea 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -153,10 +153,11 @@ struct p9_conn {
153 unsigned long wsched; 153 unsigned long wsched;
154}; 154};
155 155
156static void p9_poll_workfn(struct work_struct *work);
157
156static DEFINE_SPINLOCK(p9_poll_lock); 158static DEFINE_SPINLOCK(p9_poll_lock);
157static LIST_HEAD(p9_poll_pending_list); 159static LIST_HEAD(p9_poll_pending_list);
158static struct workqueue_struct *p9_mux_wq; 160static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
159static struct task_struct *p9_poll_task;
160 161
161static void p9_mux_poll_stop(struct p9_conn *m) 162static void p9_mux_poll_stop(struct p9_conn *m)
162{ 163{
@@ -349,7 +350,7 @@ static void p9_read_work(struct work_struct *work)
349 350
350 if (m->req->rc == NULL) { 351 if (m->req->rc == NULL) {
351 m->req->rc = kmalloc(sizeof(struct p9_fcall) + 352 m->req->rc = kmalloc(sizeof(struct p9_fcall) +
352 m->client->msize, GFP_KERNEL); 353 m->client->msize, GFP_NOFS);
353 if (!m->req->rc) { 354 if (!m->req->rc) {
354 m->req = NULL; 355 m->req = NULL;
355 err = -ENOMEM; 356 err = -ENOMEM;
@@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work)
384 385
385 if (n & POLLIN) { 386 if (n & POLLIN) {
386 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); 387 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
387 queue_work(p9_mux_wq, &m->rq); 388 schedule_work(&m->rq);
388 } else 389 } else
389 clear_bit(Rworksched, &m->wsched); 390 clear_bit(Rworksched, &m->wsched);
390 } else 391 } else
@@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work)
497 498
498 if (n & POLLOUT) { 499 if (n & POLLOUT) {
499 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); 500 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
500 queue_work(p9_mux_wq, &m->wq); 501 schedule_work(&m->wq);
501 } else 502 } else
502 clear_bit(Wworksched, &m->wsched); 503 clear_bit(Wworksched, &m->wsched);
503 } else 504 } else
@@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
516 container_of(wait, struct p9_poll_wait, wait); 517 container_of(wait, struct p9_poll_wait, wait);
517 struct p9_conn *m = pwait->conn; 518 struct p9_conn *m = pwait->conn;
518 unsigned long flags; 519 unsigned long flags;
519 DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
520 520
521 spin_lock_irqsave(&p9_poll_lock, flags); 521 spin_lock_irqsave(&p9_poll_lock, flags);
522 if (list_empty(&m->poll_pending_link)) 522 if (list_empty(&m->poll_pending_link))
523 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); 523 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
524 spin_unlock_irqrestore(&p9_poll_lock, flags); 524 spin_unlock_irqrestore(&p9_poll_lock, flags);
525 525
526 /* perform the default wake up operation */ 526 schedule_work(&p9_poll_work);
527 return default_wake_function(&dummy_wait, mode, sync, key); 527 return 1;
528} 528}
529 529
530/** 530/**
@@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m)
629 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); 629 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
630 if (!test_and_set_bit(Rworksched, &m->wsched)) { 630 if (!test_and_set_bit(Rworksched, &m->wsched)) {
631 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); 631 P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
632 queue_work(p9_mux_wq, &m->rq); 632 schedule_work(&m->rq);
633 } 633 }
634 } 634 }
635 635
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
639 if ((m->wsize || !list_empty(&m->unsent_req_list)) && 639 if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
640 !test_and_set_bit(Wworksched, &m->wsched)) { 640 !test_and_set_bit(Wworksched, &m->wsched)) {
641 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); 641 P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
642 queue_work(p9_mux_wq, &m->wq); 642 schedule_work(&m->wq);
643 } 643 }
644 } 644 }
645} 645}
@@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
677 n = p9_fd_poll(m->client, NULL); 677 n = p9_fd_poll(m->client, NULL);
678 678
679 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) 679 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
680 queue_work(p9_mux_wq, &m->wq); 680 schedule_work(&m->wq);
681 681
682 return 0; 682 return 0;
683} 683}
@@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = {
1047 * 1047 *
1048 */ 1048 */
1049 1049
1050static int p9_poll_proc(void *a) 1050static void p9_poll_workfn(struct work_struct *work)
1051{ 1051{
1052 unsigned long flags; 1052 unsigned long flags;
1053 1053
1054 P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); 1054 P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
1055 repeat: 1055
1056 spin_lock_irqsave(&p9_poll_lock, flags); 1056 spin_lock_irqsave(&p9_poll_lock, flags);
1057 while (!list_empty(&p9_poll_pending_list)) { 1057 while (!list_empty(&p9_poll_pending_list)) {
1058 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, 1058 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
@@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a)
1067 } 1067 }
1068 spin_unlock_irqrestore(&p9_poll_lock, flags); 1068 spin_unlock_irqrestore(&p9_poll_lock, flags);
1069 1069
1070 set_current_state(TASK_INTERRUPTIBLE);
1071 if (list_empty(&p9_poll_pending_list)) {
1072 P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
1073 schedule();
1074 }
1075 __set_current_state(TASK_RUNNING);
1076
1077 if (!kthread_should_stop())
1078 goto repeat;
1079
1080 P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); 1070 P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
1081 return 0;
1082} 1071}
1083 1072
1084int p9_trans_fd_init(void) 1073int p9_trans_fd_init(void)
1085{ 1074{
1086 p9_mux_wq = create_workqueue("v9fs");
1087 if (!p9_mux_wq) {
1088 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
1089 return -ENOMEM;
1090 }
1091
1092 p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
1093 if (IS_ERR(p9_poll_task)) {
1094 destroy_workqueue(p9_mux_wq);
1095 printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
1096 return PTR_ERR(p9_poll_task);
1097 }
1098
1099 v9fs_register_trans(&p9_tcp_trans); 1075 v9fs_register_trans(&p9_tcp_trans);
1100 v9fs_register_trans(&p9_unix_trans); 1076 v9fs_register_trans(&p9_unix_trans);
1101 v9fs_register_trans(&p9_fd_trans); 1077 v9fs_register_trans(&p9_fd_trans);
@@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void)
1105 1081
1106void p9_trans_fd_exit(void) 1082void p9_trans_fd_exit(void)
1107{ 1083{
1108 kthread_stop(p9_poll_task); 1084 flush_work_sync(&p9_poll_work);
1109 v9fs_unregister_trans(&p9_tcp_trans); 1085 v9fs_unregister_trans(&p9_tcp_trans);
1110 v9fs_unregister_trans(&p9_unix_trans); 1086 v9fs_unregister_trans(&p9_unix_trans);
1111 v9fs_unregister_trans(&p9_fd_trans); 1087 v9fs_unregister_trans(&p9_fd_trans);
1112
1113 destroy_workqueue(p9_mux_wq);
1114} 1088}
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 17c5ba7551a..150e0c4bbf4 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -59,7 +59,6 @@
59 * safely advertise a maxsize 59 * safely advertise a maxsize
60 * of 64k */ 60 * of 64k */
61 61
62#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
63/** 62/**
64 * struct p9_trans_rdma - RDMA transport instance 63 * struct p9_trans_rdma - RDMA transport instance
65 * 64 *
@@ -425,7 +424,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
425 struct p9_rdma_context *rpl_context = NULL; 424 struct p9_rdma_context *rpl_context = NULL;
426 425
427 /* Allocate an fcall for the reply */ 426 /* Allocate an fcall for the reply */
428 rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); 427 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
429 if (!rpl_context) { 428 if (!rpl_context) {
430 err = -ENOMEM; 429 err = -ENOMEM;
431 goto err_close; 430 goto err_close;
@@ -438,7 +437,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
438 */ 437 */
439 if (!req->rc) { 438 if (!req->rc) {
440 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, 439 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
441 GFP_KERNEL); 440 GFP_NOFS);
442 if (req->rc) { 441 if (req->rc) {
443 req->rc->sdata = (char *) req->rc + 442 req->rc->sdata = (char *) req->rc +
444 sizeof(struct p9_fcall); 443 sizeof(struct p9_fcall);
@@ -469,7 +468,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
469 req->rc = NULL; 468 req->rc = NULL;
470 469
471 /* Post the request */ 470 /* Post the request */
472 c = kmalloc(sizeof *c, GFP_KERNEL); 471 c = kmalloc(sizeof *c, GFP_NOFS);
473 if (!c) { 472 if (!c) {
474 err = -ENOMEM; 473 err = -ENOMEM;
475 goto err_free1; 474 goto err_free1;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20..e8f046b0718 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,13 +43,17 @@
43#include <net/9p/client.h> 43#include <net/9p/client.h>
44#include <net/9p/transport.h> 44#include <net/9p/transport.h>
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/swap.h>
46#include <linux/virtio.h> 47#include <linux/virtio.h>
47#include <linux/virtio_9p.h> 48#include <linux/virtio_9p.h>
49#include "trans_common.h"
48 50
49#define VIRTQUEUE_NUM 128 51#define VIRTQUEUE_NUM 128
50 52
51/* a single mutex to manage channel initialization and attachment */ 53/* a single mutex to manage channel initialization and attachment */
52static DEFINE_MUTEX(virtio_9p_lock); 54static DEFINE_MUTEX(virtio_9p_lock);
55static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
56static atomic_t vp_pinned = ATOMIC_INIT(0);
53 57
54/** 58/**
55 * struct virtio_chan - per-instance transport information 59 * struct virtio_chan - per-instance transport information
@@ -77,7 +81,10 @@ struct virtio_chan {
77 struct virtqueue *vq; 81 struct virtqueue *vq;
78 int ring_bufs_avail; 82 int ring_bufs_avail;
79 wait_queue_head_t *vc_wq; 83 wait_queue_head_t *vc_wq;
80 84 /* This is global limit. Since we don't have a global structure,
85 * will be placing it in each channel.
86 */
87 int p9_max_pages;
81 /* Scatterlist: can be too big for stack. */ 88 /* Scatterlist: can be too big for stack. */
82 struct scatterlist sg[VIRTQUEUE_NUM]; 89 struct scatterlist sg[VIRTQUEUE_NUM];
83 90
@@ -140,26 +147,36 @@ static void req_done(struct virtqueue *vq)
140 147
141 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 148 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
142 149
143 do { 150 while (1) {
144 spin_lock_irqsave(&chan->lock, flags); 151 spin_lock_irqsave(&chan->lock, flags);
145 rc = virtqueue_get_buf(chan->vq, &len); 152 rc = virtqueue_get_buf(chan->vq, &len);
146 153
147 if (rc != NULL) { 154 if (rc == NULL) {
148 if (!chan->ring_bufs_avail) {
149 chan->ring_bufs_avail = 1;
150 wake_up(chan->vc_wq);
151 }
152 spin_unlock_irqrestore(&chan->lock, flags);
153 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
154 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
155 rc->tag);
156 req = p9_tag_lookup(chan->client, rc->tag);
157 req->status = REQ_STATUS_RCVD;
158 p9_client_cb(chan->client, req);
159 } else {
160 spin_unlock_irqrestore(&chan->lock, flags); 155 spin_unlock_irqrestore(&chan->lock, flags);
156 break;
157 }
158
159 chan->ring_bufs_avail = 1;
160 spin_unlock_irqrestore(&chan->lock, flags);
161 /* Wakeup if anyone waiting for VirtIO ring space. */
162 wake_up(chan->vc_wq);
163 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
164 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
165 req = p9_tag_lookup(chan->client, rc->tag);
166 if (req->tc->private) {
167 struct trans_rpage_info *rp = req->tc->private;
168 int p = rp->rp_nr_pages;
169 /*Release pages */
170 p9_release_req_pages(rp);
171 atomic_sub(p, &vp_pinned);
172 wake_up(&vp_wq);
173 if (rp->rp_alloc)
174 kfree(rp);
175 req->tc->private = NULL;
161 } 176 }
162 } while (rc != NULL); 177 req->status = REQ_STATUS_RCVD;
178 p9_client_cb(chan->client, req);
179 }
163} 180}
164 181
165/** 182/**
@@ -203,6 +220,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
203} 220}
204 221
205/** 222/**
223 * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
224 * this takes a list of pages.
225 * @sg: scatter/gather list to pack into
226 * @start: which segment of the sg_list to start at
227 * @pdata_off: Offset into the first page
228 * @**pdata: a list of pages to add into sg.
229 * @count: amount of data to pack into the scatter/gather list
230 */
231static int
232pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
233 struct page **pdata, int count)
234{
235 int s;
236 int i = 0;
237 int index = start;
238
239 if (pdata_off) {
240 s = min((int)(PAGE_SIZE - pdata_off), count);
241 sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
242 count -= s;
243 }
244
245 while (count) {
246 BUG_ON(index > limit);
247 s = min((int)PAGE_SIZE, count);
248 sg_set_page(&sg[index++], pdata[i++], s, 0);
249 count -= s;
250 }
251 return index-start;
252}
253
254/**
206 * p9_virtio_request - issue a request 255 * p9_virtio_request - issue a request
207 * @client: client instance issuing the request 256 * @client: client instance issuing the request
208 * @req: request to be issued 257 * @req: request to be issued
@@ -212,22 +261,107 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
212static int 261static int
213p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 262p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
214{ 263{
215 int in, out; 264 int in, out, inp, outp;
216 struct virtio_chan *chan = client->trans; 265 struct virtio_chan *chan = client->trans;
217 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 266 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
218 unsigned long flags; 267 unsigned long flags;
219 int err; 268 size_t pdata_off = 0;
269 struct trans_rpage_info *rpinfo = NULL;
270 int err, pdata_len = 0;
220 271
221 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 272 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
222 273
223req_retry:
224 req->status = REQ_STATUS_SENT; 274 req->status = REQ_STATUS_SENT;
225 275
276 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
277 int nr_pages = p9_nr_pages(req);
278 int rpinfo_size = sizeof(struct trans_rpage_info) +
279 sizeof(struct page *) * nr_pages;
280
281 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
282 err = wait_event_interruptible(vp_wq,
283 atomic_read(&vp_pinned) < chan->p9_max_pages);
284 if (err == -ERESTARTSYS)
285 return err;
286 P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
287 }
288
289 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
290 /* We can use sdata */
291 req->tc->private = req->tc->sdata + req->tc->size;
292 rpinfo = (struct trans_rpage_info *)req->tc->private;
293 rpinfo->rp_alloc = 0;
294 } else {
295 req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
296 if (!req->tc->private) {
297 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
298 "private kmalloc returned NULL");
299 return -ENOMEM;
300 }
301 rpinfo = (struct trans_rpage_info *)req->tc->private;
302 rpinfo->rp_alloc = 1;
303 }
304
305 err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
306 req->tc->id == P9_TREAD ? 1 : 0);
307 if (err < 0) {
308 if (rpinfo->rp_alloc)
309 kfree(rpinfo);
310 return err;
311 } else {
312 atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
313 }
314 }
315
316req_retry_pinned:
226 spin_lock_irqsave(&chan->lock, flags); 317 spin_lock_irqsave(&chan->lock, flags);
318
319 /* Handle out VirtIO ring buffers */
227 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 320 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
228 req->tc->size); 321 req->tc->size);
229 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, 322
230 client->msize); 323 if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
324 /* We have additional write payload buffer to take care */
325 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
326 outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
327 pdata_off, rpinfo->rp_data, pdata_len);
328 } else {
329 char *pbuf = req->tc->pubuf ? req->tc->pubuf :
330 req->tc->pkbuf;
331 outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
332 req->tc->pbuf_size);
333 }
334 out += outp;
335 }
336
337 /* Handle in VirtIO ring buffers */
338 if (req->tc->pbuf_size &&
339 ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
340 /*
341 * Take care of additional Read payload.
342 * 11 is the read/write header = PDU Header(7) + IO Size (4).
343 * Arrange in such a way that server places header in the
344 * alloced memory and payload onto the user buffer.
345 */
346 inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
347 /*
348 * Running executables in the filesystem may result in
349 * a read request with kernel buffer as opposed to user buffer.
350 */
351 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
352 in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
353 pdata_off, rpinfo->rp_data, pdata_len);
354 } else {
355 char *pbuf = req->tc->pubuf ? req->tc->pubuf :
356 req->tc->pkbuf;
357 in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
358 pbuf, req->tc->pbuf_size);
359 }
360 in += inp;
361 } else {
362 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
363 client->msize);
364 }
231 365
232 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 366 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
233 if (err < 0) { 367 if (err < 0) {
@@ -240,12 +374,14 @@ req_retry:
240 return err; 374 return err;
241 375
242 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 376 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
243 goto req_retry; 377 goto req_retry_pinned;
244 } else { 378 } else {
245 spin_unlock_irqrestore(&chan->lock, flags); 379 spin_unlock_irqrestore(&chan->lock, flags);
246 P9_DPRINTK(P9_DEBUG_TRANS, 380 P9_DPRINTK(P9_DEBUG_TRANS,
247 "9p debug: " 381 "9p debug: "
248 "virtio rpc add_buf returned failure"); 382 "virtio rpc add_buf returned failure");
383 if (rpinfo && rpinfo->rp_alloc)
384 kfree(rpinfo);
249 return -EIO; 385 return -EIO;
250 } 386 }
251 } 387 }
@@ -335,6 +471,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
335 } 471 }
336 init_waitqueue_head(chan->vc_wq); 472 init_waitqueue_head(chan->vc_wq);
337 chan->ring_bufs_avail = 1; 473 chan->ring_bufs_avail = 1;
474 /* Ceiling limit to avoid denial of service attacks */
475 chan->p9_max_pages = nr_free_buffer_pages()/4;
338 476
339 mutex_lock(&virtio_9p_lock); 477 mutex_lock(&virtio_9p_lock);
340 list_add_tail(&chan->chan_list, &virtio_chan_list); 478 list_add_tail(&chan->chan_list, &virtio_chan_list);
@@ -448,6 +586,7 @@ static struct p9_trans_module p9_virtio_trans = {
448 .request = p9_virtio_request, 586 .request = p9_virtio_request,
449 .cancel = p9_virtio_cancel, 587 .cancel = p9_virtio_cancel,
450 .maxsize = PAGE_SIZE*16, 588 .maxsize = PAGE_SIZE*16,
589 .pref = P9_TRANS_PREF_PAYLOAD_SEP,
451 .def = 0, 590 .def = 0,
452 .owner = THIS_MODULE, 591 .owner = THIS_MODULE,
453}; 592};
diff --git a/net/9p/util.c b/net/9p/util.c
index e048701a72d..b84619b5ba2 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p)
92 unsigned long flags; 92 unsigned long flags;
93 93
94retry: 94retry:
95 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) 95 if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
96 return 0; 96 return 0;
97 97
98 spin_lock_irqsave(&p->lock, flags); 98 spin_lock_irqsave(&p->lock, flags);
diff --git a/net/Kconfig b/net/Kconfig
index 72840626284..79cabf1ee68 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -221,6 +221,12 @@ config RPS
221 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 221 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
222 default y 222 default y
223 223
224config RFS_ACCEL
225 boolean
226 depends on RPS && GENERIC_HARDIRQS
227 select CPU_RMAP
228 default y
229
224config XPS 230config XPS
225 boolean 231 boolean
226 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 232 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c5..a51d9465e62 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
19obj-$(CONFIG_INET) += ipv4/ 19obj-$(CONFIG_INET) += ipv4/
20obj-$(CONFIG_XFRM) += xfrm/ 20obj-$(CONFIG_XFRM) += xfrm/
21obj-$(CONFIG_UNIX) += unix/ 21obj-$(CONFIG_UNIX) += unix/
22ifneq ($(CONFIG_IPV6),) 22obj-$(CONFIG_NET) += ipv6/
23obj-y += ipv6/
24endif
25obj-$(CONFIG_PACKET) += packet/ 23obj-$(CONFIG_PACKET) += packet/
26obj-$(CONFIG_NET_KEY) += key/ 24obj-$(CONFIG_NET_KEY) += key/
27obj-$(CONFIG_BRIDGE) += bridge/ 25obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c410b93fda2..206e771e82d 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -54,7 +54,6 @@
54#include <linux/capability.h> 54#include <linux/capability.h>
55#include <linux/module.h> 55#include <linux/module.h>
56#include <linux/if_arp.h> 56#include <linux/if_arp.h>
57#include <linux/smp_lock.h>
58#include <linux/termios.h> /* For TIOCOUTQ/INQ */ 57#include <linux/termios.h> /* For TIOCOUTQ/INQ */
59#include <linux/compat.h> 58#include <linux/compat.h>
60#include <linux/slab.h> 59#include <linux/slab.h>
@@ -1052,13 +1051,16 @@ static int atalk_release(struct socket *sock)
1052{ 1051{
1053 struct sock *sk = sock->sk; 1052 struct sock *sk = sock->sk;
1054 1053
1055 lock_kernel(); 1054 sock_hold(sk);
1055 lock_sock(sk);
1056 if (sk) { 1056 if (sk) {
1057 sock_orphan(sk); 1057 sock_orphan(sk);
1058 sock->sk = NULL; 1058 sock->sk = NULL;
1059 atalk_destroy_socket(sk); 1059 atalk_destroy_socket(sk);
1060 } 1060 }
1061 unlock_kernel(); 1061 release_sock(sk);
1062 sock_put(sk);
1063
1062 return 0; 1064 return 0;
1063} 1065}
1064 1066
@@ -1143,7 +1145,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1143 if (addr->sat_family != AF_APPLETALK) 1145 if (addr->sat_family != AF_APPLETALK)
1144 return -EAFNOSUPPORT; 1146 return -EAFNOSUPPORT;
1145 1147
1146 lock_kernel(); 1148 lock_sock(sk);
1147 if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) { 1149 if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) {
1148 struct atalk_addr *ap = atalk_find_primary(); 1150 struct atalk_addr *ap = atalk_find_primary();
1149 1151
@@ -1179,7 +1181,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1179 sock_reset_flag(sk, SOCK_ZAPPED); 1181 sock_reset_flag(sk, SOCK_ZAPPED);
1180 err = 0; 1182 err = 0;
1181out: 1183out:
1182 unlock_kernel(); 1184 release_sock(sk);
1183 return err; 1185 return err;
1184} 1186}
1185 1187
@@ -1215,7 +1217,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
1215#endif 1217#endif
1216 } 1218 }
1217 1219
1218 lock_kernel(); 1220 lock_sock(sk);
1219 err = -EBUSY; 1221 err = -EBUSY;
1220 if (sock_flag(sk, SOCK_ZAPPED)) 1222 if (sock_flag(sk, SOCK_ZAPPED))
1221 if (atalk_autobind(sk) < 0) 1223 if (atalk_autobind(sk) < 0)
@@ -1233,7 +1235,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
1233 sk->sk_state = TCP_ESTABLISHED; 1235 sk->sk_state = TCP_ESTABLISHED;
1234 err = 0; 1236 err = 0;
1235out: 1237out:
1236 unlock_kernel(); 1238 release_sock(sk);
1237 return err; 1239 return err;
1238} 1240}
1239 1241
@@ -1249,7 +1251,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1249 struct atalk_sock *at = at_sk(sk); 1251 struct atalk_sock *at = at_sk(sk);
1250 int err; 1252 int err;
1251 1253
1252 lock_kernel(); 1254 lock_sock(sk);
1253 err = -ENOBUFS; 1255 err = -ENOBUFS;
1254 if (sock_flag(sk, SOCK_ZAPPED)) 1256 if (sock_flag(sk, SOCK_ZAPPED))
1255 if (atalk_autobind(sk) < 0) 1257 if (atalk_autobind(sk) < 0)
@@ -1277,17 +1279,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1277 memcpy(uaddr, &sat, sizeof(sat)); 1279 memcpy(uaddr, &sat, sizeof(sat));
1278 1280
1279out: 1281out:
1280 unlock_kernel(); 1282 release_sock(sk);
1281 return err;
1282}
1283
1284static unsigned int atalk_poll(struct file *file, struct socket *sock,
1285 poll_table *wait)
1286{
1287 int err;
1288 lock_kernel();
1289 err = datagram_poll(file, sock, wait);
1290 unlock_kernel();
1291 return err; 1283 return err;
1292} 1284}
1293 1285
@@ -1596,7 +1588,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1596 if (len > DDP_MAXSZ) 1588 if (len > DDP_MAXSZ)
1597 return -EMSGSIZE; 1589 return -EMSGSIZE;
1598 1590
1599 lock_kernel(); 1591 lock_sock(sk);
1600 if (usat) { 1592 if (usat) {
1601 err = -EBUSY; 1593 err = -EBUSY;
1602 if (sock_flag(sk, SOCK_ZAPPED)) 1594 if (sock_flag(sk, SOCK_ZAPPED))
@@ -1651,7 +1643,9 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1651 sk, size, dev->name); 1643 sk, size, dev->name);
1652 1644
1653 size += dev->hard_header_len; 1645 size += dev->hard_header_len;
1646 release_sock(sk);
1654 skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); 1647 skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
1648 lock_sock(sk);
1655 if (!skb) 1649 if (!skb)
1656 goto out; 1650 goto out;
1657 1651
@@ -1738,7 +1732,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1738 SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len); 1732 SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len);
1739 1733
1740out: 1734out:
1741 unlock_kernel(); 1735 release_sock(sk);
1742 return err ? : len; 1736 return err ? : len;
1743} 1737}
1744 1738
@@ -1753,9 +1747,10 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1753 int err = 0; 1747 int err = 0;
1754 struct sk_buff *skb; 1748 struct sk_buff *skb;
1755 1749
1756 lock_kernel();
1757 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1750 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1758 flags & MSG_DONTWAIT, &err); 1751 flags & MSG_DONTWAIT, &err);
1752 lock_sock(sk);
1753
1759 if (!skb) 1754 if (!skb)
1760 goto out; 1755 goto out;
1761 1756
@@ -1787,7 +1782,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1787 skb_free_datagram(sk, skb); /* Free the datagram. */ 1782 skb_free_datagram(sk, skb); /* Free the datagram. */
1788 1783
1789out: 1784out:
1790 unlock_kernel(); 1785 release_sock(sk);
1791 return err ? : copied; 1786 return err ? : copied;
1792} 1787}
1793 1788
@@ -1887,7 +1882,7 @@ static const struct proto_ops atalk_dgram_ops = {
1887 .socketpair = sock_no_socketpair, 1882 .socketpair = sock_no_socketpair,
1888 .accept = sock_no_accept, 1883 .accept = sock_no_accept,
1889 .getname = atalk_getname, 1884 .getname = atalk_getname,
1890 .poll = atalk_poll, 1885 .poll = datagram_poll,
1891 .ioctl = atalk_ioctl, 1886 .ioctl = atalk_ioctl,
1892#ifdef CONFIG_COMPAT 1887#ifdef CONFIG_COMPAT
1893 .compat_ioctl = atalk_compat_ioctl, 1888 .compat_ioctl = atalk_compat_ioctl,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d257da50fcf..1d4be60e139 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -502,8 +502,6 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
502 struct atmarp_entry *entry; 502 struct atmarp_entry *entry;
503 int error; 503 int error;
504 struct clip_vcc *clip_vcc; 504 struct clip_vcc *clip_vcc;
505 struct flowi fl = { .fl4_dst = ip,
506 .fl4_tos = 1 };
507 struct rtable *rt; 505 struct rtable *rt;
508 506
509 if (vcc->push != clip_push) { 507 if (vcc->push != clip_push) {
@@ -520,9 +518,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
520 unlink_clip_vcc(clip_vcc); 518 unlink_clip_vcc(clip_vcc);
521 return 0; 519 return 0;
522 } 520 }
523 error = ip_route_output_key(&init_net, &rt, &fl); 521 rt = ip_route_output(&init_net, ip, 0, 1, 0);
524 if (error) 522 if (IS_ERR(rt))
525 return error; 523 return PTR_ERR(rt);
526 neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); 524 neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
527 ip_rt_put(rt); 525 ip_rt_put(rt);
528 if (!neigh) 526 if (!neigh)
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index d936aeccd19..2de93d00631 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
index 3850a3ecf94..af45d6b2031 100644
--- a/net/batman-adv/aggregation.c
+++ b/net/batman-adv/aggregation.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -35,7 +35,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet,
35 int packet_len, 35 int packet_len,
36 unsigned long send_time, 36 unsigned long send_time,
37 bool directlink, 37 bool directlink,
38 struct batman_if *if_incoming, 38 struct hard_iface *if_incoming,
39 struct forw_packet *forw_packet) 39 struct forw_packet *forw_packet)
40{ 40{
41 struct batman_packet *batman_packet = 41 struct batman_packet *batman_packet =
@@ -99,7 +99,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet,
99/* create a new aggregated packet and add this packet to it */ 99/* create a new aggregated packet and add this packet to it */
100static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, 100static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
101 unsigned long send_time, bool direct_link, 101 unsigned long send_time, bool direct_link,
102 struct batman_if *if_incoming, 102 struct hard_iface *if_incoming,
103 int own_packet) 103 int own_packet)
104{ 104{
105 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 105 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
@@ -188,7 +188,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr,
188 188
189void add_bat_packet_to_list(struct bat_priv *bat_priv, 189void add_bat_packet_to_list(struct bat_priv *bat_priv,
190 unsigned char *packet_buff, int packet_len, 190 unsigned char *packet_buff, int packet_len,
191 struct batman_if *if_incoming, char own_packet, 191 struct hard_iface *if_incoming, char own_packet,
192 unsigned long send_time) 192 unsigned long send_time)
193{ 193{
194 /** 194 /**
@@ -247,7 +247,7 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv,
247 247
248/* unpack the aggregated packets and process them one by one */ 248/* unpack the aggregated packets and process them one by one */
249void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, 249void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
250 int packet_len, struct batman_if *if_incoming) 250 int packet_len, struct hard_iface *if_incoming)
251{ 251{
252 struct batman_packet *batman_packet; 252 struct batman_packet *batman_packet;
253 int buff_pos = 0; 253 int buff_pos = 0;
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
index 71a91b3da91..062204289d1 100644
--- a/net/batman-adv/aggregation.h
+++ b/net/batman-adv/aggregation.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -35,9 +35,9 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna)
35 35
36void add_bat_packet_to_list(struct bat_priv *bat_priv, 36void add_bat_packet_to_list(struct bat_priv *bat_priv,
37 unsigned char *packet_buff, int packet_len, 37 unsigned char *packet_buff, int packet_len,
38 struct batman_if *if_incoming, char own_packet, 38 struct hard_iface *if_incoming, char own_packet,
39 unsigned long send_time); 39 unsigned long send_time);
40void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, 40void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
41 int packet_len, struct batman_if *if_incoming); 41 int packet_len, struct hard_iface *if_incoming);
42 42
43#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ 43#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
index 0ae81d07f10..0e9d4350993 100644
--- a/net/batman-adv/bat_debugfs.c
+++ b/net/batman-adv/bat_debugfs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -52,7 +52,6 @@ static void emit_log_char(struct debug_log *debug_log, char c)
52 52
53static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) 53static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
54{ 54{
55 int printed_len;
56 va_list args; 55 va_list args;
57 static char debug_log_buf[256]; 56 static char debug_log_buf[256];
58 char *p; 57 char *p;
@@ -62,8 +61,7 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
62 61
63 spin_lock_bh(&debug_log->lock); 62 spin_lock_bh(&debug_log->lock);
64 va_start(args, fmt); 63 va_start(args, fmt);
65 printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf), 64 vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
66 fmt, args);
67 va_end(args); 65 va_end(args);
68 66
69 for (p = debug_log_buf; *p != 0; p++) 67 for (p = debug_log_buf; *p != 0; p++)
diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h
index 72df532b7d5..bc9cda3f01e 100644
--- a/net/batman-adv/bat_debugfs.h
+++ b/net/batman-adv/bat_debugfs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
index cd7bb51825f..e449bf6353e 100644
--- a/net/batman-adv/bat_sysfs.c
+++ b/net/batman-adv/bat_sysfs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -441,16 +441,16 @@ static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
441 char *buff) 441 char *buff)
442{ 442{
443 struct net_device *net_dev = kobj_to_netdev(kobj); 443 struct net_device *net_dev = kobj_to_netdev(kobj);
444 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 444 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
445 ssize_t length; 445 ssize_t length;
446 446
447 if (!batman_if) 447 if (!hard_iface)
448 return 0; 448 return 0;
449 449
450 length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ? 450 length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ?
451 "none" : batman_if->soft_iface->name); 451 "none" : hard_iface->soft_iface->name);
452 452
453 kref_put(&batman_if->refcount, hardif_free_ref); 453 hardif_free_ref(hard_iface);
454 454
455 return length; 455 return length;
456} 456}
@@ -459,11 +459,11 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
459 char *buff, size_t count) 459 char *buff, size_t count)
460{ 460{
461 struct net_device *net_dev = kobj_to_netdev(kobj); 461 struct net_device *net_dev = kobj_to_netdev(kobj);
462 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 462 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
463 int status_tmp = -1; 463 int status_tmp = -1;
464 int ret; 464 int ret = count;
465 465
466 if (!batman_if) 466 if (!hard_iface)
467 return count; 467 return count;
468 468
469 if (buff[count - 1] == '\n') 469 if (buff[count - 1] == '\n')
@@ -472,7 +472,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
472 if (strlen(buff) >= IFNAMSIZ) { 472 if (strlen(buff) >= IFNAMSIZ) {
473 pr_err("Invalid parameter for 'mesh_iface' setting received: " 473 pr_err("Invalid parameter for 'mesh_iface' setting received: "
474 "interface name too long '%s'\n", buff); 474 "interface name too long '%s'\n", buff);
475 kref_put(&batman_if->refcount, hardif_free_ref); 475 hardif_free_ref(hard_iface);
476 return -EINVAL; 476 return -EINVAL;
477 } 477 }
478 478
@@ -481,30 +481,31 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
481 else 481 else
482 status_tmp = IF_I_WANT_YOU; 482 status_tmp = IF_I_WANT_YOU;
483 483
484 if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) && 484 if (hard_iface->if_status == status_tmp)
485 (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) { 485 goto out;
486 kref_put(&batman_if->refcount, hardif_free_ref); 486
487 return count; 487 if ((hard_iface->soft_iface) &&
488 } 488 (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
489 goto out;
489 490
490 if (status_tmp == IF_NOT_IN_USE) { 491 if (status_tmp == IF_NOT_IN_USE) {
491 rtnl_lock(); 492 rtnl_lock();
492 hardif_disable_interface(batman_if); 493 hardif_disable_interface(hard_iface);
493 rtnl_unlock(); 494 rtnl_unlock();
494 kref_put(&batman_if->refcount, hardif_free_ref); 495 goto out;
495 return count;
496 } 496 }
497 497
498 /* if the interface already is in use */ 498 /* if the interface already is in use */
499 if (batman_if->if_status != IF_NOT_IN_USE) { 499 if (hard_iface->if_status != IF_NOT_IN_USE) {
500 rtnl_lock(); 500 rtnl_lock();
501 hardif_disable_interface(batman_if); 501 hardif_disable_interface(hard_iface);
502 rtnl_unlock(); 502 rtnl_unlock();
503 } 503 }
504 504
505 ret = hardif_enable_interface(batman_if, buff); 505 ret = hardif_enable_interface(hard_iface, buff);
506 kref_put(&batman_if->refcount, hardif_free_ref);
507 506
507out:
508 hardif_free_ref(hard_iface);
508 return ret; 509 return ret;
509} 510}
510 511
@@ -512,13 +513,13 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
512 char *buff) 513 char *buff)
513{ 514{
514 struct net_device *net_dev = kobj_to_netdev(kobj); 515 struct net_device *net_dev = kobj_to_netdev(kobj);
515 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 516 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
516 ssize_t length; 517 ssize_t length;
517 518
518 if (!batman_if) 519 if (!hard_iface)
519 return 0; 520 return 0;
520 521
521 switch (batman_if->if_status) { 522 switch (hard_iface->if_status) {
522 case IF_TO_BE_REMOVED: 523 case IF_TO_BE_REMOVED:
523 length = sprintf(buff, "disabling\n"); 524 length = sprintf(buff, "disabling\n");
524 break; 525 break;
@@ -537,7 +538,7 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
537 break; 538 break;
538 } 539 }
539 540
540 kref_put(&batman_if->refcount, hardif_free_ref); 541 hardif_free_ref(hard_iface);
541 542
542 return length; 543 return length;
543} 544}
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
index 7f186c007b4..02f1fa7aadf 100644
--- a/net/batman-adv/bat_sysfs.h
+++ b/net/batman-adv/bat_sysfs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bbcd8f744cd..ad2ca925b3e 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ac54017601b..769c246d1fc 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 0065ffb8d96..3cc43558cf9 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -28,58 +28,75 @@
28#include <linux/udp.h> 28#include <linux/udp.h>
29#include <linux/if_vlan.h> 29#include <linux/if_vlan.h>
30 30
31static void gw_node_free_ref(struct kref *refcount) 31static void gw_node_free_rcu(struct rcu_head *rcu)
32{ 32{
33 struct gw_node *gw_node; 33 struct gw_node *gw_node;
34 34
35 gw_node = container_of(refcount, struct gw_node, refcount); 35 gw_node = container_of(rcu, struct gw_node, rcu);
36 kfree(gw_node); 36 kfree(gw_node);
37} 37}
38 38
39static void gw_node_free_rcu(struct rcu_head *rcu) 39static void gw_node_free_ref(struct gw_node *gw_node)
40{ 40{
41 struct gw_node *gw_node; 41 if (atomic_dec_and_test(&gw_node->refcount))
42 42 call_rcu(&gw_node->rcu, gw_node_free_rcu);
43 gw_node = container_of(rcu, struct gw_node, rcu);
44 kref_put(&gw_node->refcount, gw_node_free_ref);
45} 43}
46 44
47void *gw_get_selected(struct bat_priv *bat_priv) 45void *gw_get_selected(struct bat_priv *bat_priv)
48{ 46{
49 struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; 47 struct gw_node *curr_gateway_tmp;
48 struct orig_node *orig_node = NULL;
50 49
50 rcu_read_lock();
51 curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw);
51 if (!curr_gateway_tmp) 52 if (!curr_gateway_tmp)
52 return NULL; 53 goto out;
54
55 orig_node = curr_gateway_tmp->orig_node;
56 if (!orig_node)
57 goto out;
53 58
54 return curr_gateway_tmp->orig_node; 59 if (!atomic_inc_not_zero(&orig_node->refcount))
60 orig_node = NULL;
61
62out:
63 rcu_read_unlock();
64 return orig_node;
55} 65}
56 66
57void gw_deselect(struct bat_priv *bat_priv) 67void gw_deselect(struct bat_priv *bat_priv)
58{ 68{
59 struct gw_node *gw_node = bat_priv->curr_gw; 69 struct gw_node *gw_node;
60 70
61 bat_priv->curr_gw = NULL; 71 spin_lock_bh(&bat_priv->gw_list_lock);
72 gw_node = rcu_dereference(bat_priv->curr_gw);
73 rcu_assign_pointer(bat_priv->curr_gw, NULL);
74 spin_unlock_bh(&bat_priv->gw_list_lock);
62 75
63 if (gw_node) 76 if (gw_node)
64 kref_put(&gw_node->refcount, gw_node_free_ref); 77 gw_node_free_ref(gw_node);
65} 78}
66 79
67static struct gw_node *gw_select(struct bat_priv *bat_priv, 80static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node)
68 struct gw_node *new_gw_node)
69{ 81{
70 struct gw_node *curr_gw_node = bat_priv->curr_gw; 82 struct gw_node *curr_gw_node;
71 83
72 if (new_gw_node) 84 if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
73 kref_get(&new_gw_node->refcount); 85 new_gw_node = NULL;
86
87 spin_lock_bh(&bat_priv->gw_list_lock);
88 curr_gw_node = rcu_dereference(bat_priv->curr_gw);
89 rcu_assign_pointer(bat_priv->curr_gw, new_gw_node);
90 spin_unlock_bh(&bat_priv->gw_list_lock);
74 91
75 bat_priv->curr_gw = new_gw_node; 92 if (curr_gw_node)
76 return curr_gw_node; 93 gw_node_free_ref(curr_gw_node);
77} 94}
78 95
79void gw_election(struct bat_priv *bat_priv) 96void gw_election(struct bat_priv *bat_priv)
80{ 97{
81 struct hlist_node *node; 98 struct hlist_node *node;
82 struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL; 99 struct gw_node *gw_node, *curr_gw, *curr_gw_tmp = NULL;
83 uint8_t max_tq = 0; 100 uint8_t max_tq = 0;
84 uint32_t max_gw_factor = 0, tmp_gw_factor = 0; 101 uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
85 int down, up; 102 int down, up;
@@ -93,19 +110,23 @@ void gw_election(struct bat_priv *bat_priv)
93 if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) 110 if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT)
94 return; 111 return;
95 112
96 if (bat_priv->curr_gw) 113 rcu_read_lock();
114 curr_gw = rcu_dereference(bat_priv->curr_gw);
115 if (curr_gw) {
116 rcu_read_unlock();
97 return; 117 return;
118 }
98 119
99 rcu_read_lock();
100 if (hlist_empty(&bat_priv->gw_list)) { 120 if (hlist_empty(&bat_priv->gw_list)) {
101 rcu_read_unlock();
102 121
103 if (bat_priv->curr_gw) { 122 if (curr_gw) {
123 rcu_read_unlock();
104 bat_dbg(DBG_BATMAN, bat_priv, 124 bat_dbg(DBG_BATMAN, bat_priv,
105 "Removing selected gateway - " 125 "Removing selected gateway - "
106 "no gateway in range\n"); 126 "no gateway in range\n");
107 gw_deselect(bat_priv); 127 gw_deselect(bat_priv);
108 } 128 } else
129 rcu_read_unlock();
109 130
110 return; 131 return;
111 } 132 }
@@ -154,12 +175,12 @@ void gw_election(struct bat_priv *bat_priv)
154 max_gw_factor = tmp_gw_factor; 175 max_gw_factor = tmp_gw_factor;
155 } 176 }
156 177
157 if (bat_priv->curr_gw != curr_gw_tmp) { 178 if (curr_gw != curr_gw_tmp) {
158 if ((bat_priv->curr_gw) && (!curr_gw_tmp)) 179 if ((curr_gw) && (!curr_gw_tmp))
159 bat_dbg(DBG_BATMAN, bat_priv, 180 bat_dbg(DBG_BATMAN, bat_priv,
160 "Removing selected gateway - " 181 "Removing selected gateway - "
161 "no gateway in range\n"); 182 "no gateway in range\n");
162 else if ((!bat_priv->curr_gw) && (curr_gw_tmp)) 183 else if ((!curr_gw) && (curr_gw_tmp))
163 bat_dbg(DBG_BATMAN, bat_priv, 184 bat_dbg(DBG_BATMAN, bat_priv,
164 "Adding route to gateway %pM " 185 "Adding route to gateway %pM "
165 "(gw_flags: %i, tq: %i)\n", 186 "(gw_flags: %i, tq: %i)\n",
@@ -174,43 +195,43 @@ void gw_election(struct bat_priv *bat_priv)
174 curr_gw_tmp->orig_node->gw_flags, 195 curr_gw_tmp->orig_node->gw_flags,
175 curr_gw_tmp->orig_node->router->tq_avg); 196 curr_gw_tmp->orig_node->router->tq_avg);
176 197
177 old_gw_node = gw_select(bat_priv, curr_gw_tmp); 198 gw_select(bat_priv, curr_gw_tmp);
178 } 199 }
179 200
180 rcu_read_unlock(); 201 rcu_read_unlock();
181
182 /* the kfree() has to be outside of the rcu lock */
183 if (old_gw_node)
184 kref_put(&old_gw_node->refcount, gw_node_free_ref);
185} 202}
186 203
187void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) 204void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
188{ 205{
189 struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; 206 struct gw_node *curr_gateway_tmp;
190 uint8_t gw_tq_avg, orig_tq_avg; 207 uint8_t gw_tq_avg, orig_tq_avg;
191 208
209 rcu_read_lock();
210 curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw);
192 if (!curr_gateway_tmp) 211 if (!curr_gateway_tmp)
193 return; 212 goto out_rcu;
194 213
195 if (!curr_gateway_tmp->orig_node) 214 if (!curr_gateway_tmp->orig_node)
196 goto deselect; 215 goto deselect_rcu;
197 216
198 if (!curr_gateway_tmp->orig_node->router) 217 if (!curr_gateway_tmp->orig_node->router)
199 goto deselect; 218 goto deselect_rcu;
200 219
201 /* this node already is the gateway */ 220 /* this node already is the gateway */
202 if (curr_gateway_tmp->orig_node == orig_node) 221 if (curr_gateway_tmp->orig_node == orig_node)
203 return; 222 goto out_rcu;
204 223
205 if (!orig_node->router) 224 if (!orig_node->router)
206 return; 225 goto out_rcu;
207 226
208 gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg; 227 gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg;
228 rcu_read_unlock();
229
209 orig_tq_avg = orig_node->router->tq_avg; 230 orig_tq_avg = orig_node->router->tq_avg;
210 231
211 /* the TQ value has to be better */ 232 /* the TQ value has to be better */
212 if (orig_tq_avg < gw_tq_avg) 233 if (orig_tq_avg < gw_tq_avg)
213 return; 234 goto out;
214 235
215 /** 236 /**
216 * if the routing class is greater than 3 the value tells us how much 237 * if the routing class is greater than 3 the value tells us how much
@@ -218,15 +239,23 @@ void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
218 **/ 239 **/
219 if ((atomic_read(&bat_priv->gw_sel_class) > 3) && 240 if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
220 (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) 241 (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
221 return; 242 goto out;
222 243
223 bat_dbg(DBG_BATMAN, bat_priv, 244 bat_dbg(DBG_BATMAN, bat_priv,
224 "Restarting gateway selection: better gateway found (tq curr: " 245 "Restarting gateway selection: better gateway found (tq curr: "
225 "%i, tq new: %i)\n", 246 "%i, tq new: %i)\n",
226 gw_tq_avg, orig_tq_avg); 247 gw_tq_avg, orig_tq_avg);
248 goto deselect;
227 249
250out_rcu:
251 rcu_read_unlock();
252 goto out;
253deselect_rcu:
254 rcu_read_unlock();
228deselect: 255deselect:
229 gw_deselect(bat_priv); 256 gw_deselect(bat_priv);
257out:
258 return;
230} 259}
231 260
232static void gw_node_add(struct bat_priv *bat_priv, 261static void gw_node_add(struct bat_priv *bat_priv,
@@ -242,7 +271,7 @@ static void gw_node_add(struct bat_priv *bat_priv,
242 memset(gw_node, 0, sizeof(struct gw_node)); 271 memset(gw_node, 0, sizeof(struct gw_node));
243 INIT_HLIST_NODE(&gw_node->list); 272 INIT_HLIST_NODE(&gw_node->list);
244 gw_node->orig_node = orig_node; 273 gw_node->orig_node = orig_node;
245 kref_init(&gw_node->refcount); 274 atomic_set(&gw_node->refcount, 1);
246 275
247 spin_lock_bh(&bat_priv->gw_list_lock); 276 spin_lock_bh(&bat_priv->gw_list_lock);
248 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); 277 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list);
@@ -283,7 +312,7 @@ void gw_node_update(struct bat_priv *bat_priv,
283 "Gateway %pM removed from gateway list\n", 312 "Gateway %pM removed from gateway list\n",
284 orig_node->orig); 313 orig_node->orig);
285 314
286 if (gw_node == bat_priv->curr_gw) { 315 if (gw_node == rcu_dereference(bat_priv->curr_gw)) {
287 rcu_read_unlock(); 316 rcu_read_unlock();
288 gw_deselect(bat_priv); 317 gw_deselect(bat_priv);
289 return; 318 return;
@@ -321,11 +350,11 @@ void gw_node_purge(struct bat_priv *bat_priv)
321 atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) 350 atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE)
322 continue; 351 continue;
323 352
324 if (bat_priv->curr_gw == gw_node) 353 if (rcu_dereference(bat_priv->curr_gw) == gw_node)
325 gw_deselect(bat_priv); 354 gw_deselect(bat_priv);
326 355
327 hlist_del_rcu(&gw_node->list); 356 hlist_del_rcu(&gw_node->list);
328 call_rcu(&gw_node->rcu, gw_node_free_rcu); 357 gw_node_free_ref(gw_node);
329 } 358 }
330 359
331 360
@@ -335,12 +364,16 @@ void gw_node_purge(struct bat_priv *bat_priv)
335static int _write_buffer_text(struct bat_priv *bat_priv, 364static int _write_buffer_text(struct bat_priv *bat_priv,
336 struct seq_file *seq, struct gw_node *gw_node) 365 struct seq_file *seq, struct gw_node *gw_node)
337{ 366{
338 int down, up; 367 struct gw_node *curr_gw;
368 int down, up, ret;
339 369
340 gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up); 370 gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up);
341 371
342 return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", 372 rcu_read_lock();
343 (bat_priv->curr_gw == gw_node ? "=>" : " "), 373 curr_gw = rcu_dereference(bat_priv->curr_gw);
374
375 ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n",
376 (curr_gw == gw_node ? "=>" : " "),
344 gw_node->orig_node->orig, 377 gw_node->orig_node->orig,
345 gw_node->orig_node->router->tq_avg, 378 gw_node->orig_node->router->tq_avg,
346 gw_node->orig_node->router->addr, 379 gw_node->orig_node->router->addr,
@@ -350,6 +383,9 @@ static int _write_buffer_text(struct bat_priv *bat_priv,
350 (down > 2048 ? "MBit" : "KBit"), 383 (down > 2048 ? "MBit" : "KBit"),
351 (up > 2048 ? up / 1024 : up), 384 (up > 2048 ? up / 1024 : up),
352 (up > 2048 ? "MBit" : "KBit")); 385 (up > 2048 ? "MBit" : "KBit"));
386
387 rcu_read_unlock();
388 return ret;
353} 389}
354 390
355int gw_client_seq_print_text(struct seq_file *seq, void *offset) 391int gw_client_seq_print_text(struct seq_file *seq, void *offset)
@@ -470,8 +506,12 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb)
470 if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER) 506 if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)
471 return -1; 507 return -1;
472 508
473 if (!bat_priv->curr_gw) 509 rcu_read_lock();
510 if (!rcu_dereference(bat_priv->curr_gw)) {
511 rcu_read_unlock();
474 return 0; 512 return 0;
513 }
514 rcu_read_unlock();
475 515
476 return 1; 516 return 1;
477} 517}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 4585e654984..2aa439124ee 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b962982f017..50d3a59a3d7 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 5e728d0b795..55e527a489f 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4f95777ce08..b3058e46ee6 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -31,36 +31,40 @@
31 31
32#include <linux/if_arp.h> 32#include <linux/if_arp.h>
33 33
34/* protect update critical side of if_list - but not the content */ 34/* protect update critical side of hardif_list - but not the content */
35static DEFINE_SPINLOCK(if_list_lock); 35static DEFINE_SPINLOCK(hardif_list_lock);
36 36
37static void hardif_free_rcu(struct rcu_head *rcu) 37
38static int batman_skb_recv(struct sk_buff *skb,
39 struct net_device *dev,
40 struct packet_type *ptype,
41 struct net_device *orig_dev);
42
43void hardif_free_rcu(struct rcu_head *rcu)
38{ 44{
39 struct batman_if *batman_if; 45 struct hard_iface *hard_iface;
40 46
41 batman_if = container_of(rcu, struct batman_if, rcu); 47 hard_iface = container_of(rcu, struct hard_iface, rcu);
42 dev_put(batman_if->net_dev); 48 dev_put(hard_iface->net_dev);
43 kref_put(&batman_if->refcount, hardif_free_ref); 49 kfree(hard_iface);
44} 50}
45 51
46struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) 52struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev)
47{ 53{
48 struct batman_if *batman_if; 54 struct hard_iface *hard_iface;
49 55
50 rcu_read_lock(); 56 rcu_read_lock();
51 list_for_each_entry_rcu(batman_if, &if_list, list) { 57 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
52 if (batman_if->net_dev == net_dev) 58 if (hard_iface->net_dev == net_dev &&
59 atomic_inc_not_zero(&hard_iface->refcount))
53 goto out; 60 goto out;
54 } 61 }
55 62
56 batman_if = NULL; 63 hard_iface = NULL;
57 64
58out: 65out:
59 if (batman_if)
60 kref_get(&batman_if->refcount);
61
62 rcu_read_unlock(); 66 rcu_read_unlock();
63 return batman_if; 67 return hard_iface;
64} 68}
65 69
66static int is_valid_iface(struct net_device *net_dev) 70static int is_valid_iface(struct net_device *net_dev)
@@ -75,13 +79,8 @@ static int is_valid_iface(struct net_device *net_dev)
75 return 0; 79 return 0;
76 80
77 /* no batman over batman */ 81 /* no batman over batman */
78#ifdef HAVE_NET_DEVICE_OPS 82 if (softif_is_valid(net_dev))
79 if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
80 return 0;
81#else
82 if (net_dev->hard_start_xmit == interface_tx)
83 return 0; 83 return 0;
84#endif
85 84
86 /* Device is being bridged */ 85 /* Device is being bridged */
87 /* if (net_dev->priv_flags & IFF_BRIDGE_PORT) 86 /* if (net_dev->priv_flags & IFF_BRIDGE_PORT)
@@ -90,27 +89,25 @@ static int is_valid_iface(struct net_device *net_dev)
90 return 1; 89 return 1;
91} 90}
92 91
93static struct batman_if *get_active_batman_if(struct net_device *soft_iface) 92static struct hard_iface *hardif_get_active(struct net_device *soft_iface)
94{ 93{
95 struct batman_if *batman_if; 94 struct hard_iface *hard_iface;
96 95
97 rcu_read_lock(); 96 rcu_read_lock();
98 list_for_each_entry_rcu(batman_if, &if_list, list) { 97 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
99 if (batman_if->soft_iface != soft_iface) 98 if (hard_iface->soft_iface != soft_iface)
100 continue; 99 continue;
101 100
102 if (batman_if->if_status == IF_ACTIVE) 101 if (hard_iface->if_status == IF_ACTIVE &&
102 atomic_inc_not_zero(&hard_iface->refcount))
103 goto out; 103 goto out;
104 } 104 }
105 105
106 batman_if = NULL; 106 hard_iface = NULL;
107 107
108out: 108out:
109 if (batman_if)
110 kref_get(&batman_if->refcount);
111
112 rcu_read_unlock(); 109 rcu_read_unlock();
113 return batman_if; 110 return hard_iface;
114} 111}
115 112
116static void update_primary_addr(struct bat_priv *bat_priv) 113static void update_primary_addr(struct bat_priv *bat_priv)
@@ -126,24 +123,24 @@ static void update_primary_addr(struct bat_priv *bat_priv)
126} 123}
127 124
128static void set_primary_if(struct bat_priv *bat_priv, 125static void set_primary_if(struct bat_priv *bat_priv,
129 struct batman_if *batman_if) 126 struct hard_iface *hard_iface)
130{ 127{
131 struct batman_packet *batman_packet; 128 struct batman_packet *batman_packet;
132 struct batman_if *old_if; 129 struct hard_iface *old_if;
133 130
134 if (batman_if) 131 if (hard_iface && !atomic_inc_not_zero(&hard_iface->refcount))
135 kref_get(&batman_if->refcount); 132 hard_iface = NULL;
136 133
137 old_if = bat_priv->primary_if; 134 old_if = bat_priv->primary_if;
138 bat_priv->primary_if = batman_if; 135 bat_priv->primary_if = hard_iface;
139 136
140 if (old_if) 137 if (old_if)
141 kref_put(&old_if->refcount, hardif_free_ref); 138 hardif_free_ref(old_if);
142 139
143 if (!bat_priv->primary_if) 140 if (!bat_priv->primary_if)
144 return; 141 return;
145 142
146 batman_packet = (struct batman_packet *)(batman_if->packet_buff); 143 batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
147 batman_packet->flags = PRIMARIES_FIRST_HOP; 144 batman_packet->flags = PRIMARIES_FIRST_HOP;
148 batman_packet->ttl = TTL; 145 batman_packet->ttl = TTL;
149 146
@@ -156,42 +153,42 @@ static void set_primary_if(struct bat_priv *bat_priv,
156 atomic_set(&bat_priv->hna_local_changed, 1); 153 atomic_set(&bat_priv->hna_local_changed, 1);
157} 154}
158 155
159static bool hardif_is_iface_up(struct batman_if *batman_if) 156static bool hardif_is_iface_up(struct hard_iface *hard_iface)
160{ 157{
161 if (batman_if->net_dev->flags & IFF_UP) 158 if (hard_iface->net_dev->flags & IFF_UP)
162 return true; 159 return true;
163 160
164 return false; 161 return false;
165} 162}
166 163
167static void update_mac_addresses(struct batman_if *batman_if) 164static void update_mac_addresses(struct hard_iface *hard_iface)
168{ 165{
169 memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig, 166 memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig,
170 batman_if->net_dev->dev_addr, ETH_ALEN); 167 hard_iface->net_dev->dev_addr, ETH_ALEN);
171 memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender, 168 memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender,
172 batman_if->net_dev->dev_addr, ETH_ALEN); 169 hard_iface->net_dev->dev_addr, ETH_ALEN);
173} 170}
174 171
175static void check_known_mac_addr(struct net_device *net_dev) 172static void check_known_mac_addr(struct net_device *net_dev)
176{ 173{
177 struct batman_if *batman_if; 174 struct hard_iface *hard_iface;
178 175
179 rcu_read_lock(); 176 rcu_read_lock();
180 list_for_each_entry_rcu(batman_if, &if_list, list) { 177 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
181 if ((batman_if->if_status != IF_ACTIVE) && 178 if ((hard_iface->if_status != IF_ACTIVE) &&
182 (batman_if->if_status != IF_TO_BE_ACTIVATED)) 179 (hard_iface->if_status != IF_TO_BE_ACTIVATED))
183 continue; 180 continue;
184 181
185 if (batman_if->net_dev == net_dev) 182 if (hard_iface->net_dev == net_dev)
186 continue; 183 continue;
187 184
188 if (!compare_orig(batman_if->net_dev->dev_addr, 185 if (!compare_eth(hard_iface->net_dev->dev_addr,
189 net_dev->dev_addr)) 186 net_dev->dev_addr))
190 continue; 187 continue;
191 188
192 pr_warning("The newly added mac address (%pM) already exists " 189 pr_warning("The newly added mac address (%pM) already exists "
193 "on: %s\n", net_dev->dev_addr, 190 "on: %s\n", net_dev->dev_addr,
194 batman_if->net_dev->name); 191 hard_iface->net_dev->name);
195 pr_warning("It is strongly recommended to keep mac addresses " 192 pr_warning("It is strongly recommended to keep mac addresses "
196 "unique to avoid problems!\n"); 193 "unique to avoid problems!\n");
197 } 194 }
@@ -201,7 +198,7 @@ static void check_known_mac_addr(struct net_device *net_dev)
201int hardif_min_mtu(struct net_device *soft_iface) 198int hardif_min_mtu(struct net_device *soft_iface)
202{ 199{
203 struct bat_priv *bat_priv = netdev_priv(soft_iface); 200 struct bat_priv *bat_priv = netdev_priv(soft_iface);
204 struct batman_if *batman_if; 201 struct hard_iface *hard_iface;
205 /* allow big frames if all devices are capable to do so 202 /* allow big frames if all devices are capable to do so
206 * (have MTU > 1500 + BAT_HEADER_LEN) */ 203 * (have MTU > 1500 + BAT_HEADER_LEN) */
207 int min_mtu = ETH_DATA_LEN; 204 int min_mtu = ETH_DATA_LEN;
@@ -210,15 +207,15 @@ int hardif_min_mtu(struct net_device *soft_iface)
210 goto out; 207 goto out;
211 208
212 rcu_read_lock(); 209 rcu_read_lock();
213 list_for_each_entry_rcu(batman_if, &if_list, list) { 210 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
214 if ((batman_if->if_status != IF_ACTIVE) && 211 if ((hard_iface->if_status != IF_ACTIVE) &&
215 (batman_if->if_status != IF_TO_BE_ACTIVATED)) 212 (hard_iface->if_status != IF_TO_BE_ACTIVATED))
216 continue; 213 continue;
217 214
218 if (batman_if->soft_iface != soft_iface) 215 if (hard_iface->soft_iface != soft_iface)
219 continue; 216 continue;
220 217
221 min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN, 218 min_mtu = min_t(int, hard_iface->net_dev->mtu - BAT_HEADER_LEN,
222 min_mtu); 219 min_mtu);
223 } 220 }
224 rcu_read_unlock(); 221 rcu_read_unlock();
@@ -236,77 +233,95 @@ void update_min_mtu(struct net_device *soft_iface)
236 soft_iface->mtu = min_mtu; 233 soft_iface->mtu = min_mtu;
237} 234}
238 235
239static void hardif_activate_interface(struct batman_if *batman_if) 236static void hardif_activate_interface(struct hard_iface *hard_iface)
240{ 237{
241 struct bat_priv *bat_priv; 238 struct bat_priv *bat_priv;
242 239
243 if (batman_if->if_status != IF_INACTIVE) 240 if (hard_iface->if_status != IF_INACTIVE)
244 return; 241 return;
245 242
246 bat_priv = netdev_priv(batman_if->soft_iface); 243 bat_priv = netdev_priv(hard_iface->soft_iface);
247 244
248 update_mac_addresses(batman_if); 245 update_mac_addresses(hard_iface);
249 batman_if->if_status = IF_TO_BE_ACTIVATED; 246 hard_iface->if_status = IF_TO_BE_ACTIVATED;
250 247
251 /** 248 /**
252 * the first active interface becomes our primary interface or 249 * the first active interface becomes our primary interface or
253 * the next active interface after the old primay interface was removed 250 * the next active interface after the old primay interface was removed
254 */ 251 */
255 if (!bat_priv->primary_if) 252 if (!bat_priv->primary_if)
256 set_primary_if(bat_priv, batman_if); 253 set_primary_if(bat_priv, hard_iface);
257 254
258 bat_info(batman_if->soft_iface, "Interface activated: %s\n", 255 bat_info(hard_iface->soft_iface, "Interface activated: %s\n",
259 batman_if->net_dev->name); 256 hard_iface->net_dev->name);
260 257
261 update_min_mtu(batman_if->soft_iface); 258 update_min_mtu(hard_iface->soft_iface);
262 return; 259 return;
263} 260}
264 261
265static void hardif_deactivate_interface(struct batman_if *batman_if) 262static void hardif_deactivate_interface(struct hard_iface *hard_iface)
266{ 263{
267 if ((batman_if->if_status != IF_ACTIVE) && 264 if ((hard_iface->if_status != IF_ACTIVE) &&
268 (batman_if->if_status != IF_TO_BE_ACTIVATED)) 265 (hard_iface->if_status != IF_TO_BE_ACTIVATED))
269 return; 266 return;
270 267
271 batman_if->if_status = IF_INACTIVE; 268 hard_iface->if_status = IF_INACTIVE;
272 269
273 bat_info(batman_if->soft_iface, "Interface deactivated: %s\n", 270 bat_info(hard_iface->soft_iface, "Interface deactivated: %s\n",
274 batman_if->net_dev->name); 271 hard_iface->net_dev->name);
275 272
276 update_min_mtu(batman_if->soft_iface); 273 update_min_mtu(hard_iface->soft_iface);
277} 274}
278 275
279int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) 276int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name)
280{ 277{
281 struct bat_priv *bat_priv; 278 struct bat_priv *bat_priv;
282 struct batman_packet *batman_packet; 279 struct batman_packet *batman_packet;
280 struct net_device *soft_iface;
281 int ret;
283 282
284 if (batman_if->if_status != IF_NOT_IN_USE) 283 if (hard_iface->if_status != IF_NOT_IN_USE)
285 goto out; 284 goto out;
286 285
287 batman_if->soft_iface = dev_get_by_name(&init_net, iface_name); 286 if (!atomic_inc_not_zero(&hard_iface->refcount))
287 goto out;
288 288
289 if (!batman_if->soft_iface) { 289 soft_iface = dev_get_by_name(&init_net, iface_name);
290 batman_if->soft_iface = softif_create(iface_name);
291 290
292 if (!batman_if->soft_iface) 291 if (!soft_iface) {
292 soft_iface = softif_create(iface_name);
293
294 if (!soft_iface) {
295 ret = -ENOMEM;
293 goto err; 296 goto err;
297 }
294 298
295 /* dev_get_by_name() increases the reference counter for us */ 299 /* dev_get_by_name() increases the reference counter for us */
296 dev_hold(batman_if->soft_iface); 300 dev_hold(soft_iface);
301 }
302
303 if (!softif_is_valid(soft_iface)) {
304 pr_err("Can't create batman mesh interface %s: "
305 "already exists as regular interface\n",
306 soft_iface->name);
307 dev_put(soft_iface);
308 ret = -EINVAL;
309 goto err;
297 } 310 }
298 311
299 bat_priv = netdev_priv(batman_if->soft_iface); 312 hard_iface->soft_iface = soft_iface;
300 batman_if->packet_len = BAT_PACKET_LEN; 313 bat_priv = netdev_priv(hard_iface->soft_iface);
301 batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC); 314 hard_iface->packet_len = BAT_PACKET_LEN;
315 hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
302 316
303 if (!batman_if->packet_buff) { 317 if (!hard_iface->packet_buff) {
304 bat_err(batman_if->soft_iface, "Can't add interface packet " 318 bat_err(hard_iface->soft_iface, "Can't add interface packet "
305 "(%s): out of memory\n", batman_if->net_dev->name); 319 "(%s): out of memory\n", hard_iface->net_dev->name);
320 ret = -ENOMEM;
306 goto err; 321 goto err;
307 } 322 }
308 323
309 batman_packet = (struct batman_packet *)(batman_if->packet_buff); 324 batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
310 batman_packet->packet_type = BAT_PACKET; 325 batman_packet->packet_type = BAT_PACKET;
311 batman_packet->version = COMPAT_VERSION; 326 batman_packet->version = COMPAT_VERSION;
312 batman_packet->flags = 0; 327 batman_packet->flags = 0;
@@ -314,107 +329,107 @@ int hardif_enable_interface(struct batman_if *batman_if, char *iface_name)
314 batman_packet->tq = TQ_MAX_VALUE; 329 batman_packet->tq = TQ_MAX_VALUE;
315 batman_packet->num_hna = 0; 330 batman_packet->num_hna = 0;
316 331
317 batman_if->if_num = bat_priv->num_ifaces; 332 hard_iface->if_num = bat_priv->num_ifaces;
318 bat_priv->num_ifaces++; 333 bat_priv->num_ifaces++;
319 batman_if->if_status = IF_INACTIVE; 334 hard_iface->if_status = IF_INACTIVE;
320 orig_hash_add_if(batman_if, bat_priv->num_ifaces); 335 orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
321 336
322 batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); 337 hard_iface->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN);
323 batman_if->batman_adv_ptype.func = batman_skb_recv; 338 hard_iface->batman_adv_ptype.func = batman_skb_recv;
324 batman_if->batman_adv_ptype.dev = batman_if->net_dev; 339 hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
325 kref_get(&batman_if->refcount); 340 dev_add_pack(&hard_iface->batman_adv_ptype);
326 dev_add_pack(&batman_if->batman_adv_ptype);
327 341
328 atomic_set(&batman_if->seqno, 1); 342 atomic_set(&hard_iface->seqno, 1);
329 atomic_set(&batman_if->frag_seqno, 1); 343 atomic_set(&hard_iface->frag_seqno, 1);
330 bat_info(batman_if->soft_iface, "Adding interface: %s\n", 344 bat_info(hard_iface->soft_iface, "Adding interface: %s\n",
331 batman_if->net_dev->name); 345 hard_iface->net_dev->name);
332 346
333 if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < 347 if (atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu <
334 ETH_DATA_LEN + BAT_HEADER_LEN) 348 ETH_DATA_LEN + BAT_HEADER_LEN)
335 bat_info(batman_if->soft_iface, 349 bat_info(hard_iface->soft_iface,
336 "The MTU of interface %s is too small (%i) to handle " 350 "The MTU of interface %s is too small (%i) to handle "
337 "the transport of batman-adv packets. Packets going " 351 "the transport of batman-adv packets. Packets going "
338 "over this interface will be fragmented on layer2 " 352 "over this interface will be fragmented on layer2 "
339 "which could impact the performance. Setting the MTU " 353 "which could impact the performance. Setting the MTU "
340 "to %zi would solve the problem.\n", 354 "to %zi would solve the problem.\n",
341 batman_if->net_dev->name, batman_if->net_dev->mtu, 355 hard_iface->net_dev->name, hard_iface->net_dev->mtu,
342 ETH_DATA_LEN + BAT_HEADER_LEN); 356 ETH_DATA_LEN + BAT_HEADER_LEN);
343 357
344 if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < 358 if (!atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu <
345 ETH_DATA_LEN + BAT_HEADER_LEN) 359 ETH_DATA_LEN + BAT_HEADER_LEN)
346 bat_info(batman_if->soft_iface, 360 bat_info(hard_iface->soft_iface,
347 "The MTU of interface %s is too small (%i) to handle " 361 "The MTU of interface %s is too small (%i) to handle "
348 "the transport of batman-adv packets. If you experience" 362 "the transport of batman-adv packets. If you experience"
349 " problems getting traffic through try increasing the " 363 " problems getting traffic through try increasing the "
350 "MTU to %zi.\n", 364 "MTU to %zi.\n",
351 batman_if->net_dev->name, batman_if->net_dev->mtu, 365 hard_iface->net_dev->name, hard_iface->net_dev->mtu,
352 ETH_DATA_LEN + BAT_HEADER_LEN); 366 ETH_DATA_LEN + BAT_HEADER_LEN);
353 367
354 if (hardif_is_iface_up(batman_if)) 368 if (hardif_is_iface_up(hard_iface))
355 hardif_activate_interface(batman_if); 369 hardif_activate_interface(hard_iface);
356 else 370 else
357 bat_err(batman_if->soft_iface, "Not using interface %s " 371 bat_err(hard_iface->soft_iface, "Not using interface %s "
358 "(retrying later): interface not active\n", 372 "(retrying later): interface not active\n",
359 batman_if->net_dev->name); 373 hard_iface->net_dev->name);
360 374
361 /* begin scheduling originator messages on that interface */ 375 /* begin scheduling originator messages on that interface */
362 schedule_own_packet(batman_if); 376 schedule_own_packet(hard_iface);
363 377
364out: 378out:
365 return 0; 379 return 0;
366 380
367err: 381err:
368 return -ENOMEM; 382 hardif_free_ref(hard_iface);
383 return ret;
369} 384}
370 385
371void hardif_disable_interface(struct batman_if *batman_if) 386void hardif_disable_interface(struct hard_iface *hard_iface)
372{ 387{
373 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 388 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
374 389
375 if (batman_if->if_status == IF_ACTIVE) 390 if (hard_iface->if_status == IF_ACTIVE)
376 hardif_deactivate_interface(batman_if); 391 hardif_deactivate_interface(hard_iface);
377 392
378 if (batman_if->if_status != IF_INACTIVE) 393 if (hard_iface->if_status != IF_INACTIVE)
379 return; 394 return;
380 395
381 bat_info(batman_if->soft_iface, "Removing interface: %s\n", 396 bat_info(hard_iface->soft_iface, "Removing interface: %s\n",
382 batman_if->net_dev->name); 397 hard_iface->net_dev->name);
383 dev_remove_pack(&batman_if->batman_adv_ptype); 398 dev_remove_pack(&hard_iface->batman_adv_ptype);
384 kref_put(&batman_if->refcount, hardif_free_ref);
385 399
386 bat_priv->num_ifaces--; 400 bat_priv->num_ifaces--;
387 orig_hash_del_if(batman_if, bat_priv->num_ifaces); 401 orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
388 402
389 if (batman_if == bat_priv->primary_if) { 403 if (hard_iface == bat_priv->primary_if) {
390 struct batman_if *new_if; 404 struct hard_iface *new_if;
391 405
392 new_if = get_active_batman_if(batman_if->soft_iface); 406 new_if = hardif_get_active(hard_iface->soft_iface);
393 set_primary_if(bat_priv, new_if); 407 set_primary_if(bat_priv, new_if);
394 408
395 if (new_if) 409 if (new_if)
396 kref_put(&new_if->refcount, hardif_free_ref); 410 hardif_free_ref(new_if);
397 } 411 }
398 412
399 kfree(batman_if->packet_buff); 413 kfree(hard_iface->packet_buff);
400 batman_if->packet_buff = NULL; 414 hard_iface->packet_buff = NULL;
401 batman_if->if_status = IF_NOT_IN_USE; 415 hard_iface->if_status = IF_NOT_IN_USE;
402 416
403 /* delete all references to this batman_if */ 417 /* delete all references to this hard_iface */
404 purge_orig_ref(bat_priv); 418 purge_orig_ref(bat_priv);
405 purge_outstanding_packets(bat_priv, batman_if); 419 purge_outstanding_packets(bat_priv, hard_iface);
406 dev_put(batman_if->soft_iface); 420 dev_put(hard_iface->soft_iface);
407 421
408 /* nobody uses this interface anymore */ 422 /* nobody uses this interface anymore */
409 if (!bat_priv->num_ifaces) 423 if (!bat_priv->num_ifaces)
410 softif_destroy(batman_if->soft_iface); 424 softif_destroy(hard_iface->soft_iface);
411 425
412 batman_if->soft_iface = NULL; 426 hard_iface->soft_iface = NULL;
427 hardif_free_ref(hard_iface);
413} 428}
414 429
415static struct batman_if *hardif_add_interface(struct net_device *net_dev) 430static struct hard_iface *hardif_add_interface(struct net_device *net_dev)
416{ 431{
417 struct batman_if *batman_if; 432 struct hard_iface *hard_iface;
418 int ret; 433 int ret;
419 434
420 ret = is_valid_iface(net_dev); 435 ret = is_valid_iface(net_dev);
@@ -423,73 +438,73 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev)
423 438
424 dev_hold(net_dev); 439 dev_hold(net_dev);
425 440
426 batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC); 441 hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC);
427 if (!batman_if) { 442 if (!hard_iface) {
428 pr_err("Can't add interface (%s): out of memory\n", 443 pr_err("Can't add interface (%s): out of memory\n",
429 net_dev->name); 444 net_dev->name);
430 goto release_dev; 445 goto release_dev;
431 } 446 }
432 447
433 ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev); 448 ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev);
434 if (ret) 449 if (ret)
435 goto free_if; 450 goto free_if;
436 451
437 batman_if->if_num = -1; 452 hard_iface->if_num = -1;
438 batman_if->net_dev = net_dev; 453 hard_iface->net_dev = net_dev;
439 batman_if->soft_iface = NULL; 454 hard_iface->soft_iface = NULL;
440 batman_if->if_status = IF_NOT_IN_USE; 455 hard_iface->if_status = IF_NOT_IN_USE;
441 INIT_LIST_HEAD(&batman_if->list); 456 INIT_LIST_HEAD(&hard_iface->list);
442 kref_init(&batman_if->refcount); 457 /* extra reference for return */
458 atomic_set(&hard_iface->refcount, 2);
443 459
444 check_known_mac_addr(batman_if->net_dev); 460 check_known_mac_addr(hard_iface->net_dev);
445 461
446 spin_lock(&if_list_lock); 462 spin_lock(&hardif_list_lock);
447 list_add_tail_rcu(&batman_if->list, &if_list); 463 list_add_tail_rcu(&hard_iface->list, &hardif_list);
448 spin_unlock(&if_list_lock); 464 spin_unlock(&hardif_list_lock);
449 465
450 /* extra reference for return */ 466 return hard_iface;
451 kref_get(&batman_if->refcount);
452 return batman_if;
453 467
454free_if: 468free_if:
455 kfree(batman_if); 469 kfree(hard_iface);
456release_dev: 470release_dev:
457 dev_put(net_dev); 471 dev_put(net_dev);
458out: 472out:
459 return NULL; 473 return NULL;
460} 474}
461 475
462static void hardif_remove_interface(struct batman_if *batman_if) 476static void hardif_remove_interface(struct hard_iface *hard_iface)
463{ 477{
464 /* first deactivate interface */ 478 /* first deactivate interface */
465 if (batman_if->if_status != IF_NOT_IN_USE) 479 if (hard_iface->if_status != IF_NOT_IN_USE)
466 hardif_disable_interface(batman_if); 480 hardif_disable_interface(hard_iface);
467 481
468 if (batman_if->if_status != IF_NOT_IN_USE) 482 if (hard_iface->if_status != IF_NOT_IN_USE)
469 return; 483 return;
470 484
471 batman_if->if_status = IF_TO_BE_REMOVED; 485 hard_iface->if_status = IF_TO_BE_REMOVED;
472 sysfs_del_hardif(&batman_if->hardif_obj); 486 sysfs_del_hardif(&hard_iface->hardif_obj);
473 call_rcu(&batman_if->rcu, hardif_free_rcu); 487 hardif_free_ref(hard_iface);
474} 488}
475 489
476void hardif_remove_interfaces(void) 490void hardif_remove_interfaces(void)
477{ 491{
478 struct batman_if *batman_if, *batman_if_tmp; 492 struct hard_iface *hard_iface, *hard_iface_tmp;
479 struct list_head if_queue; 493 struct list_head if_queue;
480 494
481 INIT_LIST_HEAD(&if_queue); 495 INIT_LIST_HEAD(&if_queue);
482 496
483 spin_lock(&if_list_lock); 497 spin_lock(&hardif_list_lock);
484 list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) { 498 list_for_each_entry_safe(hard_iface, hard_iface_tmp,
485 list_del_rcu(&batman_if->list); 499 &hardif_list, list) {
486 list_add_tail(&batman_if->list, &if_queue); 500 list_del_rcu(&hard_iface->list);
501 list_add_tail(&hard_iface->list, &if_queue);
487 } 502 }
488 spin_unlock(&if_list_lock); 503 spin_unlock(&hardif_list_lock);
489 504
490 rtnl_lock(); 505 rtnl_lock();
491 list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) { 506 list_for_each_entry_safe(hard_iface, hard_iface_tmp, &if_queue, list) {
492 hardif_remove_interface(batman_if); 507 hardif_remove_interface(hard_iface);
493 } 508 }
494 rtnl_unlock(); 509 rtnl_unlock();
495} 510}
@@ -498,43 +513,43 @@ static int hard_if_event(struct notifier_block *this,
498 unsigned long event, void *ptr) 513 unsigned long event, void *ptr)
499{ 514{
500 struct net_device *net_dev = (struct net_device *)ptr; 515 struct net_device *net_dev = (struct net_device *)ptr;
501 struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); 516 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
502 struct bat_priv *bat_priv; 517 struct bat_priv *bat_priv;
503 518
504 if (!batman_if && event == NETDEV_REGISTER) 519 if (!hard_iface && event == NETDEV_REGISTER)
505 batman_if = hardif_add_interface(net_dev); 520 hard_iface = hardif_add_interface(net_dev);
506 521
507 if (!batman_if) 522 if (!hard_iface)
508 goto out; 523 goto out;
509 524
510 switch (event) { 525 switch (event) {
511 case NETDEV_UP: 526 case NETDEV_UP:
512 hardif_activate_interface(batman_if); 527 hardif_activate_interface(hard_iface);
513 break; 528 break;
514 case NETDEV_GOING_DOWN: 529 case NETDEV_GOING_DOWN:
515 case NETDEV_DOWN: 530 case NETDEV_DOWN:
516 hardif_deactivate_interface(batman_if); 531 hardif_deactivate_interface(hard_iface);
517 break; 532 break;
518 case NETDEV_UNREGISTER: 533 case NETDEV_UNREGISTER:
519 spin_lock(&if_list_lock); 534 spin_lock(&hardif_list_lock);
520 list_del_rcu(&batman_if->list); 535 list_del_rcu(&hard_iface->list);
521 spin_unlock(&if_list_lock); 536 spin_unlock(&hardif_list_lock);
522 537
523 hardif_remove_interface(batman_if); 538 hardif_remove_interface(hard_iface);
524 break; 539 break;
525 case NETDEV_CHANGEMTU: 540 case NETDEV_CHANGEMTU:
526 if (batman_if->soft_iface) 541 if (hard_iface->soft_iface)
527 update_min_mtu(batman_if->soft_iface); 542 update_min_mtu(hard_iface->soft_iface);
528 break; 543 break;
529 case NETDEV_CHANGEADDR: 544 case NETDEV_CHANGEADDR:
530 if (batman_if->if_status == IF_NOT_IN_USE) 545 if (hard_iface->if_status == IF_NOT_IN_USE)
531 goto hardif_put; 546 goto hardif_put;
532 547
533 check_known_mac_addr(batman_if->net_dev); 548 check_known_mac_addr(hard_iface->net_dev);
534 update_mac_addresses(batman_if); 549 update_mac_addresses(hard_iface);
535 550
536 bat_priv = netdev_priv(batman_if->soft_iface); 551 bat_priv = netdev_priv(hard_iface->soft_iface);
537 if (batman_if == bat_priv->primary_if) 552 if (hard_iface == bat_priv->primary_if)
538 update_primary_addr(bat_priv); 553 update_primary_addr(bat_priv);
539 break; 554 break;
540 default: 555 default:
@@ -542,22 +557,23 @@ static int hard_if_event(struct notifier_block *this,
542 }; 557 };
543 558
544hardif_put: 559hardif_put:
545 kref_put(&batman_if->refcount, hardif_free_ref); 560 hardif_free_ref(hard_iface);
546out: 561out:
547 return NOTIFY_DONE; 562 return NOTIFY_DONE;
548} 563}
549 564
550/* receive a packet with the batman ethertype coming on a hard 565/* receive a packet with the batman ethertype coming on a hard
551 * interface */ 566 * interface */
552int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 567static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
553 struct packet_type *ptype, struct net_device *orig_dev) 568 struct packet_type *ptype,
569 struct net_device *orig_dev)
554{ 570{
555 struct bat_priv *bat_priv; 571 struct bat_priv *bat_priv;
556 struct batman_packet *batman_packet; 572 struct batman_packet *batman_packet;
557 struct batman_if *batman_if; 573 struct hard_iface *hard_iface;
558 int ret; 574 int ret;
559 575
560 batman_if = container_of(ptype, struct batman_if, batman_adv_ptype); 576 hard_iface = container_of(ptype, struct hard_iface, batman_adv_ptype);
561 skb = skb_share_check(skb, GFP_ATOMIC); 577 skb = skb_share_check(skb, GFP_ATOMIC);
562 578
563 /* skb was released by skb_share_check() */ 579 /* skb was released by skb_share_check() */
@@ -573,16 +589,16 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
573 || !skb_mac_header(skb))) 589 || !skb_mac_header(skb)))
574 goto err_free; 590 goto err_free;
575 591
576 if (!batman_if->soft_iface) 592 if (!hard_iface->soft_iface)
577 goto err_free; 593 goto err_free;
578 594
579 bat_priv = netdev_priv(batman_if->soft_iface); 595 bat_priv = netdev_priv(hard_iface->soft_iface);
580 596
581 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) 597 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
582 goto err_free; 598 goto err_free;
583 599
584 /* discard frames on not active interfaces */ 600 /* discard frames on not active interfaces */
585 if (batman_if->if_status != IF_ACTIVE) 601 if (hard_iface->if_status != IF_ACTIVE)
586 goto err_free; 602 goto err_free;
587 603
588 batman_packet = (struct batman_packet *)skb->data; 604 batman_packet = (struct batman_packet *)skb->data;
@@ -600,32 +616,32 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
600 switch (batman_packet->packet_type) { 616 switch (batman_packet->packet_type) {
601 /* batman originator packet */ 617 /* batman originator packet */
602 case BAT_PACKET: 618 case BAT_PACKET:
603 ret = recv_bat_packet(skb, batman_if); 619 ret = recv_bat_packet(skb, hard_iface);
604 break; 620 break;
605 621
606 /* batman icmp packet */ 622 /* batman icmp packet */
607 case BAT_ICMP: 623 case BAT_ICMP:
608 ret = recv_icmp_packet(skb, batman_if); 624 ret = recv_icmp_packet(skb, hard_iface);
609 break; 625 break;
610 626
611 /* unicast packet */ 627 /* unicast packet */
612 case BAT_UNICAST: 628 case BAT_UNICAST:
613 ret = recv_unicast_packet(skb, batman_if); 629 ret = recv_unicast_packet(skb, hard_iface);
614 break; 630 break;
615 631
616 /* fragmented unicast packet */ 632 /* fragmented unicast packet */
617 case BAT_UNICAST_FRAG: 633 case BAT_UNICAST_FRAG:
618 ret = recv_ucast_frag_packet(skb, batman_if); 634 ret = recv_ucast_frag_packet(skb, hard_iface);
619 break; 635 break;
620 636
621 /* broadcast packet */ 637 /* broadcast packet */
622 case BAT_BCAST: 638 case BAT_BCAST:
623 ret = recv_bcast_packet(skb, batman_if); 639 ret = recv_bcast_packet(skb, hard_iface);
624 break; 640 break;
625 641
626 /* vis packet */ 642 /* vis packet */
627 case BAT_VIS: 643 case BAT_VIS:
628 ret = recv_vis_packet(skb, batman_if); 644 ret = recv_vis_packet(skb, hard_iface);
629 break; 645 break;
630 default: 646 default:
631 ret = NET_RX_DROP; 647 ret = NET_RX_DROP;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 30ec3b8db45..a9ddf36e51c 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -31,23 +31,18 @@
31 31
32extern struct notifier_block hard_if_notifier; 32extern struct notifier_block hard_if_notifier;
33 33
34struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); 34struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev);
35int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); 35int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name);
36void hardif_disable_interface(struct batman_if *batman_if); 36void hardif_disable_interface(struct hard_iface *hard_iface);
37void hardif_remove_interfaces(void); 37void hardif_remove_interfaces(void);
38int batman_skb_recv(struct sk_buff *skb,
39 struct net_device *dev,
40 struct packet_type *ptype,
41 struct net_device *orig_dev);
42int hardif_min_mtu(struct net_device *soft_iface); 38int hardif_min_mtu(struct net_device *soft_iface);
43void update_min_mtu(struct net_device *soft_iface); 39void update_min_mtu(struct net_device *soft_iface);
40void hardif_free_rcu(struct rcu_head *rcu);
44 41
45static inline void hardif_free_ref(struct kref *refcount) 42static inline void hardif_free_ref(struct hard_iface *hard_iface)
46{ 43{
47 struct batman_if *batman_if; 44 if (atomic_dec_and_test(&hard_iface->refcount))
48 45 call_rcu(&hard_iface->rcu, hardif_free_rcu);
49 batman_if = container_of(refcount, struct batman_if, refcount);
50 kfree(batman_if);
51} 46}
52 47
53#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ 48#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 26e623eb9de..c5213d8f2cc 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
@@ -27,13 +27,16 @@ static void hash_init(struct hashtable_t *hash)
27{ 27{
28 int i; 28 int i;
29 29
30 for (i = 0 ; i < hash->size; i++) 30 for (i = 0 ; i < hash->size; i++) {
31 INIT_HLIST_HEAD(&hash->table[i]); 31 INIT_HLIST_HEAD(&hash->table[i]);
32 spin_lock_init(&hash->list_locks[i]);
33 }
32} 34}
33 35
34/* free only the hashtable and the hash itself. */ 36/* free only the hashtable and the hash itself. */
35void hash_destroy(struct hashtable_t *hash) 37void hash_destroy(struct hashtable_t *hash)
36{ 38{
39 kfree(hash->list_locks);
37 kfree(hash->table); 40 kfree(hash->table);
38 kfree(hash); 41 kfree(hash);
39} 42}
@@ -43,20 +46,25 @@ struct hashtable_t *hash_new(int size)
43{ 46{
44 struct hashtable_t *hash; 47 struct hashtable_t *hash;
45 48
46 hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC); 49 hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC);
47
48 if (!hash) 50 if (!hash)
49 return NULL; 51 return NULL;
50 52
51 hash->size = size;
52 hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); 53 hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC);
54 if (!hash->table)
55 goto free_hash;
53 56
54 if (!hash->table) { 57 hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC);
55 kfree(hash); 58 if (!hash->list_locks)
56 return NULL; 59 goto free_table;
57 }
58 60
61 hash->size = size;
59 hash_init(hash); 62 hash_init(hash);
60
61 return hash; 63 return hash;
64
65free_table:
66 kfree(hash->table);
67free_hash:
68 kfree(hash);
69 return NULL;
62} 70}
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 09216ade16f..434822b2747 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
@@ -28,32 +28,23 @@
28 * compare 2 element datas for their keys, 28 * compare 2 element datas for their keys,
29 * return 0 if same and not 0 if not 29 * return 0 if same and not 0 if not
30 * same */ 30 * same */
31typedef int (*hashdata_compare_cb)(void *, void *); 31typedef int (*hashdata_compare_cb)(struct hlist_node *, void *);
32 32
33/* the hashfunction, should return an index 33/* the hashfunction, should return an index
34 * based on the key in the data of the first 34 * based on the key in the data of the first
35 * argument and the size the second */ 35 * argument and the size the second */
36typedef int (*hashdata_choose_cb)(void *, int); 36typedef int (*hashdata_choose_cb)(void *, int);
37typedef void (*hashdata_free_cb)(void *, void *); 37typedef void (*hashdata_free_cb)(struct hlist_node *, void *);
38
39struct element_t {
40 void *data; /* pointer to the data */
41 struct hlist_node hlist; /* bucket list pointer */
42};
43 38
44struct hashtable_t { 39struct hashtable_t {
45 struct hlist_head *table; /* the hashtable itself, with the buckets */ 40 struct hlist_head *table; /* the hashtable itself with the buckets */
41 spinlock_t *list_locks; /* spinlock for each hash list entry */
46 int size; /* size of hashtable */ 42 int size; /* size of hashtable */
47}; 43};
48 44
49/* allocates and clears the hash */ 45/* allocates and clears the hash */
50struct hashtable_t *hash_new(int size); 46struct hashtable_t *hash_new(int size);
51 47
52/* remove element if you already found the element you want to delete and don't
53 * need the overhead to find it again with hash_remove(). But usually, you
54 * don't want to use this function, as it fiddles with hash-internals. */
55void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem);
56
57/* free only the hashtable and the hash itself. */ 48/* free only the hashtable and the hash itself. */
58void hash_destroy(struct hashtable_t *hash); 49void hash_destroy(struct hashtable_t *hash);
59 50
@@ -64,21 +55,22 @@ static inline void hash_delete(struct hashtable_t *hash,
64 hashdata_free_cb free_cb, void *arg) 55 hashdata_free_cb free_cb, void *arg)
65{ 56{
66 struct hlist_head *head; 57 struct hlist_head *head;
67 struct hlist_node *walk, *safe; 58 struct hlist_node *node, *node_tmp;
68 struct element_t *bucket; 59 spinlock_t *list_lock; /* spinlock to protect write access */
69 int i; 60 int i;
70 61
71 for (i = 0; i < hash->size; i++) { 62 for (i = 0; i < hash->size; i++) {
72 head = &hash->table[i]; 63 head = &hash->table[i];
64 list_lock = &hash->list_locks[i];
73 65
74 hlist_for_each_safe(walk, safe, head) { 66 spin_lock_bh(list_lock);
75 bucket = hlist_entry(walk, struct element_t, hlist); 67 hlist_for_each_safe(node, node_tmp, head) {
76 if (free_cb) 68 hlist_del_rcu(node);
77 free_cb(bucket->data, arg);
78 69
79 hlist_del(walk); 70 if (free_cb)
80 kfree(bucket); 71 free_cb(node, arg);
81 } 72 }
73 spin_unlock_bh(list_lock);
82 } 74 }
83 75
84 hash_destroy(hash); 76 hash_destroy(hash);
@@ -87,35 +79,41 @@ static inline void hash_delete(struct hashtable_t *hash,
87/* adds data to the hashtable. returns 0 on success, -1 on error */ 79/* adds data to the hashtable. returns 0 on success, -1 on error */
88static inline int hash_add(struct hashtable_t *hash, 80static inline int hash_add(struct hashtable_t *hash,
89 hashdata_compare_cb compare, 81 hashdata_compare_cb compare,
90 hashdata_choose_cb choose, void *data) 82 hashdata_choose_cb choose,
83 void *data, struct hlist_node *data_node)
91{ 84{
92 int index; 85 int index;
93 struct hlist_head *head; 86 struct hlist_head *head;
94 struct hlist_node *walk, *safe; 87 struct hlist_node *node;
95 struct element_t *bucket; 88 spinlock_t *list_lock; /* spinlock to protect write access */
96 89
97 if (!hash) 90 if (!hash)
98 return -1; 91 goto err;
99 92
100 index = choose(data, hash->size); 93 index = choose(data, hash->size);
101 head = &hash->table[index]; 94 head = &hash->table[index];
95 list_lock = &hash->list_locks[index];
96
97 rcu_read_lock();
98 __hlist_for_each_rcu(node, head) {
99 if (!compare(node, data))
100 continue;
102 101
103 hlist_for_each_safe(walk, safe, head) { 102 goto err_unlock;
104 bucket = hlist_entry(walk, struct element_t, hlist);
105 if (compare(bucket->data, data))
106 return -1;
107 } 103 }
104 rcu_read_unlock();
108 105
109 /* no duplicate found in list, add new element */ 106 /* no duplicate found in list, add new element */
110 bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC); 107 spin_lock_bh(list_lock);
111 108 hlist_add_head_rcu(data_node, head);
112 if (!bucket) 109 spin_unlock_bh(list_lock);
113 return -1;
114
115 bucket->data = data;
116 hlist_add_head(&bucket->hlist, head);
117 110
118 return 0; 111 return 0;
112
113err_unlock:
114 rcu_read_unlock();
115err:
116 return -1;
119} 117}
120 118
121/* removes data from hash, if found. returns pointer do data on success, so you 119/* removes data from hash, if found. returns pointer do data on success, so you
@@ -127,50 +125,25 @@ static inline void *hash_remove(struct hashtable_t *hash,
127 hashdata_choose_cb choose, void *data) 125 hashdata_choose_cb choose, void *data)
128{ 126{
129 size_t index; 127 size_t index;
130 struct hlist_node *walk; 128 struct hlist_node *node;
131 struct element_t *bucket;
132 struct hlist_head *head; 129 struct hlist_head *head;
133 void *data_save; 130 void *data_save = NULL;
134 131
135 index = choose(data, hash->size); 132 index = choose(data, hash->size);
136 head = &hash->table[index]; 133 head = &hash->table[index];
137 134
138 hlist_for_each_entry(bucket, walk, head, hlist) { 135 spin_lock_bh(&hash->list_locks[index]);
139 if (compare(bucket->data, data)) { 136 hlist_for_each(node, head) {
140 data_save = bucket->data; 137 if (!compare(node, data))
141 hlist_del(walk); 138 continue;
142 kfree(bucket);
143 return data_save;
144 }
145 }
146
147 return NULL;
148}
149
150/* finds data, based on the key in keydata. returns the found data on success,
151 * or NULL on error */
152static inline void *hash_find(struct hashtable_t *hash,
153 hashdata_compare_cb compare,
154 hashdata_choose_cb choose, void *keydata)
155{
156 int index;
157 struct hlist_head *head;
158 struct hlist_node *walk;
159 struct element_t *bucket;
160
161 if (!hash)
162 return NULL;
163
164 index = choose(keydata , hash->size);
165 head = &hash->table[index];
166 139
167 hlist_for_each(walk, head) { 140 data_save = node;
168 bucket = hlist_entry(walk, struct element_t, hlist); 141 hlist_del_rcu(node);
169 if (compare(bucket->data, keydata)) 142 break;
170 return bucket->data;
171 } 143 }
144 spin_unlock_bh(&hash->list_locks[index]);
172 145
173 return NULL; 146 return data_save;
174} 147}
175 148
176#endif /* _NET_BATMAN_ADV_HASH_H_ */ 149#endif /* _NET_BATMAN_ADV_HASH_H_ */
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ecf6d7ffab2..34ce56c358e 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -24,7 +24,6 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include "icmp_socket.h" 25#include "icmp_socket.h"
26#include "send.h" 26#include "send.h"
27#include "types.h"
28#include "hash.h" 27#include "hash.h"
29#include "originator.h" 28#include "originator.h"
30#include "hard-interface.h" 29#include "hard-interface.h"
@@ -157,10 +156,9 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
157 struct sk_buff *skb; 156 struct sk_buff *skb;
158 struct icmp_packet_rr *icmp_packet; 157 struct icmp_packet_rr *icmp_packet;
159 158
160 struct orig_node *orig_node; 159 struct orig_node *orig_node = NULL;
161 struct batman_if *batman_if; 160 struct neigh_node *neigh_node = NULL;
162 size_t packet_len = sizeof(struct icmp_packet); 161 size_t packet_len = sizeof(struct icmp_packet);
163 uint8_t dstaddr[ETH_ALEN];
164 162
165 if (len < sizeof(struct icmp_packet)) { 163 if (len < sizeof(struct icmp_packet)) {
166 bat_dbg(DBG_BATMAN, bat_priv, 164 bat_dbg(DBG_BATMAN, bat_priv,
@@ -220,47 +218,52 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
220 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) 218 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
221 goto dst_unreach; 219 goto dst_unreach;
222 220
223 spin_lock_bh(&bat_priv->orig_hash_lock); 221 rcu_read_lock();
224 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 222 orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
225 compare_orig, choose_orig,
226 icmp_packet->dst));
227 223
228 if (!orig_node) 224 if (!orig_node)
229 goto unlock; 225 goto unlock;
230 226
231 if (!orig_node->router) 227 neigh_node = orig_node->router;
228
229 if (!neigh_node)
232 goto unlock; 230 goto unlock;
233 231
234 batman_if = orig_node->router->if_incoming; 232 if (!atomic_inc_not_zero(&neigh_node->refcount)) {
235 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); 233 neigh_node = NULL;
234 goto unlock;
235 }
236 236
237 spin_unlock_bh(&bat_priv->orig_hash_lock); 237 rcu_read_unlock();
238 238
239 if (!batman_if) 239 if (!neigh_node->if_incoming)
240 goto dst_unreach; 240 goto dst_unreach;
241 241
242 if (batman_if->if_status != IF_ACTIVE) 242 if (neigh_node->if_incoming->if_status != IF_ACTIVE)
243 goto dst_unreach; 243 goto dst_unreach;
244 244
245 memcpy(icmp_packet->orig, 245 memcpy(icmp_packet->orig,
246 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 246 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
247 247
248 if (packet_len == sizeof(struct icmp_packet_rr)) 248 if (packet_len == sizeof(struct icmp_packet_rr))
249 memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN); 249 memcpy(icmp_packet->rr,
250 250 neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN);
251
252 send_skb_packet(skb, batman_if, dstaddr);
253 251
252 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
254 goto out; 253 goto out;
255 254
256unlock: 255unlock:
257 spin_unlock_bh(&bat_priv->orig_hash_lock); 256 rcu_read_unlock();
258dst_unreach: 257dst_unreach:
259 icmp_packet->msg_type = DESTINATION_UNREACHABLE; 258 icmp_packet->msg_type = DESTINATION_UNREACHABLE;
260 bat_socket_add_packet(socket_client, icmp_packet, packet_len); 259 bat_socket_add_packet(socket_client, icmp_packet, packet_len);
261free_skb: 260free_skb:
262 kfree_skb(skb); 261 kfree_skb(skb);
263out: 262out:
263 if (neigh_node)
264 neigh_node_free_ref(neigh_node);
265 if (orig_node)
266 orig_node_free_ref(orig_node);
264 return len; 267 return len;
265} 268}
266 269
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index bf9b348cde2..462b190fa10 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -22,8 +22,6 @@
22#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ 22#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
23#define _NET_BATMAN_ADV_ICMP_SOCKET_H_ 23#define _NET_BATMAN_ADV_ICMP_SOCKET_H_
24 24
25#include "types.h"
26
27#define ICMP_SOCKET "socket" 25#define ICMP_SOCKET "socket"
28 26
29void bat_socket_init(void); 27void bat_socket_init(void);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b827f6a158c..709b33bbdf4 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -30,11 +30,10 @@
30#include "translation-table.h" 30#include "translation-table.h"
31#include "hard-interface.h" 31#include "hard-interface.h"
32#include "gateway_client.h" 32#include "gateway_client.h"
33#include "types.h"
34#include "vis.h" 33#include "vis.h"
35#include "hash.h" 34#include "hash.h"
36 35
37struct list_head if_list; 36struct list_head hardif_list;
38 37
39unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 38unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
40 39
@@ -42,7 +41,7 @@ struct workqueue_struct *bat_event_workqueue;
42 41
43static int __init batman_init(void) 42static int __init batman_init(void)
44{ 43{
45 INIT_LIST_HEAD(&if_list); 44 INIT_LIST_HEAD(&hardif_list);
46 45
47 /* the name should not be longer than 10 chars - see 46 /* the name should not be longer than 10 chars - see
48 * http://lwn.net/Articles/23634/ */ 47 * http://lwn.net/Articles/23634/ */
@@ -80,7 +79,6 @@ int mesh_init(struct net_device *soft_iface)
80{ 79{
81 struct bat_priv *bat_priv = netdev_priv(soft_iface); 80 struct bat_priv *bat_priv = netdev_priv(soft_iface);
82 81
83 spin_lock_init(&bat_priv->orig_hash_lock);
84 spin_lock_init(&bat_priv->forw_bat_list_lock); 82 spin_lock_init(&bat_priv->forw_bat_list_lock);
85 spin_lock_init(&bat_priv->forw_bcast_list_lock); 83 spin_lock_init(&bat_priv->forw_bcast_list_lock);
86 spin_lock_init(&bat_priv->hna_lhash_lock); 84 spin_lock_init(&bat_priv->hna_lhash_lock);
@@ -155,14 +153,14 @@ void dec_module_count(void)
155 153
156int is_my_mac(uint8_t *addr) 154int is_my_mac(uint8_t *addr)
157{ 155{
158 struct batman_if *batman_if; 156 struct hard_iface *hard_iface;
159 157
160 rcu_read_lock(); 158 rcu_read_lock();
161 list_for_each_entry_rcu(batman_if, &if_list, list) { 159 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
162 if (batman_if->if_status != IF_ACTIVE) 160 if (hard_iface->if_status != IF_ACTIVE)
163 continue; 161 continue;
164 162
165 if (compare_orig(batman_if->net_dev->dev_addr, addr)) { 163 if (compare_eth(hard_iface->net_dev->dev_addr, addr)) {
166 rcu_read_unlock(); 164 rcu_read_unlock();
167 return 1; 165 return 1;
168 } 166 }
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 65106fb61b8..dc248697de7 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,9 +22,6 @@
22#ifndef _NET_BATMAN_ADV_MAIN_H_ 22#ifndef _NET_BATMAN_ADV_MAIN_H_
23#define _NET_BATMAN_ADV_MAIN_H_ 23#define _NET_BATMAN_ADV_MAIN_H_
24 24
25/* Kernel Programming */
26#define LINUX
27
28#define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \ 25#define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \
29 "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>" 26 "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>"
30#define DRIVER_DESC "B.A.T.M.A.N. advanced" 27#define DRIVER_DESC "B.A.T.M.A.N. advanced"
@@ -54,7 +51,6 @@
54 51
55#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) 52#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE)
56 53
57#define PACKBUFF_SIZE 2000
58#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ 54#define LOG_BUF_LEN 8192 /* has to be a power of 2 */
59 55
60#define VIS_INTERVAL 5000 /* 5 seconds */ 56#define VIS_INTERVAL 5000 /* 5 seconds */
@@ -96,15 +92,11 @@
96#define DBG_ROUTES 2 /* route or hna added / changed / deleted */ 92#define DBG_ROUTES 2 /* route or hna added / changed / deleted */
97#define DBG_ALL 3 93#define DBG_ALL 3
98 94
99#define LOG_BUF_LEN 8192 /* has to be a power of 2 */
100
101 95
102/* 96/*
103 * Vis 97 * Vis
104 */ 98 */
105 99
106/* #define VIS_SUBCLUSTERS_DISABLED */
107
108/* 100/*
109 * Kernel headers 101 * Kernel headers
110 */ 102 */
@@ -130,7 +122,7 @@
130#define REVISION_VERSION_STR " "REVISION_VERSION 122#define REVISION_VERSION_STR " "REVISION_VERSION
131#endif 123#endif
132 124
133extern struct list_head if_list; 125extern struct list_head hardif_list;
134 126
135extern unsigned char broadcast_addr[]; 127extern unsigned char broadcast_addr[];
136extern struct workqueue_struct *bat_event_workqueue; 128extern struct workqueue_struct *bat_event_workqueue;
@@ -158,13 +150,6 @@ static inline void bat_dbg(char type __always_unused,
158} 150}
159#endif 151#endif
160 152
161#define bat_warning(net_dev, fmt, arg...) \
162 do { \
163 struct net_device *_netdev = (net_dev); \
164 struct bat_priv *_batpriv = netdev_priv(_netdev); \
165 bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \
166 pr_warning("%s: " fmt, _netdev->name, ## arg); \
167 } while (0)
168#define bat_info(net_dev, fmt, arg...) \ 153#define bat_info(net_dev, fmt, arg...) \
169 do { \ 154 do { \
170 struct net_device *_netdev = (net_dev); \ 155 struct net_device *_netdev = (net_dev); \
@@ -180,4 +165,14 @@ static inline void bat_dbg(char type __always_unused,
180 pr_err("%s: " fmt, _netdev->name, ## arg); \ 165 pr_err("%s: " fmt, _netdev->name, ## arg); \
181 } while (0) 166 } while (0)
182 167
168/**
169 * returns 1 if they are the same ethernet addr
170 *
171 * note: can't use compare_ether_addr() as it requires aligned memory
172 */
173static inline int compare_eth(void *data1, void *data2)
174{
175 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
176}
177
183#endif /* _NET_BATMAN_ADV_MAIN_H_ */ 178#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6b7fb6b7e6f..0b9133022d2 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -44,24 +44,36 @@ int originator_init(struct bat_priv *bat_priv)
44 if (bat_priv->orig_hash) 44 if (bat_priv->orig_hash)
45 return 1; 45 return 1;
46 46
47 spin_lock_bh(&bat_priv->orig_hash_lock);
48 bat_priv->orig_hash = hash_new(1024); 47 bat_priv->orig_hash = hash_new(1024);
49 48
50 if (!bat_priv->orig_hash) 49 if (!bat_priv->orig_hash)
51 goto err; 50 goto err;
52 51
53 spin_unlock_bh(&bat_priv->orig_hash_lock);
54 start_purge_timer(bat_priv); 52 start_purge_timer(bat_priv);
55 return 1; 53 return 1;
56 54
57err: 55err:
58 spin_unlock_bh(&bat_priv->orig_hash_lock);
59 return 0; 56 return 0;
60} 57}
61 58
62struct neigh_node * 59static void neigh_node_free_rcu(struct rcu_head *rcu)
63create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, 60{
64 uint8_t *neigh, struct batman_if *if_incoming) 61 struct neigh_node *neigh_node;
62
63 neigh_node = container_of(rcu, struct neigh_node, rcu);
64 kfree(neigh_node);
65}
66
67void neigh_node_free_ref(struct neigh_node *neigh_node)
68{
69 if (atomic_dec_and_test(&neigh_node->refcount))
70 call_rcu(&neigh_node->rcu, neigh_node_free_rcu);
71}
72
73struct neigh_node *create_neighbor(struct orig_node *orig_node,
74 struct orig_node *orig_neigh_node,
75 uint8_t *neigh,
76 struct hard_iface *if_incoming)
65{ 77{
66 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 78 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
67 struct neigh_node *neigh_node; 79 struct neigh_node *neigh_node;
@@ -73,50 +85,94 @@ create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
73 if (!neigh_node) 85 if (!neigh_node)
74 return NULL; 86 return NULL;
75 87
76 INIT_LIST_HEAD(&neigh_node->list); 88 INIT_HLIST_NODE(&neigh_node->list);
89 INIT_LIST_HEAD(&neigh_node->bonding_list);
77 90
78 memcpy(neigh_node->addr, neigh, ETH_ALEN); 91 memcpy(neigh_node->addr, neigh, ETH_ALEN);
79 neigh_node->orig_node = orig_neigh_node; 92 neigh_node->orig_node = orig_neigh_node;
80 neigh_node->if_incoming = if_incoming; 93 neigh_node->if_incoming = if_incoming;
81 94
82 list_add_tail(&neigh_node->list, &orig_node->neigh_list); 95 /* extra reference for return */
96 atomic_set(&neigh_node->refcount, 2);
97
98 spin_lock_bh(&orig_node->neigh_list_lock);
99 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
100 spin_unlock_bh(&orig_node->neigh_list_lock);
83 return neigh_node; 101 return neigh_node;
84} 102}
85 103
86static void free_orig_node(void *data, void *arg) 104static void orig_node_free_rcu(struct rcu_head *rcu)
87{ 105{
88 struct list_head *list_pos, *list_pos_tmp; 106 struct hlist_node *node, *node_tmp;
89 struct neigh_node *neigh_node; 107 struct neigh_node *neigh_node, *tmp_neigh_node;
90 struct orig_node *orig_node = (struct orig_node *)data; 108 struct orig_node *orig_node;
91 struct bat_priv *bat_priv = (struct bat_priv *)arg;
92 109
93 /* for all neighbors towards this originator ... */ 110 orig_node = container_of(rcu, struct orig_node, rcu);
94 list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { 111
95 neigh_node = list_entry(list_pos, struct neigh_node, list); 112 spin_lock_bh(&orig_node->neigh_list_lock);
113
114 /* for all bonding members ... */
115 list_for_each_entry_safe(neigh_node, tmp_neigh_node,
116 &orig_node->bond_list, bonding_list) {
117 list_del_rcu(&neigh_node->bonding_list);
118 neigh_node_free_ref(neigh_node);
119 }
96 120
97 list_del(list_pos); 121 /* for all neighbors towards this originator ... */
98 kfree(neigh_node); 122 hlist_for_each_entry_safe(neigh_node, node, node_tmp,
123 &orig_node->neigh_list, list) {
124 hlist_del_rcu(&neigh_node->list);
125 neigh_node_free_ref(neigh_node);
99 } 126 }
100 127
128 spin_unlock_bh(&orig_node->neigh_list_lock);
129
101 frag_list_free(&orig_node->frag_list); 130 frag_list_free(&orig_node->frag_list);
102 hna_global_del_orig(bat_priv, orig_node, "originator timed out"); 131 hna_global_del_orig(orig_node->bat_priv, orig_node,
132 "originator timed out");
103 133
104 kfree(orig_node->bcast_own); 134 kfree(orig_node->bcast_own);
105 kfree(orig_node->bcast_own_sum); 135 kfree(orig_node->bcast_own_sum);
106 kfree(orig_node); 136 kfree(orig_node);
107} 137}
108 138
139void orig_node_free_ref(struct orig_node *orig_node)
140{
141 if (atomic_dec_and_test(&orig_node->refcount))
142 call_rcu(&orig_node->rcu, orig_node_free_rcu);
143}
144
109void originator_free(struct bat_priv *bat_priv) 145void originator_free(struct bat_priv *bat_priv)
110{ 146{
111 if (!bat_priv->orig_hash) 147 struct hashtable_t *hash = bat_priv->orig_hash;
148 struct hlist_node *node, *node_tmp;
149 struct hlist_head *head;
150 spinlock_t *list_lock; /* spinlock to protect write access */
151 struct orig_node *orig_node;
152 int i;
153
154 if (!hash)
112 return; 155 return;
113 156
114 cancel_delayed_work_sync(&bat_priv->orig_work); 157 cancel_delayed_work_sync(&bat_priv->orig_work);
115 158
116 spin_lock_bh(&bat_priv->orig_hash_lock);
117 hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv);
118 bat_priv->orig_hash = NULL; 159 bat_priv->orig_hash = NULL;
119 spin_unlock_bh(&bat_priv->orig_hash_lock); 160
161 for (i = 0; i < hash->size; i++) {
162 head = &hash->table[i];
163 list_lock = &hash->list_locks[i];
164
165 spin_lock_bh(list_lock);
166 hlist_for_each_entry_safe(orig_node, node, node_tmp,
167 head, hash_entry) {
168
169 hlist_del_rcu(node);
170 orig_node_free_ref(orig_node);
171 }
172 spin_unlock_bh(list_lock);
173 }
174
175 hash_destroy(hash);
120} 176}
121 177
122/* this function finds or creates an originator entry for the given 178/* this function finds or creates an originator entry for the given
@@ -127,10 +183,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
127 int size; 183 int size;
128 int hash_added; 184 int hash_added;
129 185
130 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 186 orig_node = orig_hash_find(bat_priv, addr);
131 compare_orig, choose_orig,
132 addr));
133
134 if (orig_node) 187 if (orig_node)
135 return orig_node; 188 return orig_node;
136 189
@@ -141,8 +194,16 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
141 if (!orig_node) 194 if (!orig_node)
142 return NULL; 195 return NULL;
143 196
144 INIT_LIST_HEAD(&orig_node->neigh_list); 197 INIT_HLIST_HEAD(&orig_node->neigh_list);
198 INIT_LIST_HEAD(&orig_node->bond_list);
199 spin_lock_init(&orig_node->ogm_cnt_lock);
200 spin_lock_init(&orig_node->bcast_seqno_lock);
201 spin_lock_init(&orig_node->neigh_list_lock);
202
203 /* extra reference for return */
204 atomic_set(&orig_node->refcount, 2);
145 205
206 orig_node->bat_priv = bat_priv;
146 memcpy(orig_node->orig, addr, ETH_ALEN); 207 memcpy(orig_node->orig, addr, ETH_ALEN);
147 orig_node->router = NULL; 208 orig_node->router = NULL;
148 orig_node->hna_buff = NULL; 209 orig_node->hna_buff = NULL;
@@ -151,6 +212,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
151 orig_node->batman_seqno_reset = jiffies - 1 212 orig_node->batman_seqno_reset = jiffies - 1
152 - msecs_to_jiffies(RESET_PROTECTION_MS); 213 - msecs_to_jiffies(RESET_PROTECTION_MS);
153 214
215 atomic_set(&orig_node->bond_candidates, 0);
216
154 size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS; 217 size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS;
155 218
156 orig_node->bcast_own = kzalloc(size, GFP_ATOMIC); 219 orig_node->bcast_own = kzalloc(size, GFP_ATOMIC);
@@ -166,8 +229,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
166 if (!orig_node->bcast_own_sum) 229 if (!orig_node->bcast_own_sum)
167 goto free_bcast_own; 230 goto free_bcast_own;
168 231
169 hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, 232 hash_added = hash_add(bat_priv->orig_hash, compare_orig,
170 orig_node); 233 choose_orig, orig_node, &orig_node->hash_entry);
171 if (hash_added < 0) 234 if (hash_added < 0)
172 goto free_bcast_own_sum; 235 goto free_bcast_own_sum;
173 236
@@ -185,23 +248,30 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
185 struct orig_node *orig_node, 248 struct orig_node *orig_node,
186 struct neigh_node **best_neigh_node) 249 struct neigh_node **best_neigh_node)
187{ 250{
188 struct list_head *list_pos, *list_pos_tmp; 251 struct hlist_node *node, *node_tmp;
189 struct neigh_node *neigh_node; 252 struct neigh_node *neigh_node;
190 bool neigh_purged = false; 253 bool neigh_purged = false;
191 254
192 *best_neigh_node = NULL; 255 *best_neigh_node = NULL;
193 256
257 spin_lock_bh(&orig_node->neigh_list_lock);
258
194 /* for all neighbors towards this originator ... */ 259 /* for all neighbors towards this originator ... */
195 list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { 260 hlist_for_each_entry_safe(neigh_node, node, node_tmp,
196 neigh_node = list_entry(list_pos, struct neigh_node, list); 261 &orig_node->neigh_list, list) {
197 262
198 if ((time_after(jiffies, 263 if ((time_after(jiffies,
199 neigh_node->last_valid + PURGE_TIMEOUT * HZ)) || 264 neigh_node->last_valid + PURGE_TIMEOUT * HZ)) ||
200 (neigh_node->if_incoming->if_status == IF_INACTIVE) || 265 (neigh_node->if_incoming->if_status == IF_INACTIVE) ||
266 (neigh_node->if_incoming->if_status == IF_NOT_IN_USE) ||
201 (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) { 267 (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) {
202 268
203 if (neigh_node->if_incoming->if_status == 269 if ((neigh_node->if_incoming->if_status ==
204 IF_TO_BE_REMOVED) 270 IF_INACTIVE) ||
271 (neigh_node->if_incoming->if_status ==
272 IF_NOT_IN_USE) ||
273 (neigh_node->if_incoming->if_status ==
274 IF_TO_BE_REMOVED))
205 bat_dbg(DBG_BATMAN, bat_priv, 275 bat_dbg(DBG_BATMAN, bat_priv,
206 "neighbor purge: originator %pM, " 276 "neighbor purge: originator %pM, "
207 "neighbor: %pM, iface: %s\n", 277 "neighbor: %pM, iface: %s\n",
@@ -215,14 +285,18 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
215 (neigh_node->last_valid / HZ)); 285 (neigh_node->last_valid / HZ));
216 286
217 neigh_purged = true; 287 neigh_purged = true;
218 list_del(list_pos); 288
219 kfree(neigh_node); 289 hlist_del_rcu(&neigh_node->list);
290 bonding_candidate_del(orig_node, neigh_node);
291 neigh_node_free_ref(neigh_node);
220 } else { 292 } else {
221 if ((!*best_neigh_node) || 293 if ((!*best_neigh_node) ||
222 (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) 294 (neigh_node->tq_avg > (*best_neigh_node)->tq_avg))
223 *best_neigh_node = neigh_node; 295 *best_neigh_node = neigh_node;
224 } 296 }
225 } 297 }
298
299 spin_unlock_bh(&orig_node->neigh_list_lock);
226 return neigh_purged; 300 return neigh_purged;
227} 301}
228 302
@@ -245,9 +319,6 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
245 best_neigh_node, 319 best_neigh_node,
246 orig_node->hna_buff, 320 orig_node->hna_buff,
247 orig_node->hna_buff_len); 321 orig_node->hna_buff_len);
248 /* update bonding candidates, we could have lost
249 * some candidates. */
250 update_bonding_candidates(bat_priv, orig_node);
251 } 322 }
252 } 323 }
253 324
@@ -257,40 +328,38 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
257static void _purge_orig(struct bat_priv *bat_priv) 328static void _purge_orig(struct bat_priv *bat_priv)
258{ 329{
259 struct hashtable_t *hash = bat_priv->orig_hash; 330 struct hashtable_t *hash = bat_priv->orig_hash;
260 struct hlist_node *walk, *safe; 331 struct hlist_node *node, *node_tmp;
261 struct hlist_head *head; 332 struct hlist_head *head;
262 struct element_t *bucket; 333 spinlock_t *list_lock; /* spinlock to protect write access */
263 struct orig_node *orig_node; 334 struct orig_node *orig_node;
264 int i; 335 int i;
265 336
266 if (!hash) 337 if (!hash)
267 return; 338 return;
268 339
269 spin_lock_bh(&bat_priv->orig_hash_lock);
270
271 /* for all origins... */ 340 /* for all origins... */
272 for (i = 0; i < hash->size; i++) { 341 for (i = 0; i < hash->size; i++) {
273 head = &hash->table[i]; 342 head = &hash->table[i];
343 list_lock = &hash->list_locks[i];
274 344
275 hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { 345 spin_lock_bh(list_lock);
276 orig_node = bucket->data; 346 hlist_for_each_entry_safe(orig_node, node, node_tmp,
277 347 head, hash_entry) {
278 if (purge_orig_node(bat_priv, orig_node)) { 348 if (purge_orig_node(bat_priv, orig_node)) {
279 if (orig_node->gw_flags) 349 if (orig_node->gw_flags)
280 gw_node_delete(bat_priv, orig_node); 350 gw_node_delete(bat_priv, orig_node);
281 hlist_del(walk); 351 hlist_del_rcu(node);
282 kfree(bucket); 352 orig_node_free_ref(orig_node);
283 free_orig_node(orig_node, bat_priv); 353 continue;
284 } 354 }
285 355
286 if (time_after(jiffies, orig_node->last_frag_packet + 356 if (time_after(jiffies, orig_node->last_frag_packet +
287 msecs_to_jiffies(FRAG_TIMEOUT))) 357 msecs_to_jiffies(FRAG_TIMEOUT)))
288 frag_list_free(&orig_node->frag_list); 358 frag_list_free(&orig_node->frag_list);
289 } 359 }
360 spin_unlock_bh(list_lock);
290 } 361 }
291 362
292 spin_unlock_bh(&bat_priv->orig_hash_lock);
293
294 gw_node_purge(bat_priv); 363 gw_node_purge(bat_priv);
295 gw_election(bat_priv); 364 gw_election(bat_priv);
296 365
@@ -318,9 +387,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
318 struct net_device *net_dev = (struct net_device *)seq->private; 387 struct net_device *net_dev = (struct net_device *)seq->private;
319 struct bat_priv *bat_priv = netdev_priv(net_dev); 388 struct bat_priv *bat_priv = netdev_priv(net_dev);
320 struct hashtable_t *hash = bat_priv->orig_hash; 389 struct hashtable_t *hash = bat_priv->orig_hash;
321 struct hlist_node *walk; 390 struct hlist_node *node, *node_tmp;
322 struct hlist_head *head; 391 struct hlist_head *head;
323 struct element_t *bucket;
324 struct orig_node *orig_node; 392 struct orig_node *orig_node;
325 struct neigh_node *neigh_node; 393 struct neigh_node *neigh_node;
326 int batman_count = 0; 394 int batman_count = 0;
@@ -348,14 +416,11 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
348 "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", 416 "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop",
349 "outgoingIF", "Potential nexthops"); 417 "outgoingIF", "Potential nexthops");
350 418
351 spin_lock_bh(&bat_priv->orig_hash_lock);
352
353 for (i = 0; i < hash->size; i++) { 419 for (i = 0; i < hash->size; i++) {
354 head = &hash->table[i]; 420 head = &hash->table[i];
355 421
356 hlist_for_each_entry(bucket, walk, head, hlist) { 422 rcu_read_lock();
357 orig_node = bucket->data; 423 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
358
359 if (!orig_node->router) 424 if (!orig_node->router)
360 continue; 425 continue;
361 426
@@ -374,8 +439,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
374 neigh_node->addr, 439 neigh_node->addr,
375 neigh_node->if_incoming->net_dev->name); 440 neigh_node->if_incoming->net_dev->name);
376 441
377 list_for_each_entry(neigh_node, &orig_node->neigh_list, 442 hlist_for_each_entry_rcu(neigh_node, node_tmp,
378 list) { 443 &orig_node->neigh_list, list) {
379 seq_printf(seq, " %pM (%3i)", neigh_node->addr, 444 seq_printf(seq, " %pM (%3i)", neigh_node->addr,
380 neigh_node->tq_avg); 445 neigh_node->tq_avg);
381 } 446 }
@@ -383,10 +448,9 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
383 seq_printf(seq, "\n"); 448 seq_printf(seq, "\n");
384 batman_count++; 449 batman_count++;
385 } 450 }
451 rcu_read_unlock();
386 } 452 }
387 453
388 spin_unlock_bh(&bat_priv->orig_hash_lock);
389
390 if ((batman_count == 0)) 454 if ((batman_count == 0))
391 seq_printf(seq, "No batman nodes in range ...\n"); 455 seq_printf(seq, "No batman nodes in range ...\n");
392 456
@@ -423,36 +487,36 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
423 return 0; 487 return 0;
424} 488}
425 489
426int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) 490int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num)
427{ 491{
428 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 492 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
429 struct hashtable_t *hash = bat_priv->orig_hash; 493 struct hashtable_t *hash = bat_priv->orig_hash;
430 struct hlist_node *walk; 494 struct hlist_node *node;
431 struct hlist_head *head; 495 struct hlist_head *head;
432 struct element_t *bucket;
433 struct orig_node *orig_node; 496 struct orig_node *orig_node;
434 int i; 497 int i, ret;
435 498
436 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on 499 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on
437 * if_num */ 500 * if_num */
438 spin_lock_bh(&bat_priv->orig_hash_lock);
439
440 for (i = 0; i < hash->size; i++) { 501 for (i = 0; i < hash->size; i++) {
441 head = &hash->table[i]; 502 head = &hash->table[i];
442 503
443 hlist_for_each_entry(bucket, walk, head, hlist) { 504 rcu_read_lock();
444 orig_node = bucket->data; 505 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
506 spin_lock_bh(&orig_node->ogm_cnt_lock);
507 ret = orig_node_add_if(orig_node, max_if_num);
508 spin_unlock_bh(&orig_node->ogm_cnt_lock);
445 509
446 if (orig_node_add_if(orig_node, max_if_num) == -1) 510 if (ret == -1)
447 goto err; 511 goto err;
448 } 512 }
513 rcu_read_unlock();
449 } 514 }
450 515
451 spin_unlock_bh(&bat_priv->orig_hash_lock);
452 return 0; 516 return 0;
453 517
454err: 518err:
455 spin_unlock_bh(&bat_priv->orig_hash_lock); 519 rcu_read_unlock();
456 return -ENOMEM; 520 return -ENOMEM;
457} 521}
458 522
@@ -508,57 +572,55 @@ free_own_sum:
508 return 0; 572 return 0;
509} 573}
510 574
511int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) 575int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num)
512{ 576{
513 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 577 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
514 struct hashtable_t *hash = bat_priv->orig_hash; 578 struct hashtable_t *hash = bat_priv->orig_hash;
515 struct hlist_node *walk; 579 struct hlist_node *node;
516 struct hlist_head *head; 580 struct hlist_head *head;
517 struct element_t *bucket; 581 struct hard_iface *hard_iface_tmp;
518 struct batman_if *batman_if_tmp;
519 struct orig_node *orig_node; 582 struct orig_node *orig_node;
520 int i, ret; 583 int i, ret;
521 584
522 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on 585 /* resize all orig nodes because orig_node->bcast_own(_sum) depend on
523 * if_num */ 586 * if_num */
524 spin_lock_bh(&bat_priv->orig_hash_lock);
525
526 for (i = 0; i < hash->size; i++) { 587 for (i = 0; i < hash->size; i++) {
527 head = &hash->table[i]; 588 head = &hash->table[i];
528 589
529 hlist_for_each_entry(bucket, walk, head, hlist) { 590 rcu_read_lock();
530 orig_node = bucket->data; 591 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
531 592 spin_lock_bh(&orig_node->ogm_cnt_lock);
532 ret = orig_node_del_if(orig_node, max_if_num, 593 ret = orig_node_del_if(orig_node, max_if_num,
533 batman_if->if_num); 594 hard_iface->if_num);
595 spin_unlock_bh(&orig_node->ogm_cnt_lock);
534 596
535 if (ret == -1) 597 if (ret == -1)
536 goto err; 598 goto err;
537 } 599 }
600 rcu_read_unlock();
538 } 601 }
539 602
540 /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ 603 /* renumber remaining batman interfaces _inside_ of orig_hash_lock */
541 rcu_read_lock(); 604 rcu_read_lock();
542 list_for_each_entry_rcu(batman_if_tmp, &if_list, list) { 605 list_for_each_entry_rcu(hard_iface_tmp, &hardif_list, list) {
543 if (batman_if_tmp->if_status == IF_NOT_IN_USE) 606 if (hard_iface_tmp->if_status == IF_NOT_IN_USE)
544 continue; 607 continue;
545 608
546 if (batman_if == batman_if_tmp) 609 if (hard_iface == hard_iface_tmp)
547 continue; 610 continue;
548 611
549 if (batman_if->soft_iface != batman_if_tmp->soft_iface) 612 if (hard_iface->soft_iface != hard_iface_tmp->soft_iface)
550 continue; 613 continue;
551 614
552 if (batman_if_tmp->if_num > batman_if->if_num) 615 if (hard_iface_tmp->if_num > hard_iface->if_num)
553 batman_if_tmp->if_num--; 616 hard_iface_tmp->if_num--;
554 } 617 }
555 rcu_read_unlock(); 618 rcu_read_unlock();
556 619
557 batman_if->if_num = -1; 620 hard_iface->if_num = -1;
558 spin_unlock_bh(&bat_priv->orig_hash_lock);
559 return 0; 621 return 0;
560 622
561err: 623err:
562 spin_unlock_bh(&bat_priv->orig_hash_lock); 624 rcu_read_unlock();
563 return -ENOMEM; 625 return -ENOMEM;
564} 626}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index d474ceb2a4e..5cc011057da 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,21 +22,28 @@
22#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ 22#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
23#define _NET_BATMAN_ADV_ORIGINATOR_H_ 23#define _NET_BATMAN_ADV_ORIGINATOR_H_
24 24
25#include "hash.h"
26
25int originator_init(struct bat_priv *bat_priv); 27int originator_init(struct bat_priv *bat_priv);
26void originator_free(struct bat_priv *bat_priv); 28void originator_free(struct bat_priv *bat_priv);
27void purge_orig_ref(struct bat_priv *bat_priv); 29void purge_orig_ref(struct bat_priv *bat_priv);
30void orig_node_free_ref(struct orig_node *orig_node);
28struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); 31struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr);
29struct neigh_node * 32struct neigh_node *create_neighbor(struct orig_node *orig_node,
30create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, 33 struct orig_node *orig_neigh_node,
31 uint8_t *neigh, struct batman_if *if_incoming); 34 uint8_t *neigh,
35 struct hard_iface *if_incoming);
36void neigh_node_free_ref(struct neigh_node *neigh_node);
32int orig_seq_print_text(struct seq_file *seq, void *offset); 37int orig_seq_print_text(struct seq_file *seq, void *offset);
33int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); 38int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num);
34int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); 39int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num);
35 40
36 41
37/* returns 1 if they are the same originator */ 42/* returns 1 if they are the same originator */
38static inline int compare_orig(void *data1, void *data2) 43static inline int compare_orig(struct hlist_node *node, void *data2)
39{ 44{
45 void *data1 = container_of(node, struct orig_node, hash_entry);
46
40 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); 47 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
41} 48}
42 49
@@ -61,4 +68,35 @@ static inline int choose_orig(void *data, int32_t size)
61 return hash % size; 68 return hash % size;
62} 69}
63 70
71static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv,
72 void *data)
73{
74 struct hashtable_t *hash = bat_priv->orig_hash;
75 struct hlist_head *head;
76 struct hlist_node *node;
77 struct orig_node *orig_node, *orig_node_tmp = NULL;
78 int index;
79
80 if (!hash)
81 return NULL;
82
83 index = choose_orig(data, hash->size);
84 head = &hash->table[index];
85
86 rcu_read_lock();
87 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
88 if (!compare_eth(orig_node, data))
89 continue;
90
91 if (!atomic_inc_not_zero(&orig_node->refcount))
92 continue;
93
94 orig_node_tmp = orig_node;
95 break;
96 }
97 rcu_read_unlock();
98
99 return orig_node_tmp;
100}
101
64#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ 102#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 2284e8129cb..e7571879af3 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -50,6 +50,7 @@
50 50
51/* fragmentation defines */ 51/* fragmentation defines */
52#define UNI_FRAG_HEAD 0x01 52#define UNI_FRAG_HEAD 0x01
53#define UNI_FRAG_LARGETAIL 0x02
53 54
54struct batman_packet { 55struct batman_packet {
55 uint8_t packet_type; 56 uint8_t packet_type;
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
index defd37c9be1..5bb6a619afe 100644
--- a/net/batman-adv/ring_buffer.c
+++ b/net/batman-adv/ring_buffer.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
index 6b0cb9aaeba..0395b274186 100644
--- a/net/batman-adv/ring_buffer.h
+++ b/net/batman-adv/ring_buffer.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 8828eddd3f7..c172f5d0e05 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -28,7 +28,6 @@
28#include "icmp_socket.h" 28#include "icmp_socket.h"
29#include "translation-table.h" 29#include "translation-table.h"
30#include "originator.h" 30#include "originator.h"
31#include "types.h"
32#include "ring_buffer.h" 31#include "ring_buffer.h"
33#include "vis.h" 32#include "vis.h"
34#include "aggregation.h" 33#include "aggregation.h"
@@ -36,35 +35,33 @@
36#include "gateway_client.h" 35#include "gateway_client.h"
37#include "unicast.h" 36#include "unicast.h"
38 37
39void slide_own_bcast_window(struct batman_if *batman_if) 38void slide_own_bcast_window(struct hard_iface *hard_iface)
40{ 39{
41 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 40 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
42 struct hashtable_t *hash = bat_priv->orig_hash; 41 struct hashtable_t *hash = bat_priv->orig_hash;
43 struct hlist_node *walk; 42 struct hlist_node *node;
44 struct hlist_head *head; 43 struct hlist_head *head;
45 struct element_t *bucket;
46 struct orig_node *orig_node; 44 struct orig_node *orig_node;
47 unsigned long *word; 45 unsigned long *word;
48 int i; 46 int i;
49 size_t word_index; 47 size_t word_index;
50 48
51 spin_lock_bh(&bat_priv->orig_hash_lock);
52
53 for (i = 0; i < hash->size; i++) { 49 for (i = 0; i < hash->size; i++) {
54 head = &hash->table[i]; 50 head = &hash->table[i];
55 51
56 hlist_for_each_entry(bucket, walk, head, hlist) { 52 rcu_read_lock();
57 orig_node = bucket->data; 53 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
58 word_index = batman_if->if_num * NUM_WORDS; 54 spin_lock_bh(&orig_node->ogm_cnt_lock);
55 word_index = hard_iface->if_num * NUM_WORDS;
59 word = &(orig_node->bcast_own[word_index]); 56 word = &(orig_node->bcast_own[word_index]);
60 57
61 bit_get_packet(bat_priv, word, 1, 0); 58 bit_get_packet(bat_priv, word, 1, 0);
62 orig_node->bcast_own_sum[batman_if->if_num] = 59 orig_node->bcast_own_sum[hard_iface->if_num] =
63 bit_packet_count(word); 60 bit_packet_count(word);
61 spin_unlock_bh(&orig_node->ogm_cnt_lock);
64 } 62 }
63 rcu_read_unlock();
65 } 64 }
66
67 spin_unlock_bh(&bat_priv->orig_hash_lock);
68} 65}
69 66
70static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node, 67static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node,
@@ -90,6 +87,8 @@ static void update_route(struct bat_priv *bat_priv,
90 struct neigh_node *neigh_node, 87 struct neigh_node *neigh_node,
91 unsigned char *hna_buff, int hna_buff_len) 88 unsigned char *hna_buff, int hna_buff_len)
92{ 89{
90 struct neigh_node *neigh_node_tmp;
91
93 /* route deleted */ 92 /* route deleted */
94 if ((orig_node->router) && (!neigh_node)) { 93 if ((orig_node->router) && (!neigh_node)) {
95 94
@@ -116,7 +115,12 @@ static void update_route(struct bat_priv *bat_priv,
116 orig_node->router->addr); 115 orig_node->router->addr);
117 } 116 }
118 117
118 if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount))
119 neigh_node = NULL;
120 neigh_node_tmp = orig_node->router;
119 orig_node->router = neigh_node; 121 orig_node->router = neigh_node;
122 if (neigh_node_tmp)
123 neigh_node_free_ref(neigh_node_tmp);
120} 124}
121 125
122 126
@@ -139,73 +143,93 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
139static int is_bidirectional_neigh(struct orig_node *orig_node, 143static int is_bidirectional_neigh(struct orig_node *orig_node,
140 struct orig_node *orig_neigh_node, 144 struct orig_node *orig_neigh_node,
141 struct batman_packet *batman_packet, 145 struct batman_packet *batman_packet,
142 struct batman_if *if_incoming) 146 struct hard_iface *if_incoming)
143{ 147{
144 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 148 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
145 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; 149 struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
150 struct hlist_node *node;
146 unsigned char total_count; 151 unsigned char total_count;
152 uint8_t orig_eq_count, neigh_rq_count, tq_own;
153 int tq_asym_penalty, ret = 0;
147 154
148 if (orig_node == orig_neigh_node) { 155 if (orig_node == orig_neigh_node) {
149 list_for_each_entry(tmp_neigh_node, 156 rcu_read_lock();
150 &orig_node->neigh_list, 157 hlist_for_each_entry_rcu(tmp_neigh_node, node,
151 list) { 158 &orig_node->neigh_list, list) {
152 159
153 if (compare_orig(tmp_neigh_node->addr, 160 if (!compare_eth(tmp_neigh_node->addr,
154 orig_neigh_node->orig) && 161 orig_neigh_node->orig))
155 (tmp_neigh_node->if_incoming == if_incoming)) 162 continue;
156 neigh_node = tmp_neigh_node; 163
164 if (tmp_neigh_node->if_incoming != if_incoming)
165 continue;
166
167 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
168 continue;
169
170 neigh_node = tmp_neigh_node;
157 } 171 }
172 rcu_read_unlock();
158 173
159 if (!neigh_node) 174 if (!neigh_node)
160 neigh_node = create_neighbor(orig_node, 175 neigh_node = create_neighbor(orig_node,
161 orig_neigh_node, 176 orig_neigh_node,
162 orig_neigh_node->orig, 177 orig_neigh_node->orig,
163 if_incoming); 178 if_incoming);
164 /* create_neighbor failed, return 0 */
165 if (!neigh_node) 179 if (!neigh_node)
166 return 0; 180 goto out;
167 181
168 neigh_node->last_valid = jiffies; 182 neigh_node->last_valid = jiffies;
169 } else { 183 } else {
170 /* find packet count of corresponding one hop neighbor */ 184 /* find packet count of corresponding one hop neighbor */
171 list_for_each_entry(tmp_neigh_node, 185 rcu_read_lock();
172 &orig_neigh_node->neigh_list, list) { 186 hlist_for_each_entry_rcu(tmp_neigh_node, node,
187 &orig_neigh_node->neigh_list, list) {
173 188
174 if (compare_orig(tmp_neigh_node->addr, 189 if (!compare_eth(tmp_neigh_node->addr,
175 orig_neigh_node->orig) && 190 orig_neigh_node->orig))
176 (tmp_neigh_node->if_incoming == if_incoming)) 191 continue;
177 neigh_node = tmp_neigh_node; 192
193 if (tmp_neigh_node->if_incoming != if_incoming)
194 continue;
195
196 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
197 continue;
198
199 neigh_node = tmp_neigh_node;
178 } 200 }
201 rcu_read_unlock();
179 202
180 if (!neigh_node) 203 if (!neigh_node)
181 neigh_node = create_neighbor(orig_neigh_node, 204 neigh_node = create_neighbor(orig_neigh_node,
182 orig_neigh_node, 205 orig_neigh_node,
183 orig_neigh_node->orig, 206 orig_neigh_node->orig,
184 if_incoming); 207 if_incoming);
185 /* create_neighbor failed, return 0 */
186 if (!neigh_node) 208 if (!neigh_node)
187 return 0; 209 goto out;
188 } 210 }
189 211
190 orig_node->last_valid = jiffies; 212 orig_node->last_valid = jiffies;
191 213
214 spin_lock_bh(&orig_node->ogm_cnt_lock);
215 orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
216 neigh_rq_count = neigh_node->real_packet_count;
217 spin_unlock_bh(&orig_node->ogm_cnt_lock);
218
192 /* pay attention to not get a value bigger than 100 % */ 219 /* pay attention to not get a value bigger than 100 % */
193 total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] > 220 total_count = (orig_eq_count > neigh_rq_count ?
194 neigh_node->real_packet_count ? 221 neigh_rq_count : orig_eq_count);
195 neigh_node->real_packet_count :
196 orig_neigh_node->bcast_own_sum[if_incoming->if_num]);
197 222
198 /* if we have too few packets (too less data) we set tq_own to zero */ 223 /* if we have too few packets (too less data) we set tq_own to zero */
199 /* if we receive too few packets it is not considered bidirectional */ 224 /* if we receive too few packets it is not considered bidirectional */
200 if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || 225 if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
201 (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) 226 (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
202 orig_neigh_node->tq_own = 0; 227 tq_own = 0;
203 else 228 else
204 /* neigh_node->real_packet_count is never zero as we 229 /* neigh_node->real_packet_count is never zero as we
205 * only purge old information when getting new 230 * only purge old information when getting new
206 * information */ 231 * information */
207 orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) / 232 tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count;
208 neigh_node->real_packet_count;
209 233
210 /* 234 /*
211 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does 235 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
@@ -213,20 +237,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node,
213 * punishes asymmetric links more. This will give a value 237 * punishes asymmetric links more. This will give a value
214 * between 0 and TQ_MAX_VALUE 238 * between 0 and TQ_MAX_VALUE
215 */ 239 */
216 orig_neigh_node->tq_asym_penalty = 240 tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
217 TQ_MAX_VALUE - 241 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
218 (TQ_MAX_VALUE * 242 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
219 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * 243 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
220 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * 244 (TQ_LOCAL_WINDOW_SIZE *
221 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) / 245 TQ_LOCAL_WINDOW_SIZE *
222 (TQ_LOCAL_WINDOW_SIZE * 246 TQ_LOCAL_WINDOW_SIZE);
223 TQ_LOCAL_WINDOW_SIZE * 247
224 TQ_LOCAL_WINDOW_SIZE); 248 batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) /
225 249 (TQ_MAX_VALUE * TQ_MAX_VALUE));
226 batman_packet->tq = ((batman_packet->tq *
227 orig_neigh_node->tq_own *
228 orig_neigh_node->tq_asym_penalty) /
229 (TQ_MAX_VALUE * TQ_MAX_VALUE));
230 250
231 bat_dbg(DBG_BATMAN, bat_priv, 251 bat_dbg(DBG_BATMAN, bat_priv,
232 "bidirectional: " 252 "bidirectional: "
@@ -234,34 +254,141 @@ static int is_bidirectional_neigh(struct orig_node *orig_node,
234 "real recv = %2i, local tq: %3i, asym_penalty: %3i, " 254 "real recv = %2i, local tq: %3i, asym_penalty: %3i, "
235 "total tq: %3i\n", 255 "total tq: %3i\n",
236 orig_node->orig, orig_neigh_node->orig, total_count, 256 orig_node->orig, orig_neigh_node->orig, total_count,
237 neigh_node->real_packet_count, orig_neigh_node->tq_own, 257 neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq);
238 orig_neigh_node->tq_asym_penalty, batman_packet->tq);
239 258
240 /* if link has the minimum required transmission quality 259 /* if link has the minimum required transmission quality
241 * consider it bidirectional */ 260 * consider it bidirectional */
242 if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) 261 if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
243 return 1; 262 ret = 1;
244 263
245 return 0; 264out:
265 if (neigh_node)
266 neigh_node_free_ref(neigh_node);
267 return ret;
268}
269
270/* caller must hold the neigh_list_lock */
271void bonding_candidate_del(struct orig_node *orig_node,
272 struct neigh_node *neigh_node)
273{
274 /* this neighbor is not part of our candidate list */
275 if (list_empty(&neigh_node->bonding_list))
276 goto out;
277
278 list_del_rcu(&neigh_node->bonding_list);
279 INIT_LIST_HEAD(&neigh_node->bonding_list);
280 neigh_node_free_ref(neigh_node);
281 atomic_dec(&orig_node->bond_candidates);
282
283out:
284 return;
285}
286
287static void bonding_candidate_add(struct orig_node *orig_node,
288 struct neigh_node *neigh_node)
289{
290 struct hlist_node *node;
291 struct neigh_node *tmp_neigh_node;
292 uint8_t best_tq, interference_candidate = 0;
293
294 spin_lock_bh(&orig_node->neigh_list_lock);
295
296 /* only consider if it has the same primary address ... */
297 if (!compare_eth(orig_node->orig,
298 neigh_node->orig_node->primary_addr))
299 goto candidate_del;
300
301 if (!orig_node->router)
302 goto candidate_del;
303
304 best_tq = orig_node->router->tq_avg;
305
306 /* ... and is good enough to be considered */
307 if (neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD)
308 goto candidate_del;
309
310 /**
311 * check if we have another candidate with the same mac address or
312 * interface. If we do, we won't select this candidate because of
313 * possible interference.
314 */
315 hlist_for_each_entry_rcu(tmp_neigh_node, node,
316 &orig_node->neigh_list, list) {
317
318 if (tmp_neigh_node == neigh_node)
319 continue;
320
321 /* we only care if the other candidate is even
322 * considered as candidate. */
323 if (list_empty(&tmp_neigh_node->bonding_list))
324 continue;
325
326 if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) ||
327 (compare_eth(neigh_node->addr, tmp_neigh_node->addr))) {
328 interference_candidate = 1;
329 break;
330 }
331 }
332
333 /* don't care further if it is an interference candidate */
334 if (interference_candidate)
335 goto candidate_del;
336
337 /* this neighbor already is part of our candidate list */
338 if (!list_empty(&neigh_node->bonding_list))
339 goto out;
340
341 if (!atomic_inc_not_zero(&neigh_node->refcount))
342 goto out;
343
344 list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list);
345 atomic_inc(&orig_node->bond_candidates);
346 goto out;
347
348candidate_del:
349 bonding_candidate_del(orig_node, neigh_node);
350
351out:
352 spin_unlock_bh(&orig_node->neigh_list_lock);
353 return;
354}
355
356/* copy primary address for bonding */
357static void bonding_save_primary(struct orig_node *orig_node,
358 struct orig_node *orig_neigh_node,
359 struct batman_packet *batman_packet)
360{
361 if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
362 return;
363
364 memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN);
246} 365}
247 366
248static void update_orig(struct bat_priv *bat_priv, 367static void update_orig(struct bat_priv *bat_priv,
249 struct orig_node *orig_node, 368 struct orig_node *orig_node,
250 struct ethhdr *ethhdr, 369 struct ethhdr *ethhdr,
251 struct batman_packet *batman_packet, 370 struct batman_packet *batman_packet,
252 struct batman_if *if_incoming, 371 struct hard_iface *if_incoming,
253 unsigned char *hna_buff, int hna_buff_len, 372 unsigned char *hna_buff, int hna_buff_len,
254 char is_duplicate) 373 char is_duplicate)
255{ 374{
256 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; 375 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
376 struct orig_node *orig_node_tmp;
377 struct hlist_node *node;
257 int tmp_hna_buff_len; 378 int tmp_hna_buff_len;
379 uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
258 380
259 bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " 381 bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
260 "Searching and updating originator entry of received packet\n"); 382 "Searching and updating originator entry of received packet\n");
261 383
262 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { 384 rcu_read_lock();
263 if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && 385 hlist_for_each_entry_rcu(tmp_neigh_node, node,
264 (tmp_neigh_node->if_incoming == if_incoming)) { 386 &orig_node->neigh_list, list) {
387 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
388 (tmp_neigh_node->if_incoming == if_incoming) &&
389 atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
390 if (neigh_node)
391 neigh_node_free_ref(neigh_node);
265 neigh_node = tmp_neigh_node; 392 neigh_node = tmp_neigh_node;
266 continue; 393 continue;
267 } 394 }
@@ -280,16 +407,20 @@ static void update_orig(struct bat_priv *bat_priv,
280 407
281 orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); 408 orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
282 if (!orig_tmp) 409 if (!orig_tmp)
283 return; 410 goto unlock;
284 411
285 neigh_node = create_neighbor(orig_node, orig_tmp, 412 neigh_node = create_neighbor(orig_node, orig_tmp,
286 ethhdr->h_source, if_incoming); 413 ethhdr->h_source, if_incoming);
414
415 orig_node_free_ref(orig_tmp);
287 if (!neigh_node) 416 if (!neigh_node)
288 return; 417 goto unlock;
289 } else 418 } else
290 bat_dbg(DBG_BATMAN, bat_priv, 419 bat_dbg(DBG_BATMAN, bat_priv,
291 "Updating existing last-hop neighbor of originator\n"); 420 "Updating existing last-hop neighbor of originator\n");
292 421
422 rcu_read_unlock();
423
293 orig_node->flags = batman_packet->flags; 424 orig_node->flags = batman_packet->flags;
294 neigh_node->last_valid = jiffies; 425 neigh_node->last_valid = jiffies;
295 426
@@ -303,6 +434,8 @@ static void update_orig(struct bat_priv *bat_priv,
303 neigh_node->last_ttl = batman_packet->ttl; 434 neigh_node->last_ttl = batman_packet->ttl;
304 } 435 }
305 436
437 bonding_candidate_add(orig_node, neigh_node);
438
306 tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ? 439 tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ?
307 batman_packet->num_hna * ETH_ALEN : hna_buff_len); 440 batman_packet->num_hna * ETH_ALEN : hna_buff_len);
308 441
@@ -319,10 +452,22 @@ static void update_orig(struct bat_priv *bat_priv,
319 /* if the TQ is the same and the link not more symetric we 452 /* if the TQ is the same and the link not more symetric we
320 * won't consider it either */ 453 * won't consider it either */
321 if ((orig_node->router) && 454 if ((orig_node->router) &&
322 ((neigh_node->tq_avg == orig_node->router->tq_avg) && 455 (neigh_node->tq_avg == orig_node->router->tq_avg)) {
323 (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num] 456 orig_node_tmp = orig_node->router->orig_node;
324 >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num]))) 457 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
325 goto update_hna; 458 bcast_own_sum_orig =
459 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
460 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
461
462 orig_node_tmp = neigh_node->orig_node;
463 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
464 bcast_own_sum_neigh =
465 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
466 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
467
468 if (bcast_own_sum_orig >= bcast_own_sum_neigh)
469 goto update_hna;
470 }
326 471
327 update_routes(bat_priv, orig_node, neigh_node, 472 update_routes(bat_priv, orig_node, neigh_node,
328 hna_buff, tmp_hna_buff_len); 473 hna_buff, tmp_hna_buff_len);
@@ -343,6 +488,14 @@ update_gw:
343 (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && 488 (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
344 (atomic_read(&bat_priv->gw_sel_class) > 2)) 489 (atomic_read(&bat_priv->gw_sel_class) > 2))
345 gw_check_election(bat_priv, orig_node); 490 gw_check_election(bat_priv, orig_node);
491
492 goto out;
493
494unlock:
495 rcu_read_unlock();
496out:
497 if (neigh_node)
498 neigh_node_free_ref(neigh_node);
346} 499}
347 500
348/* checks whether the host restarted and is in the protection time. 501/* checks whether the host restarted and is in the protection time.
@@ -380,34 +533,38 @@ static int window_protected(struct bat_priv *bat_priv,
380 */ 533 */
381static char count_real_packets(struct ethhdr *ethhdr, 534static char count_real_packets(struct ethhdr *ethhdr,
382 struct batman_packet *batman_packet, 535 struct batman_packet *batman_packet,
383 struct batman_if *if_incoming) 536 struct hard_iface *if_incoming)
384{ 537{
385 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 538 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
386 struct orig_node *orig_node; 539 struct orig_node *orig_node;
387 struct neigh_node *tmp_neigh_node; 540 struct neigh_node *tmp_neigh_node;
541 struct hlist_node *node;
388 char is_duplicate = 0; 542 char is_duplicate = 0;
389 int32_t seq_diff; 543 int32_t seq_diff;
390 int need_update = 0; 544 int need_update = 0;
391 int set_mark; 545 int set_mark, ret = -1;
392 546
393 orig_node = get_orig_node(bat_priv, batman_packet->orig); 547 orig_node = get_orig_node(bat_priv, batman_packet->orig);
394 if (!orig_node) 548 if (!orig_node)
395 return 0; 549 return 0;
396 550
551 spin_lock_bh(&orig_node->ogm_cnt_lock);
397 seq_diff = batman_packet->seqno - orig_node->last_real_seqno; 552 seq_diff = batman_packet->seqno - orig_node->last_real_seqno;
398 553
399 /* signalize caller that the packet is to be dropped. */ 554 /* signalize caller that the packet is to be dropped. */
400 if (window_protected(bat_priv, seq_diff, 555 if (window_protected(bat_priv, seq_diff,
401 &orig_node->batman_seqno_reset)) 556 &orig_node->batman_seqno_reset))
402 return -1; 557 goto out;
403 558
404 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { 559 rcu_read_lock();
560 hlist_for_each_entry_rcu(tmp_neigh_node, node,
561 &orig_node->neigh_list, list) {
405 562
406 is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, 563 is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
407 orig_node->last_real_seqno, 564 orig_node->last_real_seqno,
408 batman_packet->seqno); 565 batman_packet->seqno);
409 566
410 if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && 567 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
411 (tmp_neigh_node->if_incoming == if_incoming)) 568 (tmp_neigh_node->if_incoming == if_incoming))
412 set_mark = 1; 569 set_mark = 1;
413 else 570 else
@@ -421,6 +578,7 @@ static char count_real_packets(struct ethhdr *ethhdr,
421 tmp_neigh_node->real_packet_count = 578 tmp_neigh_node->real_packet_count =
422 bit_packet_count(tmp_neigh_node->real_bits); 579 bit_packet_count(tmp_neigh_node->real_bits);
423 } 580 }
581 rcu_read_unlock();
424 582
425 if (need_update) { 583 if (need_update) {
426 bat_dbg(DBG_BATMAN, bat_priv, 584 bat_dbg(DBG_BATMAN, bat_priv,
@@ -429,123 +587,21 @@ static char count_real_packets(struct ethhdr *ethhdr,
429 orig_node->last_real_seqno = batman_packet->seqno; 587 orig_node->last_real_seqno = batman_packet->seqno;
430 } 588 }
431 589
432 return is_duplicate; 590 ret = is_duplicate;
433}
434
435/* copy primary address for bonding */
436static void mark_bonding_address(struct bat_priv *bat_priv,
437 struct orig_node *orig_node,
438 struct orig_node *orig_neigh_node,
439 struct batman_packet *batman_packet)
440 591
441{ 592out:
442 if (batman_packet->flags & PRIMARIES_FIRST_HOP) 593 spin_unlock_bh(&orig_node->ogm_cnt_lock);
443 memcpy(orig_neigh_node->primary_addr, 594 orig_node_free_ref(orig_node);
444 orig_node->orig, ETH_ALEN); 595 return ret;
445
446 return;
447}
448
449/* mark possible bond.candidates in the neighbor list */
450void update_bonding_candidates(struct bat_priv *bat_priv,
451 struct orig_node *orig_node)
452{
453 int candidates;
454 int interference_candidate;
455 int best_tq;
456 struct neigh_node *tmp_neigh_node, *tmp_neigh_node2;
457 struct neigh_node *first_candidate, *last_candidate;
458
459 /* update the candidates for this originator */
460 if (!orig_node->router) {
461 orig_node->bond.candidates = 0;
462 return;
463 }
464
465 best_tq = orig_node->router->tq_avg;
466
467 /* update bond.candidates */
468
469 candidates = 0;
470
471 /* mark other nodes which also received "PRIMARIES FIRST HOP" packets
472 * as "bonding partner" */
473
474 /* first, zero the list */
475 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
476 tmp_neigh_node->next_bond_candidate = NULL;
477 }
478
479 first_candidate = NULL;
480 last_candidate = NULL;
481 list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
482
483 /* only consider if it has the same primary address ... */
484 if (memcmp(orig_node->orig,
485 tmp_neigh_node->orig_node->primary_addr,
486 ETH_ALEN) != 0)
487 continue;
488
489 /* ... and is good enough to be considered */
490 if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD)
491 continue;
492
493 /* check if we have another candidate with the same
494 * mac address or interface. If we do, we won't
495 * select this candidate because of possible interference. */
496
497 interference_candidate = 0;
498 list_for_each_entry(tmp_neigh_node2,
499 &orig_node->neigh_list, list) {
500
501 if (tmp_neigh_node2 == tmp_neigh_node)
502 continue;
503
504 /* we only care if the other candidate is even
505 * considered as candidate. */
506 if (!tmp_neigh_node2->next_bond_candidate)
507 continue;
508
509
510 if ((tmp_neigh_node->if_incoming ==
511 tmp_neigh_node2->if_incoming)
512 || (memcmp(tmp_neigh_node->addr,
513 tmp_neigh_node2->addr, ETH_ALEN) == 0)) {
514
515 interference_candidate = 1;
516 break;
517 }
518 }
519 /* don't care further if it is an interference candidate */
520 if (interference_candidate)
521 continue;
522
523 if (!first_candidate) {
524 first_candidate = tmp_neigh_node;
525 tmp_neigh_node->next_bond_candidate = first_candidate;
526 } else
527 tmp_neigh_node->next_bond_candidate = last_candidate;
528
529 last_candidate = tmp_neigh_node;
530
531 candidates++;
532 }
533
534 if (candidates > 0) {
535 first_candidate->next_bond_candidate = last_candidate;
536 orig_node->bond.selected = first_candidate;
537 }
538
539 orig_node->bond.candidates = candidates;
540} 596}
541 597
542void receive_bat_packet(struct ethhdr *ethhdr, 598void receive_bat_packet(struct ethhdr *ethhdr,
543 struct batman_packet *batman_packet, 599 struct batman_packet *batman_packet,
544 unsigned char *hna_buff, int hna_buff_len, 600 unsigned char *hna_buff, int hna_buff_len,
545 struct batman_if *if_incoming) 601 struct hard_iface *if_incoming)
546{ 602{
547 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 603 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
548 struct batman_if *batman_if; 604 struct hard_iface *hard_iface;
549 struct orig_node *orig_neigh_node, *orig_node; 605 struct orig_node *orig_neigh_node, *orig_node;
550 char has_directlink_flag; 606 char has_directlink_flag;
551 char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; 607 char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
@@ -573,8 +629,8 @@ void receive_bat_packet(struct ethhdr *ethhdr,
573 629
574 has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); 630 has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0);
575 631
576 is_single_hop_neigh = (compare_orig(ethhdr->h_source, 632 is_single_hop_neigh = (compare_eth(ethhdr->h_source,
577 batman_packet->orig) ? 1 : 0); 633 batman_packet->orig) ? 1 : 0);
578 634
579 bat_dbg(DBG_BATMAN, bat_priv, 635 bat_dbg(DBG_BATMAN, bat_priv,
580 "Received BATMAN packet via NB: %pM, IF: %s [%pM] " 636 "Received BATMAN packet via NB: %pM, IF: %s [%pM] "
@@ -587,26 +643,26 @@ void receive_bat_packet(struct ethhdr *ethhdr,
587 has_directlink_flag); 643 has_directlink_flag);
588 644
589 rcu_read_lock(); 645 rcu_read_lock();
590 list_for_each_entry_rcu(batman_if, &if_list, list) { 646 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
591 if (batman_if->if_status != IF_ACTIVE) 647 if (hard_iface->if_status != IF_ACTIVE)
592 continue; 648 continue;
593 649
594 if (batman_if->soft_iface != if_incoming->soft_iface) 650 if (hard_iface->soft_iface != if_incoming->soft_iface)
595 continue; 651 continue;
596 652
597 if (compare_orig(ethhdr->h_source, 653 if (compare_eth(ethhdr->h_source,
598 batman_if->net_dev->dev_addr)) 654 hard_iface->net_dev->dev_addr))
599 is_my_addr = 1; 655 is_my_addr = 1;
600 656
601 if (compare_orig(batman_packet->orig, 657 if (compare_eth(batman_packet->orig,
602 batman_if->net_dev->dev_addr)) 658 hard_iface->net_dev->dev_addr))
603 is_my_orig = 1; 659 is_my_orig = 1;
604 660
605 if (compare_orig(batman_packet->prev_sender, 661 if (compare_eth(batman_packet->prev_sender,
606 batman_if->net_dev->dev_addr)) 662 hard_iface->net_dev->dev_addr))
607 is_my_oldorig = 1; 663 is_my_oldorig = 1;
608 664
609 if (compare_orig(ethhdr->h_source, broadcast_addr)) 665 if (compare_eth(ethhdr->h_source, broadcast_addr))
610 is_broadcast = 1; 666 is_broadcast = 1;
611 } 667 }
612 rcu_read_unlock(); 668 rcu_read_unlock();
@@ -638,7 +694,6 @@ void receive_bat_packet(struct ethhdr *ethhdr,
638 int offset; 694 int offset;
639 695
640 orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); 696 orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
641
642 if (!orig_neigh_node) 697 if (!orig_neigh_node)
643 return; 698 return;
644 699
@@ -647,18 +702,22 @@ void receive_bat_packet(struct ethhdr *ethhdr,
647 /* if received seqno equals last send seqno save new 702 /* if received seqno equals last send seqno save new
648 * seqno for bidirectional check */ 703 * seqno for bidirectional check */
649 if (has_directlink_flag && 704 if (has_directlink_flag &&
650 compare_orig(if_incoming->net_dev->dev_addr, 705 compare_eth(if_incoming->net_dev->dev_addr,
651 batman_packet->orig) && 706 batman_packet->orig) &&
652 (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { 707 (batman_packet->seqno - if_incoming_seqno + 2 == 0)) {
653 offset = if_incoming->if_num * NUM_WORDS; 708 offset = if_incoming->if_num * NUM_WORDS;
709
710 spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
654 word = &(orig_neigh_node->bcast_own[offset]); 711 word = &(orig_neigh_node->bcast_own[offset]);
655 bit_mark(word, 0); 712 bit_mark(word, 0);
656 orig_neigh_node->bcast_own_sum[if_incoming->if_num] = 713 orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
657 bit_packet_count(word); 714 bit_packet_count(word);
715 spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
658 } 716 }
659 717
660 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " 718 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
661 "originator packet from myself (via neighbor)\n"); 719 "originator packet from myself (via neighbor)\n");
720 orig_node_free_ref(orig_neigh_node);
662 return; 721 return;
663 } 722 }
664 723
@@ -679,27 +738,27 @@ void receive_bat_packet(struct ethhdr *ethhdr,
679 bat_dbg(DBG_BATMAN, bat_priv, 738 bat_dbg(DBG_BATMAN, bat_priv,
680 "Drop packet: packet within seqno protection time " 739 "Drop packet: packet within seqno protection time "
681 "(sender: %pM)\n", ethhdr->h_source); 740 "(sender: %pM)\n", ethhdr->h_source);
682 return; 741 goto out;
683 } 742 }
684 743
685 if (batman_packet->tq == 0) { 744 if (batman_packet->tq == 0) {
686 bat_dbg(DBG_BATMAN, bat_priv, 745 bat_dbg(DBG_BATMAN, bat_priv,
687 "Drop packet: originator packet with tq equal 0\n"); 746 "Drop packet: originator packet with tq equal 0\n");
688 return; 747 goto out;
689 } 748 }
690 749
691 /* avoid temporary routing loops */ 750 /* avoid temporary routing loops */
692 if ((orig_node->router) && 751 if ((orig_node->router) &&
693 (orig_node->router->orig_node->router) && 752 (orig_node->router->orig_node->router) &&
694 (compare_orig(orig_node->router->addr, 753 (compare_eth(orig_node->router->addr,
695 batman_packet->prev_sender)) && 754 batman_packet->prev_sender)) &&
696 !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) && 755 !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) &&
697 (compare_orig(orig_node->router->addr, 756 (compare_eth(orig_node->router->addr,
698 orig_node->router->orig_node->router->addr))) { 757 orig_node->router->orig_node->router->addr))) {
699 bat_dbg(DBG_BATMAN, bat_priv, 758 bat_dbg(DBG_BATMAN, bat_priv,
700 "Drop packet: ignoring all rebroadcast packets that " 759 "Drop packet: ignoring all rebroadcast packets that "
701 "may make me loop (sender: %pM)\n", ethhdr->h_source); 760 "may make me loop (sender: %pM)\n", ethhdr->h_source);
702 return; 761 goto out;
703 } 762 }
704 763
705 /* if sender is a direct neighbor the sender mac equals 764 /* if sender is a direct neighbor the sender mac equals
@@ -708,19 +767,21 @@ void receive_bat_packet(struct ethhdr *ethhdr,
708 orig_node : 767 orig_node :
709 get_orig_node(bat_priv, ethhdr->h_source)); 768 get_orig_node(bat_priv, ethhdr->h_source));
710 if (!orig_neigh_node) 769 if (!orig_neigh_node)
711 return; 770 goto out;
712 771
713 /* drop packet if sender is not a direct neighbor and if we 772 /* drop packet if sender is not a direct neighbor and if we
714 * don't route towards it */ 773 * don't route towards it */
715 if (!is_single_hop_neigh && (!orig_neigh_node->router)) { 774 if (!is_single_hop_neigh && (!orig_neigh_node->router)) {
716 bat_dbg(DBG_BATMAN, bat_priv, 775 bat_dbg(DBG_BATMAN, bat_priv,
717 "Drop packet: OGM via unknown neighbor!\n"); 776 "Drop packet: OGM via unknown neighbor!\n");
718 return; 777 goto out_neigh;
719 } 778 }
720 779
721 is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, 780 is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node,
722 batman_packet, if_incoming); 781 batman_packet, if_incoming);
723 782
783 bonding_save_primary(orig_node, orig_neigh_node, batman_packet);
784
724 /* update ranking if it is not a duplicate or has the same 785 /* update ranking if it is not a duplicate or has the same
725 * seqno and similar ttl as the non-duplicate */ 786 * seqno and similar ttl as the non-duplicate */
726 if (is_bidirectional && 787 if (is_bidirectional &&
@@ -730,10 +791,6 @@ void receive_bat_packet(struct ethhdr *ethhdr,
730 update_orig(bat_priv, orig_node, ethhdr, batman_packet, 791 update_orig(bat_priv, orig_node, ethhdr, batman_packet,
731 if_incoming, hna_buff, hna_buff_len, is_duplicate); 792 if_incoming, hna_buff, hna_buff_len, is_duplicate);
732 793
733 mark_bonding_address(bat_priv, orig_node,
734 orig_neigh_node, batman_packet);
735 update_bonding_candidates(bat_priv, orig_node);
736
737 /* is single hop (direct) neighbor */ 794 /* is single hop (direct) neighbor */
738 if (is_single_hop_neigh) { 795 if (is_single_hop_neigh) {
739 796
@@ -743,31 +800,36 @@ void receive_bat_packet(struct ethhdr *ethhdr,
743 800
744 bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " 801 bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
745 "rebroadcast neighbor packet with direct link flag\n"); 802 "rebroadcast neighbor packet with direct link flag\n");
746 return; 803 goto out_neigh;
747 } 804 }
748 805
749 /* multihop originator */ 806 /* multihop originator */
750 if (!is_bidirectional) { 807 if (!is_bidirectional) {
751 bat_dbg(DBG_BATMAN, bat_priv, 808 bat_dbg(DBG_BATMAN, bat_priv,
752 "Drop packet: not received via bidirectional link\n"); 809 "Drop packet: not received via bidirectional link\n");
753 return; 810 goto out_neigh;
754 } 811 }
755 812
756 if (is_duplicate) { 813 if (is_duplicate) {
757 bat_dbg(DBG_BATMAN, bat_priv, 814 bat_dbg(DBG_BATMAN, bat_priv,
758 "Drop packet: duplicate packet received\n"); 815 "Drop packet: duplicate packet received\n");
759 return; 816 goto out_neigh;
760 } 817 }
761 818
762 bat_dbg(DBG_BATMAN, bat_priv, 819 bat_dbg(DBG_BATMAN, bat_priv,
763 "Forwarding packet: rebroadcast originator packet\n"); 820 "Forwarding packet: rebroadcast originator packet\n");
764 schedule_forward_packet(orig_node, ethhdr, batman_packet, 821 schedule_forward_packet(orig_node, ethhdr, batman_packet,
765 0, hna_buff_len, if_incoming); 822 0, hna_buff_len, if_incoming);
823
824out_neigh:
825 if ((orig_neigh_node) && (!is_single_hop_neigh))
826 orig_node_free_ref(orig_neigh_node);
827out:
828 orig_node_free_ref(orig_node);
766} 829}
767 830
768int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) 831int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
769{ 832{
770 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
771 struct ethhdr *ethhdr; 833 struct ethhdr *ethhdr;
772 834
773 /* drop packet if it has not necessary minimum size */ 835 /* drop packet if it has not necessary minimum size */
@@ -794,12 +856,10 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
794 856
795 ethhdr = (struct ethhdr *)skb_mac_header(skb); 857 ethhdr = (struct ethhdr *)skb_mac_header(skb);
796 858
797 spin_lock_bh(&bat_priv->orig_hash_lock);
798 receive_aggr_bat_packet(ethhdr, 859 receive_aggr_bat_packet(ethhdr,
799 skb->data, 860 skb->data,
800 skb_headlen(skb), 861 skb_headlen(skb),
801 batman_if); 862 hard_iface);
802 spin_unlock_bh(&bat_priv->orig_hash_lock);
803 863
804 kfree_skb(skb); 864 kfree_skb(skb);
805 return NET_RX_SUCCESS; 865 return NET_RX_SUCCESS;
@@ -808,135 +868,144 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
808static int recv_my_icmp_packet(struct bat_priv *bat_priv, 868static int recv_my_icmp_packet(struct bat_priv *bat_priv,
809 struct sk_buff *skb, size_t icmp_len) 869 struct sk_buff *skb, size_t icmp_len)
810{ 870{
811 struct orig_node *orig_node; 871 struct orig_node *orig_node = NULL;
872 struct neigh_node *neigh_node = NULL;
812 struct icmp_packet_rr *icmp_packet; 873 struct icmp_packet_rr *icmp_packet;
813 struct ethhdr *ethhdr; 874 int ret = NET_RX_DROP;
814 struct batman_if *batman_if;
815 int ret;
816 uint8_t dstaddr[ETH_ALEN];
817 875
818 icmp_packet = (struct icmp_packet_rr *)skb->data; 876 icmp_packet = (struct icmp_packet_rr *)skb->data;
819 ethhdr = (struct ethhdr *)skb_mac_header(skb);
820 877
821 /* add data to device queue */ 878 /* add data to device queue */
822 if (icmp_packet->msg_type != ECHO_REQUEST) { 879 if (icmp_packet->msg_type != ECHO_REQUEST) {
823 bat_socket_receive_packet(icmp_packet, icmp_len); 880 bat_socket_receive_packet(icmp_packet, icmp_len);
824 return NET_RX_DROP; 881 goto out;
825 } 882 }
826 883
827 if (!bat_priv->primary_if) 884 if (!bat_priv->primary_if)
828 return NET_RX_DROP; 885 goto out;
829 886
830 /* answer echo request (ping) */ 887 /* answer echo request (ping) */
831 /* get routing information */ 888 /* get routing information */
832 spin_lock_bh(&bat_priv->orig_hash_lock); 889 rcu_read_lock();
833 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 890 orig_node = orig_hash_find(bat_priv, icmp_packet->orig);
834 compare_orig, choose_orig,
835 icmp_packet->orig));
836 ret = NET_RX_DROP;
837
838 if ((orig_node) && (orig_node->router)) {
839
840 /* don't lock while sending the packets ... we therefore
841 * copy the required data before sending */
842 batman_if = orig_node->router->if_incoming;
843 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
844 spin_unlock_bh(&bat_priv->orig_hash_lock);
845
846 /* create a copy of the skb, if needed, to modify it. */
847 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
848 return NET_RX_DROP;
849 891
850 icmp_packet = (struct icmp_packet_rr *)skb->data; 892 if (!orig_node)
851 ethhdr = (struct ethhdr *)skb_mac_header(skb); 893 goto unlock;
852 894
853 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); 895 neigh_node = orig_node->router;
854 memcpy(icmp_packet->orig,
855 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
856 icmp_packet->msg_type = ECHO_REPLY;
857 icmp_packet->ttl = TTL;
858 896
859 send_skb_packet(skb, batman_if, dstaddr); 897 if (!neigh_node)
860 ret = NET_RX_SUCCESS; 898 goto unlock;
861 899
862 } else 900 if (!atomic_inc_not_zero(&neigh_node->refcount)) {
863 spin_unlock_bh(&bat_priv->orig_hash_lock); 901 neigh_node = NULL;
902 goto unlock;
903 }
904
905 rcu_read_unlock();
906
907 /* create a copy of the skb, if needed, to modify it. */
908 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
909 goto out;
910
911 icmp_packet = (struct icmp_packet_rr *)skb->data;
912
913 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
914 memcpy(icmp_packet->orig,
915 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
916 icmp_packet->msg_type = ECHO_REPLY;
917 icmp_packet->ttl = TTL;
864 918
919 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
920 ret = NET_RX_SUCCESS;
921 goto out;
922
923unlock:
924 rcu_read_unlock();
925out:
926 if (neigh_node)
927 neigh_node_free_ref(neigh_node);
928 if (orig_node)
929 orig_node_free_ref(orig_node);
865 return ret; 930 return ret;
866} 931}
867 932
868static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, 933static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv,
869 struct sk_buff *skb, size_t icmp_len) 934 struct sk_buff *skb)
870{ 935{
871 struct orig_node *orig_node; 936 struct orig_node *orig_node = NULL;
937 struct neigh_node *neigh_node = NULL;
872 struct icmp_packet *icmp_packet; 938 struct icmp_packet *icmp_packet;
873 struct ethhdr *ethhdr; 939 int ret = NET_RX_DROP;
874 struct batman_if *batman_if;
875 int ret;
876 uint8_t dstaddr[ETH_ALEN];
877 940
878 icmp_packet = (struct icmp_packet *)skb->data; 941 icmp_packet = (struct icmp_packet *)skb->data;
879 ethhdr = (struct ethhdr *)skb_mac_header(skb);
880 942
881 /* send TTL exceeded if packet is an echo request (traceroute) */ 943 /* send TTL exceeded if packet is an echo request (traceroute) */
882 if (icmp_packet->msg_type != ECHO_REQUEST) { 944 if (icmp_packet->msg_type != ECHO_REQUEST) {
883 pr_debug("Warning - can't forward icmp packet from %pM to " 945 pr_debug("Warning - can't forward icmp packet from %pM to "
884 "%pM: ttl exceeded\n", icmp_packet->orig, 946 "%pM: ttl exceeded\n", icmp_packet->orig,
885 icmp_packet->dst); 947 icmp_packet->dst);
886 return NET_RX_DROP; 948 goto out;
887 } 949 }
888 950
889 if (!bat_priv->primary_if) 951 if (!bat_priv->primary_if)
890 return NET_RX_DROP; 952 goto out;
891 953
892 /* get routing information */ 954 /* get routing information */
893 spin_lock_bh(&bat_priv->orig_hash_lock); 955 rcu_read_lock();
894 orig_node = ((struct orig_node *) 956 orig_node = orig_hash_find(bat_priv, icmp_packet->orig);
895 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
896 icmp_packet->orig));
897 ret = NET_RX_DROP;
898
899 if ((orig_node) && (orig_node->router)) {
900
901 /* don't lock while sending the packets ... we therefore
902 * copy the required data before sending */
903 batman_if = orig_node->router->if_incoming;
904 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
905 spin_unlock_bh(&bat_priv->orig_hash_lock);
906
907 /* create a copy of the skb, if needed, to modify it. */
908 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
909 return NET_RX_DROP;
910 957
911 icmp_packet = (struct icmp_packet *) skb->data; 958 if (!orig_node)
912 ethhdr = (struct ethhdr *)skb_mac_header(skb); 959 goto unlock;
913 960
914 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); 961 neigh_node = orig_node->router;
915 memcpy(icmp_packet->orig,
916 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
917 icmp_packet->msg_type = TTL_EXCEEDED;
918 icmp_packet->ttl = TTL;
919 962
920 send_skb_packet(skb, batman_if, dstaddr); 963 if (!neigh_node)
921 ret = NET_RX_SUCCESS; 964 goto unlock;
922 965
923 } else 966 if (!atomic_inc_not_zero(&neigh_node->refcount)) {
924 spin_unlock_bh(&bat_priv->orig_hash_lock); 967 neigh_node = NULL;
968 goto unlock;
969 }
925 970
971 rcu_read_unlock();
972
973 /* create a copy of the skb, if needed, to modify it. */
974 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
975 goto out;
976
977 icmp_packet = (struct icmp_packet *)skb->data;
978
979 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
980 memcpy(icmp_packet->orig,
981 bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
982 icmp_packet->msg_type = TTL_EXCEEDED;
983 icmp_packet->ttl = TTL;
984
985 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
986 ret = NET_RX_SUCCESS;
987 goto out;
988
989unlock:
990 rcu_read_unlock();
991out:
992 if (neigh_node)
993 neigh_node_free_ref(neigh_node);
994 if (orig_node)
995 orig_node_free_ref(orig_node);
926 return ret; 996 return ret;
927} 997}
928 998
929 999
930int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) 1000int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if)
931{ 1001{
932 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1002 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
933 struct icmp_packet_rr *icmp_packet; 1003 struct icmp_packet_rr *icmp_packet;
934 struct ethhdr *ethhdr; 1004 struct ethhdr *ethhdr;
935 struct orig_node *orig_node; 1005 struct orig_node *orig_node = NULL;
936 struct batman_if *batman_if; 1006 struct neigh_node *neigh_node = NULL;
937 int hdr_size = sizeof(struct icmp_packet); 1007 int hdr_size = sizeof(struct icmp_packet);
938 int ret; 1008 int ret = NET_RX_DROP;
939 uint8_t dstaddr[ETH_ALEN];
940 1009
941 /** 1010 /**
942 * we truncate all incoming icmp packets if they don't match our size 1011 * we truncate all incoming icmp packets if they don't match our size
@@ -946,21 +1015,21 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
946 1015
947 /* drop packet if it has not necessary minimum size */ 1016 /* drop packet if it has not necessary minimum size */
948 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1017 if (unlikely(!pskb_may_pull(skb, hdr_size)))
949 return NET_RX_DROP; 1018 goto out;
950 1019
951 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1020 ethhdr = (struct ethhdr *)skb_mac_header(skb);
952 1021
953 /* packet with unicast indication but broadcast recipient */ 1022 /* packet with unicast indication but broadcast recipient */
954 if (is_broadcast_ether_addr(ethhdr->h_dest)) 1023 if (is_broadcast_ether_addr(ethhdr->h_dest))
955 return NET_RX_DROP; 1024 goto out;
956 1025
957 /* packet with broadcast sender address */ 1026 /* packet with broadcast sender address */
958 if (is_broadcast_ether_addr(ethhdr->h_source)) 1027 if (is_broadcast_ether_addr(ethhdr->h_source))
959 return NET_RX_DROP; 1028 goto out;
960 1029
961 /* not for me */ 1030 /* not for me */
962 if (!is_my_mac(ethhdr->h_dest)) 1031 if (!is_my_mac(ethhdr->h_dest))
963 return NET_RX_DROP; 1032 goto out;
964 1033
965 icmp_packet = (struct icmp_packet_rr *)skb->data; 1034 icmp_packet = (struct icmp_packet_rr *)skb->data;
966 1035
@@ -978,53 +1047,61 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
978 1047
979 /* TTL exceeded */ 1048 /* TTL exceeded */
980 if (icmp_packet->ttl < 2) 1049 if (icmp_packet->ttl < 2)
981 return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size); 1050 return recv_icmp_ttl_exceeded(bat_priv, skb);
982
983 ret = NET_RX_DROP;
984 1051
985 /* get routing information */ 1052 /* get routing information */
986 spin_lock_bh(&bat_priv->orig_hash_lock); 1053 rcu_read_lock();
987 orig_node = ((struct orig_node *) 1054 orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
988 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
989 icmp_packet->dst));
990 1055
991 if ((orig_node) && (orig_node->router)) { 1056 if (!orig_node)
1057 goto unlock;
992 1058
993 /* don't lock while sending the packets ... we therefore 1059 neigh_node = orig_node->router;
994 * copy the required data before sending */
995 batman_if = orig_node->router->if_incoming;
996 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
997 spin_unlock_bh(&bat_priv->orig_hash_lock);
998 1060
999 /* create a copy of the skb, if needed, to modify it. */ 1061 if (!neigh_node)
1000 if (skb_cow(skb, sizeof(struct ethhdr)) < 0) 1062 goto unlock;
1001 return NET_RX_DROP;
1002 1063
1003 icmp_packet = (struct icmp_packet_rr *)skb->data; 1064 if (!atomic_inc_not_zero(&neigh_node->refcount)) {
1004 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1065 neigh_node = NULL;
1066 goto unlock;
1067 }
1005 1068
1006 /* decrement ttl */ 1069 rcu_read_unlock();
1007 icmp_packet->ttl--;
1008 1070
1009 /* route it */ 1071 /* create a copy of the skb, if needed, to modify it. */
1010 send_skb_packet(skb, batman_if, dstaddr); 1072 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
1011 ret = NET_RX_SUCCESS; 1073 goto out;
1012 1074
1013 } else 1075 icmp_packet = (struct icmp_packet_rr *)skb->data;
1014 spin_unlock_bh(&bat_priv->orig_hash_lock); 1076
1077 /* decrement ttl */
1078 icmp_packet->ttl--;
1015 1079
1080 /* route it */
1081 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
1082 ret = NET_RX_SUCCESS;
1083 goto out;
1084
1085unlock:
1086 rcu_read_unlock();
1087out:
1088 if (neigh_node)
1089 neigh_node_free_ref(neigh_node);
1090 if (orig_node)
1091 orig_node_free_ref(orig_node);
1016 return ret; 1092 return ret;
1017} 1093}
1018 1094
1019/* find a suitable router for this originator, and use 1095/* find a suitable router for this originator, and use
1020 * bonding if possible. */ 1096 * bonding if possible. increases the found neighbors
1097 * refcount.*/
1021struct neigh_node *find_router(struct bat_priv *bat_priv, 1098struct neigh_node *find_router(struct bat_priv *bat_priv,
1022 struct orig_node *orig_node, 1099 struct orig_node *orig_node,
1023 struct batman_if *recv_if) 1100 struct hard_iface *recv_if)
1024{ 1101{
1025 struct orig_node *primary_orig_node; 1102 struct orig_node *primary_orig_node;
1026 struct orig_node *router_orig; 1103 struct orig_node *router_orig;
1027 struct neigh_node *router, *first_candidate, *best_router; 1104 struct neigh_node *router, *first_candidate, *tmp_neigh_node;
1028 static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; 1105 static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
1029 int bonding_enabled; 1106 int bonding_enabled;
1030 1107
@@ -1036,78 +1113,128 @@ struct neigh_node *find_router(struct bat_priv *bat_priv,
1036 1113
1037 /* without bonding, the first node should 1114 /* without bonding, the first node should
1038 * always choose the default router. */ 1115 * always choose the default router. */
1039
1040 bonding_enabled = atomic_read(&bat_priv->bonding); 1116 bonding_enabled = atomic_read(&bat_priv->bonding);
1041 1117
1042 if ((!recv_if) && (!bonding_enabled)) 1118 rcu_read_lock();
1043 return orig_node->router; 1119 /* select default router to output */
1044 1120 router = orig_node->router;
1045 router_orig = orig_node->router->orig_node; 1121 router_orig = orig_node->router->orig_node;
1122 if (!router_orig || !atomic_inc_not_zero(&router->refcount)) {
1123 rcu_read_unlock();
1124 return NULL;
1125 }
1126
1127 if ((!recv_if) && (!bonding_enabled))
1128 goto return_router;
1046 1129
1047 /* if we have something in the primary_addr, we can search 1130 /* if we have something in the primary_addr, we can search
1048 * for a potential bonding candidate. */ 1131 * for a potential bonding candidate. */
1049 if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0) 1132 if (compare_eth(router_orig->primary_addr, zero_mac))
1050 return orig_node->router; 1133 goto return_router;
1051 1134
1052 /* find the orig_node which has the primary interface. might 1135 /* find the orig_node which has the primary interface. might
1053 * even be the same as our router_orig in many cases */ 1136 * even be the same as our router_orig in many cases */
1054 1137
1055 if (memcmp(router_orig->primary_addr, 1138 if (compare_eth(router_orig->primary_addr, router_orig->orig)) {
1056 router_orig->orig, ETH_ALEN) == 0) {
1057 primary_orig_node = router_orig; 1139 primary_orig_node = router_orig;
1058 } else { 1140 } else {
1059 primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, 1141 primary_orig_node = orig_hash_find(bat_priv,
1060 choose_orig, 1142 router_orig->primary_addr);
1061 router_orig->primary_addr);
1062
1063 if (!primary_orig_node) 1143 if (!primary_orig_node)
1064 return orig_node->router; 1144 goto return_router;
1145
1146 orig_node_free_ref(primary_orig_node);
1065 } 1147 }
1066 1148
1067 /* with less than 2 candidates, we can't do any 1149 /* with less than 2 candidates, we can't do any
1068 * bonding and prefer the original router. */ 1150 * bonding and prefer the original router. */
1069 1151 if (atomic_read(&primary_orig_node->bond_candidates) < 2)
1070 if (primary_orig_node->bond.candidates < 2) 1152 goto return_router;
1071 return orig_node->router;
1072 1153
1073 1154
1074 /* all nodes between should choose a candidate which 1155 /* all nodes between should choose a candidate which
1075 * is is not on the interface where the packet came 1156 * is is not on the interface where the packet came
1076 * in. */ 1157 * in. */
1077 first_candidate = primary_orig_node->bond.selected; 1158
1078 router = first_candidate; 1159 neigh_node_free_ref(router);
1160 first_candidate = NULL;
1161 router = NULL;
1079 1162
1080 if (bonding_enabled) { 1163 if (bonding_enabled) {
1081 /* in the bonding case, send the packets in a round 1164 /* in the bonding case, send the packets in a round
1082 * robin fashion over the remaining interfaces. */ 1165 * robin fashion over the remaining interfaces. */
1083 do { 1166
1167 list_for_each_entry_rcu(tmp_neigh_node,
1168 &primary_orig_node->bond_list, bonding_list) {
1169 if (!first_candidate)
1170 first_candidate = tmp_neigh_node;
1084 /* recv_if == NULL on the first node. */ 1171 /* recv_if == NULL on the first node. */
1085 if (router->if_incoming != recv_if) 1172 if (tmp_neigh_node->if_incoming != recv_if &&
1173 atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
1174 router = tmp_neigh_node;
1086 break; 1175 break;
1176 }
1177 }
1178
1179 /* use the first candidate if nothing was found. */
1180 if (!router && first_candidate &&
1181 atomic_inc_not_zero(&first_candidate->refcount))
1182 router = first_candidate;
1087 1183
1088 router = router->next_bond_candidate; 1184 if (!router) {
1089 } while (router != first_candidate); 1185 rcu_read_unlock();
1186 return NULL;
1187 }
1090 1188
1091 primary_orig_node->bond.selected = router->next_bond_candidate; 1189 /* selected should point to the next element
1190 * after the current router */
1191 spin_lock_bh(&primary_orig_node->neigh_list_lock);
1192 /* this is a list_move(), which unfortunately
1193 * does not exist as rcu version */
1194 list_del_rcu(&primary_orig_node->bond_list);
1195 list_add_rcu(&primary_orig_node->bond_list,
1196 &router->bonding_list);
1197 spin_unlock_bh(&primary_orig_node->neigh_list_lock);
1092 1198
1093 } else { 1199 } else {
1094 /* if bonding is disabled, use the best of the 1200 /* if bonding is disabled, use the best of the
1095 * remaining candidates which are not using 1201 * remaining candidates which are not using
1096 * this interface. */ 1202 * this interface. */
1097 best_router = first_candidate; 1203 list_for_each_entry_rcu(tmp_neigh_node,
1204 &primary_orig_node->bond_list, bonding_list) {
1205 if (!first_candidate)
1206 first_candidate = tmp_neigh_node;
1098 1207
1099 do {
1100 /* recv_if == NULL on the first node. */ 1208 /* recv_if == NULL on the first node. */
1101 if ((router->if_incoming != recv_if) && 1209 if (tmp_neigh_node->if_incoming == recv_if)
1102 (router->tq_avg > best_router->tq_avg)) 1210 continue;
1103 best_router = router;
1104 1211
1105 router = router->next_bond_candidate; 1212 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
1106 } while (router != first_candidate); 1213 continue;
1107 1214
1108 router = best_router; 1215 /* if we don't have a router yet
1109 } 1216 * or this one is better, choose it. */
1217 if ((!router) ||
1218 (tmp_neigh_node->tq_avg > router->tq_avg)) {
1219 /* decrement refcount of
1220 * previously selected router */
1221 if (router)
1222 neigh_node_free_ref(router);
1223
1224 router = tmp_neigh_node;
1225 atomic_inc_not_zero(&router->refcount);
1226 }
1227
1228 neigh_node_free_ref(tmp_neigh_node);
1229 }
1110 1230
1231 /* use the first candidate if nothing was found. */
1232 if (!router && first_candidate &&
1233 atomic_inc_not_zero(&first_candidate->refcount))
1234 router = first_candidate;
1235 }
1236return_router:
1237 rcu_read_unlock();
1111 return router; 1238 return router;
1112} 1239}
1113 1240
@@ -1136,17 +1263,14 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
1136 return 0; 1263 return 0;
1137} 1264}
1138 1265
1139int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, 1266int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1140 int hdr_size)
1141{ 1267{
1142 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1268 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1143 struct orig_node *orig_node; 1269 struct orig_node *orig_node = NULL;
1144 struct neigh_node *router; 1270 struct neigh_node *neigh_node = NULL;
1145 struct batman_if *batman_if;
1146 uint8_t dstaddr[ETH_ALEN];
1147 struct unicast_packet *unicast_packet; 1271 struct unicast_packet *unicast_packet;
1148 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); 1272 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
1149 int ret; 1273 int ret = NET_RX_DROP;
1150 struct sk_buff *new_skb; 1274 struct sk_buff *new_skb;
1151 1275
1152 unicast_packet = (struct unicast_packet *)skb->data; 1276 unicast_packet = (struct unicast_packet *)skb->data;
@@ -1156,53 +1280,51 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
1156 pr_debug("Warning - can't forward unicast packet from %pM to " 1280 pr_debug("Warning - can't forward unicast packet from %pM to "
1157 "%pM: ttl exceeded\n", ethhdr->h_source, 1281 "%pM: ttl exceeded\n", ethhdr->h_source,
1158 unicast_packet->dest); 1282 unicast_packet->dest);
1159 return NET_RX_DROP; 1283 goto out;
1160 } 1284 }
1161 1285
1162 /* get routing information */ 1286 /* get routing information */
1163 spin_lock_bh(&bat_priv->orig_hash_lock); 1287 rcu_read_lock();
1164 orig_node = ((struct orig_node *) 1288 orig_node = orig_hash_find(bat_priv, unicast_packet->dest);
1165 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
1166 unicast_packet->dest));
1167
1168 router = find_router(bat_priv, orig_node, recv_if);
1169 1289
1170 if (!router) { 1290 if (!orig_node)
1171 spin_unlock_bh(&bat_priv->orig_hash_lock); 1291 goto unlock;
1172 return NET_RX_DROP;
1173 }
1174 1292
1175 /* don't lock while sending the packets ... we therefore 1293 rcu_read_unlock();
1176 * copy the required data before sending */
1177 1294
1178 batman_if = router->if_incoming; 1295 /* find_router() increases neigh_nodes refcount if found. */
1179 memcpy(dstaddr, router->addr, ETH_ALEN); 1296 neigh_node = find_router(bat_priv, orig_node, recv_if);
1180 1297
1181 spin_unlock_bh(&bat_priv->orig_hash_lock); 1298 if (!neigh_node)
1299 goto out;
1182 1300
1183 /* create a copy of the skb, if needed, to modify it. */ 1301 /* create a copy of the skb, if needed, to modify it. */
1184 if (skb_cow(skb, sizeof(struct ethhdr)) < 0) 1302 if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
1185 return NET_RX_DROP; 1303 goto out;
1186 1304
1187 unicast_packet = (struct unicast_packet *)skb->data; 1305 unicast_packet = (struct unicast_packet *)skb->data;
1188 1306
1189 if (unicast_packet->packet_type == BAT_UNICAST && 1307 if (unicast_packet->packet_type == BAT_UNICAST &&
1190 atomic_read(&bat_priv->fragmentation) && 1308 atomic_read(&bat_priv->fragmentation) &&
1191 skb->len > batman_if->net_dev->mtu) 1309 skb->len > neigh_node->if_incoming->net_dev->mtu) {
1192 return frag_send_skb(skb, bat_priv, batman_if, 1310 ret = frag_send_skb(skb, bat_priv,
1193 dstaddr); 1311 neigh_node->if_incoming, neigh_node->addr);
1312 goto out;
1313 }
1194 1314
1195 if (unicast_packet->packet_type == BAT_UNICAST_FRAG && 1315 if (unicast_packet->packet_type == BAT_UNICAST_FRAG &&
1196 2 * skb->len - hdr_size <= batman_if->net_dev->mtu) { 1316 frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) {
1197 1317
1198 ret = frag_reassemble_skb(skb, bat_priv, &new_skb); 1318 ret = frag_reassemble_skb(skb, bat_priv, &new_skb);
1199 1319
1200 if (ret == NET_RX_DROP) 1320 if (ret == NET_RX_DROP)
1201 return NET_RX_DROP; 1321 goto out;
1202 1322
1203 /* packet was buffered for late merge */ 1323 /* packet was buffered for late merge */
1204 if (!new_skb) 1324 if (!new_skb) {
1205 return NET_RX_SUCCESS; 1325 ret = NET_RX_SUCCESS;
1326 goto out;
1327 }
1206 1328
1207 skb = new_skb; 1329 skb = new_skb;
1208 unicast_packet = (struct unicast_packet *)skb->data; 1330 unicast_packet = (struct unicast_packet *)skb->data;
@@ -1212,12 +1334,21 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
1212 unicast_packet->ttl--; 1334 unicast_packet->ttl--;
1213 1335
1214 /* route it */ 1336 /* route it */
1215 send_skb_packet(skb, batman_if, dstaddr); 1337 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
1338 ret = NET_RX_SUCCESS;
1339 goto out;
1216 1340
1217 return NET_RX_SUCCESS; 1341unlock:
1342 rcu_read_unlock();
1343out:
1344 if (neigh_node)
1345 neigh_node_free_ref(neigh_node);
1346 if (orig_node)
1347 orig_node_free_ref(orig_node);
1348 return ret;
1218} 1349}
1219 1350
1220int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) 1351int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1221{ 1352{
1222 struct unicast_packet *unicast_packet; 1353 struct unicast_packet *unicast_packet;
1223 int hdr_size = sizeof(struct unicast_packet); 1354 int hdr_size = sizeof(struct unicast_packet);
@@ -1233,10 +1364,10 @@ int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if)
1233 return NET_RX_SUCCESS; 1364 return NET_RX_SUCCESS;
1234 } 1365 }
1235 1366
1236 return route_unicast_packet(skb, recv_if, hdr_size); 1367 return route_unicast_packet(skb, recv_if);
1237} 1368}
1238 1369
1239int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) 1370int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1240{ 1371{
1241 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1372 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1242 struct unicast_frag_packet *unicast_packet; 1373 struct unicast_frag_packet *unicast_packet;
@@ -1266,89 +1397,96 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if)
1266 return NET_RX_SUCCESS; 1397 return NET_RX_SUCCESS;
1267 } 1398 }
1268 1399
1269 return route_unicast_packet(skb, recv_if, hdr_size); 1400 return route_unicast_packet(skb, recv_if);
1270} 1401}
1271 1402
1272 1403
1273int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) 1404int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1274{ 1405{
1275 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); 1406 struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1276 struct orig_node *orig_node; 1407 struct orig_node *orig_node = NULL;
1277 struct bcast_packet *bcast_packet; 1408 struct bcast_packet *bcast_packet;
1278 struct ethhdr *ethhdr; 1409 struct ethhdr *ethhdr;
1279 int hdr_size = sizeof(struct bcast_packet); 1410 int hdr_size = sizeof(struct bcast_packet);
1411 int ret = NET_RX_DROP;
1280 int32_t seq_diff; 1412 int32_t seq_diff;
1281 1413
1282 /* drop packet if it has not necessary minimum size */ 1414 /* drop packet if it has not necessary minimum size */
1283 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1415 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1284 return NET_RX_DROP; 1416 goto out;
1285 1417
1286 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1418 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1287 1419
1288 /* packet with broadcast indication but unicast recipient */ 1420 /* packet with broadcast indication but unicast recipient */
1289 if (!is_broadcast_ether_addr(ethhdr->h_dest)) 1421 if (!is_broadcast_ether_addr(ethhdr->h_dest))
1290 return NET_RX_DROP; 1422 goto out;
1291 1423
1292 /* packet with broadcast sender address */ 1424 /* packet with broadcast sender address */
1293 if (is_broadcast_ether_addr(ethhdr->h_source)) 1425 if (is_broadcast_ether_addr(ethhdr->h_source))
1294 return NET_RX_DROP; 1426 goto out;
1295 1427
1296 /* ignore broadcasts sent by myself */ 1428 /* ignore broadcasts sent by myself */
1297 if (is_my_mac(ethhdr->h_source)) 1429 if (is_my_mac(ethhdr->h_source))
1298 return NET_RX_DROP; 1430 goto out;
1299 1431
1300 bcast_packet = (struct bcast_packet *)skb->data; 1432 bcast_packet = (struct bcast_packet *)skb->data;
1301 1433
1302 /* ignore broadcasts originated by myself */ 1434 /* ignore broadcasts originated by myself */
1303 if (is_my_mac(bcast_packet->orig)) 1435 if (is_my_mac(bcast_packet->orig))
1304 return NET_RX_DROP; 1436 goto out;
1305 1437
1306 if (bcast_packet->ttl < 2) 1438 if (bcast_packet->ttl < 2)
1307 return NET_RX_DROP; 1439 goto out;
1308 1440
1309 spin_lock_bh(&bat_priv->orig_hash_lock); 1441 rcu_read_lock();
1310 orig_node = ((struct orig_node *) 1442 orig_node = orig_hash_find(bat_priv, bcast_packet->orig);
1311 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
1312 bcast_packet->orig));
1313 1443
1314 if (!orig_node) { 1444 if (!orig_node)
1315 spin_unlock_bh(&bat_priv->orig_hash_lock); 1445 goto rcu_unlock;
1316 return NET_RX_DROP; 1446
1317 } 1447 rcu_read_unlock();
1448
1449 spin_lock_bh(&orig_node->bcast_seqno_lock);
1318 1450
1319 /* check whether the packet is a duplicate */ 1451 /* check whether the packet is a duplicate */
1320 if (get_bit_status(orig_node->bcast_bits, 1452 if (get_bit_status(orig_node->bcast_bits, orig_node->last_bcast_seqno,
1321 orig_node->last_bcast_seqno, 1453 ntohl(bcast_packet->seqno)))
1322 ntohl(bcast_packet->seqno))) { 1454 goto spin_unlock;
1323 spin_unlock_bh(&bat_priv->orig_hash_lock);
1324 return NET_RX_DROP;
1325 }
1326 1455
1327 seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; 1456 seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno;
1328 1457
1329 /* check whether the packet is old and the host just restarted. */ 1458 /* check whether the packet is old and the host just restarted. */
1330 if (window_protected(bat_priv, seq_diff, 1459 if (window_protected(bat_priv, seq_diff,
1331 &orig_node->bcast_seqno_reset)) { 1460 &orig_node->bcast_seqno_reset))
1332 spin_unlock_bh(&bat_priv->orig_hash_lock); 1461 goto spin_unlock;
1333 return NET_RX_DROP;
1334 }
1335 1462
1336 /* mark broadcast in flood history, update window position 1463 /* mark broadcast in flood history, update window position
1337 * if required. */ 1464 * if required. */
1338 if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) 1465 if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1))
1339 orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); 1466 orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno);
1340 1467
1341 spin_unlock_bh(&bat_priv->orig_hash_lock); 1468 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1469
1342 /* rebroadcast packet */ 1470 /* rebroadcast packet */
1343 add_bcast_packet_to_list(bat_priv, skb); 1471 add_bcast_packet_to_list(bat_priv, skb);
1344 1472
1345 /* broadcast for me */ 1473 /* broadcast for me */
1346 interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); 1474 interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size);
1475 ret = NET_RX_SUCCESS;
1476 goto out;
1347 1477
1348 return NET_RX_SUCCESS; 1478rcu_unlock:
1479 rcu_read_unlock();
1480 goto out;
1481spin_unlock:
1482 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1483out:
1484 if (orig_node)
1485 orig_node_free_ref(orig_node);
1486 return ret;
1349} 1487}
1350 1488
1351int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if) 1489int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if)
1352{ 1490{
1353 struct vis_packet *vis_packet; 1491 struct vis_packet *vis_packet;
1354 struct ethhdr *ethhdr; 1492 struct ethhdr *ethhdr;
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index f108f230bfd..b5a064c88a4 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,27 +22,25 @@
22#ifndef _NET_BATMAN_ADV_ROUTING_H_ 22#ifndef _NET_BATMAN_ADV_ROUTING_H_
23#define _NET_BATMAN_ADV_ROUTING_H_ 23#define _NET_BATMAN_ADV_ROUTING_H_
24 24
25#include "types.h" 25void slide_own_bcast_window(struct hard_iface *hard_iface);
26
27void slide_own_bcast_window(struct batman_if *batman_if);
28void receive_bat_packet(struct ethhdr *ethhdr, 26void receive_bat_packet(struct ethhdr *ethhdr,
29 struct batman_packet *batman_packet, 27 struct batman_packet *batman_packet,
30 unsigned char *hna_buff, int hna_buff_len, 28 unsigned char *hna_buff, int hna_buff_len,
31 struct batman_if *if_incoming); 29 struct hard_iface *if_incoming);
32void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, 30void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
33 struct neigh_node *neigh_node, unsigned char *hna_buff, 31 struct neigh_node *neigh_node, unsigned char *hna_buff,
34 int hna_buff_len); 32 int hna_buff_len);
35int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, 33int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
36 int hdr_size); 34int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if);
37int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if); 35int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
38int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if); 36int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if);
39int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if); 37int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
40int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if); 38int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if);
41int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); 39int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if);
42int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if);
43struct neigh_node *find_router(struct bat_priv *bat_priv, 40struct neigh_node *find_router(struct bat_priv *bat_priv,
44 struct orig_node *orig_node, struct batman_if *recv_if); 41 struct orig_node *orig_node,
45void update_bonding_candidates(struct bat_priv *bat_priv, 42 struct hard_iface *recv_if);
46 struct orig_node *orig_node); 43void bonding_candidate_del(struct orig_node *orig_node,
44 struct neigh_node *neigh_node);
47 45
48#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ 46#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index b89b9f7709a..d49e54d932a 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -25,7 +25,6 @@
25#include "translation-table.h" 25#include "translation-table.h"
26#include "soft-interface.h" 26#include "soft-interface.h"
27#include "hard-interface.h" 27#include "hard-interface.h"
28#include "types.h"
29#include "vis.h" 28#include "vis.h"
30#include "aggregation.h" 29#include "aggregation.h"
31#include "gateway_common.h" 30#include "gateway_common.h"
@@ -49,7 +48,7 @@ static unsigned long own_send_time(struct bat_priv *bat_priv)
49} 48}
50 49
51/* when do we schedule a forwarded packet to be sent */ 50/* when do we schedule a forwarded packet to be sent */
52static unsigned long forward_send_time(struct bat_priv *bat_priv) 51static unsigned long forward_send_time(void)
53{ 52{
54 return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); 53 return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
55} 54}
@@ -57,20 +56,20 @@ static unsigned long forward_send_time(struct bat_priv *bat_priv)
57/* send out an already prepared packet to the given address via the 56/* send out an already prepared packet to the given address via the
58 * specified batman interface */ 57 * specified batman interface */
59int send_skb_packet(struct sk_buff *skb, 58int send_skb_packet(struct sk_buff *skb,
60 struct batman_if *batman_if, 59 struct hard_iface *hard_iface,
61 uint8_t *dst_addr) 60 uint8_t *dst_addr)
62{ 61{
63 struct ethhdr *ethhdr; 62 struct ethhdr *ethhdr;
64 63
65 if (batman_if->if_status != IF_ACTIVE) 64 if (hard_iface->if_status != IF_ACTIVE)
66 goto send_skb_err; 65 goto send_skb_err;
67 66
68 if (unlikely(!batman_if->net_dev)) 67 if (unlikely(!hard_iface->net_dev))
69 goto send_skb_err; 68 goto send_skb_err;
70 69
71 if (!(batman_if->net_dev->flags & IFF_UP)) { 70 if (!(hard_iface->net_dev->flags & IFF_UP)) {
72 pr_warning("Interface %s is not up - can't send packet via " 71 pr_warning("Interface %s is not up - can't send packet via "
73 "that interface!\n", batman_if->net_dev->name); 72 "that interface!\n", hard_iface->net_dev->name);
74 goto send_skb_err; 73 goto send_skb_err;
75 } 74 }
76 75
@@ -81,7 +80,7 @@ int send_skb_packet(struct sk_buff *skb,
81 skb_reset_mac_header(skb); 80 skb_reset_mac_header(skb);
82 81
83 ethhdr = (struct ethhdr *) skb_mac_header(skb); 82 ethhdr = (struct ethhdr *) skb_mac_header(skb);
84 memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN); 83 memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
85 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); 84 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
86 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); 85 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
87 86
@@ -89,7 +88,7 @@ int send_skb_packet(struct sk_buff *skb,
89 skb->priority = TC_PRIO_CONTROL; 88 skb->priority = TC_PRIO_CONTROL;
90 skb->protocol = __constant_htons(ETH_P_BATMAN); 89 skb->protocol = __constant_htons(ETH_P_BATMAN);
91 90
92 skb->dev = batman_if->net_dev; 91 skb->dev = hard_iface->net_dev;
93 92
94 /* dev_queue_xmit() returns a negative result on error. However on 93 /* dev_queue_xmit() returns a negative result on error. However on
95 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP 94 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
@@ -103,16 +102,16 @@ send_skb_err:
103 102
104/* Send a packet to a given interface */ 103/* Send a packet to a given interface */
105static void send_packet_to_if(struct forw_packet *forw_packet, 104static void send_packet_to_if(struct forw_packet *forw_packet,
106 struct batman_if *batman_if) 105 struct hard_iface *hard_iface)
107{ 106{
108 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 107 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
109 char *fwd_str; 108 char *fwd_str;
110 uint8_t packet_num; 109 uint8_t packet_num;
111 int16_t buff_pos; 110 int16_t buff_pos;
112 struct batman_packet *batman_packet; 111 struct batman_packet *batman_packet;
113 struct sk_buff *skb; 112 struct sk_buff *skb;
114 113
115 if (batman_if->if_status != IF_ACTIVE) 114 if (hard_iface->if_status != IF_ACTIVE)
116 return; 115 return;
117 116
118 packet_num = 0; 117 packet_num = 0;
@@ -127,7 +126,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
127 /* we might have aggregated direct link packets with an 126 /* we might have aggregated direct link packets with an
128 * ordinary base packet */ 127 * ordinary base packet */
129 if ((forw_packet->direct_link_flags & (1 << packet_num)) && 128 if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
130 (forw_packet->if_incoming == batman_if)) 129 (forw_packet->if_incoming == hard_iface))
131 batman_packet->flags |= DIRECTLINK; 130 batman_packet->flags |= DIRECTLINK;
132 else 131 else
133 batman_packet->flags &= ~DIRECTLINK; 132 batman_packet->flags &= ~DIRECTLINK;
@@ -143,7 +142,8 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
143 batman_packet->tq, batman_packet->ttl, 142 batman_packet->tq, batman_packet->ttl,
144 (batman_packet->flags & DIRECTLINK ? 143 (batman_packet->flags & DIRECTLINK ?
145 "on" : "off"), 144 "on" : "off"),
146 batman_if->net_dev->name, batman_if->net_dev->dev_addr); 145 hard_iface->net_dev->name,
146 hard_iface->net_dev->dev_addr);
147 147
148 buff_pos += sizeof(struct batman_packet) + 148 buff_pos += sizeof(struct batman_packet) +
149 (batman_packet->num_hna * ETH_ALEN); 149 (batman_packet->num_hna * ETH_ALEN);
@@ -155,13 +155,13 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
155 /* create clone because function is called more than once */ 155 /* create clone because function is called more than once */
156 skb = skb_clone(forw_packet->skb, GFP_ATOMIC); 156 skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
157 if (skb) 157 if (skb)
158 send_skb_packet(skb, batman_if, broadcast_addr); 158 send_skb_packet(skb, hard_iface, broadcast_addr);
159} 159}
160 160
161/* send a batman packet */ 161/* send a batman packet */
162static void send_packet(struct forw_packet *forw_packet) 162static void send_packet(struct forw_packet *forw_packet)
163{ 163{
164 struct batman_if *batman_if; 164 struct hard_iface *hard_iface;
165 struct net_device *soft_iface; 165 struct net_device *soft_iface;
166 struct bat_priv *bat_priv; 166 struct bat_priv *bat_priv;
167 struct batman_packet *batman_packet = 167 struct batman_packet *batman_packet =
@@ -205,17 +205,17 @@ static void send_packet(struct forw_packet *forw_packet)
205 205
206 /* broadcast on every interface */ 206 /* broadcast on every interface */
207 rcu_read_lock(); 207 rcu_read_lock();
208 list_for_each_entry_rcu(batman_if, &if_list, list) { 208 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
209 if (batman_if->soft_iface != soft_iface) 209 if (hard_iface->soft_iface != soft_iface)
210 continue; 210 continue;
211 211
212 send_packet_to_if(forw_packet, batman_if); 212 send_packet_to_if(forw_packet, hard_iface);
213 } 213 }
214 rcu_read_unlock(); 214 rcu_read_unlock();
215} 215}
216 216
217static void rebuild_batman_packet(struct bat_priv *bat_priv, 217static void rebuild_batman_packet(struct bat_priv *bat_priv,
218 struct batman_if *batman_if) 218 struct hard_iface *hard_iface)
219{ 219{
220 int new_len; 220 int new_len;
221 unsigned char *new_buff; 221 unsigned char *new_buff;
@@ -227,7 +227,7 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv,
227 227
228 /* keep old buffer if kmalloc should fail */ 228 /* keep old buffer if kmalloc should fail */
229 if (new_buff) { 229 if (new_buff) {
230 memcpy(new_buff, batman_if->packet_buff, 230 memcpy(new_buff, hard_iface->packet_buff,
231 sizeof(struct batman_packet)); 231 sizeof(struct batman_packet));
232 batman_packet = (struct batman_packet *)new_buff; 232 batman_packet = (struct batman_packet *)new_buff;
233 233
@@ -235,21 +235,21 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv,
235 new_buff + sizeof(struct batman_packet), 235 new_buff + sizeof(struct batman_packet),
236 new_len - sizeof(struct batman_packet)); 236 new_len - sizeof(struct batman_packet));
237 237
238 kfree(batman_if->packet_buff); 238 kfree(hard_iface->packet_buff);
239 batman_if->packet_buff = new_buff; 239 hard_iface->packet_buff = new_buff;
240 batman_if->packet_len = new_len; 240 hard_iface->packet_len = new_len;
241 } 241 }
242} 242}
243 243
244void schedule_own_packet(struct batman_if *batman_if) 244void schedule_own_packet(struct hard_iface *hard_iface)
245{ 245{
246 struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); 246 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
247 unsigned long send_time; 247 unsigned long send_time;
248 struct batman_packet *batman_packet; 248 struct batman_packet *batman_packet;
249 int vis_server; 249 int vis_server;
250 250
251 if ((batman_if->if_status == IF_NOT_IN_USE) || 251 if ((hard_iface->if_status == IF_NOT_IN_USE) ||
252 (batman_if->if_status == IF_TO_BE_REMOVED)) 252 (hard_iface->if_status == IF_TO_BE_REMOVED))
253 return; 253 return;
254 254
255 vis_server = atomic_read(&bat_priv->vis_mode); 255 vis_server = atomic_read(&bat_priv->vis_mode);
@@ -261,51 +261,51 @@ void schedule_own_packet(struct batman_if *batman_if)
261 * outdated packets (especially uninitialized mac addresses) in the 261 * outdated packets (especially uninitialized mac addresses) in the
262 * packet queue 262 * packet queue
263 */ 263 */
264 if (batman_if->if_status == IF_TO_BE_ACTIVATED) 264 if (hard_iface->if_status == IF_TO_BE_ACTIVATED)
265 batman_if->if_status = IF_ACTIVE; 265 hard_iface->if_status = IF_ACTIVE;
266 266
267 /* if local hna has changed and interface is a primary interface */ 267 /* if local hna has changed and interface is a primary interface */
268 if ((atomic_read(&bat_priv->hna_local_changed)) && 268 if ((atomic_read(&bat_priv->hna_local_changed)) &&
269 (batman_if == bat_priv->primary_if)) 269 (hard_iface == bat_priv->primary_if))
270 rebuild_batman_packet(bat_priv, batman_if); 270 rebuild_batman_packet(bat_priv, hard_iface);
271 271
272 /** 272 /**
273 * NOTE: packet_buff might just have been re-allocated in 273 * NOTE: packet_buff might just have been re-allocated in
274 * rebuild_batman_packet() 274 * rebuild_batman_packet()
275 */ 275 */
276 batman_packet = (struct batman_packet *)batman_if->packet_buff; 276 batman_packet = (struct batman_packet *)hard_iface->packet_buff;
277 277
278 /* change sequence number to network order */ 278 /* change sequence number to network order */
279 batman_packet->seqno = 279 batman_packet->seqno =
280 htonl((uint32_t)atomic_read(&batman_if->seqno)); 280 htonl((uint32_t)atomic_read(&hard_iface->seqno));
281 281
282 if (vis_server == VIS_TYPE_SERVER_SYNC) 282 if (vis_server == VIS_TYPE_SERVER_SYNC)
283 batman_packet->flags |= VIS_SERVER; 283 batman_packet->flags |= VIS_SERVER;
284 else 284 else
285 batman_packet->flags &= ~VIS_SERVER; 285 batman_packet->flags &= ~VIS_SERVER;
286 286
287 if ((batman_if == bat_priv->primary_if) && 287 if ((hard_iface == bat_priv->primary_if) &&
288 (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) 288 (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
289 batman_packet->gw_flags = 289 batman_packet->gw_flags =
290 (uint8_t)atomic_read(&bat_priv->gw_bandwidth); 290 (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
291 else 291 else
292 batman_packet->gw_flags = 0; 292 batman_packet->gw_flags = 0;
293 293
294 atomic_inc(&batman_if->seqno); 294 atomic_inc(&hard_iface->seqno);
295 295
296 slide_own_bcast_window(batman_if); 296 slide_own_bcast_window(hard_iface);
297 send_time = own_send_time(bat_priv); 297 send_time = own_send_time(bat_priv);
298 add_bat_packet_to_list(bat_priv, 298 add_bat_packet_to_list(bat_priv,
299 batman_if->packet_buff, 299 hard_iface->packet_buff,
300 batman_if->packet_len, 300 hard_iface->packet_len,
301 batman_if, 1, send_time); 301 hard_iface, 1, send_time);
302} 302}
303 303
304void schedule_forward_packet(struct orig_node *orig_node, 304void schedule_forward_packet(struct orig_node *orig_node,
305 struct ethhdr *ethhdr, 305 struct ethhdr *ethhdr,
306 struct batman_packet *batman_packet, 306 struct batman_packet *batman_packet,
307 uint8_t directlink, int hna_buff_len, 307 uint8_t directlink, int hna_buff_len,
308 struct batman_if *if_incoming) 308 struct hard_iface *if_incoming)
309{ 309{
310 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 310 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
311 unsigned char in_tq, in_ttl, tq_avg = 0; 311 unsigned char in_tq, in_ttl, tq_avg = 0;
@@ -327,7 +327,7 @@ void schedule_forward_packet(struct orig_node *orig_node,
327 if ((orig_node->router) && (orig_node->router->tq_avg != 0)) { 327 if ((orig_node->router) && (orig_node->router->tq_avg != 0)) {
328 328
329 /* rebroadcast ogm of best ranking neighbor as is */ 329 /* rebroadcast ogm of best ranking neighbor as is */
330 if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) { 330 if (!compare_eth(orig_node->router->addr, ethhdr->h_source)) {
331 batman_packet->tq = orig_node->router->tq_avg; 331 batman_packet->tq = orig_node->router->tq_avg;
332 332
333 if (orig_node->router->last_ttl) 333 if (orig_node->router->last_ttl)
@@ -356,7 +356,7 @@ void schedule_forward_packet(struct orig_node *orig_node,
356 else 356 else
357 batman_packet->flags &= ~DIRECTLINK; 357 batman_packet->flags &= ~DIRECTLINK;
358 358
359 send_time = forward_send_time(bat_priv); 359 send_time = forward_send_time();
360 add_bat_packet_to_list(bat_priv, 360 add_bat_packet_to_list(bat_priv,
361 (unsigned char *)batman_packet, 361 (unsigned char *)batman_packet,
362 sizeof(struct batman_packet) + hna_buff_len, 362 sizeof(struct batman_packet) + hna_buff_len,
@@ -444,7 +444,7 @@ out:
444 444
445static void send_outstanding_bcast_packet(struct work_struct *work) 445static void send_outstanding_bcast_packet(struct work_struct *work)
446{ 446{
447 struct batman_if *batman_if; 447 struct hard_iface *hard_iface;
448 struct delayed_work *delayed_work = 448 struct delayed_work *delayed_work =
449 container_of(work, struct delayed_work, work); 449 container_of(work, struct delayed_work, work);
450 struct forw_packet *forw_packet = 450 struct forw_packet *forw_packet =
@@ -462,14 +462,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work)
462 462
463 /* rebroadcast packet */ 463 /* rebroadcast packet */
464 rcu_read_lock(); 464 rcu_read_lock();
465 list_for_each_entry_rcu(batman_if, &if_list, list) { 465 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
466 if (batman_if->soft_iface != soft_iface) 466 if (hard_iface->soft_iface != soft_iface)
467 continue; 467 continue;
468 468
469 /* send a copy of the saved skb */ 469 /* send a copy of the saved skb */
470 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); 470 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
471 if (skb1) 471 if (skb1)
472 send_skb_packet(skb1, batman_if, broadcast_addr); 472 send_skb_packet(skb1, hard_iface, broadcast_addr);
473 } 473 }
474 rcu_read_unlock(); 474 rcu_read_unlock();
475 475
@@ -522,15 +522,15 @@ out:
522} 522}
523 523
524void purge_outstanding_packets(struct bat_priv *bat_priv, 524void purge_outstanding_packets(struct bat_priv *bat_priv,
525 struct batman_if *batman_if) 525 struct hard_iface *hard_iface)
526{ 526{
527 struct forw_packet *forw_packet; 527 struct forw_packet *forw_packet;
528 struct hlist_node *tmp_node, *safe_tmp_node; 528 struct hlist_node *tmp_node, *safe_tmp_node;
529 529
530 if (batman_if) 530 if (hard_iface)
531 bat_dbg(DBG_BATMAN, bat_priv, 531 bat_dbg(DBG_BATMAN, bat_priv,
532 "purge_outstanding_packets(): %s\n", 532 "purge_outstanding_packets(): %s\n",
533 batman_if->net_dev->name); 533 hard_iface->net_dev->name);
534 else 534 else
535 bat_dbg(DBG_BATMAN, bat_priv, 535 bat_dbg(DBG_BATMAN, bat_priv,
536 "purge_outstanding_packets()\n"); 536 "purge_outstanding_packets()\n");
@@ -544,8 +544,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
544 * if purge_outstanding_packets() was called with an argmument 544 * if purge_outstanding_packets() was called with an argmument
545 * we delete only packets belonging to the given interface 545 * we delete only packets belonging to the given interface
546 */ 546 */
547 if ((batman_if) && 547 if ((hard_iface) &&
548 (forw_packet->if_incoming != batman_if)) 548 (forw_packet->if_incoming != hard_iface))
549 continue; 549 continue;
550 550
551 spin_unlock_bh(&bat_priv->forw_bcast_list_lock); 551 spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
@@ -568,8 +568,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
568 * if purge_outstanding_packets() was called with an argmument 568 * if purge_outstanding_packets() was called with an argmument
569 * we delete only packets belonging to the given interface 569 * we delete only packets belonging to the given interface
570 */ 570 */
571 if ((batman_if) && 571 if ((hard_iface) &&
572 (forw_packet->if_incoming != batman_if)) 572 (forw_packet->if_incoming != hard_iface))
573 continue; 573 continue;
574 574
575 spin_unlock_bh(&bat_priv->forw_bat_list_lock); 575 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index c4cefa8e4f8..7b2ff19c05e 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,20 +22,18 @@
22#ifndef _NET_BATMAN_ADV_SEND_H_ 22#ifndef _NET_BATMAN_ADV_SEND_H_
23#define _NET_BATMAN_ADV_SEND_H_ 23#define _NET_BATMAN_ADV_SEND_H_
24 24
25#include "types.h"
26
27int send_skb_packet(struct sk_buff *skb, 25int send_skb_packet(struct sk_buff *skb,
28 struct batman_if *batman_if, 26 struct hard_iface *hard_iface,
29 uint8_t *dst_addr); 27 uint8_t *dst_addr);
30void schedule_own_packet(struct batman_if *batman_if); 28void schedule_own_packet(struct hard_iface *hard_iface);
31void schedule_forward_packet(struct orig_node *orig_node, 29void schedule_forward_packet(struct orig_node *orig_node,
32 struct ethhdr *ethhdr, 30 struct ethhdr *ethhdr,
33 struct batman_packet *batman_packet, 31 struct batman_packet *batman_packet,
34 uint8_t directlink, int hna_buff_len, 32 uint8_t directlink, int hna_buff_len,
35 struct batman_if *if_outgoing); 33 struct hard_iface *if_outgoing);
36int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); 34int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb);
37void send_outstanding_bat_packet(struct work_struct *work); 35void send_outstanding_bat_packet(struct work_struct *work);
38void purge_outstanding_packets(struct bat_priv *bat_priv, 36void purge_outstanding_packets(struct bat_priv *bat_priv,
39 struct batman_if *batman_if); 37 struct hard_iface *hard_iface);
40 38
41#endif /* _NET_BATMAN_ADV_SEND_H_ */ 39#endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e89ede192ed..9ed26140a26 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -26,18 +26,15 @@
26#include "send.h" 26#include "send.h"
27#include "bat_debugfs.h" 27#include "bat_debugfs.h"
28#include "translation-table.h" 28#include "translation-table.h"
29#include "types.h"
30#include "hash.h" 29#include "hash.h"
31#include "gateway_common.h" 30#include "gateway_common.h"
32#include "gateway_client.h" 31#include "gateway_client.h"
33#include "send.h"
34#include "bat_sysfs.h" 32#include "bat_sysfs.h"
35#include <linux/slab.h> 33#include <linux/slab.h>
36#include <linux/ethtool.h> 34#include <linux/ethtool.h>
37#include <linux/etherdevice.h> 35#include <linux/etherdevice.h>
38#include <linux/if_vlan.h> 36#include <linux/if_vlan.h>
39#include "unicast.h" 37#include "unicast.h"
40#include "routing.h"
41 38
42 39
43static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); 40static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -79,20 +76,18 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len)
79 return 0; 76 return 0;
80} 77}
81 78
82static void softif_neigh_free_ref(struct kref *refcount) 79static void softif_neigh_free_rcu(struct rcu_head *rcu)
83{ 80{
84 struct softif_neigh *softif_neigh; 81 struct softif_neigh *softif_neigh;
85 82
86 softif_neigh = container_of(refcount, struct softif_neigh, refcount); 83 softif_neigh = container_of(rcu, struct softif_neigh, rcu);
87 kfree(softif_neigh); 84 kfree(softif_neigh);
88} 85}
89 86
90static void softif_neigh_free_rcu(struct rcu_head *rcu) 87static void softif_neigh_free_ref(struct softif_neigh *softif_neigh)
91{ 88{
92 struct softif_neigh *softif_neigh; 89 if (atomic_dec_and_test(&softif_neigh->refcount))
93 90 call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu);
94 softif_neigh = container_of(rcu, struct softif_neigh, rcu);
95 kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
96} 91}
97 92
98void softif_neigh_purge(struct bat_priv *bat_priv) 93void softif_neigh_purge(struct bat_priv *bat_priv)
@@ -119,11 +114,10 @@ void softif_neigh_purge(struct bat_priv *bat_priv)
119 softif_neigh->addr, softif_neigh->vid); 114 softif_neigh->addr, softif_neigh->vid);
120 softif_neigh_tmp = bat_priv->softif_neigh; 115 softif_neigh_tmp = bat_priv->softif_neigh;
121 bat_priv->softif_neigh = NULL; 116 bat_priv->softif_neigh = NULL;
122 kref_put(&softif_neigh_tmp->refcount, 117 softif_neigh_free_ref(softif_neigh_tmp);
123 softif_neigh_free_ref);
124 } 118 }
125 119
126 call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); 120 softif_neigh_free_ref(softif_neigh);
127 } 121 }
128 122
129 spin_unlock_bh(&bat_priv->softif_neigh_lock); 123 spin_unlock_bh(&bat_priv->softif_neigh_lock);
@@ -138,14 +132,17 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv,
138 rcu_read_lock(); 132 rcu_read_lock();
139 hlist_for_each_entry_rcu(softif_neigh, node, 133 hlist_for_each_entry_rcu(softif_neigh, node,
140 &bat_priv->softif_neigh_list, list) { 134 &bat_priv->softif_neigh_list, list) {
141 if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0) 135 if (!compare_eth(softif_neigh->addr, addr))
142 continue; 136 continue;
143 137
144 if (softif_neigh->vid != vid) 138 if (softif_neigh->vid != vid)
145 continue; 139 continue;
146 140
141 if (!atomic_inc_not_zero(&softif_neigh->refcount))
142 continue;
143
147 softif_neigh->last_seen = jiffies; 144 softif_neigh->last_seen = jiffies;
148 goto found; 145 goto out;
149 } 146 }
150 147
151 softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); 148 softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC);
@@ -155,15 +152,14 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv,
155 memcpy(softif_neigh->addr, addr, ETH_ALEN); 152 memcpy(softif_neigh->addr, addr, ETH_ALEN);
156 softif_neigh->vid = vid; 153 softif_neigh->vid = vid;
157 softif_neigh->last_seen = jiffies; 154 softif_neigh->last_seen = jiffies;
158 kref_init(&softif_neigh->refcount); 155 /* initialize with 2 - caller decrements counter by one */
156 atomic_set(&softif_neigh->refcount, 2);
159 157
160 INIT_HLIST_NODE(&softif_neigh->list); 158 INIT_HLIST_NODE(&softif_neigh->list);
161 spin_lock_bh(&bat_priv->softif_neigh_lock); 159 spin_lock_bh(&bat_priv->softif_neigh_lock);
162 hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list); 160 hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list);
163 spin_unlock_bh(&bat_priv->softif_neigh_lock); 161 spin_unlock_bh(&bat_priv->softif_neigh_lock);
164 162
165found:
166 kref_get(&softif_neigh->refcount);
167out: 163out:
168 rcu_read_unlock(); 164 rcu_read_unlock();
169 return softif_neigh; 165 return softif_neigh;
@@ -175,8 +171,6 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset)
175 struct bat_priv *bat_priv = netdev_priv(net_dev); 171 struct bat_priv *bat_priv = netdev_priv(net_dev);
176 struct softif_neigh *softif_neigh; 172 struct softif_neigh *softif_neigh;
177 struct hlist_node *node; 173 struct hlist_node *node;
178 size_t buf_size, pos;
179 char *buff;
180 174
181 if (!bat_priv->primary_if) { 175 if (!bat_priv->primary_if) {
182 return seq_printf(seq, "BATMAN mesh %s disabled - " 176 return seq_printf(seq, "BATMAN mesh %s disabled - "
@@ -186,33 +180,15 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset)
186 180
187 seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name); 181 seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name);
188 182
189 buf_size = 1;
190 /* Estimate length for: " xx:xx:xx:xx:xx:xx\n" */
191 rcu_read_lock(); 183 rcu_read_lock();
192 hlist_for_each_entry_rcu(softif_neigh, node, 184 hlist_for_each_entry_rcu(softif_neigh, node,
193 &bat_priv->softif_neigh_list, list) 185 &bat_priv->softif_neigh_list, list)
194 buf_size += 30; 186 seq_printf(seq, "%s %pM (vid: %d)\n",
195 rcu_read_unlock();
196
197 buff = kmalloc(buf_size, GFP_ATOMIC);
198 if (!buff)
199 return -ENOMEM;
200
201 buff[0] = '\0';
202 pos = 0;
203
204 rcu_read_lock();
205 hlist_for_each_entry_rcu(softif_neigh, node,
206 &bat_priv->softif_neigh_list, list) {
207 pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n",
208 bat_priv->softif_neigh == softif_neigh 187 bat_priv->softif_neigh == softif_neigh
209 ? "=>" : " ", softif_neigh->addr, 188 ? "=>" : " ", softif_neigh->addr,
210 softif_neigh->vid); 189 softif_neigh->vid);
211 }
212 rcu_read_unlock(); 190 rcu_read_unlock();
213 191
214 seq_printf(seq, "%s", buff);
215 kfree(buff);
216 return 0; 192 return 0;
217} 193}
218 194
@@ -267,7 +243,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
267 softif_neigh->addr, softif_neigh->vid); 243 softif_neigh->addr, softif_neigh->vid);
268 softif_neigh_tmp = bat_priv->softif_neigh; 244 softif_neigh_tmp = bat_priv->softif_neigh;
269 bat_priv->softif_neigh = softif_neigh; 245 bat_priv->softif_neigh = softif_neigh;
270 kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref); 246 softif_neigh_free_ref(softif_neigh_tmp);
271 /* we need to hold the additional reference */ 247 /* we need to hold the additional reference */
272 goto err; 248 goto err;
273 } 249 }
@@ -285,7 +261,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
285 } 261 }
286 262
287out: 263out:
288 kref_put(&softif_neigh->refcount, softif_neigh_free_ref); 264 softif_neigh_free_ref(softif_neigh);
289err: 265err:
290 kfree_skb(skb); 266 kfree_skb(skb);
291 return; 267 return;
@@ -438,7 +414,7 @@ end:
438} 414}
439 415
440void interface_rx(struct net_device *soft_iface, 416void interface_rx(struct net_device *soft_iface,
441 struct sk_buff *skb, struct batman_if *recv_if, 417 struct sk_buff *skb, struct hard_iface *recv_if,
442 int hdr_size) 418 int hdr_size)
443{ 419{
444 struct bat_priv *bat_priv = netdev_priv(soft_iface); 420 struct bat_priv *bat_priv = netdev_priv(soft_iface);
@@ -486,7 +462,7 @@ void interface_rx(struct net_device *soft_iface,
486 462
487 memcpy(unicast_packet->dest, 463 memcpy(unicast_packet->dest,
488 bat_priv->softif_neigh->addr, ETH_ALEN); 464 bat_priv->softif_neigh->addr, ETH_ALEN);
489 ret = route_unicast_packet(skb, recv_if, hdr_size); 465 ret = route_unicast_packet(skb, recv_if);
490 if (ret == NET_RX_DROP) 466 if (ret == NET_RX_DROP)
491 goto dropped; 467 goto dropped;
492 468
@@ -646,6 +622,19 @@ void softif_destroy(struct net_device *soft_iface)
646 unregister_netdevice(soft_iface); 622 unregister_netdevice(soft_iface);
647} 623}
648 624
625int softif_is_valid(struct net_device *net_dev)
626{
627#ifdef HAVE_NET_DEVICE_OPS
628 if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
629 return 1;
630#else
631 if (net_dev->hard_start_xmit == interface_tx)
632 return 1;
633#endif
634
635 return 0;
636}
637
649/* ethtool */ 638/* ethtool */
650static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 639static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
651{ 640{
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 02b77334d10..4789b6f2a0b 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner 4 * Marek Lindner
5 * 5 *
@@ -27,9 +27,10 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset);
27void softif_neigh_purge(struct bat_priv *bat_priv); 27void softif_neigh_purge(struct bat_priv *bat_priv);
28int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); 28int interface_tx(struct sk_buff *skb, struct net_device *soft_iface);
29void interface_rx(struct net_device *soft_iface, 29void interface_rx(struct net_device *soft_iface,
30 struct sk_buff *skb, struct batman_if *recv_if, 30 struct sk_buff *skb, struct hard_iface *recv_if,
31 int hdr_size); 31 int hdr_size);
32struct net_device *softif_create(char *name); 32struct net_device *softif_create(char *name);
33void softif_destroy(struct net_device *soft_iface); 33void softif_destroy(struct net_device *soft_iface);
34int softif_is_valid(struct net_device *net_dev);
34 35
35#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ 36#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a633b5a435e..8d15b48d169 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,7 +22,6 @@
22#include "main.h" 22#include "main.h"
23#include "translation-table.h" 23#include "translation-table.h"
24#include "soft-interface.h" 24#include "soft-interface.h"
25#include "types.h"
26#include "hash.h" 25#include "hash.h"
27#include "originator.h" 26#include "originator.h"
28 27
@@ -31,12 +30,85 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv,
31 struct hna_global_entry *hna_global_entry, 30 struct hna_global_entry *hna_global_entry,
32 char *message); 31 char *message);
33 32
33/* returns 1 if they are the same mac addr */
34static int compare_lhna(struct hlist_node *node, void *data2)
35{
36 void *data1 = container_of(node, struct hna_local_entry, hash_entry);
37
38 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
39}
40
41/* returns 1 if they are the same mac addr */
42static int compare_ghna(struct hlist_node *node, void *data2)
43{
44 void *data1 = container_of(node, struct hna_global_entry, hash_entry);
45
46 return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
47}
48
34static void hna_local_start_timer(struct bat_priv *bat_priv) 49static void hna_local_start_timer(struct bat_priv *bat_priv)
35{ 50{
36 INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge); 51 INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge);
37 queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ); 52 queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ);
38} 53}
39 54
55static struct hna_local_entry *hna_local_hash_find(struct bat_priv *bat_priv,
56 void *data)
57{
58 struct hashtable_t *hash = bat_priv->hna_local_hash;
59 struct hlist_head *head;
60 struct hlist_node *node;
61 struct hna_local_entry *hna_local_entry, *hna_local_entry_tmp = NULL;
62 int index;
63
64 if (!hash)
65 return NULL;
66
67 index = choose_orig(data, hash->size);
68 head = &hash->table[index];
69
70 rcu_read_lock();
71 hlist_for_each_entry_rcu(hna_local_entry, node, head, hash_entry) {
72 if (!compare_eth(hna_local_entry, data))
73 continue;
74
75 hna_local_entry_tmp = hna_local_entry;
76 break;
77 }
78 rcu_read_unlock();
79
80 return hna_local_entry_tmp;
81}
82
83static struct hna_global_entry *hna_global_hash_find(struct bat_priv *bat_priv,
84 void *data)
85{
86 struct hashtable_t *hash = bat_priv->hna_global_hash;
87 struct hlist_head *head;
88 struct hlist_node *node;
89 struct hna_global_entry *hna_global_entry;
90 struct hna_global_entry *hna_global_entry_tmp = NULL;
91 int index;
92
93 if (!hash)
94 return NULL;
95
96 index = choose_orig(data, hash->size);
97 head = &hash->table[index];
98
99 rcu_read_lock();
100 hlist_for_each_entry_rcu(hna_global_entry, node, head, hash_entry) {
101 if (!compare_eth(hna_global_entry, data))
102 continue;
103
104 hna_global_entry_tmp = hna_global_entry;
105 break;
106 }
107 rcu_read_unlock();
108
109 return hna_global_entry_tmp;
110}
111
40int hna_local_init(struct bat_priv *bat_priv) 112int hna_local_init(struct bat_priv *bat_priv)
41{ 113{
42 if (bat_priv->hna_local_hash) 114 if (bat_priv->hna_local_hash)
@@ -61,10 +133,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
61 int required_bytes; 133 int required_bytes;
62 134
63 spin_lock_bh(&bat_priv->hna_lhash_lock); 135 spin_lock_bh(&bat_priv->hna_lhash_lock);
64 hna_local_entry = 136 hna_local_entry = hna_local_hash_find(bat_priv, addr);
65 ((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash,
66 compare_orig, choose_orig,
67 addr));
68 spin_unlock_bh(&bat_priv->hna_lhash_lock); 137 spin_unlock_bh(&bat_priv->hna_lhash_lock);
69 138
70 if (hna_local_entry) { 139 if (hna_local_entry) {
@@ -100,15 +169,15 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
100 hna_local_entry->last_seen = jiffies; 169 hna_local_entry->last_seen = jiffies;
101 170
102 /* the batman interface mac address should never be purged */ 171 /* the batman interface mac address should never be purged */
103 if (compare_orig(addr, soft_iface->dev_addr)) 172 if (compare_eth(addr, soft_iface->dev_addr))
104 hna_local_entry->never_purge = 1; 173 hna_local_entry->never_purge = 1;
105 else 174 else
106 hna_local_entry->never_purge = 0; 175 hna_local_entry->never_purge = 0;
107 176
108 spin_lock_bh(&bat_priv->hna_lhash_lock); 177 spin_lock_bh(&bat_priv->hna_lhash_lock);
109 178
110 hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig, 179 hash_add(bat_priv->hna_local_hash, compare_lhna, choose_orig,
111 hna_local_entry); 180 hna_local_entry, &hna_local_entry->hash_entry);
112 bat_priv->num_local_hna++; 181 bat_priv->num_local_hna++;
113 atomic_set(&bat_priv->hna_local_changed, 1); 182 atomic_set(&bat_priv->hna_local_changed, 1);
114 183
@@ -117,9 +186,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
117 /* remove address from global hash if present */ 186 /* remove address from global hash if present */
118 spin_lock_bh(&bat_priv->hna_ghash_lock); 187 spin_lock_bh(&bat_priv->hna_ghash_lock);
119 188
120 hna_global_entry = ((struct hna_global_entry *) 189 hna_global_entry = hna_global_hash_find(bat_priv, addr);
121 hash_find(bat_priv->hna_global_hash,
122 compare_orig, choose_orig, addr));
123 190
124 if (hna_global_entry) 191 if (hna_global_entry)
125 _hna_global_del_orig(bat_priv, hna_global_entry, 192 _hna_global_del_orig(bat_priv, hna_global_entry,
@@ -133,28 +200,27 @@ int hna_local_fill_buffer(struct bat_priv *bat_priv,
133{ 200{
134 struct hashtable_t *hash = bat_priv->hna_local_hash; 201 struct hashtable_t *hash = bat_priv->hna_local_hash;
135 struct hna_local_entry *hna_local_entry; 202 struct hna_local_entry *hna_local_entry;
136 struct element_t *bucket; 203 struct hlist_node *node;
137 int i;
138 struct hlist_node *walk;
139 struct hlist_head *head; 204 struct hlist_head *head;
140 int count = 0; 205 int i, count = 0;
141 206
142 spin_lock_bh(&bat_priv->hna_lhash_lock); 207 spin_lock_bh(&bat_priv->hna_lhash_lock);
143 208
144 for (i = 0; i < hash->size; i++) { 209 for (i = 0; i < hash->size; i++) {
145 head = &hash->table[i]; 210 head = &hash->table[i];
146 211
147 hlist_for_each_entry(bucket, walk, head, hlist) { 212 rcu_read_lock();
148 213 hlist_for_each_entry_rcu(hna_local_entry, node,
214 head, hash_entry) {
149 if (buff_len < (count + 1) * ETH_ALEN) 215 if (buff_len < (count + 1) * ETH_ALEN)
150 break; 216 break;
151 217
152 hna_local_entry = bucket->data;
153 memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr, 218 memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr,
154 ETH_ALEN); 219 ETH_ALEN);
155 220
156 count++; 221 count++;
157 } 222 }
223 rcu_read_unlock();
158 } 224 }
159 225
160 /* if we did not get all new local hnas see you next time ;-) */ 226 /* if we did not get all new local hnas see you next time ;-) */
@@ -171,12 +237,11 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
171 struct bat_priv *bat_priv = netdev_priv(net_dev); 237 struct bat_priv *bat_priv = netdev_priv(net_dev);
172 struct hashtable_t *hash = bat_priv->hna_local_hash; 238 struct hashtable_t *hash = bat_priv->hna_local_hash;
173 struct hna_local_entry *hna_local_entry; 239 struct hna_local_entry *hna_local_entry;
174 int i; 240 struct hlist_node *node;
175 struct hlist_node *walk;
176 struct hlist_head *head; 241 struct hlist_head *head;
177 struct element_t *bucket;
178 size_t buf_size, pos; 242 size_t buf_size, pos;
179 char *buff; 243 char *buff;
244 int i;
180 245
181 if (!bat_priv->primary_if) { 246 if (!bat_priv->primary_if) {
182 return seq_printf(seq, "BATMAN mesh %s disabled - " 247 return seq_printf(seq, "BATMAN mesh %s disabled - "
@@ -195,8 +260,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
195 for (i = 0; i < hash->size; i++) { 260 for (i = 0; i < hash->size; i++) {
196 head = &hash->table[i]; 261 head = &hash->table[i];
197 262
198 hlist_for_each(walk, head) 263 rcu_read_lock();
264 __hlist_for_each_rcu(node, head)
199 buf_size += 21; 265 buf_size += 21;
266 rcu_read_unlock();
200 } 267 }
201 268
202 buff = kmalloc(buf_size, GFP_ATOMIC); 269 buff = kmalloc(buf_size, GFP_ATOMIC);
@@ -204,18 +271,20 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
204 spin_unlock_bh(&bat_priv->hna_lhash_lock); 271 spin_unlock_bh(&bat_priv->hna_lhash_lock);
205 return -ENOMEM; 272 return -ENOMEM;
206 } 273 }
274
207 buff[0] = '\0'; 275 buff[0] = '\0';
208 pos = 0; 276 pos = 0;
209 277
210 for (i = 0; i < hash->size; i++) { 278 for (i = 0; i < hash->size; i++) {
211 head = &hash->table[i]; 279 head = &hash->table[i];
212 280
213 hlist_for_each_entry(bucket, walk, head, hlist) { 281 rcu_read_lock();
214 hna_local_entry = bucket->data; 282 hlist_for_each_entry_rcu(hna_local_entry, node,
215 283 head, hash_entry) {
216 pos += snprintf(buff + pos, 22, " * %pM\n", 284 pos += snprintf(buff + pos, 22, " * %pM\n",
217 hna_local_entry->addr); 285 hna_local_entry->addr);
218 } 286 }
287 rcu_read_unlock();
219 } 288 }
220 289
221 spin_unlock_bh(&bat_priv->hna_lhash_lock); 290 spin_unlock_bh(&bat_priv->hna_lhash_lock);
@@ -225,9 +294,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
225 return 0; 294 return 0;
226} 295}
227 296
228static void _hna_local_del(void *data, void *arg) 297static void _hna_local_del(struct hlist_node *node, void *arg)
229{ 298{
230 struct bat_priv *bat_priv = (struct bat_priv *)arg; 299 struct bat_priv *bat_priv = (struct bat_priv *)arg;
300 void *data = container_of(node, struct hna_local_entry, hash_entry);
231 301
232 kfree(data); 302 kfree(data);
233 bat_priv->num_local_hna--; 303 bat_priv->num_local_hna--;
@@ -241,9 +311,9 @@ static void hna_local_del(struct bat_priv *bat_priv,
241 bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n", 311 bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n",
242 hna_local_entry->addr, message); 312 hna_local_entry->addr, message);
243 313
244 hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig, 314 hash_remove(bat_priv->hna_local_hash, compare_lhna, choose_orig,
245 hna_local_entry->addr); 315 hna_local_entry->addr);
246 _hna_local_del(hna_local_entry, bat_priv); 316 _hna_local_del(&hna_local_entry->hash_entry, bat_priv);
247} 317}
248 318
249void hna_local_remove(struct bat_priv *bat_priv, 319void hna_local_remove(struct bat_priv *bat_priv,
@@ -253,9 +323,7 @@ void hna_local_remove(struct bat_priv *bat_priv,
253 323
254 spin_lock_bh(&bat_priv->hna_lhash_lock); 324 spin_lock_bh(&bat_priv->hna_lhash_lock);
255 325
256 hna_local_entry = (struct hna_local_entry *) 326 hna_local_entry = hna_local_hash_find(bat_priv, addr);
257 hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig,
258 addr);
259 327
260 if (hna_local_entry) 328 if (hna_local_entry)
261 hna_local_del(bat_priv, hna_local_entry, message); 329 hna_local_del(bat_priv, hna_local_entry, message);
@@ -271,27 +339,29 @@ static void hna_local_purge(struct work_struct *work)
271 container_of(delayed_work, struct bat_priv, hna_work); 339 container_of(delayed_work, struct bat_priv, hna_work);
272 struct hashtable_t *hash = bat_priv->hna_local_hash; 340 struct hashtable_t *hash = bat_priv->hna_local_hash;
273 struct hna_local_entry *hna_local_entry; 341 struct hna_local_entry *hna_local_entry;
274 int i; 342 struct hlist_node *node, *node_tmp;
275 struct hlist_node *walk, *safe;
276 struct hlist_head *head; 343 struct hlist_head *head;
277 struct element_t *bucket;
278 unsigned long timeout; 344 unsigned long timeout;
345 int i;
279 346
280 spin_lock_bh(&bat_priv->hna_lhash_lock); 347 spin_lock_bh(&bat_priv->hna_lhash_lock);
281 348
282 for (i = 0; i < hash->size; i++) { 349 for (i = 0; i < hash->size; i++) {
283 head = &hash->table[i]; 350 head = &hash->table[i];
284 351
285 hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { 352 hlist_for_each_entry_safe(hna_local_entry, node, node_tmp,
286 hna_local_entry = bucket->data; 353 head, hash_entry) {
354 if (hna_local_entry->never_purge)
355 continue;
287 356
288 timeout = hna_local_entry->last_seen; 357 timeout = hna_local_entry->last_seen;
289 timeout += LOCAL_HNA_TIMEOUT * HZ; 358 timeout += LOCAL_HNA_TIMEOUT * HZ;
290 359
291 if ((!hna_local_entry->never_purge) && 360 if (time_before(jiffies, timeout))
292 time_after(jiffies, timeout)) 361 continue;
293 hna_local_del(bat_priv, hna_local_entry, 362
294 "address timed out"); 363 hna_local_del(bat_priv, hna_local_entry,
364 "address timed out");
295 } 365 }
296 } 366 }
297 367
@@ -335,9 +405,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv,
335 spin_lock_bh(&bat_priv->hna_ghash_lock); 405 spin_lock_bh(&bat_priv->hna_ghash_lock);
336 406
337 hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); 407 hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN);
338 hna_global_entry = (struct hna_global_entry *) 408 hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr);
339 hash_find(bat_priv->hna_global_hash, compare_orig,
340 choose_orig, hna_ptr);
341 409
342 if (!hna_global_entry) { 410 if (!hna_global_entry) {
343 spin_unlock_bh(&bat_priv->hna_ghash_lock); 411 spin_unlock_bh(&bat_priv->hna_ghash_lock);
@@ -357,8 +425,9 @@ void hna_global_add_orig(struct bat_priv *bat_priv,
357 hna_global_entry->addr, orig_node->orig); 425 hna_global_entry->addr, orig_node->orig);
358 426
359 spin_lock_bh(&bat_priv->hna_ghash_lock); 427 spin_lock_bh(&bat_priv->hna_ghash_lock);
360 hash_add(bat_priv->hna_global_hash, compare_orig, 428 hash_add(bat_priv->hna_global_hash, compare_ghna,
361 choose_orig, hna_global_entry); 429 choose_orig, hna_global_entry,
430 &hna_global_entry->hash_entry);
362 431
363 } 432 }
364 433
@@ -369,9 +438,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv,
369 spin_lock_bh(&bat_priv->hna_lhash_lock); 438 spin_lock_bh(&bat_priv->hna_lhash_lock);
370 439
371 hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); 440 hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN);
372 hna_local_entry = (struct hna_local_entry *) 441 hna_local_entry = hna_local_hash_find(bat_priv, hna_ptr);
373 hash_find(bat_priv->hna_local_hash, compare_orig,
374 choose_orig, hna_ptr);
375 442
376 if (hna_local_entry) 443 if (hna_local_entry)
377 hna_local_del(bat_priv, hna_local_entry, 444 hna_local_del(bat_priv, hna_local_entry,
@@ -401,12 +468,11 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
401 struct bat_priv *bat_priv = netdev_priv(net_dev); 468 struct bat_priv *bat_priv = netdev_priv(net_dev);
402 struct hashtable_t *hash = bat_priv->hna_global_hash; 469 struct hashtable_t *hash = bat_priv->hna_global_hash;
403 struct hna_global_entry *hna_global_entry; 470 struct hna_global_entry *hna_global_entry;
404 int i; 471 struct hlist_node *node;
405 struct hlist_node *walk;
406 struct hlist_head *head; 472 struct hlist_head *head;
407 struct element_t *bucket;
408 size_t buf_size, pos; 473 size_t buf_size, pos;
409 char *buff; 474 char *buff;
475 int i;
410 476
411 if (!bat_priv->primary_if) { 477 if (!bat_priv->primary_if) {
412 return seq_printf(seq, "BATMAN mesh %s disabled - " 478 return seq_printf(seq, "BATMAN mesh %s disabled - "
@@ -424,8 +490,10 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
424 for (i = 0; i < hash->size; i++) { 490 for (i = 0; i < hash->size; i++) {
425 head = &hash->table[i]; 491 head = &hash->table[i];
426 492
427 hlist_for_each(walk, head) 493 rcu_read_lock();
494 __hlist_for_each_rcu(node, head)
428 buf_size += 43; 495 buf_size += 43;
496 rcu_read_unlock();
429 } 497 }
430 498
431 buff = kmalloc(buf_size, GFP_ATOMIC); 499 buff = kmalloc(buf_size, GFP_ATOMIC);
@@ -439,14 +507,15 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
439 for (i = 0; i < hash->size; i++) { 507 for (i = 0; i < hash->size; i++) {
440 head = &hash->table[i]; 508 head = &hash->table[i];
441 509
442 hlist_for_each_entry(bucket, walk, head, hlist) { 510 rcu_read_lock();
443 hna_global_entry = bucket->data; 511 hlist_for_each_entry_rcu(hna_global_entry, node,
444 512 head, hash_entry) {
445 pos += snprintf(buff + pos, 44, 513 pos += snprintf(buff + pos, 44,
446 " * %pM via %pM\n", 514 " * %pM via %pM\n",
447 hna_global_entry->addr, 515 hna_global_entry->addr,
448 hna_global_entry->orig_node->orig); 516 hna_global_entry->orig_node->orig);
449 } 517 }
518 rcu_read_unlock();
450 } 519 }
451 520
452 spin_unlock_bh(&bat_priv->hna_ghash_lock); 521 spin_unlock_bh(&bat_priv->hna_ghash_lock);
@@ -465,7 +534,7 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv,
465 hna_global_entry->addr, hna_global_entry->orig_node->orig, 534 hna_global_entry->addr, hna_global_entry->orig_node->orig,
466 message); 535 message);
467 536
468 hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig, 537 hash_remove(bat_priv->hna_global_hash, compare_ghna, choose_orig,
469 hna_global_entry->addr); 538 hna_global_entry->addr);
470 kfree(hna_global_entry); 539 kfree(hna_global_entry);
471} 540}
@@ -484,9 +553,7 @@ void hna_global_del_orig(struct bat_priv *bat_priv,
484 553
485 while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) { 554 while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) {
486 hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN); 555 hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN);
487 hna_global_entry = (struct hna_global_entry *) 556 hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr);
488 hash_find(bat_priv->hna_global_hash, compare_orig,
489 choose_orig, hna_ptr);
490 557
491 if ((hna_global_entry) && 558 if ((hna_global_entry) &&
492 (hna_global_entry->orig_node == orig_node)) 559 (hna_global_entry->orig_node == orig_node))
@@ -503,8 +570,10 @@ void hna_global_del_orig(struct bat_priv *bat_priv,
503 orig_node->hna_buff = NULL; 570 orig_node->hna_buff = NULL;
504} 571}
505 572
506static void hna_global_del(void *data, void *arg) 573static void hna_global_del(struct hlist_node *node, void *arg)
507{ 574{
575 void *data = container_of(node, struct hna_global_entry, hash_entry);
576
508 kfree(data); 577 kfree(data);
509} 578}
510 579
@@ -520,15 +589,20 @@ void hna_global_free(struct bat_priv *bat_priv)
520struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) 589struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr)
521{ 590{
522 struct hna_global_entry *hna_global_entry; 591 struct hna_global_entry *hna_global_entry;
592 struct orig_node *orig_node = NULL;
523 593
524 spin_lock_bh(&bat_priv->hna_ghash_lock); 594 spin_lock_bh(&bat_priv->hna_ghash_lock);
525 hna_global_entry = (struct hna_global_entry *) 595 hna_global_entry = hna_global_hash_find(bat_priv, addr);
526 hash_find(bat_priv->hna_global_hash,
527 compare_orig, choose_orig, addr);
528 spin_unlock_bh(&bat_priv->hna_ghash_lock);
529 596
530 if (!hna_global_entry) 597 if (!hna_global_entry)
531 return NULL; 598 goto out;
532 599
533 return hna_global_entry->orig_node; 600 if (!atomic_inc_not_zero(&hna_global_entry->orig_node->refcount))
601 goto out;
602
603 orig_node = hna_global_entry->orig_node;
604
605out:
606 spin_unlock_bh(&bat_priv->hna_ghash_lock);
607 return orig_node;
534} 608}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 10c4c5c319b..f19931ca145 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -22,8 +22,6 @@
22#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ 22#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
23#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ 23#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
24 24
25#include "types.h"
26
27int hna_local_init(struct bat_priv *bat_priv); 25int hna_local_init(struct bat_priv *bat_priv);
28void hna_local_add(struct net_device *soft_iface, uint8_t *addr); 26void hna_local_add(struct net_device *soft_iface, uint8_t *addr);
29void hna_local_remove(struct bat_priv *bat_priv, 27void hna_local_remove(struct bat_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index bf3f6f5a12c..83445cf0cc9 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Marek Lindner, Simon Wunderlich 4 * Marek Lindner, Simon Wunderlich
5 * 5 *
@@ -33,7 +33,7 @@
33 sizeof(struct bcast_packet)))) 33 sizeof(struct bcast_packet))))
34 34
35 35
36struct batman_if { 36struct hard_iface {
37 struct list_head list; 37 struct list_head list;
38 int16_t if_num; 38 int16_t if_num;
39 char if_status; 39 char if_status;
@@ -43,7 +43,7 @@ struct batman_if {
43 unsigned char *packet_buff; 43 unsigned char *packet_buff;
44 int packet_len; 44 int packet_len;
45 struct kobject *hardif_obj; 45 struct kobject *hardif_obj;
46 struct kref refcount; 46 atomic_t refcount;
47 struct packet_type batman_adv_ptype; 47 struct packet_type batman_adv_ptype;
48 struct net_device *soft_iface; 48 struct net_device *soft_iface;
49 struct rcu_head rcu; 49 struct rcu_head rcu;
@@ -70,8 +70,6 @@ struct orig_node {
70 struct neigh_node *router; 70 struct neigh_node *router;
71 unsigned long *bcast_own; 71 unsigned long *bcast_own;
72 uint8_t *bcast_own_sum; 72 uint8_t *bcast_own_sum;
73 uint8_t tq_own;
74 int tq_asym_penalty;
75 unsigned long last_valid; 73 unsigned long last_valid;
76 unsigned long bcast_seqno_reset; 74 unsigned long bcast_seqno_reset;
77 unsigned long batman_seqno_reset; 75 unsigned long batman_seqno_reset;
@@ -83,20 +81,28 @@ struct orig_node {
83 uint8_t last_ttl; 81 uint8_t last_ttl;
84 unsigned long bcast_bits[NUM_WORDS]; 82 unsigned long bcast_bits[NUM_WORDS];
85 uint32_t last_bcast_seqno; 83 uint32_t last_bcast_seqno;
86 struct list_head neigh_list; 84 struct hlist_head neigh_list;
87 struct list_head frag_list; 85 struct list_head frag_list;
86 spinlock_t neigh_list_lock; /* protects neighbor list */
87 atomic_t refcount;
88 struct rcu_head rcu;
89 struct hlist_node hash_entry;
90 struct bat_priv *bat_priv;
88 unsigned long last_frag_packet; 91 unsigned long last_frag_packet;
89 struct { 92 spinlock_t ogm_cnt_lock; /* protects: bcast_own, bcast_own_sum,
90 uint8_t candidates; 93 * neigh_node->real_bits,
91 struct neigh_node *selected; 94 * neigh_node->real_packet_count */
92 } bond; 95 spinlock_t bcast_seqno_lock; /* protects bcast_bits,
96 * last_bcast_seqno */
97 atomic_t bond_candidates;
98 struct list_head bond_list;
93}; 99};
94 100
95struct gw_node { 101struct gw_node {
96 struct hlist_node list; 102 struct hlist_node list;
97 struct orig_node *orig_node; 103 struct orig_node *orig_node;
98 unsigned long deleted; 104 unsigned long deleted;
99 struct kref refcount; 105 atomic_t refcount;
100 struct rcu_head rcu; 106 struct rcu_head rcu;
101}; 107};
102 108
@@ -105,18 +111,20 @@ struct gw_node {
105 * @last_valid: when last packet via this neighbor was received 111 * @last_valid: when last packet via this neighbor was received
106 */ 112 */
107struct neigh_node { 113struct neigh_node {
108 struct list_head list; 114 struct hlist_node list;
109 uint8_t addr[ETH_ALEN]; 115 uint8_t addr[ETH_ALEN];
110 uint8_t real_packet_count; 116 uint8_t real_packet_count;
111 uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE]; 117 uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE];
112 uint8_t tq_index; 118 uint8_t tq_index;
113 uint8_t tq_avg; 119 uint8_t tq_avg;
114 uint8_t last_ttl; 120 uint8_t last_ttl;
115 struct neigh_node *next_bond_candidate; 121 struct list_head bonding_list;
116 unsigned long last_valid; 122 unsigned long last_valid;
117 unsigned long real_bits[NUM_WORDS]; 123 unsigned long real_bits[NUM_WORDS];
124 atomic_t refcount;
125 struct rcu_head rcu;
118 struct orig_node *orig_node; 126 struct orig_node *orig_node;
119 struct batman_if *if_incoming; 127 struct hard_iface *if_incoming;
120}; 128};
121 129
122 130
@@ -140,7 +148,7 @@ struct bat_priv {
140 struct hlist_head softif_neigh_list; 148 struct hlist_head softif_neigh_list;
141 struct softif_neigh *softif_neigh; 149 struct softif_neigh *softif_neigh;
142 struct debug_log *debug_log; 150 struct debug_log *debug_log;
143 struct batman_if *primary_if; 151 struct hard_iface *primary_if;
144 struct kobject *mesh_obj; 152 struct kobject *mesh_obj;
145 struct dentry *debug_dir; 153 struct dentry *debug_dir;
146 struct hlist_head forw_bat_list; 154 struct hlist_head forw_bat_list;
@@ -151,12 +159,11 @@ struct bat_priv {
151 struct hashtable_t *hna_local_hash; 159 struct hashtable_t *hna_local_hash;
152 struct hashtable_t *hna_global_hash; 160 struct hashtable_t *hna_global_hash;
153 struct hashtable_t *vis_hash; 161 struct hashtable_t *vis_hash;
154 spinlock_t orig_hash_lock; /* protects orig_hash */
155 spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ 162 spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
156 spinlock_t forw_bcast_list_lock; /* protects */ 163 spinlock_t forw_bcast_list_lock; /* protects */
157 spinlock_t hna_lhash_lock; /* protects hna_local_hash */ 164 spinlock_t hna_lhash_lock; /* protects hna_local_hash */
158 spinlock_t hna_ghash_lock; /* protects hna_global_hash */ 165 spinlock_t hna_ghash_lock; /* protects hna_global_hash */
159 spinlock_t gw_list_lock; /* protects gw_list */ 166 spinlock_t gw_list_lock; /* protects gw_list and curr_gw */
160 spinlock_t vis_hash_lock; /* protects vis_hash */ 167 spinlock_t vis_hash_lock; /* protects vis_hash */
161 spinlock_t vis_list_lock; /* protects vis_info::recv_list */ 168 spinlock_t vis_list_lock; /* protects vis_info::recv_list */
162 spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ 169 spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */
@@ -165,7 +172,7 @@ struct bat_priv {
165 struct delayed_work hna_work; 172 struct delayed_work hna_work;
166 struct delayed_work orig_work; 173 struct delayed_work orig_work;
167 struct delayed_work vis_work; 174 struct delayed_work vis_work;
168 struct gw_node *curr_gw; 175 struct gw_node __rcu *curr_gw; /* rcu protected pointer */
169 struct vis_info *my_vis_info; 176 struct vis_info *my_vis_info;
170}; 177};
171 178
@@ -188,11 +195,13 @@ struct hna_local_entry {
188 uint8_t addr[ETH_ALEN]; 195 uint8_t addr[ETH_ALEN];
189 unsigned long last_seen; 196 unsigned long last_seen;
190 char never_purge; 197 char never_purge;
198 struct hlist_node hash_entry;
191}; 199};
192 200
193struct hna_global_entry { 201struct hna_global_entry {
194 uint8_t addr[ETH_ALEN]; 202 uint8_t addr[ETH_ALEN];
195 struct orig_node *orig_node; 203 struct orig_node *orig_node;
204 struct hlist_node hash_entry;
196}; 205};
197 206
198/** 207/**
@@ -208,7 +217,7 @@ struct forw_packet {
208 uint32_t direct_link_flags; 217 uint32_t direct_link_flags;
209 uint8_t num_packets; 218 uint8_t num_packets;
210 struct delayed_work delayed_work; 219 struct delayed_work delayed_work;
211 struct batman_if *if_incoming; 220 struct hard_iface *if_incoming;
212}; 221};
213 222
214/* While scanning for vis-entries of a particular vis-originator 223/* While scanning for vis-entries of a particular vis-originator
@@ -242,6 +251,7 @@ struct vis_info {
242 * from. we should not reply to them. */ 251 * from. we should not reply to them. */
243 struct list_head send_list; 252 struct list_head send_list;
244 struct kref refcount; 253 struct kref refcount;
254 struct hlist_node hash_entry;
245 struct bat_priv *bat_priv; 255 struct bat_priv *bat_priv;
246 /* this packet might be part of the vis send queue. */ 256 /* this packet might be part of the vis send queue. */
247 struct sk_buff *skb_packet; 257 struct sk_buff *skb_packet;
@@ -264,7 +274,7 @@ struct softif_neigh {
264 uint8_t addr[ETH_ALEN]; 274 uint8_t addr[ETH_ALEN];
265 unsigned long last_seen; 275 unsigned long last_seen;
266 short vid; 276 short vid;
267 struct kref refcount; 277 atomic_t refcount;
268 struct rcu_head rcu; 278 struct rcu_head rcu;
269}; 279};
270 280
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index d1a61132254..19f84bd443a 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Andreas Langer 4 * Andreas Langer
5 * 5 *
@@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head,
39 (struct unicast_frag_packet *)skb->data; 39 (struct unicast_frag_packet *)skb->data;
40 struct sk_buff *tmp_skb; 40 struct sk_buff *tmp_skb;
41 struct unicast_packet *unicast_packet; 41 struct unicast_packet *unicast_packet;
42 int hdr_len = sizeof(struct unicast_packet), 42 int hdr_len = sizeof(struct unicast_packet);
43 uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; 43 int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len;
44 44
45 /* set skb to the first part and tmp_skb to the second part */ 45 /* set skb to the first part and tmp_skb to the second part */
46 if (up->flags & UNI_FRAG_HEAD) { 46 if (up->flags & UNI_FRAG_HEAD) {
@@ -183,15 +183,10 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
183 (struct unicast_frag_packet *)skb->data; 183 (struct unicast_frag_packet *)skb->data;
184 184
185 *new_skb = NULL; 185 *new_skb = NULL;
186 spin_lock_bh(&bat_priv->orig_hash_lock);
187 orig_node = ((struct orig_node *)
188 hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
189 unicast_packet->orig));
190 186
191 if (!orig_node) { 187 orig_node = orig_hash_find(bat_priv, unicast_packet->orig);
192 pr_debug("couldn't find originator in orig_hash\n"); 188 if (!orig_node)
193 goto out; 189 goto out;
194 }
195 190
196 orig_node->last_frag_packet = jiffies; 191 orig_node->last_frag_packet = jiffies;
197 192
@@ -215,21 +210,24 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
215 /* if not, merge failed */ 210 /* if not, merge failed */
216 if (*new_skb) 211 if (*new_skb)
217 ret = NET_RX_SUCCESS; 212 ret = NET_RX_SUCCESS;
218out:
219 spin_unlock_bh(&bat_priv->orig_hash_lock);
220 213
214out:
215 if (orig_node)
216 orig_node_free_ref(orig_node);
221 return ret; 217 return ret;
222} 218}
223 219
224int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, 220int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
225 struct batman_if *batman_if, uint8_t dstaddr[]) 221 struct hard_iface *hard_iface, uint8_t dstaddr[])
226{ 222{
227 struct unicast_packet tmp_uc, *unicast_packet; 223 struct unicast_packet tmp_uc, *unicast_packet;
228 struct sk_buff *frag_skb; 224 struct sk_buff *frag_skb;
229 struct unicast_frag_packet *frag1, *frag2; 225 struct unicast_frag_packet *frag1, *frag2;
230 int uc_hdr_len = sizeof(struct unicast_packet); 226 int uc_hdr_len = sizeof(struct unicast_packet);
231 int ucf_hdr_len = sizeof(struct unicast_frag_packet); 227 int ucf_hdr_len = sizeof(struct unicast_frag_packet);
232 int data_len = skb->len; 228 int data_len = skb->len - uc_hdr_len;
229 int large_tail = 0;
230 uint16_t seqno;
233 231
234 if (!bat_priv->primary_if) 232 if (!bat_priv->primary_if)
235 goto dropped; 233 goto dropped;
@@ -237,10 +235,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
237 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); 235 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
238 if (!frag_skb) 236 if (!frag_skb)
239 goto dropped; 237 goto dropped;
238 skb_reserve(frag_skb, ucf_hdr_len);
240 239
241 unicast_packet = (struct unicast_packet *) skb->data; 240 unicast_packet = (struct unicast_packet *) skb->data;
242 memcpy(&tmp_uc, unicast_packet, uc_hdr_len); 241 memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
243 skb_split(skb, frag_skb, data_len / 2); 242 skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len);
244 243
245 if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || 244 if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
246 my_skb_head_push(frag_skb, ucf_hdr_len) < 0) 245 my_skb_head_push(frag_skb, ucf_hdr_len) < 0)
@@ -258,16 +257,18 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
258 memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); 257 memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
259 memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); 258 memcpy(frag2, frag1, sizeof(struct unicast_frag_packet));
260 259
261 frag1->flags |= UNI_FRAG_HEAD; 260 if (data_len & 1)
262 frag2->flags &= ~UNI_FRAG_HEAD; 261 large_tail = UNI_FRAG_LARGETAIL;
262
263 frag1->flags = UNI_FRAG_HEAD | large_tail;
264 frag2->flags = large_tail;
263 265
264 frag1->seqno = htons((uint16_t)atomic_inc_return( 266 seqno = atomic_add_return(2, &hard_iface->frag_seqno);
265 &batman_if->frag_seqno)); 267 frag1->seqno = htons(seqno - 1);
266 frag2->seqno = htons((uint16_t)atomic_inc_return( 268 frag2->seqno = htons(seqno);
267 &batman_if->frag_seqno));
268 269
269 send_skb_packet(skb, batman_if, dstaddr); 270 send_skb_packet(skb, hard_iface, dstaddr);
270 send_skb_packet(frag_skb, batman_if, dstaddr); 271 send_skb_packet(frag_skb, hard_iface, dstaddr);
271 return NET_RX_SUCCESS; 272 return NET_RX_SUCCESS;
272 273
273drop_frag: 274drop_frag:
@@ -282,44 +283,36 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
282 struct ethhdr *ethhdr = (struct ethhdr *)skb->data; 283 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
283 struct unicast_packet *unicast_packet; 284 struct unicast_packet *unicast_packet;
284 struct orig_node *orig_node; 285 struct orig_node *orig_node;
285 struct batman_if *batman_if; 286 struct neigh_node *neigh_node;
286 struct neigh_node *router;
287 int data_len = skb->len; 287 int data_len = skb->len;
288 uint8_t dstaddr[6]; 288 int ret = 1;
289
290 spin_lock_bh(&bat_priv->orig_hash_lock);
291 289
292 /* get routing information */ 290 /* get routing information */
293 if (is_multicast_ether_addr(ethhdr->h_dest)) 291 if (is_multicast_ether_addr(ethhdr->h_dest)) {
294 orig_node = (struct orig_node *)gw_get_selected(bat_priv); 292 orig_node = (struct orig_node *)gw_get_selected(bat_priv);
295 else 293 if (orig_node)
296 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, 294 goto find_router;
297 compare_orig, 295 }
298 choose_orig,
299 ethhdr->h_dest));
300
301 /* check for hna host */
302 if (!orig_node)
303 orig_node = transtable_search(bat_priv, ethhdr->h_dest);
304
305 router = find_router(bat_priv, orig_node, NULL);
306
307 if (!router)
308 goto unlock;
309 296
310 /* don't lock while sending the packets ... we therefore 297 /* check for hna host - increases orig_node refcount */
311 * copy the required data before sending */ 298 orig_node = transtable_search(bat_priv, ethhdr->h_dest);
312 299
313 batman_if = router->if_incoming; 300find_router:
314 memcpy(dstaddr, router->addr, ETH_ALEN); 301 /**
302 * find_router():
303 * - if orig_node is NULL it returns NULL
304 * - increases neigh_nodes refcount if found.
305 */
306 neigh_node = find_router(bat_priv, orig_node, NULL);
315 307
316 spin_unlock_bh(&bat_priv->orig_hash_lock); 308 if (!neigh_node)
309 goto out;
317 310
318 if (batman_if->if_status != IF_ACTIVE) 311 if (neigh_node->if_incoming->if_status != IF_ACTIVE)
319 goto dropped; 312 goto out;
320 313
321 if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) 314 if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0)
322 goto dropped; 315 goto out;
323 316
324 unicast_packet = (struct unicast_packet *)skb->data; 317 unicast_packet = (struct unicast_packet *)skb->data;
325 318
@@ -333,18 +326,24 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
333 326
334 if (atomic_read(&bat_priv->fragmentation) && 327 if (atomic_read(&bat_priv->fragmentation) &&
335 data_len + sizeof(struct unicast_packet) > 328 data_len + sizeof(struct unicast_packet) >
336 batman_if->net_dev->mtu) { 329 neigh_node->if_incoming->net_dev->mtu) {
337 /* send frag skb decreases ttl */ 330 /* send frag skb decreases ttl */
338 unicast_packet->ttl++; 331 unicast_packet->ttl++;
339 return frag_send_skb(skb, bat_priv, batman_if, 332 ret = frag_send_skb(skb, bat_priv,
340 dstaddr); 333 neigh_node->if_incoming, neigh_node->addr);
334 goto out;
341 } 335 }
342 send_skb_packet(skb, batman_if, dstaddr);
343 return 0;
344 336
345unlock: 337 send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
346 spin_unlock_bh(&bat_priv->orig_hash_lock); 338 ret = 0;
347dropped: 339 goto out;
348 kfree_skb(skb); 340
349 return 1; 341out:
342 if (neigh_node)
343 neigh_node_free_ref(neigh_node);
344 if (orig_node)
345 orig_node_free_ref(orig_node);
346 if (ret == 1)
347 kfree_skb(skb);
348 return ret;
350} 349}
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index e32b7867a9a..16ad7a9242b 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Andreas Langer 4 * Andreas Langer
5 * 5 *
@@ -22,6 +22,8 @@
22#ifndef _NET_BATMAN_ADV_UNICAST_H_ 22#ifndef _NET_BATMAN_ADV_UNICAST_H_
23#define _NET_BATMAN_ADV_UNICAST_H_ 23#define _NET_BATMAN_ADV_UNICAST_H_
24 24
25#include "packet.h"
26
25#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ 27#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */
26#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ 28#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */
27 29
@@ -30,6 +32,27 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
30void frag_list_free(struct list_head *head); 32void frag_list_free(struct list_head *head);
31int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); 33int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv);
32int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, 34int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
33 struct batman_if *batman_if, uint8_t dstaddr[]); 35 struct hard_iface *hard_iface, uint8_t dstaddr[]);
36
37static inline int frag_can_reassemble(struct sk_buff *skb, int mtu)
38{
39 struct unicast_frag_packet *unicast_packet;
40 int uneven_correction = 0;
41 unsigned int merged_size;
42
43 unicast_packet = (struct unicast_frag_packet *)skb->data;
44
45 if (unicast_packet->flags & UNI_FRAG_LARGETAIL) {
46 if (unicast_packet->flags & UNI_FRAG_HEAD)
47 uneven_correction = 1;
48 else
49 uneven_correction = -1;
50 }
51
52 merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2;
53 merged_size += sizeof(struct unicast_packet) + uneven_correction;
54
55 return merged_size <= mtu;
56}
34 57
35#endif /* _NET_BATMAN_ADV_UNICAST_H_ */ 58#endif /* _NET_BATMAN_ADV_UNICAST_H_ */
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index de1022cacaf..f90212f4208 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich 4 * Simon Wunderlich
5 * 5 *
@@ -68,15 +68,16 @@ static void free_info(struct kref *ref)
68} 68}
69 69
70/* Compare two vis packets, used by the hashing algorithm */ 70/* Compare two vis packets, used by the hashing algorithm */
71static int vis_info_cmp(void *data1, void *data2) 71static int vis_info_cmp(struct hlist_node *node, void *data2)
72{ 72{
73 struct vis_info *d1, *d2; 73 struct vis_info *d1, *d2;
74 struct vis_packet *p1, *p2; 74 struct vis_packet *p1, *p2;
75 d1 = data1; 75
76 d1 = container_of(node, struct vis_info, hash_entry);
76 d2 = data2; 77 d2 = data2;
77 p1 = (struct vis_packet *)d1->skb_packet->data; 78 p1 = (struct vis_packet *)d1->skb_packet->data;
78 p2 = (struct vis_packet *)d2->skb_packet->data; 79 p2 = (struct vis_packet *)d2->skb_packet->data;
79 return compare_orig(p1->vis_orig, p2->vis_orig); 80 return compare_eth(p1->vis_orig, p2->vis_orig);
80} 81}
81 82
82/* hash function to choose an entry in a hash table of given size */ 83/* hash function to choose an entry in a hash table of given size */
@@ -104,6 +105,34 @@ static int vis_info_choose(void *data, int size)
104 return hash % size; 105 return hash % size;
105} 106}
106 107
108static struct vis_info *vis_hash_find(struct bat_priv *bat_priv,
109 void *data)
110{
111 struct hashtable_t *hash = bat_priv->vis_hash;
112 struct hlist_head *head;
113 struct hlist_node *node;
114 struct vis_info *vis_info, *vis_info_tmp = NULL;
115 int index;
116
117 if (!hash)
118 return NULL;
119
120 index = vis_info_choose(data, hash->size);
121 head = &hash->table[index];
122
123 rcu_read_lock();
124 hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) {
125 if (!vis_info_cmp(node, data))
126 continue;
127
128 vis_info_tmp = vis_info;
129 break;
130 }
131 rcu_read_unlock();
132
133 return vis_info_tmp;
134}
135
107/* insert interface to the list of interfaces of one originator, if it 136/* insert interface to the list of interfaces of one originator, if it
108 * does not already exist in the list */ 137 * does not already exist in the list */
109static void vis_data_insert_interface(const uint8_t *interface, 138static void vis_data_insert_interface(const uint8_t *interface,
@@ -114,7 +143,7 @@ static void vis_data_insert_interface(const uint8_t *interface,
114 struct hlist_node *pos; 143 struct hlist_node *pos;
115 144
116 hlist_for_each_entry(entry, pos, if_list, list) { 145 hlist_for_each_entry(entry, pos, if_list, list) {
117 if (compare_orig(entry->addr, (void *)interface)) 146 if (compare_eth(entry->addr, (void *)interface))
118 return; 147 return;
119 } 148 }
120 149
@@ -166,7 +195,7 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
166 /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ 195 /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */
167 if (primary && entry->quality == 0) 196 if (primary && entry->quality == 0)
168 return sprintf(buff, "HNA %pM, ", entry->dest); 197 return sprintf(buff, "HNA %pM, ", entry->dest);
169 else if (compare_orig(entry->src, src)) 198 else if (compare_eth(entry->src, src))
170 return sprintf(buff, "TQ %pM %d, ", entry->dest, 199 return sprintf(buff, "TQ %pM %d, ", entry->dest,
171 entry->quality); 200 entry->quality);
172 201
@@ -175,9 +204,8 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
175 204
176int vis_seq_print_text(struct seq_file *seq, void *offset) 205int vis_seq_print_text(struct seq_file *seq, void *offset)
177{ 206{
178 struct hlist_node *walk; 207 struct hlist_node *node;
179 struct hlist_head *head; 208 struct hlist_head *head;
180 struct element_t *bucket;
181 struct vis_info *info; 209 struct vis_info *info;
182 struct vis_packet *packet; 210 struct vis_packet *packet;
183 struct vis_info_entry *entries; 211 struct vis_info_entry *entries;
@@ -203,8 +231,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
203 for (i = 0; i < hash->size; i++) { 231 for (i = 0; i < hash->size; i++) {
204 head = &hash->table[i]; 232 head = &hash->table[i];
205 233
206 hlist_for_each_entry(bucket, walk, head, hlist) { 234 rcu_read_lock();
207 info = bucket->data; 235 hlist_for_each_entry_rcu(info, node, head, hash_entry) {
208 packet = (struct vis_packet *)info->skb_packet->data; 236 packet = (struct vis_packet *)info->skb_packet->data;
209 entries = (struct vis_info_entry *) 237 entries = (struct vis_info_entry *)
210 ((char *)packet + sizeof(struct vis_packet)); 238 ((char *)packet + sizeof(struct vis_packet));
@@ -213,7 +241,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
213 if (entries[j].quality == 0) 241 if (entries[j].quality == 0)
214 continue; 242 continue;
215 compare = 243 compare =
216 compare_orig(entries[j].src, packet->vis_orig); 244 compare_eth(entries[j].src, packet->vis_orig);
217 vis_data_insert_interface(entries[j].src, 245 vis_data_insert_interface(entries[j].src,
218 &vis_if_list, 246 &vis_if_list,
219 compare); 247 compare);
@@ -223,7 +251,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
223 buf_size += 18 + 26 * packet->entries; 251 buf_size += 18 + 26 * packet->entries;
224 252
225 /* add primary/secondary records */ 253 /* add primary/secondary records */
226 if (compare_orig(entry->addr, packet->vis_orig)) 254 if (compare_eth(entry->addr, packet->vis_orig))
227 buf_size += 255 buf_size +=
228 vis_data_count_prim_sec(&vis_if_list); 256 vis_data_count_prim_sec(&vis_if_list);
229 257
@@ -236,6 +264,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
236 kfree(entry); 264 kfree(entry);
237 } 265 }
238 } 266 }
267 rcu_read_unlock();
239 } 268 }
240 269
241 buff = kmalloc(buf_size, GFP_ATOMIC); 270 buff = kmalloc(buf_size, GFP_ATOMIC);
@@ -249,8 +278,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
249 for (i = 0; i < hash->size; i++) { 278 for (i = 0; i < hash->size; i++) {
250 head = &hash->table[i]; 279 head = &hash->table[i];
251 280
252 hlist_for_each_entry(bucket, walk, head, hlist) { 281 rcu_read_lock();
253 info = bucket->data; 282 hlist_for_each_entry_rcu(info, node, head, hash_entry) {
254 packet = (struct vis_packet *)info->skb_packet->data; 283 packet = (struct vis_packet *)info->skb_packet->data;
255 entries = (struct vis_info_entry *) 284 entries = (struct vis_info_entry *)
256 ((char *)packet + sizeof(struct vis_packet)); 285 ((char *)packet + sizeof(struct vis_packet));
@@ -259,7 +288,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
259 if (entries[j].quality == 0) 288 if (entries[j].quality == 0)
260 continue; 289 continue;
261 compare = 290 compare =
262 compare_orig(entries[j].src, packet->vis_orig); 291 compare_eth(entries[j].src, packet->vis_orig);
263 vis_data_insert_interface(entries[j].src, 292 vis_data_insert_interface(entries[j].src,
264 &vis_if_list, 293 &vis_if_list,
265 compare); 294 compare);
@@ -277,7 +306,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
277 entry->primary); 306 entry->primary);
278 307
279 /* add primary/secondary records */ 308 /* add primary/secondary records */
280 if (compare_orig(entry->addr, packet->vis_orig)) 309 if (compare_eth(entry->addr, packet->vis_orig))
281 buff_pos += 310 buff_pos +=
282 vis_data_read_prim_sec(buff + buff_pos, 311 vis_data_read_prim_sec(buff + buff_pos,
283 &vis_if_list); 312 &vis_if_list);
@@ -291,6 +320,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
291 kfree(entry); 320 kfree(entry);
292 } 321 }
293 } 322 }
323 rcu_read_unlock();
294 } 324 }
295 325
296 spin_unlock_bh(&bat_priv->vis_hash_lock); 326 spin_unlock_bh(&bat_priv->vis_hash_lock);
@@ -345,7 +375,7 @@ static int recv_list_is_in(struct bat_priv *bat_priv,
345 375
346 spin_lock_bh(&bat_priv->vis_list_lock); 376 spin_lock_bh(&bat_priv->vis_list_lock);
347 list_for_each_entry(entry, recv_list, list) { 377 list_for_each_entry(entry, recv_list, list) {
348 if (memcmp(entry->mac, mac, ETH_ALEN) == 0) { 378 if (compare_eth(entry->mac, mac)) {
349 spin_unlock_bh(&bat_priv->vis_list_lock); 379 spin_unlock_bh(&bat_priv->vis_list_lock);
350 return 1; 380 return 1;
351 } 381 }
@@ -381,8 +411,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
381 sizeof(struct vis_packet)); 411 sizeof(struct vis_packet));
382 412
383 memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); 413 memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN);
384 old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 414 old_info = vis_hash_find(bat_priv, &search_elem);
385 &search_elem);
386 kfree_skb(search_elem.skb_packet); 415 kfree_skb(search_elem.skb_packet);
387 416
388 if (old_info) { 417 if (old_info) {
@@ -442,7 +471,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
442 471
443 /* try to add it */ 472 /* try to add it */
444 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 473 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
445 info); 474 info, &info->hash_entry);
446 if (hash_added < 0) { 475 if (hash_added < 0) {
447 /* did not work (for some reason) */ 476 /* did not work (for some reason) */
448 kref_put(&info->refcount, free_info); 477 kref_put(&info->refcount, free_info);
@@ -529,9 +558,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
529 struct vis_info *info) 558 struct vis_info *info)
530{ 559{
531 struct hashtable_t *hash = bat_priv->orig_hash; 560 struct hashtable_t *hash = bat_priv->orig_hash;
532 struct hlist_node *walk; 561 struct hlist_node *node;
533 struct hlist_head *head; 562 struct hlist_head *head;
534 struct element_t *bucket;
535 struct orig_node *orig_node; 563 struct orig_node *orig_node;
536 struct vis_packet *packet; 564 struct vis_packet *packet;
537 int best_tq = -1, i; 565 int best_tq = -1, i;
@@ -541,16 +569,17 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
541 for (i = 0; i < hash->size; i++) { 569 for (i = 0; i < hash->size; i++) {
542 head = &hash->table[i]; 570 head = &hash->table[i];
543 571
544 hlist_for_each_entry(bucket, walk, head, hlist) { 572 rcu_read_lock();
545 orig_node = bucket->data; 573 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
546 if ((orig_node) && (orig_node->router) && 574 if ((orig_node) && (orig_node->router) &&
547 (orig_node->flags & VIS_SERVER) && 575 (orig_node->flags & VIS_SERVER) &&
548 (orig_node->router->tq_avg > best_tq)) { 576 (orig_node->router->tq_avg > best_tq)) {
549 best_tq = orig_node->router->tq_avg; 577 best_tq = orig_node->router->tq_avg;
550 memcpy(packet->target_orig, orig_node->orig, 578 memcpy(packet->target_orig, orig_node->orig,
551 ETH_ALEN); 579 ETH_ALEN);
552 } 580 }
553 } 581 }
582 rcu_read_unlock();
554 } 583 }
555 584
556 return best_tq; 585 return best_tq;
@@ -573,9 +602,8 @@ static bool vis_packet_full(struct vis_info *info)
573static int generate_vis_packet(struct bat_priv *bat_priv) 602static int generate_vis_packet(struct bat_priv *bat_priv)
574{ 603{
575 struct hashtable_t *hash = bat_priv->orig_hash; 604 struct hashtable_t *hash = bat_priv->orig_hash;
576 struct hlist_node *walk; 605 struct hlist_node *node;
577 struct hlist_head *head; 606 struct hlist_head *head;
578 struct element_t *bucket;
579 struct orig_node *orig_node; 607 struct orig_node *orig_node;
580 struct neigh_node *neigh_node; 608 struct neigh_node *neigh_node;
581 struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; 609 struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info;
@@ -587,7 +615,6 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
587 info->first_seen = jiffies; 615 info->first_seen = jiffies;
588 packet->vis_type = atomic_read(&bat_priv->vis_mode); 616 packet->vis_type = atomic_read(&bat_priv->vis_mode);
589 617
590 spin_lock_bh(&bat_priv->orig_hash_lock);
591 memcpy(packet->target_orig, broadcast_addr, ETH_ALEN); 618 memcpy(packet->target_orig, broadcast_addr, ETH_ALEN);
592 packet->ttl = TTL; 619 packet->ttl = TTL;
593 packet->seqno = htonl(ntohl(packet->seqno) + 1); 620 packet->seqno = htonl(ntohl(packet->seqno) + 1);
@@ -597,23 +624,21 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
597 if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { 624 if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) {
598 best_tq = find_best_vis_server(bat_priv, info); 625 best_tq = find_best_vis_server(bat_priv, info);
599 626
600 if (best_tq < 0) { 627 if (best_tq < 0)
601 spin_unlock_bh(&bat_priv->orig_hash_lock);
602 return -1; 628 return -1;
603 }
604 } 629 }
605 630
606 for (i = 0; i < hash->size; i++) { 631 for (i = 0; i < hash->size; i++) {
607 head = &hash->table[i]; 632 head = &hash->table[i];
608 633
609 hlist_for_each_entry(bucket, walk, head, hlist) { 634 rcu_read_lock();
610 orig_node = bucket->data; 635 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
611 neigh_node = orig_node->router; 636 neigh_node = orig_node->router;
612 637
613 if (!neigh_node) 638 if (!neigh_node)
614 continue; 639 continue;
615 640
616 if (!compare_orig(neigh_node->addr, orig_node->orig)) 641 if (!compare_eth(neigh_node->addr, orig_node->orig))
617 continue; 642 continue;
618 643
619 if (neigh_node->if_incoming->if_status != IF_ACTIVE) 644 if (neigh_node->if_incoming->if_status != IF_ACTIVE)
@@ -632,23 +657,19 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
632 entry->quality = neigh_node->tq_avg; 657 entry->quality = neigh_node->tq_avg;
633 packet->entries++; 658 packet->entries++;
634 659
635 if (vis_packet_full(info)) { 660 if (vis_packet_full(info))
636 spin_unlock_bh(&bat_priv->orig_hash_lock); 661 goto unlock;
637 return 0;
638 }
639 } 662 }
663 rcu_read_unlock();
640 } 664 }
641 665
642 spin_unlock_bh(&bat_priv->orig_hash_lock);
643
644 hash = bat_priv->hna_local_hash; 666 hash = bat_priv->hna_local_hash;
645 667
646 spin_lock_bh(&bat_priv->hna_lhash_lock); 668 spin_lock_bh(&bat_priv->hna_lhash_lock);
647 for (i = 0; i < hash->size; i++) { 669 for (i = 0; i < hash->size; i++) {
648 head = &hash->table[i]; 670 head = &hash->table[i];
649 671
650 hlist_for_each_entry(bucket, walk, head, hlist) { 672 hlist_for_each_entry(hna_local_entry, node, head, hash_entry) {
651 hna_local_entry = bucket->data;
652 entry = (struct vis_info_entry *) 673 entry = (struct vis_info_entry *)
653 skb_put(info->skb_packet, 674 skb_put(info->skb_packet,
654 sizeof(*entry)); 675 sizeof(*entry));
@@ -666,6 +687,10 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
666 687
667 spin_unlock_bh(&bat_priv->hna_lhash_lock); 688 spin_unlock_bh(&bat_priv->hna_lhash_lock);
668 return 0; 689 return 0;
690
691unlock:
692 rcu_read_unlock();
693 return 0;
669} 694}
670 695
671/* free old vis packets. Must be called with this vis_hash_lock 696/* free old vis packets. Must be called with this vis_hash_lock
@@ -674,25 +699,22 @@ static void purge_vis_packets(struct bat_priv *bat_priv)
674{ 699{
675 int i; 700 int i;
676 struct hashtable_t *hash = bat_priv->vis_hash; 701 struct hashtable_t *hash = bat_priv->vis_hash;
677 struct hlist_node *walk, *safe; 702 struct hlist_node *node, *node_tmp;
678 struct hlist_head *head; 703 struct hlist_head *head;
679 struct element_t *bucket;
680 struct vis_info *info; 704 struct vis_info *info;
681 705
682 for (i = 0; i < hash->size; i++) { 706 for (i = 0; i < hash->size; i++) {
683 head = &hash->table[i]; 707 head = &hash->table[i];
684 708
685 hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { 709 hlist_for_each_entry_safe(info, node, node_tmp,
686 info = bucket->data; 710 head, hash_entry) {
687
688 /* never purge own data. */ 711 /* never purge own data. */
689 if (info == bat_priv->my_vis_info) 712 if (info == bat_priv->my_vis_info)
690 continue; 713 continue;
691 714
692 if (time_after(jiffies, 715 if (time_after(jiffies,
693 info->first_seen + VIS_TIMEOUT * HZ)) { 716 info->first_seen + VIS_TIMEOUT * HZ)) {
694 hlist_del(walk); 717 hlist_del(node);
695 kfree(bucket);
696 send_list_del(info); 718 send_list_del(info);
697 kref_put(&info->refcount, free_info); 719 kref_put(&info->refcount, free_info);
698 } 720 }
@@ -704,27 +726,24 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv,
704 struct vis_info *info) 726 struct vis_info *info)
705{ 727{
706 struct hashtable_t *hash = bat_priv->orig_hash; 728 struct hashtable_t *hash = bat_priv->orig_hash;
707 struct hlist_node *walk; 729 struct hlist_node *node;
708 struct hlist_head *head; 730 struct hlist_head *head;
709 struct element_t *bucket;
710 struct orig_node *orig_node; 731 struct orig_node *orig_node;
711 struct vis_packet *packet; 732 struct vis_packet *packet;
712 struct sk_buff *skb; 733 struct sk_buff *skb;
713 struct batman_if *batman_if; 734 struct hard_iface *hard_iface;
714 uint8_t dstaddr[ETH_ALEN]; 735 uint8_t dstaddr[ETH_ALEN];
715 int i; 736 int i;
716 737
717 738
718 spin_lock_bh(&bat_priv->orig_hash_lock);
719 packet = (struct vis_packet *)info->skb_packet->data; 739 packet = (struct vis_packet *)info->skb_packet->data;
720 740
721 /* send to all routers in range. */ 741 /* send to all routers in range. */
722 for (i = 0; i < hash->size; i++) { 742 for (i = 0; i < hash->size; i++) {
723 head = &hash->table[i]; 743 head = &hash->table[i];
724 744
725 hlist_for_each_entry(bucket, walk, head, hlist) { 745 rcu_read_lock();
726 orig_node = bucket->data; 746 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
727
728 /* if it's a vis server and reachable, send it. */ 747 /* if it's a vis server and reachable, send it. */
729 if ((!orig_node) || (!orig_node->router)) 748 if ((!orig_node) || (!orig_node->router))
730 continue; 749 continue;
@@ -737,54 +756,61 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv,
737 continue; 756 continue;
738 757
739 memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); 758 memcpy(packet->target_orig, orig_node->orig, ETH_ALEN);
740 batman_if = orig_node->router->if_incoming; 759 hard_iface = orig_node->router->if_incoming;
741 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); 760 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
742 spin_unlock_bh(&bat_priv->orig_hash_lock);
743 761
744 skb = skb_clone(info->skb_packet, GFP_ATOMIC); 762 skb = skb_clone(info->skb_packet, GFP_ATOMIC);
745 if (skb) 763 if (skb)
746 send_skb_packet(skb, batman_if, dstaddr); 764 send_skb_packet(skb, hard_iface, dstaddr);
747 765
748 spin_lock_bh(&bat_priv->orig_hash_lock);
749 } 766 }
750 767 rcu_read_unlock();
751 } 768 }
752
753 spin_unlock_bh(&bat_priv->orig_hash_lock);
754} 769}
755 770
756static void unicast_vis_packet(struct bat_priv *bat_priv, 771static void unicast_vis_packet(struct bat_priv *bat_priv,
757 struct vis_info *info) 772 struct vis_info *info)
758{ 773{
759 struct orig_node *orig_node; 774 struct orig_node *orig_node;
775 struct neigh_node *neigh_node = NULL;
760 struct sk_buff *skb; 776 struct sk_buff *skb;
761 struct vis_packet *packet; 777 struct vis_packet *packet;
762 struct batman_if *batman_if;
763 uint8_t dstaddr[ETH_ALEN];
764 778
765 spin_lock_bh(&bat_priv->orig_hash_lock);
766 packet = (struct vis_packet *)info->skb_packet->data; 779 packet = (struct vis_packet *)info->skb_packet->data;
767 orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
768 compare_orig, choose_orig,
769 packet->target_orig));
770 780
771 if ((!orig_node) || (!orig_node->router)) 781 rcu_read_lock();
772 goto out; 782 orig_node = orig_hash_find(bat_priv, packet->target_orig);
773 783
774 /* don't lock while sending the packets ... we therefore 784 if (!orig_node)
775 * copy the required data before sending */ 785 goto unlock;
776 batman_if = orig_node->router->if_incoming; 786
777 memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); 787 neigh_node = orig_node->router;
778 spin_unlock_bh(&bat_priv->orig_hash_lock); 788
789 if (!neigh_node)
790 goto unlock;
791
792 if (!atomic_inc_not_zero(&neigh_node->refcount)) {
793 neigh_node = NULL;
794 goto unlock;
795 }
796
797 rcu_read_unlock();
779 798
780 skb = skb_clone(info->skb_packet, GFP_ATOMIC); 799 skb = skb_clone(info->skb_packet, GFP_ATOMIC);
781 if (skb) 800 if (skb)
782 send_skb_packet(skb, batman_if, dstaddr); 801 send_skb_packet(skb, neigh_node->if_incoming,
802 neigh_node->addr);
783 803
784 return; 804 goto out;
785 805
806unlock:
807 rcu_read_unlock();
786out: 808out:
787 spin_unlock_bh(&bat_priv->orig_hash_lock); 809 if (neigh_node)
810 neigh_node_free_ref(neigh_node);
811 if (orig_node)
812 orig_node_free_ref(orig_node);
813 return;
788} 814}
789 815
790/* only send one vis packet. called from send_vis_packets() */ 816/* only send one vis packet. called from send_vis_packets() */
@@ -896,7 +922,8 @@ int vis_init(struct bat_priv *bat_priv)
896 INIT_LIST_HEAD(&bat_priv->vis_send_list); 922 INIT_LIST_HEAD(&bat_priv->vis_send_list);
897 923
898 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 924 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
899 bat_priv->my_vis_info); 925 bat_priv->my_vis_info,
926 &bat_priv->my_vis_info->hash_entry);
900 if (hash_added < 0) { 927 if (hash_added < 0) {
901 pr_err("Can't add own vis packet into hash\n"); 928 pr_err("Can't add own vis packet into hash\n");
902 /* not in hash, need to remove it manually. */ 929 /* not in hash, need to remove it manually. */
@@ -918,10 +945,11 @@ err:
918} 945}
919 946
920/* Decrease the reference count on a hash item info */ 947/* Decrease the reference count on a hash item info */
921static void free_info_ref(void *data, void *arg) 948static void free_info_ref(struct hlist_node *node, void *arg)
922{ 949{
923 struct vis_info *info = data; 950 struct vis_info *info;
924 951
952 info = container_of(node, struct vis_info, hash_entry);
925 send_list_del(info); 953 send_list_del(info);
926 kref_put(&info->refcount, free_info); 954 kref_put(&info->refcount, free_info);
927} 955}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
index 2c3b33089a9..31b820d07f2 100644
--- a/net/batman-adv/vis.h
+++ b/net/batman-adv/vis.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: 2 * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors:
3 * 3 *
4 * Simon Wunderlich, Marek Lindner 4 * Simon Wunderlich, Marek Lindner
5 * 5 *
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ed371684c13..6ae5ec50858 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -27,31 +27,27 @@ menuconfig BT
27 compile it as module (bluetooth). 27 compile it as module (bluetooth).
28 28
29 To use Linux Bluetooth subsystem, you will need several user-space 29 To use Linux Bluetooth subsystem, you will need several user-space
30 utilities like hciconfig and hcid. These utilities and updates to 30 utilities like hciconfig and bluetoothd. These utilities and updates
31 Bluetooth kernel modules are provided in the BlueZ packages. 31 to Bluetooth kernel modules are provided in the BlueZ packages. For
32 For more information, see <http://www.bluez.org/>. 32 more information, see <http://www.bluez.org/>.
33
34if BT != n
33 35
34config BT_L2CAP 36config BT_L2CAP
35 tristate "L2CAP protocol support" 37 bool "L2CAP protocol support"
36 depends on BT
37 select CRC16 38 select CRC16
38 help 39 help
39 L2CAP (Logical Link Control and Adaptation Protocol) provides 40 L2CAP (Logical Link Control and Adaptation Protocol) provides
40 connection oriented and connection-less data transport. L2CAP 41 connection oriented and connection-less data transport. L2CAP
41 support is required for most Bluetooth applications. 42 support is required for most Bluetooth applications.
42 43
43 Say Y here to compile L2CAP support into the kernel or say M to
44 compile it as module (l2cap).
45
46config BT_SCO 44config BT_SCO
47 tristate "SCO links support" 45 bool "SCO links support"
48 depends on BT
49 help 46 help
50 SCO link provides voice transport over Bluetooth. SCO support is 47 SCO link provides voice transport over Bluetooth. SCO support is
51 required for voice applications like Headset and Audio. 48 required for voice applications like Headset and Audio.
52 49
53 Say Y here to compile SCO support into the kernel or say M to 50endif
54 compile it as module (sco).
55 51
56source "net/bluetooth/rfcomm/Kconfig" 52source "net/bluetooth/rfcomm/Kconfig"
57 53
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 250f954f021..f04fe9a9d63 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -3,11 +3,11 @@
3# 3#
4 4
5obj-$(CONFIG_BT) += bluetooth.o 5obj-$(CONFIG_BT) += bluetooth.o
6obj-$(CONFIG_BT_L2CAP) += l2cap.o
7obj-$(CONFIG_BT_SCO) += sco.o
8obj-$(CONFIG_BT_RFCOMM) += rfcomm/ 6obj-$(CONFIG_BT_RFCOMM) += rfcomm/
9obj-$(CONFIG_BT_BNEP) += bnep/ 7obj-$(CONFIG_BT_BNEP) += bnep/
10obj-$(CONFIG_BT_CMTP) += cmtp/ 8obj-$(CONFIG_BT_CMTP) += cmtp/
11obj-$(CONFIG_BT_HIDP) += hidp/ 9obj-$(CONFIG_BT_HIDP) += hidp/
12 10
13bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o 11bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o
12bluetooth-$(CONFIG_BT_L2CAP) += l2cap_core.o l2cap_sock.o
13bluetooth-$(CONFIG_BT_SCO) += sco.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c4cf3f59500..8add9b49991 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -40,7 +40,7 @@
40 40
41#include <net/bluetooth/bluetooth.h> 41#include <net/bluetooth/bluetooth.h>
42 42
43#define VERSION "2.15" 43#define VERSION "2.16"
44 44
45/* Bluetooth sockets */ 45/* Bluetooth sockets */
46#define BT_MAX_PROTO 8 46#define BT_MAX_PROTO 8
@@ -199,14 +199,15 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
199 199
200 BT_DBG("parent %p", parent); 200 BT_DBG("parent %p", parent);
201 201
202 local_bh_disable();
202 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { 203 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
203 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); 204 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
204 205
205 lock_sock(sk); 206 bh_lock_sock(sk);
206 207
207 /* FIXME: Is this check still needed */ 208 /* FIXME: Is this check still needed */
208 if (sk->sk_state == BT_CLOSED) { 209 if (sk->sk_state == BT_CLOSED) {
209 release_sock(sk); 210 bh_unlock_sock(sk);
210 bt_accept_unlink(sk); 211 bt_accept_unlink(sk);
211 continue; 212 continue;
212 } 213 }
@@ -216,12 +217,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
216 bt_accept_unlink(sk); 217 bt_accept_unlink(sk);
217 if (newsock) 218 if (newsock)
218 sock_graft(sk, newsock); 219 sock_graft(sk, newsock);
219 release_sock(sk); 220
221 bh_unlock_sock(sk);
222 local_bh_enable();
220 return sk; 223 return sk;
221 } 224 }
222 225
223 release_sock(sk); 226 bh_unlock_sock(sk);
224 } 227 }
228 local_bh_enable();
229
225 return NULL; 230 return NULL;
226} 231}
227EXPORT_SYMBOL(bt_accept_dequeue); 232EXPORT_SYMBOL(bt_accept_dequeue);
@@ -240,7 +245,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
240 if (flags & (MSG_OOB)) 245 if (flags & (MSG_OOB))
241 return -EOPNOTSUPP; 246 return -EOPNOTSUPP;
242 247
243 if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) { 248 skb = skb_recv_datagram(sk, flags, noblock, &err);
249 if (!skb) {
244 if (sk->sk_shutdown & RCV_SHUTDOWN) 250 if (sk->sk_shutdown & RCV_SHUTDOWN)
245 return 0; 251 return 0;
246 return err; 252 return err;
@@ -323,7 +329,8 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
323 if (copied >= target) 329 if (copied >= target)
324 break; 330 break;
325 331
326 if ((err = sock_error(sk)) != 0) 332 err = sock_error(sk);
333 if (err)
327 break; 334 break;
328 if (sk->sk_shutdown & RCV_SHUTDOWN) 335 if (sk->sk_shutdown & RCV_SHUTDOWN)
329 break; 336 break;
@@ -390,7 +397,7 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
390 return 0; 397 return 0;
391} 398}
392 399
393unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait) 400unsigned int bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
394{ 401{
395 struct sock *sk = sock->sk; 402 struct sock *sk = sock->sk;
396 unsigned int mask = 0; 403 unsigned int mask = 0;
@@ -538,13 +545,39 @@ static int __init bt_init(void)
538 545
539 BT_INFO("HCI device and connection manager initialized"); 546 BT_INFO("HCI device and connection manager initialized");
540 547
541 hci_sock_init(); 548 err = hci_sock_init();
549 if (err < 0)
550 goto error;
551
552 err = l2cap_init();
553 if (err < 0)
554 goto sock_err;
555
556 err = sco_init();
557 if (err < 0) {
558 l2cap_exit();
559 goto sock_err;
560 }
542 561
543 return 0; 562 return 0;
563
564sock_err:
565 hci_sock_cleanup();
566
567error:
568 sock_unregister(PF_BLUETOOTH);
569 bt_sysfs_cleanup();
570
571 return err;
544} 572}
545 573
546static void __exit bt_exit(void) 574static void __exit bt_exit(void)
547{ 575{
576
577 sco_exit();
578
579 l2cap_exit();
580
548 hci_sock_cleanup(); 581 hci_sock_cleanup();
549 582
550 sock_unregister(PF_BLUETOOTH); 583 sock_unregister(PF_BLUETOOTH);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 5868597534e..03d4d1245d5 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -708,8 +708,6 @@ static int __init bnep_init(void)
708{ 708{
709 char flt[50] = ""; 709 char flt[50] = "";
710 710
711 l2cap_load();
712
713#ifdef CONFIG_BT_BNEP_PROTO_FILTER 711#ifdef CONFIG_BT_BNEP_PROTO_FILTER
714 strcat(flt, "protocol "); 712 strcat(flt, "protocol ");
715#endif 713#endif
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 2862f53b66b..d935da71ab3 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -88,6 +88,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
88 sockfd_put(nsock); 88 sockfd_put(nsock);
89 return -EBADFD; 89 return -EBADFD;
90 } 90 }
91 ca.device[sizeof(ca.device)-1] = 0;
91 92
92 err = bnep_add_connection(&ca, nsock); 93 err = bnep_add_connection(&ca, nsock);
93 if (!err) { 94 if (!err) {
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 3487cfe74ae..67cff810c77 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -155,7 +155,8 @@ static void cmtp_send_interopmsg(struct cmtp_session *session,
155 155
156 BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum); 156 BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum);
157 157
158 if (!(skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC))) { 158 skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC);
159 if (!skb) {
159 BT_ERR("Can't allocate memory for interoperability packet"); 160 BT_ERR("Can't allocate memory for interoperability packet");
160 return; 161 return;
161 } 162 }
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 8e5f292529a..964ea9126f9 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -115,7 +115,8 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
115 115
116 size = (skb) ? skb->len + count : count; 116 size = (skb) ? skb->len + count : count;
117 117
118 if (!(nskb = alloc_skb(size, GFP_ATOMIC))) { 118 nskb = alloc_skb(size, GFP_ATOMIC);
119 if (!nskb) {
119 BT_ERR("Can't allocate memory for CAPI message"); 120 BT_ERR("Can't allocate memory for CAPI message");
120 return; 121 return;
121 } 122 }
@@ -216,7 +217,8 @@ static void cmtp_process_transmit(struct cmtp_session *session)
216 217
217 BT_DBG("session %p", session); 218 BT_DBG("session %p", session);
218 219
219 if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) { 220 nskb = alloc_skb(session->mtu, GFP_ATOMIC);
221 if (!nskb) {
220 BT_ERR("Can't allocate memory for new frame"); 222 BT_ERR("Can't allocate memory for new frame");
221 return; 223 return;
222 } 224 }
@@ -224,7 +226,8 @@ static void cmtp_process_transmit(struct cmtp_session *session)
224 while ((skb = skb_dequeue(&session->transmit))) { 226 while ((skb = skb_dequeue(&session->transmit))) {
225 struct cmtp_scb *scb = (void *) skb->cb; 227 struct cmtp_scb *scb = (void *) skb->cb;
226 228
227 if ((tail = (session->mtu - nskb->len)) < 5) { 229 tail = session->mtu - nskb->len;
230 if (tail < 5) {
228 cmtp_send_frame(session, nskb->data, nskb->len); 231 cmtp_send_frame(session, nskb->data, nskb->len);
229 skb_trim(nskb, 0); 232 skb_trim(nskb, 0);
230 tail = session->mtu; 233 tail = session->mtu;
@@ -466,8 +469,6 @@ int cmtp_get_conninfo(struct cmtp_conninfo *ci)
466 469
467static int __init cmtp_init(void) 470static int __init cmtp_init(void)
468{ 471{
469 l2cap_load();
470
471 BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); 472 BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
472 473
473 cmtp_init_sockets(); 474 cmtp_init_sockets();
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 99cd8d9d891..7a6f56b2f49 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -45,6 +45,33 @@
45#include <net/bluetooth/bluetooth.h> 45#include <net/bluetooth/bluetooth.h>
46#include <net/bluetooth/hci_core.h> 46#include <net/bluetooth/hci_core.h>
47 47
48static void hci_le_connect(struct hci_conn *conn)
49{
50 struct hci_dev *hdev = conn->hdev;
51 struct hci_cp_le_create_conn cp;
52
53 conn->state = BT_CONNECT;
54 conn->out = 1;
55 conn->link_mode |= HCI_LM_MASTER;
56
57 memset(&cp, 0, sizeof(cp));
58 cp.scan_interval = cpu_to_le16(0x0004);
59 cp.scan_window = cpu_to_le16(0x0004);
60 bacpy(&cp.peer_addr, &conn->dst);
61 cp.conn_interval_min = cpu_to_le16(0x0008);
62 cp.conn_interval_max = cpu_to_le16(0x0100);
63 cp.supervision_timeout = cpu_to_le16(0x0064);
64 cp.min_ce_len = cpu_to_le16(0x0001);
65 cp.max_ce_len = cpu_to_le16(0x0001);
66
67 hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
68}
69
70static void hci_le_connect_cancel(struct hci_conn *conn)
71{
72 hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
73}
74
48void hci_acl_connect(struct hci_conn *conn) 75void hci_acl_connect(struct hci_conn *conn)
49{ 76{
50 struct hci_dev *hdev = conn->hdev; 77 struct hci_dev *hdev = conn->hdev;
@@ -156,6 +183,26 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
156 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); 183 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
157} 184}
158 185
186void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
187 u16 latency, u16 to_multiplier)
188{
189 struct hci_cp_le_conn_update cp;
190 struct hci_dev *hdev = conn->hdev;
191
192 memset(&cp, 0, sizeof(cp));
193
194 cp.handle = cpu_to_le16(conn->handle);
195 cp.conn_interval_min = cpu_to_le16(min);
196 cp.conn_interval_max = cpu_to_le16(max);
197 cp.conn_latency = cpu_to_le16(latency);
198 cp.supervision_timeout = cpu_to_le16(to_multiplier);
199 cp.min_ce_len = cpu_to_le16(0x0001);
200 cp.max_ce_len = cpu_to_le16(0x0001);
201
202 hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp);
203}
204EXPORT_SYMBOL(hci_le_conn_update);
205
159/* Device _must_ be locked */ 206/* Device _must_ be locked */
160void hci_sco_setup(struct hci_conn *conn, __u8 status) 207void hci_sco_setup(struct hci_conn *conn, __u8 status)
161{ 208{
@@ -193,8 +240,12 @@ static void hci_conn_timeout(unsigned long arg)
193 switch (conn->state) { 240 switch (conn->state) {
194 case BT_CONNECT: 241 case BT_CONNECT:
195 case BT_CONNECT2: 242 case BT_CONNECT2:
196 if (conn->type == ACL_LINK && conn->out) 243 if (conn->out) {
197 hci_acl_connect_cancel(conn); 244 if (conn->type == ACL_LINK)
245 hci_acl_connect_cancel(conn);
246 else if (conn->type == LE_LINK)
247 hci_le_connect_cancel(conn);
248 }
198 break; 249 break;
199 case BT_CONFIG: 250 case BT_CONFIG:
200 case BT_CONNECTED: 251 case BT_CONNECTED:
@@ -234,6 +285,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
234 conn->mode = HCI_CM_ACTIVE; 285 conn->mode = HCI_CM_ACTIVE;
235 conn->state = BT_OPEN; 286 conn->state = BT_OPEN;
236 conn->auth_type = HCI_AT_GENERAL_BONDING; 287 conn->auth_type = HCI_AT_GENERAL_BONDING;
288 conn->io_capability = hdev->io_capability;
289 conn->remote_auth = 0xff;
237 290
238 conn->power_save = 1; 291 conn->power_save = 1;
239 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 292 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -295,6 +348,11 @@ int hci_conn_del(struct hci_conn *conn)
295 348
296 /* Unacked frames */ 349 /* Unacked frames */
297 hdev->acl_cnt += conn->sent; 350 hdev->acl_cnt += conn->sent;
351 } else if (conn->type == LE_LINK) {
352 if (hdev->le_pkts)
353 hdev->le_cnt += conn->sent;
354 else
355 hdev->acl_cnt += conn->sent;
298 } else { 356 } else {
299 struct hci_conn *acl = conn->link; 357 struct hci_conn *acl = conn->link;
300 if (acl) { 358 if (acl) {
@@ -360,15 +418,31 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
360} 418}
361EXPORT_SYMBOL(hci_get_route); 419EXPORT_SYMBOL(hci_get_route);
362 420
363/* Create SCO or ACL connection. 421/* Create SCO, ACL or LE connection.
364 * Device _must_ be locked */ 422 * Device _must_ be locked */
365struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) 423struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type)
366{ 424{
367 struct hci_conn *acl; 425 struct hci_conn *acl;
368 struct hci_conn *sco; 426 struct hci_conn *sco;
427 struct hci_conn *le;
369 428
370 BT_DBG("%s dst %s", hdev->name, batostr(dst)); 429 BT_DBG("%s dst %s", hdev->name, batostr(dst));
371 430
431 if (type == LE_LINK) {
432 le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
433 if (le)
434 return ERR_PTR(-EBUSY);
435 le = hci_conn_add(hdev, LE_LINK, dst);
436 if (!le)
437 return ERR_PTR(-ENOMEM);
438 if (le->state == BT_OPEN)
439 hci_le_connect(le);
440
441 hci_conn_hold(le);
442
443 return le;
444 }
445
372 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); 446 acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
373 if (!acl) { 447 if (!acl) {
374 acl = hci_conn_add(hdev, ACL_LINK, dst); 448 acl = hci_conn_add(hdev, ACL_LINK, dst);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9c4541bc488..b372fb8bcdc 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -41,6 +41,7 @@
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/rfkill.h> 43#include <linux/rfkill.h>
44#include <linux/timer.h>
44#include <net/sock.h> 45#include <net/sock.h>
45 46
46#include <asm/system.h> 47#include <asm/system.h>
@@ -50,6 +51,8 @@
50#include <net/bluetooth/bluetooth.h> 51#include <net/bluetooth/bluetooth.h>
51#include <net/bluetooth/hci_core.h> 52#include <net/bluetooth/hci_core.h>
52 53
54#define AUTO_OFF_TIMEOUT 2000
55
53static void hci_cmd_task(unsigned long arg); 56static void hci_cmd_task(unsigned long arg);
54static void hci_rx_task(unsigned long arg); 57static void hci_rx_task(unsigned long arg);
55static void hci_tx_task(unsigned long arg); 58static void hci_tx_task(unsigned long arg);
@@ -95,11 +98,10 @@ void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
95{ 98{
96 BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result); 99 BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result);
97 100
98 /* If the request has set req_last_cmd (typical for multi-HCI 101 /* If this is the init phase check if the completed command matches
99 * command requests) check if the completed command matches 102 * the last init command, and if not just return.
100 * this, and if not just return. Single HCI command requests 103 */
101 * typically leave req_last_cmd as 0 */ 104 if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd)
102 if (hdev->req_last_cmd && cmd != hdev->req_last_cmd)
103 return; 105 return;
104 106
105 if (hdev->req_status == HCI_REQ_PEND) { 107 if (hdev->req_status == HCI_REQ_PEND) {
@@ -122,7 +124,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
122 124
123/* Execute request and wait for completion. */ 125/* Execute request and wait for completion. */
124static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), 126static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
125 unsigned long opt, __u32 timeout) 127 unsigned long opt, __u32 timeout)
126{ 128{
127 DECLARE_WAITQUEUE(wait, current); 129 DECLARE_WAITQUEUE(wait, current);
128 int err = 0; 130 int err = 0;
@@ -156,7 +158,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
156 break; 158 break;
157 } 159 }
158 160
159 hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0; 161 hdev->req_status = hdev->req_result = 0;
160 162
161 BT_DBG("%s end: err %d", hdev->name, err); 163 BT_DBG("%s end: err %d", hdev->name, err);
162 164
@@ -164,7 +166,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
164} 166}
165 167
166static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), 168static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
167 unsigned long opt, __u32 timeout) 169 unsigned long opt, __u32 timeout)
168{ 170{
169 int ret; 171 int ret;
170 172
@@ -189,6 +191,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
189 191
190static void hci_init_req(struct hci_dev *hdev, unsigned long opt) 192static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
191{ 193{
194 struct hci_cp_delete_stored_link_key cp;
192 struct sk_buff *skb; 195 struct sk_buff *skb;
193 __le16 param; 196 __le16 param;
194 __u8 flt_type; 197 __u8 flt_type;
@@ -252,15 +255,21 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
252 flt_type = HCI_FLT_CLEAR_ALL; 255 flt_type = HCI_FLT_CLEAR_ALL;
253 hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); 256 hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
254 257
255 /* Page timeout ~20 secs */
256 param = cpu_to_le16(0x8000);
257 hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, &param);
258
259 /* Connection accept timeout ~20 secs */ 258 /* Connection accept timeout ~20 secs */
260 param = cpu_to_le16(0x7d00); 259 param = cpu_to_le16(0x7d00);
261 hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param); 260 hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
262 261
263 hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT; 262 bacpy(&cp.bdaddr, BDADDR_ANY);
263 cp.delete_all = 1;
264 hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
265}
266
267static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt)
268{
269 BT_DBG("%s", hdev->name);
270
271 /* Read LE buffer size */
272 hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
264} 273}
265 274
266static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) 275static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
@@ -429,7 +438,8 @@ int hci_inquiry(void __user *arg)
429 if (copy_from_user(&ir, ptr, sizeof(ir))) 438 if (copy_from_user(&ir, ptr, sizeof(ir)))
430 return -EFAULT; 439 return -EFAULT;
431 440
432 if (!(hdev = hci_dev_get(ir.dev_id))) 441 hdev = hci_dev_get(ir.dev_id);
442 if (!hdev)
433 return -ENODEV; 443 return -ENODEV;
434 444
435 hci_dev_lock_bh(hdev); 445 hci_dev_lock_bh(hdev);
@@ -455,7 +465,7 @@ int hci_inquiry(void __user *arg)
455 /* cache_dump can't sleep. Therefore we allocate temp buffer and then 465 /* cache_dump can't sleep. Therefore we allocate temp buffer and then
456 * copy it to the user space. 466 * copy it to the user space.
457 */ 467 */
458 buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL); 468 buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL);
459 if (!buf) { 469 if (!buf) {
460 err = -ENOMEM; 470 err = -ENOMEM;
461 goto done; 471 goto done;
@@ -489,7 +499,8 @@ int hci_dev_open(__u16 dev)
489 struct hci_dev *hdev; 499 struct hci_dev *hdev;
490 int ret = 0; 500 int ret = 0;
491 501
492 if (!(hdev = hci_dev_get(dev))) 502 hdev = hci_dev_get(dev);
503 if (!hdev)
493 return -ENODEV; 504 return -ENODEV;
494 505
495 BT_DBG("%s %p", hdev->name, hdev); 506 BT_DBG("%s %p", hdev->name, hdev);
@@ -521,11 +532,15 @@ int hci_dev_open(__u16 dev)
521 if (!test_bit(HCI_RAW, &hdev->flags)) { 532 if (!test_bit(HCI_RAW, &hdev->flags)) {
522 atomic_set(&hdev->cmd_cnt, 1); 533 atomic_set(&hdev->cmd_cnt, 1);
523 set_bit(HCI_INIT, &hdev->flags); 534 set_bit(HCI_INIT, &hdev->flags);
535 hdev->init_last_cmd = 0;
524 536
525 //__hci_request(hdev, hci_reset_req, 0, HZ);
526 ret = __hci_request(hdev, hci_init_req, 0, 537 ret = __hci_request(hdev, hci_init_req, 0,
527 msecs_to_jiffies(HCI_INIT_TIMEOUT)); 538 msecs_to_jiffies(HCI_INIT_TIMEOUT));
528 539
540 if (lmp_le_capable(hdev))
541 ret = __hci_request(hdev, hci_le_init_req, 0,
542 msecs_to_jiffies(HCI_INIT_TIMEOUT));
543
529 clear_bit(HCI_INIT, &hdev->flags); 544 clear_bit(HCI_INIT, &hdev->flags);
530 } 545 }
531 546
@@ -533,6 +548,8 @@ int hci_dev_open(__u16 dev)
533 hci_dev_hold(hdev); 548 hci_dev_hold(hdev);
534 set_bit(HCI_UP, &hdev->flags); 549 set_bit(HCI_UP, &hdev->flags);
535 hci_notify(hdev, HCI_DEV_UP); 550 hci_notify(hdev, HCI_DEV_UP);
551 if (!test_bit(HCI_SETUP, &hdev->flags))
552 mgmt_powered(hdev->id, 1);
536 } else { 553 } else {
537 /* Init failed, cleanup */ 554 /* Init failed, cleanup */
538 tasklet_kill(&hdev->rx_task); 555 tasklet_kill(&hdev->rx_task);
@@ -606,6 +623,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
606 623
607 /* Drop last sent command */ 624 /* Drop last sent command */
608 if (hdev->sent_cmd) { 625 if (hdev->sent_cmd) {
626 del_timer_sync(&hdev->cmd_timer);
609 kfree_skb(hdev->sent_cmd); 627 kfree_skb(hdev->sent_cmd);
610 hdev->sent_cmd = NULL; 628 hdev->sent_cmd = NULL;
611 } 629 }
@@ -614,6 +632,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
614 * and no tasks are scheduled. */ 632 * and no tasks are scheduled. */
615 hdev->close(hdev); 633 hdev->close(hdev);
616 634
635 mgmt_powered(hdev->id, 0);
636
617 /* Clear flags */ 637 /* Clear flags */
618 hdev->flags = 0; 638 hdev->flags = 0;
619 639
@@ -664,7 +684,7 @@ int hci_dev_reset(__u16 dev)
664 hdev->flush(hdev); 684 hdev->flush(hdev);
665 685
666 atomic_set(&hdev->cmd_cnt, 1); 686 atomic_set(&hdev->cmd_cnt, 1);
667 hdev->acl_cnt = 0; hdev->sco_cnt = 0; 687 hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
668 688
669 if (!test_bit(HCI_RAW, &hdev->flags)) 689 if (!test_bit(HCI_RAW, &hdev->flags))
670 ret = __hci_request(hdev, hci_reset_req, 0, 690 ret = __hci_request(hdev, hci_reset_req, 0,
@@ -793,9 +813,17 @@ int hci_get_dev_list(void __user *arg)
793 read_lock_bh(&hci_dev_list_lock); 813 read_lock_bh(&hci_dev_list_lock);
794 list_for_each(p, &hci_dev_list) { 814 list_for_each(p, &hci_dev_list) {
795 struct hci_dev *hdev; 815 struct hci_dev *hdev;
816
796 hdev = list_entry(p, struct hci_dev, list); 817 hdev = list_entry(p, struct hci_dev, list);
818
819 hci_del_off_timer(hdev);
820
821 if (!test_bit(HCI_MGMT, &hdev->flags))
822 set_bit(HCI_PAIRABLE, &hdev->flags);
823
797 (dr + n)->dev_id = hdev->id; 824 (dr + n)->dev_id = hdev->id;
798 (dr + n)->dev_opt = hdev->flags; 825 (dr + n)->dev_opt = hdev->flags;
826
799 if (++n >= dev_num) 827 if (++n >= dev_num)
800 break; 828 break;
801 } 829 }
@@ -823,6 +851,11 @@ int hci_get_dev_info(void __user *arg)
823 if (!hdev) 851 if (!hdev)
824 return -ENODEV; 852 return -ENODEV;
825 853
854 hci_del_off_timer(hdev);
855
856 if (!test_bit(HCI_MGMT, &hdev->flags))
857 set_bit(HCI_PAIRABLE, &hdev->flags);
858
826 strcpy(di.name, hdev->name); 859 strcpy(di.name, hdev->name);
827 di.bdaddr = hdev->bdaddr; 860 di.bdaddr = hdev->bdaddr;
828 di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); 861 di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4);
@@ -891,6 +924,159 @@ void hci_free_dev(struct hci_dev *hdev)
891} 924}
892EXPORT_SYMBOL(hci_free_dev); 925EXPORT_SYMBOL(hci_free_dev);
893 926
927static void hci_power_on(struct work_struct *work)
928{
929 struct hci_dev *hdev = container_of(work, struct hci_dev, power_on);
930
931 BT_DBG("%s", hdev->name);
932
933 if (hci_dev_open(hdev->id) < 0)
934 return;
935
936 if (test_bit(HCI_AUTO_OFF, &hdev->flags))
937 mod_timer(&hdev->off_timer,
938 jiffies + msecs_to_jiffies(AUTO_OFF_TIMEOUT));
939
940 if (test_and_clear_bit(HCI_SETUP, &hdev->flags))
941 mgmt_index_added(hdev->id);
942}
943
944static void hci_power_off(struct work_struct *work)
945{
946 struct hci_dev *hdev = container_of(work, struct hci_dev, power_off);
947
948 BT_DBG("%s", hdev->name);
949
950 hci_dev_close(hdev->id);
951}
952
953static void hci_auto_off(unsigned long data)
954{
955 struct hci_dev *hdev = (struct hci_dev *) data;
956
957 BT_DBG("%s", hdev->name);
958
959 clear_bit(HCI_AUTO_OFF, &hdev->flags);
960
961 queue_work(hdev->workqueue, &hdev->power_off);
962}
963
964void hci_del_off_timer(struct hci_dev *hdev)
965{
966 BT_DBG("%s", hdev->name);
967
968 clear_bit(HCI_AUTO_OFF, &hdev->flags);
969 del_timer(&hdev->off_timer);
970}
971
972int hci_uuids_clear(struct hci_dev *hdev)
973{
974 struct list_head *p, *n;
975
976 list_for_each_safe(p, n, &hdev->uuids) {
977 struct bt_uuid *uuid;
978
979 uuid = list_entry(p, struct bt_uuid, list);
980
981 list_del(p);
982 kfree(uuid);
983 }
984
985 return 0;
986}
987
988int hci_link_keys_clear(struct hci_dev *hdev)
989{
990 struct list_head *p, *n;
991
992 list_for_each_safe(p, n, &hdev->link_keys) {
993 struct link_key *key;
994
995 key = list_entry(p, struct link_key, list);
996
997 list_del(p);
998 kfree(key);
999 }
1000
1001 return 0;
1002}
1003
1004struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
1005{
1006 struct list_head *p;
1007
1008 list_for_each(p, &hdev->link_keys) {
1009 struct link_key *k;
1010
1011 k = list_entry(p, struct link_key, list);
1012
1013 if (bacmp(bdaddr, &k->bdaddr) == 0)
1014 return k;
1015 }
1016
1017 return NULL;
1018}
1019
1020int hci_add_link_key(struct hci_dev *hdev, int new_key, bdaddr_t *bdaddr,
1021 u8 *val, u8 type, u8 pin_len)
1022{
1023 struct link_key *key, *old_key;
1024 u8 old_key_type;
1025
1026 old_key = hci_find_link_key(hdev, bdaddr);
1027 if (old_key) {
1028 old_key_type = old_key->type;
1029 key = old_key;
1030 } else {
1031 old_key_type = 0xff;
1032 key = kzalloc(sizeof(*key), GFP_ATOMIC);
1033 if (!key)
1034 return -ENOMEM;
1035 list_add(&key->list, &hdev->link_keys);
1036 }
1037
1038 BT_DBG("%s key for %s type %u", hdev->name, batostr(bdaddr), type);
1039
1040 bacpy(&key->bdaddr, bdaddr);
1041 memcpy(key->val, val, 16);
1042 key->type = type;
1043 key->pin_len = pin_len;
1044
1045 if (new_key)
1046 mgmt_new_key(hdev->id, key, old_key_type);
1047
1048 if (type == 0x06)
1049 key->type = old_key_type;
1050
1051 return 0;
1052}
1053
1054int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
1055{
1056 struct link_key *key;
1057
1058 key = hci_find_link_key(hdev, bdaddr);
1059 if (!key)
1060 return -ENOENT;
1061
1062 BT_DBG("%s removing %s", hdev->name, batostr(bdaddr));
1063
1064 list_del(&key->list);
1065 kfree(key);
1066
1067 return 0;
1068}
1069
1070/* HCI command timer function */
1071static void hci_cmd_timer(unsigned long arg)
1072{
1073 struct hci_dev *hdev = (void *) arg;
1074
1075 BT_ERR("%s command tx timeout", hdev->name);
1076 atomic_set(&hdev->cmd_cnt, 1);
1077 tasklet_schedule(&hdev->cmd_task);
1078}
1079
894/* Register HCI device */ 1080/* Register HCI device */
895int hci_register_dev(struct hci_dev *hdev) 1081int hci_register_dev(struct hci_dev *hdev)
896{ 1082{
@@ -923,6 +1109,7 @@ int hci_register_dev(struct hci_dev *hdev)
923 hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); 1109 hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1);
924 hdev->esco_type = (ESCO_HV1); 1110 hdev->esco_type = (ESCO_HV1);
925 hdev->link_mode = (HCI_LM_ACCEPT); 1111 hdev->link_mode = (HCI_LM_ACCEPT);
1112 hdev->io_capability = 0x03; /* No Input No Output */
926 1113
927 hdev->idle_timeout = 0; 1114 hdev->idle_timeout = 0;
928 hdev->sniff_max_interval = 800; 1115 hdev->sniff_max_interval = 800;
@@ -936,6 +1123,8 @@ int hci_register_dev(struct hci_dev *hdev)
936 skb_queue_head_init(&hdev->cmd_q); 1123 skb_queue_head_init(&hdev->cmd_q);
937 skb_queue_head_init(&hdev->raw_q); 1124 skb_queue_head_init(&hdev->raw_q);
938 1125
1126 setup_timer(&hdev->cmd_timer, hci_cmd_timer, (unsigned long) hdev);
1127
939 for (i = 0; i < NUM_REASSEMBLY; i++) 1128 for (i = 0; i < NUM_REASSEMBLY; i++)
940 hdev->reassembly[i] = NULL; 1129 hdev->reassembly[i] = NULL;
941 1130
@@ -948,6 +1137,14 @@ int hci_register_dev(struct hci_dev *hdev)
948 1137
949 INIT_LIST_HEAD(&hdev->blacklist); 1138 INIT_LIST_HEAD(&hdev->blacklist);
950 1139
1140 INIT_LIST_HEAD(&hdev->uuids);
1141
1142 INIT_LIST_HEAD(&hdev->link_keys);
1143
1144 INIT_WORK(&hdev->power_on, hci_power_on);
1145 INIT_WORK(&hdev->power_off, hci_power_off);
1146 setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev);
1147
951 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); 1148 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
952 1149
953 atomic_set(&hdev->promisc, 0); 1150 atomic_set(&hdev->promisc, 0);
@@ -969,7 +1166,10 @@ int hci_register_dev(struct hci_dev *hdev)
969 } 1166 }
970 } 1167 }
971 1168
972 mgmt_index_added(hdev->id); 1169 set_bit(HCI_AUTO_OFF, &hdev->flags);
1170 set_bit(HCI_SETUP, &hdev->flags);
1171 queue_work(hdev->workqueue, &hdev->power_on);
1172
973 hci_notify(hdev, HCI_DEV_REG); 1173 hci_notify(hdev, HCI_DEV_REG);
974 1174
975 return id; 1175 return id;
@@ -999,7 +1199,10 @@ int hci_unregister_dev(struct hci_dev *hdev)
999 for (i = 0; i < NUM_REASSEMBLY; i++) 1199 for (i = 0; i < NUM_REASSEMBLY; i++)
1000 kfree_skb(hdev->reassembly[i]); 1200 kfree_skb(hdev->reassembly[i]);
1001 1201
1002 mgmt_index_removed(hdev->id); 1202 if (!test_bit(HCI_INIT, &hdev->flags) &&
1203 !test_bit(HCI_SETUP, &hdev->flags))
1204 mgmt_index_removed(hdev->id);
1205
1003 hci_notify(hdev, HCI_DEV_UNREG); 1206 hci_notify(hdev, HCI_DEV_UNREG);
1004 1207
1005 if (hdev->rfkill) { 1208 if (hdev->rfkill) {
@@ -1009,10 +1212,14 @@ int hci_unregister_dev(struct hci_dev *hdev)
1009 1212
1010 hci_unregister_sysfs(hdev); 1213 hci_unregister_sysfs(hdev);
1011 1214
1215 hci_del_off_timer(hdev);
1216
1012 destroy_workqueue(hdev->workqueue); 1217 destroy_workqueue(hdev->workqueue);
1013 1218
1014 hci_dev_lock_bh(hdev); 1219 hci_dev_lock_bh(hdev);
1015 hci_blacklist_clear(hdev); 1220 hci_blacklist_clear(hdev);
1221 hci_uuids_clear(hdev);
1222 hci_link_keys_clear(hdev);
1016 hci_dev_unlock_bh(hdev); 1223 hci_dev_unlock_bh(hdev);
1017 1224
1018 __hci_dev_put(hdev); 1225 __hci_dev_put(hdev);
@@ -1313,7 +1520,7 @@ static int hci_send_frame(struct sk_buff *skb)
1313 /* Time stamp */ 1520 /* Time stamp */
1314 __net_timestamp(skb); 1521 __net_timestamp(skb);
1315 1522
1316 hci_send_to_sock(hdev, skb); 1523 hci_send_to_sock(hdev, skb, NULL);
1317 } 1524 }
1318 1525
1319 /* Get rid of skb owner, prior to sending to the driver. */ 1526 /* Get rid of skb owner, prior to sending to the driver. */
@@ -1349,6 +1556,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
1349 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; 1556 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
1350 skb->dev = (void *) hdev; 1557 skb->dev = (void *) hdev;
1351 1558
1559 if (test_bit(HCI_INIT, &hdev->flags))
1560 hdev->init_last_cmd = opcode;
1561
1352 skb_queue_tail(&hdev->cmd_q, skb); 1562 skb_queue_tail(&hdev->cmd_q, skb);
1353 tasklet_schedule(&hdev->cmd_task); 1563 tasklet_schedule(&hdev->cmd_task);
1354 1564
@@ -1395,7 +1605,7 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
1395 1605
1396 skb->dev = (void *) hdev; 1606 skb->dev = (void *) hdev;
1397 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; 1607 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
1398 hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); 1608 hci_add_acl_hdr(skb, conn->handle, flags);
1399 1609
1400 list = skb_shinfo(skb)->frag_list; 1610 list = skb_shinfo(skb)->frag_list;
1401 if (!list) { 1611 if (!list) {
@@ -1413,12 +1623,15 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
1413 spin_lock_bh(&conn->data_q.lock); 1623 spin_lock_bh(&conn->data_q.lock);
1414 1624
1415 __skb_queue_tail(&conn->data_q, skb); 1625 __skb_queue_tail(&conn->data_q, skb);
1626
1627 flags &= ~ACL_START;
1628 flags |= ACL_CONT;
1416 do { 1629 do {
1417 skb = list; list = list->next; 1630 skb = list; list = list->next;
1418 1631
1419 skb->dev = (void *) hdev; 1632 skb->dev = (void *) hdev;
1420 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; 1633 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
1421 hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); 1634 hci_add_acl_hdr(skb, conn->handle, flags);
1422 1635
1423 BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); 1636 BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
1424 1637
@@ -1486,8 +1699,25 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
1486 } 1699 }
1487 1700
1488 if (conn) { 1701 if (conn) {
1489 int cnt = (type == ACL_LINK ? hdev->acl_cnt : hdev->sco_cnt); 1702 int cnt, q;
1490 int q = cnt / num; 1703
1704 switch (conn->type) {
1705 case ACL_LINK:
1706 cnt = hdev->acl_cnt;
1707 break;
1708 case SCO_LINK:
1709 case ESCO_LINK:
1710 cnt = hdev->sco_cnt;
1711 break;
1712 case LE_LINK:
1713 cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
1714 break;
1715 default:
1716 cnt = 0;
1717 BT_ERR("Unknown link type");
1718 }
1719
1720 q = cnt / num;
1491 *quote = q ? q : 1; 1721 *quote = q ? q : 1;
1492 } else 1722 } else
1493 *quote = 0; 1723 *quote = 0;
@@ -1496,19 +1726,19 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
1496 return conn; 1726 return conn;
1497} 1727}
1498 1728
1499static inline void hci_acl_tx_to(struct hci_dev *hdev) 1729static inline void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
1500{ 1730{
1501 struct hci_conn_hash *h = &hdev->conn_hash; 1731 struct hci_conn_hash *h = &hdev->conn_hash;
1502 struct list_head *p; 1732 struct list_head *p;
1503 struct hci_conn *c; 1733 struct hci_conn *c;
1504 1734
1505 BT_ERR("%s ACL tx timeout", hdev->name); 1735 BT_ERR("%s link tx timeout", hdev->name);
1506 1736
1507 /* Kill stalled connections */ 1737 /* Kill stalled connections */
1508 list_for_each(p, &h->list) { 1738 list_for_each(p, &h->list) {
1509 c = list_entry(p, struct hci_conn, list); 1739 c = list_entry(p, struct hci_conn, list);
1510 if (c->type == ACL_LINK && c->sent) { 1740 if (c->type == type && c->sent) {
1511 BT_ERR("%s killing stalled ACL connection %s", 1741 BT_ERR("%s killing stalled connection %s",
1512 hdev->name, batostr(&c->dst)); 1742 hdev->name, batostr(&c->dst));
1513 hci_acl_disconn(c, 0x13); 1743 hci_acl_disconn(c, 0x13);
1514 } 1744 }
@@ -1527,7 +1757,7 @@ static inline void hci_sched_acl(struct hci_dev *hdev)
1527 /* ACL tx timeout must be longer than maximum 1757 /* ACL tx timeout must be longer than maximum
1528 * link supervision timeout (40.9 seconds) */ 1758 * link supervision timeout (40.9 seconds) */
1529 if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45)) 1759 if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45))
1530 hci_acl_tx_to(hdev); 1760 hci_link_tx_to(hdev, ACL_LINK);
1531 } 1761 }
1532 1762
1533 while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) { 1763 while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) {
@@ -1586,6 +1816,40 @@ static inline void hci_sched_esco(struct hci_dev *hdev)
1586 } 1816 }
1587} 1817}
1588 1818
1819static inline void hci_sched_le(struct hci_dev *hdev)
1820{
1821 struct hci_conn *conn;
1822 struct sk_buff *skb;
1823 int quote, cnt;
1824
1825 BT_DBG("%s", hdev->name);
1826
1827 if (!test_bit(HCI_RAW, &hdev->flags)) {
1828 /* LE tx timeout must be longer than maximum
1829 * link supervision timeout (40.9 seconds) */
1830 if (!hdev->le_cnt && hdev->le_pkts &&
1831 time_after(jiffies, hdev->le_last_tx + HZ * 45))
1832 hci_link_tx_to(hdev, LE_LINK);
1833 }
1834
1835 cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
1836 while (cnt && (conn = hci_low_sent(hdev, LE_LINK, &quote))) {
1837 while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
1838 BT_DBG("skb %p len %d", skb, skb->len);
1839
1840 hci_send_frame(skb);
1841 hdev->le_last_tx = jiffies;
1842
1843 cnt--;
1844 conn->sent++;
1845 }
1846 }
1847 if (hdev->le_pkts)
1848 hdev->le_cnt = cnt;
1849 else
1850 hdev->acl_cnt = cnt;
1851}
1852
1589static void hci_tx_task(unsigned long arg) 1853static void hci_tx_task(unsigned long arg)
1590{ 1854{
1591 struct hci_dev *hdev = (struct hci_dev *) arg; 1855 struct hci_dev *hdev = (struct hci_dev *) arg;
@@ -1593,7 +1857,8 @@ static void hci_tx_task(unsigned long arg)
1593 1857
1594 read_lock(&hci_task_lock); 1858 read_lock(&hci_task_lock);
1595 1859
1596 BT_DBG("%s acl %d sco %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt); 1860 BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt,
1861 hdev->sco_cnt, hdev->le_cnt);
1597 1862
1598 /* Schedule queues and send stuff to HCI driver */ 1863 /* Schedule queues and send stuff to HCI driver */
1599 1864
@@ -1603,6 +1868,8 @@ static void hci_tx_task(unsigned long arg)
1603 1868
1604 hci_sched_esco(hdev); 1869 hci_sched_esco(hdev);
1605 1870
1871 hci_sched_le(hdev);
1872
1606 /* Send next queued raw (unknown type) packet */ 1873 /* Send next queued raw (unknown type) packet */
1607 while ((skb = skb_dequeue(&hdev->raw_q))) 1874 while ((skb = skb_dequeue(&hdev->raw_q)))
1608 hci_send_frame(skb); 1875 hci_send_frame(skb);
@@ -1700,7 +1967,7 @@ static void hci_rx_task(unsigned long arg)
1700 while ((skb = skb_dequeue(&hdev->rx_q))) { 1967 while ((skb = skb_dequeue(&hdev->rx_q))) {
1701 if (atomic_read(&hdev->promisc)) { 1968 if (atomic_read(&hdev->promisc)) {
1702 /* Send copy to the sockets */ 1969 /* Send copy to the sockets */
1703 hci_send_to_sock(hdev, skb); 1970 hci_send_to_sock(hdev, skb, NULL);
1704 } 1971 }
1705 1972
1706 if (test_bit(HCI_RAW, &hdev->flags)) { 1973 if (test_bit(HCI_RAW, &hdev->flags)) {
@@ -1750,20 +2017,20 @@ static void hci_cmd_task(unsigned long arg)
1750 2017
1751 BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt)); 2018 BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt));
1752 2019
1753 if (!atomic_read(&hdev->cmd_cnt) && time_after(jiffies, hdev->cmd_last_tx + HZ)) {
1754 BT_ERR("%s command tx timeout", hdev->name);
1755 atomic_set(&hdev->cmd_cnt, 1);
1756 }
1757
1758 /* Send queued commands */ 2020 /* Send queued commands */
1759 if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { 2021 if (atomic_read(&hdev->cmd_cnt)) {
2022 skb = skb_dequeue(&hdev->cmd_q);
2023 if (!skb)
2024 return;
2025
1760 kfree_skb(hdev->sent_cmd); 2026 kfree_skb(hdev->sent_cmd);
1761 2027
1762 hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); 2028 hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC);
1763 if (hdev->sent_cmd) { 2029 if (hdev->sent_cmd) {
1764 atomic_dec(&hdev->cmd_cnt); 2030 atomic_dec(&hdev->cmd_cnt);
1765 hci_send_frame(skb); 2031 hci_send_frame(skb);
1766 hdev->cmd_last_tx = jiffies; 2032 mod_timer(&hdev->cmd_timer,
2033 jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT));
1767 } else { 2034 } else {
1768 skb_queue_head(&hdev->cmd_q, skb); 2035 skb_queue_head(&hdev->cmd_q, skb);
1769 tasklet_schedule(&hdev->cmd_task); 2036 tasklet_schedule(&hdev->cmd_task);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a290854fdaa..3fbfa50c2bf 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -274,15 +274,24 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
274 274
275 if (!status) { 275 if (!status) {
276 __u8 param = *((__u8 *) sent); 276 __u8 param = *((__u8 *) sent);
277 int old_pscan, old_iscan;
277 278
278 clear_bit(HCI_PSCAN, &hdev->flags); 279 old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags);
279 clear_bit(HCI_ISCAN, &hdev->flags); 280 old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags);
280 281
281 if (param & SCAN_INQUIRY) 282 if (param & SCAN_INQUIRY) {
282 set_bit(HCI_ISCAN, &hdev->flags); 283 set_bit(HCI_ISCAN, &hdev->flags);
284 if (!old_iscan)
285 mgmt_discoverable(hdev->id, 1);
286 } else if (old_iscan)
287 mgmt_discoverable(hdev->id, 0);
283 288
284 if (param & SCAN_PAGE) 289 if (param & SCAN_PAGE) {
285 set_bit(HCI_PSCAN, &hdev->flags); 290 set_bit(HCI_PSCAN, &hdev->flags);
291 if (!old_pscan)
292 mgmt_connectable(hdev->id, 1);
293 } else if (old_pscan)
294 mgmt_connectable(hdev->id, 0);
286 } 295 }
287 296
288 hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); 297 hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
@@ -415,6 +424,115 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
415 hdev->ssp_mode = *((__u8 *) sent); 424 hdev->ssp_mode = *((__u8 *) sent);
416} 425}
417 426
427static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
428{
429 if (hdev->features[6] & LMP_EXT_INQ)
430 return 2;
431
432 if (hdev->features[3] & LMP_RSSI_INQ)
433 return 1;
434
435 if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
436 hdev->lmp_subver == 0x0757)
437 return 1;
438
439 if (hdev->manufacturer == 15) {
440 if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
441 return 1;
442 if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
443 return 1;
444 if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
445 return 1;
446 }
447
448 if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
449 hdev->lmp_subver == 0x1805)
450 return 1;
451
452 return 0;
453}
454
455static void hci_setup_inquiry_mode(struct hci_dev *hdev)
456{
457 u8 mode;
458
459 mode = hci_get_inquiry_mode(hdev);
460
461 hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
462}
463
464static void hci_setup_event_mask(struct hci_dev *hdev)
465{
466 /* The second byte is 0xff instead of 0x9f (two reserved bits
467 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
468 * command otherwise */
469 u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
470
471 /* Events for 1.2 and newer controllers */
472 if (hdev->lmp_ver > 1) {
473 events[4] |= 0x01; /* Flow Specification Complete */
474 events[4] |= 0x02; /* Inquiry Result with RSSI */
475 events[4] |= 0x04; /* Read Remote Extended Features Complete */
476 events[5] |= 0x08; /* Synchronous Connection Complete */
477 events[5] |= 0x10; /* Synchronous Connection Changed */
478 }
479
480 if (hdev->features[3] & LMP_RSSI_INQ)
481 events[4] |= 0x04; /* Inquiry Result with RSSI */
482
483 if (hdev->features[5] & LMP_SNIFF_SUBR)
484 events[5] |= 0x20; /* Sniff Subrating */
485
486 if (hdev->features[5] & LMP_PAUSE_ENC)
487 events[5] |= 0x80; /* Encryption Key Refresh Complete */
488
489 if (hdev->features[6] & LMP_EXT_INQ)
490 events[5] |= 0x40; /* Extended Inquiry Result */
491
492 if (hdev->features[6] & LMP_NO_FLUSH)
493 events[7] |= 0x01; /* Enhanced Flush Complete */
494
495 if (hdev->features[7] & LMP_LSTO)
496 events[6] |= 0x80; /* Link Supervision Timeout Changed */
497
498 if (hdev->features[6] & LMP_SIMPLE_PAIR) {
499 events[6] |= 0x01; /* IO Capability Request */
500 events[6] |= 0x02; /* IO Capability Response */
501 events[6] |= 0x04; /* User Confirmation Request */
502 events[6] |= 0x08; /* User Passkey Request */
503 events[6] |= 0x10; /* Remote OOB Data Request */
504 events[6] |= 0x20; /* Simple Pairing Complete */
505 events[7] |= 0x04; /* User Passkey Notification */
506 events[7] |= 0x08; /* Keypress Notification */
507 events[7] |= 0x10; /* Remote Host Supported
508 * Features Notification */
509 }
510
511 if (hdev->features[4] & LMP_LE)
512 events[7] |= 0x20; /* LE Meta-Event */
513
514 hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
515}
516
517static void hci_setup(struct hci_dev *hdev)
518{
519 hci_setup_event_mask(hdev);
520
521 if (hdev->lmp_ver > 1)
522 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
523
524 if (hdev->features[6] & LMP_SIMPLE_PAIR) {
525 u8 mode = 0x01;
526 hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
527 }
528
529 if (hdev->features[3] & LMP_RSSI_INQ)
530 hci_setup_inquiry_mode(hdev);
531
532 if (hdev->features[7] & LMP_INQ_TX_PWR)
533 hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
534}
535
418static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) 536static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
419{ 537{
420 struct hci_rp_read_local_version *rp = (void *) skb->data; 538 struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -426,11 +544,34 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
426 544
427 hdev->hci_ver = rp->hci_ver; 545 hdev->hci_ver = rp->hci_ver;
428 hdev->hci_rev = __le16_to_cpu(rp->hci_rev); 546 hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
547 hdev->lmp_ver = rp->lmp_ver;
429 hdev->manufacturer = __le16_to_cpu(rp->manufacturer); 548 hdev->manufacturer = __le16_to_cpu(rp->manufacturer);
549 hdev->lmp_subver = __le16_to_cpu(rp->lmp_subver);
430 550
431 BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, 551 BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name,
432 hdev->manufacturer, 552 hdev->manufacturer,
433 hdev->hci_ver, hdev->hci_rev); 553 hdev->hci_ver, hdev->hci_rev);
554
555 if (test_bit(HCI_INIT, &hdev->flags))
556 hci_setup(hdev);
557}
558
559static void hci_setup_link_policy(struct hci_dev *hdev)
560{
561 u16 link_policy = 0;
562
563 if (hdev->features[0] & LMP_RSWITCH)
564 link_policy |= HCI_LP_RSWITCH;
565 if (hdev->features[0] & LMP_HOLD)
566 link_policy |= HCI_LP_HOLD;
567 if (hdev->features[0] & LMP_SNIFF)
568 link_policy |= HCI_LP_SNIFF;
569 if (hdev->features[1] & LMP_PARK)
570 link_policy |= HCI_LP_PARK;
571
572 link_policy = cpu_to_le16(link_policy);
573 hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
574 sizeof(link_policy), &link_policy);
434} 575}
435 576
436static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb) 577static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb)
@@ -440,9 +581,15 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb
440 BT_DBG("%s status 0x%x", hdev->name, rp->status); 581 BT_DBG("%s status 0x%x", hdev->name, rp->status);
441 582
442 if (rp->status) 583 if (rp->status)
443 return; 584 goto done;
444 585
445 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); 586 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
587
588 if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
589 hci_setup_link_policy(hdev);
590
591done:
592 hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
446} 593}
447 594
448static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb) 595static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb)
@@ -548,6 +695,130 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
548 hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); 695 hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
549} 696}
550 697
698static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
699 struct sk_buff *skb)
700{
701 __u8 status = *((__u8 *) skb->data);
702
703 BT_DBG("%s status 0x%x", hdev->name, status);
704
705 hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
706}
707
708static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
709{
710 __u8 status = *((__u8 *) skb->data);
711
712 BT_DBG("%s status 0x%x", hdev->name, status);
713
714 hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
715}
716
717static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
718 struct sk_buff *skb)
719{
720 __u8 status = *((__u8 *) skb->data);
721
722 BT_DBG("%s status 0x%x", hdev->name, status);
723
724 hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
725}
726
727static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
728 struct sk_buff *skb)
729{
730 __u8 status = *((__u8 *) skb->data);
731
732 BT_DBG("%s status 0x%x", hdev->name, status);
733
734 hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, status);
735}
736
737static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
738{
739 __u8 status = *((__u8 *) skb->data);
740
741 BT_DBG("%s status 0x%x", hdev->name, status);
742
743 hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
744}
745
746static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
747{
748 struct hci_rp_pin_code_reply *rp = (void *) skb->data;
749 struct hci_cp_pin_code_reply *cp;
750 struct hci_conn *conn;
751
752 BT_DBG("%s status 0x%x", hdev->name, rp->status);
753
754 if (test_bit(HCI_MGMT, &hdev->flags))
755 mgmt_pin_code_reply_complete(hdev->id, &rp->bdaddr, rp->status);
756
757 if (rp->status != 0)
758 return;
759
760 cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY);
761 if (!cp)
762 return;
763
764 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
765 if (conn)
766 conn->pin_length = cp->pin_len;
767}
768
769static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
770{
771 struct hci_rp_pin_code_neg_reply *rp = (void *) skb->data;
772
773 BT_DBG("%s status 0x%x", hdev->name, rp->status);
774
775 if (test_bit(HCI_MGMT, &hdev->flags))
776 mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr,
777 rp->status);
778}
779static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
780 struct sk_buff *skb)
781{
782 struct hci_rp_le_read_buffer_size *rp = (void *) skb->data;
783
784 BT_DBG("%s status 0x%x", hdev->name, rp->status);
785
786 if (rp->status)
787 return;
788
789 hdev->le_mtu = __le16_to_cpu(rp->le_mtu);
790 hdev->le_pkts = rp->le_max_pkt;
791
792 hdev->le_cnt = hdev->le_pkts;
793
794 BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
795
796 hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
797}
798
799static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
800{
801 struct hci_rp_user_confirm_reply *rp = (void *) skb->data;
802
803 BT_DBG("%s status 0x%x", hdev->name, rp->status);
804
805 if (test_bit(HCI_MGMT, &hdev->flags))
806 mgmt_user_confirm_reply_complete(hdev->id, &rp->bdaddr,
807 rp->status);
808}
809
810static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev,
811 struct sk_buff *skb)
812{
813 struct hci_rp_user_confirm_reply *rp = (void *) skb->data;
814
815 BT_DBG("%s status 0x%x", hdev->name, rp->status);
816
817 if (test_bit(HCI_MGMT, &hdev->flags))
818 mgmt_user_confirm_neg_reply_complete(hdev->id, &rp->bdaddr,
819 rp->status);
820}
821
551static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) 822static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
552{ 823{
553 BT_DBG("%s status 0x%x", hdev->name, status); 824 BT_DBG("%s status 0x%x", hdev->name, status);
@@ -622,11 +893,14 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status)
622 hci_dev_lock(hdev); 893 hci_dev_lock(hdev);
623 894
624 acl = hci_conn_hash_lookup_handle(hdev, handle); 895 acl = hci_conn_hash_lookup_handle(hdev, handle);
625 if (acl && (sco = acl->link)) { 896 if (acl) {
626 sco->state = BT_CLOSED; 897 sco = acl->link;
898 if (sco) {
899 sco->state = BT_CLOSED;
627 900
628 hci_proto_connect_cfm(sco, status); 901 hci_proto_connect_cfm(sco, status);
629 hci_conn_del(sco); 902 hci_conn_del(sco);
903 }
630 } 904 }
631 905
632 hci_dev_unlock(hdev); 906 hci_dev_unlock(hdev);
@@ -687,7 +961,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
687} 961}
688 962
689static int hci_outgoing_auth_needed(struct hci_dev *hdev, 963static int hci_outgoing_auth_needed(struct hci_dev *hdev,
690 struct hci_conn *conn) 964 struct hci_conn *conn)
691{ 965{
692 if (conn->state != BT_CONFIG || !conn->out) 966 if (conn->state != BT_CONFIG || !conn->out)
693 return 0; 967 return 0;
@@ -808,11 +1082,14 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
808 hci_dev_lock(hdev); 1082 hci_dev_lock(hdev);
809 1083
810 acl = hci_conn_hash_lookup_handle(hdev, handle); 1084 acl = hci_conn_hash_lookup_handle(hdev, handle);
811 if (acl && (sco = acl->link)) { 1085 if (acl) {
812 sco->state = BT_CLOSED; 1086 sco = acl->link;
1087 if (sco) {
1088 sco->state = BT_CLOSED;
813 1089
814 hci_proto_connect_cfm(sco, status); 1090 hci_proto_connect_cfm(sco, status);
815 hci_conn_del(sco); 1091 hci_conn_del(sco);
1092 }
816 } 1093 }
817 1094
818 hci_dev_unlock(hdev); 1095 hci_dev_unlock(hdev);
@@ -872,6 +1149,43 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
872 hci_dev_unlock(hdev); 1149 hci_dev_unlock(hdev);
873} 1150}
874 1151
1152static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
1153{
1154 struct hci_cp_le_create_conn *cp;
1155 struct hci_conn *conn;
1156
1157 BT_DBG("%s status 0x%x", hdev->name, status);
1158
1159 cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
1160 if (!cp)
1161 return;
1162
1163 hci_dev_lock(hdev);
1164
1165 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
1166
1167 BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr),
1168 conn);
1169
1170 if (status) {
1171 if (conn && conn->state == BT_CONNECT) {
1172 conn->state = BT_CLOSED;
1173 hci_proto_connect_cfm(conn, status);
1174 hci_conn_del(conn);
1175 }
1176 } else {
1177 if (!conn) {
1178 conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr);
1179 if (conn)
1180 conn->out = 1;
1181 else
1182 BT_ERR("No memory for new connection");
1183 }
1184 }
1185
1186 hci_dev_unlock(hdev);
1187}
1188
875static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 1189static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
876{ 1190{
877 __u8 status = *((__u8 *) skb->data); 1191 __u8 status = *((__u8 *) skb->data);
@@ -942,6 +1256,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
942 conn->state = BT_CONFIG; 1256 conn->state = BT_CONFIG;
943 hci_conn_hold(conn); 1257 hci_conn_hold(conn);
944 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 1258 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
1259 mgmt_connected(hdev->id, &ev->bdaddr);
945 } else 1260 } else
946 conn->state = BT_CONNECTED; 1261 conn->state = BT_CONNECTED;
947 1262
@@ -970,8 +1285,11 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
970 hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, 1285 hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE,
971 sizeof(cp), &cp); 1286 sizeof(cp), &cp);
972 } 1287 }
973 } else 1288 } else {
974 conn->state = BT_CLOSED; 1289 conn->state = BT_CLOSED;
1290 if (conn->type == ACL_LINK)
1291 mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status);
1292 }
975 1293
976 if (conn->type == ACL_LINK) 1294 if (conn->type == ACL_LINK)
977 hci_sco_setup(conn, ev->status); 1295 hci_sco_setup(conn, ev->status);
@@ -998,7 +1316,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
998 1316
999 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); 1317 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
1000 1318
1001 if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) { 1319 if ((mask & HCI_LM_ACCEPT) &&
1320 !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
1002 /* Connection accepted */ 1321 /* Connection accepted */
1003 struct inquiry_entry *ie; 1322 struct inquiry_entry *ie;
1004 struct hci_conn *conn; 1323 struct hci_conn *conn;
@@ -1068,19 +1387,26 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
1068 1387
1069 BT_DBG("%s status %d", hdev->name, ev->status); 1388 BT_DBG("%s status %d", hdev->name, ev->status);
1070 1389
1071 if (ev->status) 1390 if (ev->status) {
1391 mgmt_disconnect_failed(hdev->id);
1072 return; 1392 return;
1393 }
1073 1394
1074 hci_dev_lock(hdev); 1395 hci_dev_lock(hdev);
1075 1396
1076 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); 1397 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
1077 if (conn) { 1398 if (!conn)
1078 conn->state = BT_CLOSED; 1399 goto unlock;
1079 1400
1080 hci_proto_disconn_cfm(conn, ev->reason); 1401 conn->state = BT_CLOSED;
1081 hci_conn_del(conn); 1402
1082 } 1403 if (conn->type == ACL_LINK)
1404 mgmt_disconnected(hdev->id, &conn->dst);
1083 1405
1406 hci_proto_disconn_cfm(conn, ev->reason);
1407 hci_conn_del(conn);
1408
1409unlock:
1084 hci_dev_unlock(hdev); 1410 hci_dev_unlock(hdev);
1085} 1411}
1086 1412
@@ -1098,8 +1424,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
1098 if (!ev->status) { 1424 if (!ev->status) {
1099 conn->link_mode |= HCI_LM_AUTH; 1425 conn->link_mode |= HCI_LM_AUTH;
1100 conn->sec_level = conn->pending_sec_level; 1426 conn->sec_level = conn->pending_sec_level;
1101 } else 1427 } else {
1428 mgmt_auth_failed(hdev->id, &conn->dst, ev->status);
1102 conn->sec_level = BT_SECURITY_LOW; 1429 conn->sec_level = BT_SECURITY_LOW;
1430 }
1103 1431
1104 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); 1432 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
1105 1433
@@ -1393,11 +1721,54 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
1393 hci_cc_write_ca_timeout(hdev, skb); 1721 hci_cc_write_ca_timeout(hdev, skb);
1394 break; 1722 break;
1395 1723
1724 case HCI_OP_DELETE_STORED_LINK_KEY:
1725 hci_cc_delete_stored_link_key(hdev, skb);
1726 break;
1727
1728 case HCI_OP_SET_EVENT_MASK:
1729 hci_cc_set_event_mask(hdev, skb);
1730 break;
1731
1732 case HCI_OP_WRITE_INQUIRY_MODE:
1733 hci_cc_write_inquiry_mode(hdev, skb);
1734 break;
1735
1736 case HCI_OP_READ_INQ_RSP_TX_POWER:
1737 hci_cc_read_inq_rsp_tx_power(hdev, skb);
1738 break;
1739
1740 case HCI_OP_SET_EVENT_FLT:
1741 hci_cc_set_event_flt(hdev, skb);
1742 break;
1743
1744 case HCI_OP_PIN_CODE_REPLY:
1745 hci_cc_pin_code_reply(hdev, skb);
1746 break;
1747
1748 case HCI_OP_PIN_CODE_NEG_REPLY:
1749 hci_cc_pin_code_neg_reply(hdev, skb);
1750 break;
1751
1752 case HCI_OP_LE_READ_BUFFER_SIZE:
1753 hci_cc_le_read_buffer_size(hdev, skb);
1754 break;
1755
1756 case HCI_OP_USER_CONFIRM_REPLY:
1757 hci_cc_user_confirm_reply(hdev, skb);
1758 break;
1759
1760 case HCI_OP_USER_CONFIRM_NEG_REPLY:
1761 hci_cc_user_confirm_neg_reply(hdev, skb);
1762 break;
1763
1396 default: 1764 default:
1397 BT_DBG("%s opcode 0x%x", hdev->name, opcode); 1765 BT_DBG("%s opcode 0x%x", hdev->name, opcode);
1398 break; 1766 break;
1399 } 1767 }
1400 1768
1769 if (ev->opcode != HCI_OP_NOP)
1770 del_timer(&hdev->cmd_timer);
1771
1401 if (ev->ncmd) { 1772 if (ev->ncmd) {
1402 atomic_set(&hdev->cmd_cnt, 1); 1773 atomic_set(&hdev->cmd_cnt, 1);
1403 if (!skb_queue_empty(&hdev->cmd_q)) 1774 if (!skb_queue_empty(&hdev->cmd_q))
@@ -1459,11 +1830,23 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
1459 hci_cs_exit_sniff_mode(hdev, ev->status); 1830 hci_cs_exit_sniff_mode(hdev, ev->status);
1460 break; 1831 break;
1461 1832
1833 case HCI_OP_DISCONNECT:
1834 if (ev->status != 0)
1835 mgmt_disconnect_failed(hdev->id);
1836 break;
1837
1838 case HCI_OP_LE_CREATE_CONN:
1839 hci_cs_le_create_conn(hdev, ev->status);
1840 break;
1841
1462 default: 1842 default:
1463 BT_DBG("%s opcode 0x%x", hdev->name, opcode); 1843 BT_DBG("%s opcode 0x%x", hdev->name, opcode);
1464 break; 1844 break;
1465 } 1845 }
1466 1846
1847 if (ev->opcode != HCI_OP_NOP)
1848 del_timer(&hdev->cmd_timer);
1849
1467 if (ev->ncmd) { 1850 if (ev->ncmd) {
1468 atomic_set(&hdev->cmd_cnt, 1); 1851 atomic_set(&hdev->cmd_cnt, 1);
1469 if (!skb_queue_empty(&hdev->cmd_q)) 1852 if (!skb_queue_empty(&hdev->cmd_q))
@@ -1529,6 +1912,16 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
1529 hdev->acl_cnt += count; 1912 hdev->acl_cnt += count;
1530 if (hdev->acl_cnt > hdev->acl_pkts) 1913 if (hdev->acl_cnt > hdev->acl_pkts)
1531 hdev->acl_cnt = hdev->acl_pkts; 1914 hdev->acl_cnt = hdev->acl_pkts;
1915 } else if (conn->type == LE_LINK) {
1916 if (hdev->le_pkts) {
1917 hdev->le_cnt += count;
1918 if (hdev->le_cnt > hdev->le_pkts)
1919 hdev->le_cnt = hdev->le_pkts;
1920 } else {
1921 hdev->acl_cnt += count;
1922 if (hdev->acl_cnt > hdev->acl_pkts)
1923 hdev->acl_cnt = hdev->acl_pkts;
1924 }
1532 } else { 1925 } else {
1533 hdev->sco_cnt += count; 1926 hdev->sco_cnt += count;
1534 if (hdev->sco_cnt > hdev->sco_pkts) 1927 if (hdev->sco_cnt > hdev->sco_pkts)
@@ -1586,18 +1979,72 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff
1586 hci_conn_put(conn); 1979 hci_conn_put(conn);
1587 } 1980 }
1588 1981
1982 if (!test_bit(HCI_PAIRABLE, &hdev->flags))
1983 hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY,
1984 sizeof(ev->bdaddr), &ev->bdaddr);
1985
1986 if (test_bit(HCI_MGMT, &hdev->flags))
1987 mgmt_pin_code_request(hdev->id, &ev->bdaddr);
1988
1589 hci_dev_unlock(hdev); 1989 hci_dev_unlock(hdev);
1590} 1990}
1591 1991
1592static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 1992static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
1593{ 1993{
1994 struct hci_ev_link_key_req *ev = (void *) skb->data;
1995 struct hci_cp_link_key_reply cp;
1996 struct hci_conn *conn;
1997 struct link_key *key;
1998
1594 BT_DBG("%s", hdev->name); 1999 BT_DBG("%s", hdev->name);
2000
2001 if (!test_bit(HCI_LINK_KEYS, &hdev->flags))
2002 return;
2003
2004 hci_dev_lock(hdev);
2005
2006 key = hci_find_link_key(hdev, &ev->bdaddr);
2007 if (!key) {
2008 BT_DBG("%s link key not found for %s", hdev->name,
2009 batostr(&ev->bdaddr));
2010 goto not_found;
2011 }
2012
2013 BT_DBG("%s found key type %u for %s", hdev->name, key->type,
2014 batostr(&ev->bdaddr));
2015
2016 if (!test_bit(HCI_DEBUG_KEYS, &hdev->flags) && key->type == 0x03) {
2017 BT_DBG("%s ignoring debug key", hdev->name);
2018 goto not_found;
2019 }
2020
2021 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
2022
2023 if (key->type == 0x04 && conn && conn->auth_type != 0xff &&
2024 (conn->auth_type & 0x01)) {
2025 BT_DBG("%s ignoring unauthenticated key", hdev->name);
2026 goto not_found;
2027 }
2028
2029 bacpy(&cp.bdaddr, &ev->bdaddr);
2030 memcpy(cp.link_key, key->val, 16);
2031
2032 hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp);
2033
2034 hci_dev_unlock(hdev);
2035
2036 return;
2037
2038not_found:
2039 hci_send_cmd(hdev, HCI_OP_LINK_KEY_NEG_REPLY, 6, &ev->bdaddr);
2040 hci_dev_unlock(hdev);
1595} 2041}
1596 2042
1597static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) 2043static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
1598{ 2044{
1599 struct hci_ev_link_key_notify *ev = (void *) skb->data; 2045 struct hci_ev_link_key_notify *ev = (void *) skb->data;
1600 struct hci_conn *conn; 2046 struct hci_conn *conn;
2047 u8 pin_len = 0;
1601 2048
1602 BT_DBG("%s", hdev->name); 2049 BT_DBG("%s", hdev->name);
1603 2050
@@ -1607,9 +2054,14 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff
1607 if (conn) { 2054 if (conn) {
1608 hci_conn_hold(conn); 2055 hci_conn_hold(conn);
1609 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 2056 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
2057 pin_len = conn->pin_length;
1610 hci_conn_put(conn); 2058 hci_conn_put(conn);
1611 } 2059 }
1612 2060
2061 if (test_bit(HCI_LINK_KEYS, &hdev->flags))
2062 hci_add_link_key(hdev, 1, &ev->bdaddr, ev->link_key,
2063 ev->key_type, pin_len);
2064
1613 hci_dev_unlock(hdev); 2065 hci_dev_unlock(hdev);
1614} 2066}
1615 2067
@@ -1683,7 +2135,8 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
1683 hci_dev_lock(hdev); 2135 hci_dev_lock(hdev);
1684 2136
1685 if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { 2137 if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) {
1686 struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1); 2138 struct inquiry_info_with_rssi_and_pscan_mode *info;
2139 info = (void *) (skb->data + 1);
1687 2140
1688 for (; num_rsp; num_rsp--) { 2141 for (; num_rsp; num_rsp--) {
1689 bacpy(&data.bdaddr, &info->bdaddr); 2142 bacpy(&data.bdaddr, &info->bdaddr);
@@ -1824,17 +2277,8 @@ static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buf
1824static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) 2277static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
1825{ 2278{
1826 struct hci_ev_sniff_subrate *ev = (void *) skb->data; 2279 struct hci_ev_sniff_subrate *ev = (void *) skb->data;
1827 struct hci_conn *conn;
1828 2280
1829 BT_DBG("%s status %d", hdev->name, ev->status); 2281 BT_DBG("%s status %d", hdev->name, ev->status);
1830
1831 hci_dev_lock(hdev);
1832
1833 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
1834 if (conn) {
1835 }
1836
1837 hci_dev_unlock(hdev);
1838} 2282}
1839 2283
1840static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) 2284static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1852,12 +2296,12 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
1852 2296
1853 for (; num_rsp; num_rsp--) { 2297 for (; num_rsp; num_rsp--) {
1854 bacpy(&data.bdaddr, &info->bdaddr); 2298 bacpy(&data.bdaddr, &info->bdaddr);
1855 data.pscan_rep_mode = info->pscan_rep_mode; 2299 data.pscan_rep_mode = info->pscan_rep_mode;
1856 data.pscan_period_mode = info->pscan_period_mode; 2300 data.pscan_period_mode = info->pscan_period_mode;
1857 data.pscan_mode = 0x00; 2301 data.pscan_mode = 0x00;
1858 memcpy(data.dev_class, info->dev_class, 3); 2302 memcpy(data.dev_class, info->dev_class, 3);
1859 data.clock_offset = info->clock_offset; 2303 data.clock_offset = info->clock_offset;
1860 data.rssi = info->rssi; 2304 data.rssi = info->rssi;
1861 data.ssp_mode = 0x01; 2305 data.ssp_mode = 0x01;
1862 info++; 2306 info++;
1863 hci_inquiry_cache_update(hdev, &data); 2307 hci_inquiry_cache_update(hdev, &data);
@@ -1866,6 +2310,25 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
1866 hci_dev_unlock(hdev); 2310 hci_dev_unlock(hdev);
1867} 2311}
1868 2312
2313static inline u8 hci_get_auth_req(struct hci_conn *conn)
2314{
2315 /* If remote requests dedicated bonding follow that lead */
2316 if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) {
2317 /* If both remote and local IO capabilities allow MITM
2318 * protection then require it, otherwise don't */
2319 if (conn->remote_cap == 0x03 || conn->io_capability == 0x03)
2320 return 0x02;
2321 else
2322 return 0x03;
2323 }
2324
2325 /* If remote requests no-bonding follow that lead */
2326 if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01)
2327 return 0x00;
2328
2329 return conn->auth_type;
2330}
2331
1869static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 2332static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
1870{ 2333{
1871 struct hci_ev_io_capa_request *ev = (void *) skb->data; 2334 struct hci_ev_io_capa_request *ev = (void *) skb->data;
@@ -1876,8 +2339,73 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff
1876 hci_dev_lock(hdev); 2339 hci_dev_lock(hdev);
1877 2340
1878 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); 2341 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1879 if (conn) 2342 if (!conn)
1880 hci_conn_hold(conn); 2343 goto unlock;
2344
2345 hci_conn_hold(conn);
2346
2347 if (!test_bit(HCI_MGMT, &hdev->flags))
2348 goto unlock;
2349
2350 if (test_bit(HCI_PAIRABLE, &hdev->flags) ||
2351 (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) {
2352 struct hci_cp_io_capability_reply cp;
2353
2354 bacpy(&cp.bdaddr, &ev->bdaddr);
2355 cp.capability = conn->io_capability;
2356 cp.oob_data = 0;
2357 cp.authentication = hci_get_auth_req(conn);
2358
2359 hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY,
2360 sizeof(cp), &cp);
2361 } else {
2362 struct hci_cp_io_capability_neg_reply cp;
2363
2364 bacpy(&cp.bdaddr, &ev->bdaddr);
2365 cp.reason = 0x16; /* Pairing not allowed */
2366
2367 hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_NEG_REPLY,
2368 sizeof(cp), &cp);
2369 }
2370
2371unlock:
2372 hci_dev_unlock(hdev);
2373}
2374
2375static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb)
2376{
2377 struct hci_ev_io_capa_reply *ev = (void *) skb->data;
2378 struct hci_conn *conn;
2379
2380 BT_DBG("%s", hdev->name);
2381
2382 hci_dev_lock(hdev);
2383
2384 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
2385 if (!conn)
2386 goto unlock;
2387
2388 hci_conn_hold(conn);
2389
2390 conn->remote_cap = ev->capability;
2391 conn->remote_oob = ev->oob_data;
2392 conn->remote_auth = ev->authentication;
2393
2394unlock:
2395 hci_dev_unlock(hdev);
2396}
2397
2398static inline void hci_user_confirm_request_evt(struct hci_dev *hdev,
2399 struct sk_buff *skb)
2400{
2401 struct hci_ev_user_confirm_req *ev = (void *) skb->data;
2402
2403 BT_DBG("%s", hdev->name);
2404
2405 hci_dev_lock(hdev);
2406
2407 if (test_bit(HCI_MGMT, &hdev->flags))
2408 mgmt_user_confirm_request(hdev->id, &ev->bdaddr, ev->passkey);
1881 2409
1882 hci_dev_unlock(hdev); 2410 hci_dev_unlock(hdev);
1883} 2411}
@@ -1892,9 +2420,20 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_
1892 hci_dev_lock(hdev); 2420 hci_dev_lock(hdev);
1893 2421
1894 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); 2422 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1895 if (conn) 2423 if (!conn)
1896 hci_conn_put(conn); 2424 goto unlock;
2425
2426 /* To avoid duplicate auth_failed events to user space we check
2427 * the HCI_CONN_AUTH_PEND flag which will be set if we
2428 * initiated the authentication. A traditional auth_complete
2429 * event gets always produced as initiator and is also mapped to
2430 * the mgmt_auth_failed event */
2431 if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend) && ev->status != 0)
2432 mgmt_auth_failed(hdev->id, &conn->dst, ev->status);
2433
2434 hci_conn_put(conn);
1897 2435
2436unlock:
1898 hci_dev_unlock(hdev); 2437 hci_dev_unlock(hdev);
1899} 2438}
1900 2439
@@ -1914,6 +2453,60 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_
1914 hci_dev_unlock(hdev); 2453 hci_dev_unlock(hdev);
1915} 2454}
1916 2455
2456static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2457{
2458 struct hci_ev_le_conn_complete *ev = (void *) skb->data;
2459 struct hci_conn *conn;
2460
2461 BT_DBG("%s status %d", hdev->name, ev->status);
2462
2463 hci_dev_lock(hdev);
2464
2465 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);
2466 if (!conn) {
2467 conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
2468 if (!conn) {
2469 BT_ERR("No memory for new connection");
2470 hci_dev_unlock(hdev);
2471 return;
2472 }
2473 }
2474
2475 if (ev->status) {
2476 hci_proto_connect_cfm(conn, ev->status);
2477 conn->state = BT_CLOSED;
2478 hci_conn_del(conn);
2479 goto unlock;
2480 }
2481
2482 conn->handle = __le16_to_cpu(ev->handle);
2483 conn->state = BT_CONNECTED;
2484
2485 hci_conn_hold_device(conn);
2486 hci_conn_add_sysfs(conn);
2487
2488 hci_proto_connect_cfm(conn, ev->status);
2489
2490unlock:
2491 hci_dev_unlock(hdev);
2492}
2493
2494static inline void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
2495{
2496 struct hci_ev_le_meta *le_ev = (void *) skb->data;
2497
2498 skb_pull(skb, sizeof(*le_ev));
2499
2500 switch (le_ev->subevent) {
2501 case HCI_EV_LE_CONN_COMPLETE:
2502 hci_le_conn_complete_evt(hdev, skb);
2503 break;
2504
2505 default:
2506 break;
2507 }
2508}
2509
1917void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) 2510void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
1918{ 2511{
1919 struct hci_event_hdr *hdr = (void *) skb->data; 2512 struct hci_event_hdr *hdr = (void *) skb->data;
@@ -2042,6 +2635,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
2042 hci_io_capa_request_evt(hdev, skb); 2635 hci_io_capa_request_evt(hdev, skb);
2043 break; 2636 break;
2044 2637
2638 case HCI_EV_IO_CAPA_REPLY:
2639 hci_io_capa_reply_evt(hdev, skb);
2640 break;
2641
2642 case HCI_EV_USER_CONFIRM_REQUEST:
2643 hci_user_confirm_request_evt(hdev, skb);
2644 break;
2645
2045 case HCI_EV_SIMPLE_PAIR_COMPLETE: 2646 case HCI_EV_SIMPLE_PAIR_COMPLETE:
2046 hci_simple_pair_complete_evt(hdev, skb); 2647 hci_simple_pair_complete_evt(hdev, skb);
2047 break; 2648 break;
@@ -2050,6 +2651,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
2050 hci_remote_host_features_evt(hdev, skb); 2651 hci_remote_host_features_evt(hdev, skb);
2051 break; 2652 break;
2052 2653
2654 case HCI_EV_LE_META:
2655 hci_le_meta_evt(hdev, skb);
2656 break;
2657
2053 default: 2658 default:
2054 BT_DBG("%s event 0x%x", hdev->name, event); 2659 BT_DBG("%s event 0x%x", hdev->name, event);
2055 break; 2660 break;
@@ -2083,6 +2688,6 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
2083 2688
2084 bt_cb(skb)->pkt_type = HCI_EVENT_PKT; 2689 bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
2085 skb->dev = (void *) hdev; 2690 skb->dev = (void *) hdev;
2086 hci_send_to_sock(hdev, skb); 2691 hci_send_to_sock(hdev, skb, NULL);
2087 kfree_skb(skb); 2692 kfree_skb(skb);
2088} 2693}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 29827c77f6c..295e4a88fff 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -85,7 +85,8 @@ static struct bt_sock_list hci_sk_list = {
85}; 85};
86 86
87/* Send frame to RAW socket */ 87/* Send frame to RAW socket */
88void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) 88void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb,
89 struct sock *skip_sk)
89{ 90{
90 struct sock *sk; 91 struct sock *sk;
91 struct hlist_node *node; 92 struct hlist_node *node;
@@ -97,6 +98,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
97 struct hci_filter *flt; 98 struct hci_filter *flt;
98 struct sk_buff *nskb; 99 struct sk_buff *nskb;
99 100
101 if (sk == skip_sk)
102 continue;
103
100 if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) 104 if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev)
101 continue; 105 continue;
102 106
@@ -857,7 +861,7 @@ error:
857 return err; 861 return err;
858} 862}
859 863
860void __exit hci_sock_cleanup(void) 864void hci_sock_cleanup(void)
861{ 865{
862 if (bt_sock_unregister(BTPROTO_HCI) < 0) 866 if (bt_sock_unregister(BTPROTO_HCI) < 0)
863 BT_ERR("HCI socket unregistration failed"); 867 BT_ERR("HCI socket unregistration failed");
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 5fce3d6d07b..3c838a65a75 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -11,7 +11,7 @@
11 11
12static struct class *bt_class; 12static struct class *bt_class;
13 13
14struct dentry *bt_debugfs = NULL; 14struct dentry *bt_debugfs;
15EXPORT_SYMBOL_GPL(bt_debugfs); 15EXPORT_SYMBOL_GPL(bt_debugfs);
16 16
17static inline char *link_typetostr(int type) 17static inline char *link_typetostr(int type)
@@ -51,8 +51,8 @@ static ssize_t show_link_features(struct device *dev, struct device_attribute *a
51 conn->features[6], conn->features[7]); 51 conn->features[6], conn->features[7]);
52} 52}
53 53
54#define LINK_ATTR(_name,_mode,_show,_store) \ 54#define LINK_ATTR(_name, _mode, _show, _store) \
55struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store) 55struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store)
56 56
57static LINK_ATTR(type, S_IRUGO, show_link_type, NULL); 57static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
58static LINK_ATTR(address, S_IRUGO, show_link_address, NULL); 58static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
@@ -461,6 +461,56 @@ static const struct file_operations blacklist_fops = {
461 .llseek = seq_lseek, 461 .llseek = seq_lseek,
462 .release = single_release, 462 .release = single_release,
463}; 463};
464
465static void print_bt_uuid(struct seq_file *f, u8 *uuid)
466{
467 u32 data0, data4;
468 u16 data1, data2, data3, data5;
469
470 memcpy(&data0, &uuid[0], 4);
471 memcpy(&data1, &uuid[4], 2);
472 memcpy(&data2, &uuid[6], 2);
473 memcpy(&data3, &uuid[8], 2);
474 memcpy(&data4, &uuid[10], 4);
475 memcpy(&data5, &uuid[14], 2);
476
477 seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n",
478 ntohl(data0), ntohs(data1), ntohs(data2),
479 ntohs(data3), ntohl(data4), ntohs(data5));
480}
481
482static int uuids_show(struct seq_file *f, void *p)
483{
484 struct hci_dev *hdev = f->private;
485 struct list_head *l;
486
487 hci_dev_lock_bh(hdev);
488
489 list_for_each(l, &hdev->uuids) {
490 struct bt_uuid *uuid;
491
492 uuid = list_entry(l, struct bt_uuid, list);
493
494 print_bt_uuid(f, uuid->uuid);
495 }
496
497 hci_dev_unlock_bh(hdev);
498
499 return 0;
500}
501
502static int uuids_open(struct inode *inode, struct file *file)
503{
504 return single_open(file, uuids_show, inode->i_private);
505}
506
507static const struct file_operations uuids_fops = {
508 .open = uuids_open,
509 .read = seq_read,
510 .llseek = seq_lseek,
511 .release = single_release,
512};
513
464int hci_register_sysfs(struct hci_dev *hdev) 514int hci_register_sysfs(struct hci_dev *hdev)
465{ 515{
466 struct device *dev = &hdev->dev; 516 struct device *dev = &hdev->dev;
@@ -493,6 +543,8 @@ int hci_register_sysfs(struct hci_dev *hdev)
493 debugfs_create_file("blacklist", 0444, hdev->debugfs, 543 debugfs_create_file("blacklist", 0444, hdev->debugfs,
494 hdev, &blacklist_fops); 544 hdev, &blacklist_fops);
495 545
546 debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
547
496 return 0; 548 return 0;
497} 549}
498 550
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 29544c21f4b..5ec12971af6 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,7 @@
36#include <linux/file.h> 36#include <linux/file.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/wait.h> 38#include <linux/wait.h>
39#include <linux/mutex.h>
39#include <net/sock.h> 40#include <net/sock.h>
40 41
41#include <linux/input.h> 42#include <linux/input.h>
@@ -157,7 +158,8 @@ static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
157 158
158 session->leds = newleds; 159 session->leds = newleds;
159 160
160 if (!(skb = alloc_skb(3, GFP_ATOMIC))) { 161 skb = alloc_skb(3, GFP_ATOMIC);
162 if (!skb) {
161 BT_ERR("Can't allocate memory for new frame"); 163 BT_ERR("Can't allocate memory for new frame");
162 return -ENOMEM; 164 return -ENOMEM;
163 } 165 }
@@ -250,7 +252,8 @@ static int __hidp_send_ctrl_message(struct hidp_session *session,
250 252
251 BT_DBG("session %p data %p size %d", session, data, size); 253 BT_DBG("session %p data %p size %d", session, data, size);
252 254
253 if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { 255 skb = alloc_skb(size + 1, GFP_ATOMIC);
256 if (!skb) {
254 BT_ERR("Can't allocate memory for new frame"); 257 BT_ERR("Can't allocate memory for new frame");
255 return -ENOMEM; 258 return -ENOMEM;
256 } 259 }
@@ -283,7 +286,8 @@ static int hidp_queue_report(struct hidp_session *session,
283 286
284 BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); 287 BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size);
285 288
286 if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { 289 skb = alloc_skb(size + 1, GFP_ATOMIC);
290 if (!skb) {
287 BT_ERR("Can't allocate memory for new frame"); 291 BT_ERR("Can't allocate memory for new frame");
288 return -ENOMEM; 292 return -ENOMEM;
289 } 293 }
@@ -313,24 +317,144 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
313 return hidp_queue_report(session, buf, rsize); 317 return hidp_queue_report(session, buf, rsize);
314} 318}
315 319
320static int hidp_get_raw_report(struct hid_device *hid,
321 unsigned char report_number,
322 unsigned char *data, size_t count,
323 unsigned char report_type)
324{
325 struct hidp_session *session = hid->driver_data;
326 struct sk_buff *skb;
327 size_t len;
328 int numbered_reports = hid->report_enum[report_type].numbered;
329
330 switch (report_type) {
331 case HID_FEATURE_REPORT:
332 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
333 break;
334 case HID_INPUT_REPORT:
335 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_INPUT;
336 break;
337 case HID_OUTPUT_REPORT:
338 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_OUPUT;
339 break;
340 default:
341 return -EINVAL;
342 }
343
344 if (mutex_lock_interruptible(&session->report_mutex))
345 return -ERESTARTSYS;
346
347 /* Set up our wait, and send the report request to the device. */
348 session->waiting_report_type = report_type & HIDP_DATA_RTYPE_MASK;
349 session->waiting_report_number = numbered_reports ? report_number : -1;
350 set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
351 data[0] = report_number;
352 if (hidp_send_ctrl_message(hid->driver_data, report_type, data, 1))
353 goto err_eio;
354
355 /* Wait for the return of the report. The returned report
356 gets put in session->report_return. */
357 while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) {
358 int res;
359
360 res = wait_event_interruptible_timeout(session->report_queue,
361 !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags),
362 5*HZ);
363 if (res == 0) {
364 /* timeout */
365 goto err_eio;
366 }
367 if (res < 0) {
368 /* signal */
369 goto err_restartsys;
370 }
371 }
372
373 skb = session->report_return;
374 if (skb) {
375 len = skb->len < count ? skb->len : count;
376 memcpy(data, skb->data, len);
377
378 kfree_skb(skb);
379 session->report_return = NULL;
380 } else {
381 /* Device returned a HANDSHAKE, indicating protocol error. */
382 len = -EIO;
383 }
384
385 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
386 mutex_unlock(&session->report_mutex);
387
388 return len;
389
390err_restartsys:
391 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
392 mutex_unlock(&session->report_mutex);
393 return -ERESTARTSYS;
394err_eio:
395 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
396 mutex_unlock(&session->report_mutex);
397 return -EIO;
398}
399
316static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count, 400static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count,
317 unsigned char report_type) 401 unsigned char report_type)
318{ 402{
403 struct hidp_session *session = hid->driver_data;
404 int ret;
405
319 switch (report_type) { 406 switch (report_type) {
320 case HID_FEATURE_REPORT: 407 case HID_FEATURE_REPORT:
321 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; 408 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
322 break; 409 break;
323 case HID_OUTPUT_REPORT: 410 case HID_OUTPUT_REPORT:
324 report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; 411 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT;
325 break; 412 break;
326 default: 413 default:
327 return -EINVAL; 414 return -EINVAL;
328 } 415 }
329 416
417 if (mutex_lock_interruptible(&session->report_mutex))
418 return -ERESTARTSYS;
419
420 /* Set up our wait, and send the report request to the device. */
421 set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
330 if (hidp_send_ctrl_message(hid->driver_data, report_type, 422 if (hidp_send_ctrl_message(hid->driver_data, report_type,
331 data, count)) 423 data, count)) {
332 return -ENOMEM; 424 ret = -ENOMEM;
333 return count; 425 goto err;
426 }
427
428 /* Wait for the ACK from the device. */
429 while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) {
430 int res;
431
432 res = wait_event_interruptible_timeout(session->report_queue,
433 !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags),
434 10*HZ);
435 if (res == 0) {
436 /* timeout */
437 ret = -EIO;
438 goto err;
439 }
440 if (res < 0) {
441 /* signal */
442 ret = -ERESTARTSYS;
443 goto err;
444 }
445 }
446
447 if (!session->output_report_success) {
448 ret = -EIO;
449 goto err;
450 }
451
452 ret = count;
453
454err:
455 clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
456 mutex_unlock(&session->report_mutex);
457 return ret;
334} 458}
335 459
336static void hidp_idle_timeout(unsigned long arg) 460static void hidp_idle_timeout(unsigned long arg)
@@ -357,16 +481,22 @@ static void hidp_process_handshake(struct hidp_session *session,
357 unsigned char param) 481 unsigned char param)
358{ 482{
359 BT_DBG("session %p param 0x%02x", session, param); 483 BT_DBG("session %p param 0x%02x", session, param);
484 session->output_report_success = 0; /* default condition */
360 485
361 switch (param) { 486 switch (param) {
362 case HIDP_HSHK_SUCCESSFUL: 487 case HIDP_HSHK_SUCCESSFUL:
363 /* FIXME: Call into SET_ GET_ handlers here */ 488 /* FIXME: Call into SET_ GET_ handlers here */
489 session->output_report_success = 1;
364 break; 490 break;
365 491
366 case HIDP_HSHK_NOT_READY: 492 case HIDP_HSHK_NOT_READY:
367 case HIDP_HSHK_ERR_INVALID_REPORT_ID: 493 case HIDP_HSHK_ERR_INVALID_REPORT_ID:
368 case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST: 494 case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST:
369 case HIDP_HSHK_ERR_INVALID_PARAMETER: 495 case HIDP_HSHK_ERR_INVALID_PARAMETER:
496 if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) {
497 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
498 wake_up_interruptible(&session->report_queue);
499 }
370 /* FIXME: Call into SET_ GET_ handlers here */ 500 /* FIXME: Call into SET_ GET_ handlers here */
371 break; 501 break;
372 502
@@ -385,6 +515,12 @@ static void hidp_process_handshake(struct hidp_session *session,
385 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); 515 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
386 break; 516 break;
387 } 517 }
518
519 /* Wake up the waiting thread. */
520 if (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) {
521 clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
522 wake_up_interruptible(&session->report_queue);
523 }
388} 524}
389 525
390static void hidp_process_hid_control(struct hidp_session *session, 526static void hidp_process_hid_control(struct hidp_session *session,
@@ -403,9 +539,11 @@ static void hidp_process_hid_control(struct hidp_session *session,
403 } 539 }
404} 540}
405 541
406static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, 542/* Returns true if the passed-in skb should be freed by the caller. */
543static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
407 unsigned char param) 544 unsigned char param)
408{ 545{
546 int done_with_skb = 1;
409 BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param); 547 BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param);
410 548
411 switch (param) { 549 switch (param) {
@@ -417,7 +555,6 @@ static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
417 555
418 if (session->hid) 556 if (session->hid)
419 hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0); 557 hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0);
420
421 break; 558 break;
422 559
423 case HIDP_DATA_RTYPE_OTHER: 560 case HIDP_DATA_RTYPE_OTHER:
@@ -429,12 +566,27 @@ static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
429 __hidp_send_ctrl_message(session, 566 __hidp_send_ctrl_message(session,
430 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); 567 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
431 } 568 }
569
570 if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) &&
571 param == session->waiting_report_type) {
572 if (session->waiting_report_number < 0 ||
573 session->waiting_report_number == skb->data[0]) {
574 /* hidp_get_raw_report() is waiting on this report. */
575 session->report_return = skb;
576 done_with_skb = 0;
577 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
578 wake_up_interruptible(&session->report_queue);
579 }
580 }
581
582 return done_with_skb;
432} 583}
433 584
434static void hidp_recv_ctrl_frame(struct hidp_session *session, 585static void hidp_recv_ctrl_frame(struct hidp_session *session,
435 struct sk_buff *skb) 586 struct sk_buff *skb)
436{ 587{
437 unsigned char hdr, type, param; 588 unsigned char hdr, type, param;
589 int free_skb = 1;
438 590
439 BT_DBG("session %p skb %p len %d", session, skb, skb->len); 591 BT_DBG("session %p skb %p len %d", session, skb, skb->len);
440 592
@@ -454,7 +606,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
454 break; 606 break;
455 607
456 case HIDP_TRANS_DATA: 608 case HIDP_TRANS_DATA:
457 hidp_process_data(session, skb, param); 609 free_skb = hidp_process_data(session, skb, param);
458 break; 610 break;
459 611
460 default: 612 default:
@@ -463,7 +615,8 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
463 break; 615 break;
464 } 616 }
465 617
466 kfree_skb(skb); 618 if (free_skb)
619 kfree_skb(skb);
467} 620}
468 621
469static void hidp_recv_intr_frame(struct hidp_session *session, 622static void hidp_recv_intr_frame(struct hidp_session *session,
@@ -563,6 +716,8 @@ static int hidp_session(void *arg)
563 init_waitqueue_entry(&intr_wait, current); 716 init_waitqueue_entry(&intr_wait, current);
564 add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); 717 add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
565 add_wait_queue(sk_sleep(intr_sk), &intr_wait); 718 add_wait_queue(sk_sleep(intr_sk), &intr_wait);
719 session->waiting_for_startup = 0;
720 wake_up_interruptible(&session->startup_queue);
566 while (!atomic_read(&session->terminate)) { 721 while (!atomic_read(&session->terminate)) {
567 set_current_state(TASK_INTERRUPTIBLE); 722 set_current_state(TASK_INTERRUPTIBLE);
568 723
@@ -754,6 +909,8 @@ static struct hid_ll_driver hidp_hid_driver = {
754 .hidinput_input_event = hidp_hidinput_event, 909 .hidinput_input_event = hidp_hidinput_event,
755}; 910};
756 911
912/* This function sets up the hid device. It does not add it
913 to the HID system. That is done in hidp_add_connection(). */
757static int hidp_setup_hid(struct hidp_session *session, 914static int hidp_setup_hid(struct hidp_session *session,
758 struct hidp_connadd_req *req) 915 struct hidp_connadd_req *req)
759{ 916{
@@ -793,18 +950,11 @@ static int hidp_setup_hid(struct hidp_session *session,
793 hid->dev.parent = hidp_get_device(session); 950 hid->dev.parent = hidp_get_device(session);
794 hid->ll_driver = &hidp_hid_driver; 951 hid->ll_driver = &hidp_hid_driver;
795 952
953 hid->hid_get_raw_report = hidp_get_raw_report;
796 hid->hid_output_raw_report = hidp_output_raw_report; 954 hid->hid_output_raw_report = hidp_output_raw_report;
797 955
798 err = hid_add_device(hid);
799 if (err < 0)
800 goto failed;
801
802 return 0; 956 return 0;
803 957
804failed:
805 hid_destroy_device(hid);
806 session->hid = NULL;
807
808fault: 958fault:
809 kfree(session->rd_data); 959 kfree(session->rd_data);
810 session->rd_data = NULL; 960 session->rd_data = NULL;
@@ -853,6 +1003,10 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
853 skb_queue_head_init(&session->ctrl_transmit); 1003 skb_queue_head_init(&session->ctrl_transmit);
854 skb_queue_head_init(&session->intr_transmit); 1004 skb_queue_head_init(&session->intr_transmit);
855 1005
1006 mutex_init(&session->report_mutex);
1007 init_waitqueue_head(&session->report_queue);
1008 init_waitqueue_head(&session->startup_queue);
1009 session->waiting_for_startup = 1;
856 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); 1010 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
857 session->idle_to = req->idle_to; 1011 session->idle_to = req->idle_to;
858 1012
@@ -875,6 +1029,14 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
875 err = kernel_thread(hidp_session, session, CLONE_KERNEL); 1029 err = kernel_thread(hidp_session, session, CLONE_KERNEL);
876 if (err < 0) 1030 if (err < 0)
877 goto unlink; 1031 goto unlink;
1032 while (session->waiting_for_startup) {
1033 wait_event_interruptible(session->startup_queue,
1034 !session->waiting_for_startup);
1035 }
1036
1037 err = hid_add_device(session->hid);
1038 if (err < 0)
1039 goto err_add_device;
878 1040
879 if (session->input) { 1041 if (session->input) {
880 hidp_send_ctrl_message(session, 1042 hidp_send_ctrl_message(session,
@@ -888,6 +1050,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
888 up_write(&hidp_session_sem); 1050 up_write(&hidp_session_sem);
889 return 0; 1051 return 0;
890 1052
1053err_add_device:
1054 hid_destroy_device(session->hid);
1055 session->hid = NULL;
1056 atomic_inc(&session->terminate);
1057 hidp_schedule(session);
1058
891unlink: 1059unlink:
892 hidp_del_timer(session); 1060 hidp_del_timer(session);
893 1061
@@ -1016,8 +1184,6 @@ static int __init hidp_init(void)
1016{ 1184{
1017 int ret; 1185 int ret;
1018 1186
1019 l2cap_load();
1020
1021 BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); 1187 BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
1022 1188
1023 ret = hid_register_driver(&hidp_driver); 1189 ret = hid_register_driver(&hidp_driver);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 8d934a19da0..13de5fa0348 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -80,6 +80,8 @@
80#define HIDP_VIRTUAL_CABLE_UNPLUG 0 80#define HIDP_VIRTUAL_CABLE_UNPLUG 0
81#define HIDP_BOOT_PROTOCOL_MODE 1 81#define HIDP_BOOT_PROTOCOL_MODE 1
82#define HIDP_BLUETOOTH_VENDOR_ID 9 82#define HIDP_BLUETOOTH_VENDOR_ID 9
83#define HIDP_WAITING_FOR_RETURN 10
84#define HIDP_WAITING_FOR_SEND_ACK 11
83 85
84struct hidp_connadd_req { 86struct hidp_connadd_req {
85 int ctrl_sock; // Connected control socket 87 int ctrl_sock; // Connected control socket
@@ -154,9 +156,22 @@ struct hidp_session {
154 struct sk_buff_head ctrl_transmit; 156 struct sk_buff_head ctrl_transmit;
155 struct sk_buff_head intr_transmit; 157 struct sk_buff_head intr_transmit;
156 158
159 /* Used in hidp_get_raw_report() */
160 int waiting_report_type; /* HIDP_DATA_RTYPE_* */
161 int waiting_report_number; /* -1 for not numbered */
162 struct mutex report_mutex;
163 struct sk_buff *report_return;
164 wait_queue_head_t report_queue;
165
166 /* Used in hidp_output_raw_report() */
167 int output_report_success; /* boolean */
168
157 /* Report descriptor */ 169 /* Report descriptor */
158 __u8 *rd_data; 170 __u8 *rd_data;
159 uint rd_size; 171 uint rd_size;
172
173 wait_queue_head_t startup_queue;
174 int waiting_for_startup;
160}; 175};
161 176
162static inline void hidp_schedule(struct hidp_session *session) 177static inline void hidp_schedule(struct hidp_session *session)
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap_core.c
index 675614e38e1..c9f9cecca52 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap_core.c
@@ -24,7 +24,7 @@
24 SOFTWARE IS DISCLAIMED. 24 SOFTWARE IS DISCLAIMED.
25*/ 25*/
26 26
27/* Bluetooth L2CAP core and sockets. */ 27/* Bluetooth L2CAP core. */
28 28
29#include <linux/module.h> 29#include <linux/module.h>
30 30
@@ -55,79 +55,24 @@
55#include <net/bluetooth/hci_core.h> 55#include <net/bluetooth/hci_core.h>
56#include <net/bluetooth/l2cap.h> 56#include <net/bluetooth/l2cap.h>
57 57
58#define VERSION "2.15" 58int disable_ertm;
59
60static int disable_ertm;
61 59
62static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; 60static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
63static u8 l2cap_fixed_chan[8] = { 0x02, }; 61static u8 l2cap_fixed_chan[8] = { 0x02, };
64 62
65static const struct proto_ops l2cap_sock_ops;
66
67static struct workqueue_struct *_busy_wq; 63static struct workqueue_struct *_busy_wq;
68 64
69static struct bt_sock_list l2cap_sk_list = { 65struct bt_sock_list l2cap_sk_list = {
70 .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) 66 .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
71}; 67};
72 68
73static void l2cap_busy_work(struct work_struct *work); 69static void l2cap_busy_work(struct work_struct *work);
74 70
75static void __l2cap_sock_close(struct sock *sk, int reason);
76static void l2cap_sock_close(struct sock *sk);
77static void l2cap_sock_kill(struct sock *sk);
78
79static int l2cap_build_conf_req(struct sock *sk, void *data);
80static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, 71static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
81 u8 code, u8 ident, u16 dlen, void *data); 72 u8 code, u8 ident, u16 dlen, void *data);
82 73
83static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); 74static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
84 75
85/* ---- L2CAP timers ---- */
86static void l2cap_sock_set_timer(struct sock *sk, long timeout)
87{
88 BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
89 sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
90}
91
92static void l2cap_sock_clear_timer(struct sock *sk)
93{
94 BT_DBG("sock %p state %d", sk, sk->sk_state);
95 sk_stop_timer(sk, &sk->sk_timer);
96}
97
98static void l2cap_sock_timeout(unsigned long arg)
99{
100 struct sock *sk = (struct sock *) arg;
101 int reason;
102
103 BT_DBG("sock %p state %d", sk, sk->sk_state);
104
105 bh_lock_sock(sk);
106
107 if (sock_owned_by_user(sk)) {
108 /* sk is owned by user. Try again later */
109 l2cap_sock_set_timer(sk, HZ / 5);
110 bh_unlock_sock(sk);
111 sock_put(sk);
112 return;
113 }
114
115 if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
116 reason = ECONNREFUSED;
117 else if (sk->sk_state == BT_CONNECT &&
118 l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
119 reason = ECONNREFUSED;
120 else
121 reason = ETIMEDOUT;
122
123 __l2cap_sock_close(sk, reason);
124
125 bh_unlock_sock(sk);
126
127 l2cap_sock_kill(sk);
128 sock_put(sk);
129}
130
131/* ---- L2CAP channels ---- */ 76/* ---- L2CAP channels ---- */
132static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) 77static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
133{ 78{
@@ -236,8 +181,16 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
236 l2cap_pi(sk)->conn = conn; 181 l2cap_pi(sk)->conn = conn;
237 182
238 if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) { 183 if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
239 /* Alloc CID for connection-oriented socket */ 184 if (conn->hcon->type == LE_LINK) {
240 l2cap_pi(sk)->scid = l2cap_alloc_cid(l); 185 /* LE connection */
186 l2cap_pi(sk)->omtu = L2CAP_LE_DEFAULT_MTU;
187 l2cap_pi(sk)->scid = L2CAP_CID_LE_DATA;
188 l2cap_pi(sk)->dcid = L2CAP_CID_LE_DATA;
189 } else {
190 /* Alloc CID for connection-oriented socket */
191 l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
192 l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
193 }
241 } else if (sk->sk_type == SOCK_DGRAM) { 194 } else if (sk->sk_type == SOCK_DGRAM) {
242 /* Connectionless socket */ 195 /* Connectionless socket */
243 l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS; 196 l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS;
@@ -258,7 +211,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
258 211
259/* Delete channel. 212/* Delete channel.
260 * Must be called on the locked socket. */ 213 * Must be called on the locked socket. */
261static void l2cap_chan_del(struct sock *sk, int err) 214void l2cap_chan_del(struct sock *sk, int err)
262{ 215{
263 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 216 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
264 struct sock *parent = bt_sk(sk)->parent; 217 struct sock *parent = bt_sk(sk)->parent;
@@ -348,7 +301,7 @@ static inline int l2cap_check_security(struct sock *sk)
348 auth_type); 301 auth_type);
349} 302}
350 303
351static inline u8 l2cap_get_ident(struct l2cap_conn *conn) 304u8 l2cap_get_ident(struct l2cap_conn *conn)
352{ 305{
353 u8 id; 306 u8 id;
354 307
@@ -370,16 +323,22 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
370 return id; 323 return id;
371} 324}
372 325
373static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) 326void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
374{ 327{
375 struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); 328 struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
329 u8 flags;
376 330
377 BT_DBG("code 0x%2.2x", code); 331 BT_DBG("code 0x%2.2x", code);
378 332
379 if (!skb) 333 if (!skb)
380 return; 334 return;
381 335
382 hci_send_acl(conn->hcon, skb, 0); 336 if (lmp_no_flush_capable(conn->hcon->hdev))
337 flags = ACL_START_NO_FLUSH;
338 else
339 flags = ACL_START;
340
341 hci_send_acl(conn->hcon, skb, flags);
383} 342}
384 343
385static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) 344static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
@@ -389,6 +348,7 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
389 struct l2cap_conn *conn = pi->conn; 348 struct l2cap_conn *conn = pi->conn;
390 struct sock *sk = (struct sock *)pi; 349 struct sock *sk = (struct sock *)pi;
391 int count, hlen = L2CAP_HDR_SIZE + 2; 350 int count, hlen = L2CAP_HDR_SIZE + 2;
351 u8 flags;
392 352
393 if (sk->sk_state != BT_CONNECTED) 353 if (sk->sk_state != BT_CONNECTED)
394 return; 354 return;
@@ -425,7 +385,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
425 put_unaligned_le16(fcs, skb_put(skb, 2)); 385 put_unaligned_le16(fcs, skb_put(skb, 2));
426 } 386 }
427 387
428 hci_send_acl(pi->conn->hcon, skb, 0); 388 if (lmp_no_flush_capable(conn->hcon->hdev))
389 flags = ACL_START_NO_FLUSH;
390 else
391 flags = ACL_START;
392
393 hci_send_acl(pi->conn->hcon, skb, flags);
429} 394}
430 395
431static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) 396static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
@@ -496,7 +461,7 @@ static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
496 } 461 }
497} 462}
498 463
499static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) 464void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
500{ 465{
501 struct l2cap_disconn_req req; 466 struct l2cap_disconn_req req;
502 467
@@ -624,6 +589,82 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
624 } 589 }
625} 590}
626 591
592/* Find socket with cid and source bdaddr.
593 * Returns closest match, locked.
594 */
595static struct sock *l2cap_get_sock_by_scid(int state, __le16 cid, bdaddr_t *src)
596{
597 struct sock *s, *sk = NULL, *sk1 = NULL;
598 struct hlist_node *node;
599
600 read_lock(&l2cap_sk_list.lock);
601
602 sk_for_each(sk, node, &l2cap_sk_list.head) {
603 if (state && sk->sk_state != state)
604 continue;
605
606 if (l2cap_pi(sk)->scid == cid) {
607 /* Exact match. */
608 if (!bacmp(&bt_sk(sk)->src, src))
609 break;
610
611 /* Closest match */
612 if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
613 sk1 = sk;
614 }
615 }
616 s = node ? sk : sk1;
617 if (s)
618 bh_lock_sock(s);
619 read_unlock(&l2cap_sk_list.lock);
620
621 return s;
622}
623
624static void l2cap_le_conn_ready(struct l2cap_conn *conn)
625{
626 struct l2cap_chan_list *list = &conn->chan_list;
627 struct sock *parent, *uninitialized_var(sk);
628
629 BT_DBG("");
630
631 /* Check if we have socket listening on cid */
632 parent = l2cap_get_sock_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA,
633 conn->src);
634 if (!parent)
635 return;
636
637 /* Check for backlog size */
638 if (sk_acceptq_is_full(parent)) {
639 BT_DBG("backlog full %d", parent->sk_ack_backlog);
640 goto clean;
641 }
642
643 sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC);
644 if (!sk)
645 goto clean;
646
647 write_lock_bh(&list->lock);
648
649 hci_conn_hold(conn->hcon);
650
651 l2cap_sock_init(sk, parent);
652 bacpy(&bt_sk(sk)->src, conn->src);
653 bacpy(&bt_sk(sk)->dst, conn->dst);
654
655 __l2cap_chan_add(conn, sk, parent);
656
657 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
658
659 sk->sk_state = BT_CONNECTED;
660 parent->sk_data_ready(parent, 0);
661
662 write_unlock_bh(&list->lock);
663
664clean:
665 bh_unlock_sock(parent);
666}
667
627static void l2cap_conn_ready(struct l2cap_conn *conn) 668static void l2cap_conn_ready(struct l2cap_conn *conn)
628{ 669{
629 struct l2cap_chan_list *l = &conn->chan_list; 670 struct l2cap_chan_list *l = &conn->chan_list;
@@ -631,11 +672,20 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
631 672
632 BT_DBG("conn %p", conn); 673 BT_DBG("conn %p", conn);
633 674
675 if (!conn->hcon->out && conn->hcon->type == LE_LINK)
676 l2cap_le_conn_ready(conn);
677
634 read_lock(&l->lock); 678 read_lock(&l->lock);
635 679
636 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 680 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
637 bh_lock_sock(sk); 681 bh_lock_sock(sk);
638 682
683 if (conn->hcon->type == LE_LINK) {
684 l2cap_sock_clear_timer(sk);
685 sk->sk_state = BT_CONNECTED;
686 sk->sk_state_change(sk);
687 }
688
639 if (sk->sk_type != SOCK_SEQPACKET && 689 if (sk->sk_type != SOCK_SEQPACKET &&
640 sk->sk_type != SOCK_STREAM) { 690 sk->sk_type != SOCK_STREAM) {
641 l2cap_sock_clear_timer(sk); 691 l2cap_sock_clear_timer(sk);
@@ -694,7 +744,11 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
694 744
695 BT_DBG("hcon %p conn %p", hcon, conn); 745 BT_DBG("hcon %p conn %p", hcon, conn);
696 746
697 conn->mtu = hcon->hdev->acl_mtu; 747 if (hcon->hdev->le_mtu && hcon->type == LE_LINK)
748 conn->mtu = hcon->hdev->le_mtu;
749 else
750 conn->mtu = hcon->hdev->acl_mtu;
751
698 conn->src = &hcon->hdev->bdaddr; 752 conn->src = &hcon->hdev->bdaddr;
699 conn->dst = &hcon->dst; 753 conn->dst = &hcon->dst;
700 754
@@ -703,7 +757,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
703 spin_lock_init(&conn->lock); 757 spin_lock_init(&conn->lock);
704 rwlock_init(&conn->chan_list.lock); 758 rwlock_init(&conn->chan_list.lock);
705 759
706 setup_timer(&conn->info_timer, l2cap_info_timeout, 760 if (hcon->type != LE_LINK)
761 setup_timer(&conn->info_timer, l2cap_info_timeout,
707 (unsigned long) conn); 762 (unsigned long) conn);
708 763
709 conn->disc_reason = 0x13; 764 conn->disc_reason = 0x13;
@@ -747,17 +802,6 @@ static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, stru
747} 802}
748 803
749/* ---- Socket interface ---- */ 804/* ---- Socket interface ---- */
750static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src)
751{
752 struct sock *sk;
753 struct hlist_node *node;
754 sk_for_each(sk, node, &l2cap_sk_list.head)
755 if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src))
756 goto found;
757 sk = NULL;
758found:
759 return sk;
760}
761 805
762/* Find socket with psm and source bdaddr. 806/* Find socket with psm and source bdaddr.
763 * Returns closest match. 807 * Returns closest match.
@@ -789,277 +833,7 @@ static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
789 return node ? sk : sk1; 833 return node ? sk : sk1;
790} 834}
791 835
792static void l2cap_sock_destruct(struct sock *sk) 836int l2cap_do_connect(struct sock *sk)
793{
794 BT_DBG("sk %p", sk);
795
796 skb_queue_purge(&sk->sk_receive_queue);
797 skb_queue_purge(&sk->sk_write_queue);
798}
799
800static void l2cap_sock_cleanup_listen(struct sock *parent)
801{
802 struct sock *sk;
803
804 BT_DBG("parent %p", parent);
805
806 /* Close not yet accepted channels */
807 while ((sk = bt_accept_dequeue(parent, NULL)))
808 l2cap_sock_close(sk);
809
810 parent->sk_state = BT_CLOSED;
811 sock_set_flag(parent, SOCK_ZAPPED);
812}
813
814/* Kill socket (only if zapped and orphan)
815 * Must be called on unlocked socket.
816 */
817static void l2cap_sock_kill(struct sock *sk)
818{
819 if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
820 return;
821
822 BT_DBG("sk %p state %d", sk, sk->sk_state);
823
824 /* Kill poor orphan */
825 bt_sock_unlink(&l2cap_sk_list, sk);
826 sock_set_flag(sk, SOCK_DEAD);
827 sock_put(sk);
828}
829
830static void __l2cap_sock_close(struct sock *sk, int reason)
831{
832 BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
833
834 switch (sk->sk_state) {
835 case BT_LISTEN:
836 l2cap_sock_cleanup_listen(sk);
837 break;
838
839 case BT_CONNECTED:
840 case BT_CONFIG:
841 if (sk->sk_type == SOCK_SEQPACKET ||
842 sk->sk_type == SOCK_STREAM) {
843 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
844
845 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
846 l2cap_send_disconn_req(conn, sk, reason);
847 } else
848 l2cap_chan_del(sk, reason);
849 break;
850
851 case BT_CONNECT2:
852 if (sk->sk_type == SOCK_SEQPACKET ||
853 sk->sk_type == SOCK_STREAM) {
854 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
855 struct l2cap_conn_rsp rsp;
856 __u16 result;
857
858 if (bt_sk(sk)->defer_setup)
859 result = L2CAP_CR_SEC_BLOCK;
860 else
861 result = L2CAP_CR_BAD_PSM;
862 sk->sk_state = BT_DISCONN;
863
864 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
865 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
866 rsp.result = cpu_to_le16(result);
867 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
868 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
869 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
870 } else
871 l2cap_chan_del(sk, reason);
872 break;
873
874 case BT_CONNECT:
875 case BT_DISCONN:
876 l2cap_chan_del(sk, reason);
877 break;
878
879 default:
880 sock_set_flag(sk, SOCK_ZAPPED);
881 break;
882 }
883}
884
885/* Must be called on unlocked socket. */
886static void l2cap_sock_close(struct sock *sk)
887{
888 l2cap_sock_clear_timer(sk);
889 lock_sock(sk);
890 __l2cap_sock_close(sk, ECONNRESET);
891 release_sock(sk);
892 l2cap_sock_kill(sk);
893}
894
895static void l2cap_sock_init(struct sock *sk, struct sock *parent)
896{
897 struct l2cap_pinfo *pi = l2cap_pi(sk);
898
899 BT_DBG("sk %p", sk);
900
901 if (parent) {
902 sk->sk_type = parent->sk_type;
903 bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
904
905 pi->imtu = l2cap_pi(parent)->imtu;
906 pi->omtu = l2cap_pi(parent)->omtu;
907 pi->conf_state = l2cap_pi(parent)->conf_state;
908 pi->mode = l2cap_pi(parent)->mode;
909 pi->fcs = l2cap_pi(parent)->fcs;
910 pi->max_tx = l2cap_pi(parent)->max_tx;
911 pi->tx_win = l2cap_pi(parent)->tx_win;
912 pi->sec_level = l2cap_pi(parent)->sec_level;
913 pi->role_switch = l2cap_pi(parent)->role_switch;
914 pi->force_reliable = l2cap_pi(parent)->force_reliable;
915 } else {
916 pi->imtu = L2CAP_DEFAULT_MTU;
917 pi->omtu = 0;
918 if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
919 pi->mode = L2CAP_MODE_ERTM;
920 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
921 } else {
922 pi->mode = L2CAP_MODE_BASIC;
923 }
924 pi->max_tx = L2CAP_DEFAULT_MAX_TX;
925 pi->fcs = L2CAP_FCS_CRC16;
926 pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
927 pi->sec_level = BT_SECURITY_LOW;
928 pi->role_switch = 0;
929 pi->force_reliable = 0;
930 }
931
932 /* Default config options */
933 pi->conf_len = 0;
934 pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
935 skb_queue_head_init(TX_QUEUE(sk));
936 skb_queue_head_init(SREJ_QUEUE(sk));
937 skb_queue_head_init(BUSY_QUEUE(sk));
938 INIT_LIST_HEAD(SREJ_LIST(sk));
939}
940
941static struct proto l2cap_proto = {
942 .name = "L2CAP",
943 .owner = THIS_MODULE,
944 .obj_size = sizeof(struct l2cap_pinfo)
945};
946
947static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
948{
949 struct sock *sk;
950
951 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
952 if (!sk)
953 return NULL;
954
955 sock_init_data(sock, sk);
956 INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
957
958 sk->sk_destruct = l2cap_sock_destruct;
959 sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
960
961 sock_reset_flag(sk, SOCK_ZAPPED);
962
963 sk->sk_protocol = proto;
964 sk->sk_state = BT_OPEN;
965
966 setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
967
968 bt_sock_link(&l2cap_sk_list, sk);
969 return sk;
970}
971
972static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
973 int kern)
974{
975 struct sock *sk;
976
977 BT_DBG("sock %p", sock);
978
979 sock->state = SS_UNCONNECTED;
980
981 if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
982 sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
983 return -ESOCKTNOSUPPORT;
984
985 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
986 return -EPERM;
987
988 sock->ops = &l2cap_sock_ops;
989
990 sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
991 if (!sk)
992 return -ENOMEM;
993
994 l2cap_sock_init(sk, NULL);
995 return 0;
996}
997
998static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
999{
1000 struct sock *sk = sock->sk;
1001 struct sockaddr_l2 la;
1002 int len, err = 0;
1003
1004 BT_DBG("sk %p", sk);
1005
1006 if (!addr || addr->sa_family != AF_BLUETOOTH)
1007 return -EINVAL;
1008
1009 memset(&la, 0, sizeof(la));
1010 len = min_t(unsigned int, sizeof(la), alen);
1011 memcpy(&la, addr, len);
1012
1013 if (la.l2_cid)
1014 return -EINVAL;
1015
1016 lock_sock(sk);
1017
1018 if (sk->sk_state != BT_OPEN) {
1019 err = -EBADFD;
1020 goto done;
1021 }
1022
1023 if (la.l2_psm) {
1024 __u16 psm = __le16_to_cpu(la.l2_psm);
1025
1026 /* PSM must be odd and lsb of upper byte must be 0 */
1027 if ((psm & 0x0101) != 0x0001) {
1028 err = -EINVAL;
1029 goto done;
1030 }
1031
1032 /* Restrict usage of well-known PSMs */
1033 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
1034 err = -EACCES;
1035 goto done;
1036 }
1037 }
1038
1039 write_lock_bh(&l2cap_sk_list.lock);
1040
1041 if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
1042 err = -EADDRINUSE;
1043 } else {
1044 /* Save source address */
1045 bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
1046 l2cap_pi(sk)->psm = la.l2_psm;
1047 l2cap_pi(sk)->sport = la.l2_psm;
1048 sk->sk_state = BT_BOUND;
1049
1050 if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
1051 __le16_to_cpu(la.l2_psm) == 0x0003)
1052 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
1053 }
1054
1055 write_unlock_bh(&l2cap_sk_list.lock);
1056
1057done:
1058 release_sock(sk);
1059 return err;
1060}
1061
1062static int l2cap_do_connect(struct sock *sk)
1063{ 837{
1064 bdaddr_t *src = &bt_sk(sk)->src; 838 bdaddr_t *src = &bt_sk(sk)->src;
1065 bdaddr_t *dst = &bt_sk(sk)->dst; 839 bdaddr_t *dst = &bt_sk(sk)->dst;
@@ -1078,23 +852,27 @@ static int l2cap_do_connect(struct sock *sk)
1078 852
1079 hci_dev_lock_bh(hdev); 853 hci_dev_lock_bh(hdev);
1080 854
1081 err = -ENOMEM;
1082
1083 auth_type = l2cap_get_auth_type(sk); 855 auth_type = l2cap_get_auth_type(sk);
1084 856
1085 hcon = hci_connect(hdev, ACL_LINK, dst, 857 if (l2cap_pi(sk)->dcid == L2CAP_CID_LE_DATA)
858 hcon = hci_connect(hdev, LE_LINK, dst,
1086 l2cap_pi(sk)->sec_level, auth_type); 859 l2cap_pi(sk)->sec_level, auth_type);
1087 if (!hcon) 860 else
861 hcon = hci_connect(hdev, ACL_LINK, dst,
862 l2cap_pi(sk)->sec_level, auth_type);
863
864 if (IS_ERR(hcon)) {
865 err = PTR_ERR(hcon);
1088 goto done; 866 goto done;
867 }
1089 868
1090 conn = l2cap_conn_add(hcon, 0); 869 conn = l2cap_conn_add(hcon, 0);
1091 if (!conn) { 870 if (!conn) {
1092 hci_conn_put(hcon); 871 hci_conn_put(hcon);
872 err = -ENOMEM;
1093 goto done; 873 goto done;
1094 } 874 }
1095 875
1096 err = 0;
1097
1098 /* Update source addr of the socket */ 876 /* Update source addr of the socket */
1099 bacpy(src, conn->src); 877 bacpy(src, conn->src);
1100 878
@@ -1113,236 +891,15 @@ static int l2cap_do_connect(struct sock *sk)
1113 l2cap_do_start(sk); 891 l2cap_do_start(sk);
1114 } 892 }
1115 893
894 err = 0;
895
1116done: 896done:
1117 hci_dev_unlock_bh(hdev); 897 hci_dev_unlock_bh(hdev);
1118 hci_dev_put(hdev); 898 hci_dev_put(hdev);
1119 return err; 899 return err;
1120} 900}
1121 901
1122static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) 902int __l2cap_wait_ack(struct sock *sk)
1123{
1124 struct sock *sk = sock->sk;
1125 struct sockaddr_l2 la;
1126 int len, err = 0;
1127
1128 BT_DBG("sk %p", sk);
1129
1130 if (!addr || alen < sizeof(addr->sa_family) ||
1131 addr->sa_family != AF_BLUETOOTH)
1132 return -EINVAL;
1133
1134 memset(&la, 0, sizeof(la));
1135 len = min_t(unsigned int, sizeof(la), alen);
1136 memcpy(&la, addr, len);
1137
1138 if (la.l2_cid)
1139 return -EINVAL;
1140
1141 lock_sock(sk);
1142
1143 if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
1144 && !la.l2_psm) {
1145 err = -EINVAL;
1146 goto done;
1147 }
1148
1149 switch (l2cap_pi(sk)->mode) {
1150 case L2CAP_MODE_BASIC:
1151 break;
1152 case L2CAP_MODE_ERTM:
1153 case L2CAP_MODE_STREAMING:
1154 if (!disable_ertm)
1155 break;
1156 /* fall through */
1157 default:
1158 err = -ENOTSUPP;
1159 goto done;
1160 }
1161
1162 switch (sk->sk_state) {
1163 case BT_CONNECT:
1164 case BT_CONNECT2:
1165 case BT_CONFIG:
1166 /* Already connecting */
1167 goto wait;
1168
1169 case BT_CONNECTED:
1170 /* Already connected */
1171 err = -EISCONN;
1172 goto done;
1173
1174 case BT_OPEN:
1175 case BT_BOUND:
1176 /* Can connect */
1177 break;
1178
1179 default:
1180 err = -EBADFD;
1181 goto done;
1182 }
1183
1184 /* PSM must be odd and lsb of upper byte must be 0 */
1185 if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
1186 sk->sk_type != SOCK_RAW) {
1187 err = -EINVAL;
1188 goto done;
1189 }
1190
1191 /* Set destination address and psm */
1192 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
1193 l2cap_pi(sk)->psm = la.l2_psm;
1194
1195 err = l2cap_do_connect(sk);
1196 if (err)
1197 goto done;
1198
1199wait:
1200 err = bt_sock_wait_state(sk, BT_CONNECTED,
1201 sock_sndtimeo(sk, flags & O_NONBLOCK));
1202done:
1203 release_sock(sk);
1204 return err;
1205}
1206
1207static int l2cap_sock_listen(struct socket *sock, int backlog)
1208{
1209 struct sock *sk = sock->sk;
1210 int err = 0;
1211
1212 BT_DBG("sk %p backlog %d", sk, backlog);
1213
1214 lock_sock(sk);
1215
1216 if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
1217 || sk->sk_state != BT_BOUND) {
1218 err = -EBADFD;
1219 goto done;
1220 }
1221
1222 switch (l2cap_pi(sk)->mode) {
1223 case L2CAP_MODE_BASIC:
1224 break;
1225 case L2CAP_MODE_ERTM:
1226 case L2CAP_MODE_STREAMING:
1227 if (!disable_ertm)
1228 break;
1229 /* fall through */
1230 default:
1231 err = -ENOTSUPP;
1232 goto done;
1233 }
1234
1235 if (!l2cap_pi(sk)->psm) {
1236 bdaddr_t *src = &bt_sk(sk)->src;
1237 u16 psm;
1238
1239 err = -EINVAL;
1240
1241 write_lock_bh(&l2cap_sk_list.lock);
1242
1243 for (psm = 0x1001; psm < 0x1100; psm += 2)
1244 if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) {
1245 l2cap_pi(sk)->psm = cpu_to_le16(psm);
1246 l2cap_pi(sk)->sport = cpu_to_le16(psm);
1247 err = 0;
1248 break;
1249 }
1250
1251 write_unlock_bh(&l2cap_sk_list.lock);
1252
1253 if (err < 0)
1254 goto done;
1255 }
1256
1257 sk->sk_max_ack_backlog = backlog;
1258 sk->sk_ack_backlog = 0;
1259 sk->sk_state = BT_LISTEN;
1260
1261done:
1262 release_sock(sk);
1263 return err;
1264}
1265
1266static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
1267{
1268 DECLARE_WAITQUEUE(wait, current);
1269 struct sock *sk = sock->sk, *nsk;
1270 long timeo;
1271 int err = 0;
1272
1273 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
1274
1275 if (sk->sk_state != BT_LISTEN) {
1276 err = -EBADFD;
1277 goto done;
1278 }
1279
1280 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1281
1282 BT_DBG("sk %p timeo %ld", sk, timeo);
1283
1284 /* Wait for an incoming connection. (wake-one). */
1285 add_wait_queue_exclusive(sk_sleep(sk), &wait);
1286 while (!(nsk = bt_accept_dequeue(sk, newsock))) {
1287 set_current_state(TASK_INTERRUPTIBLE);
1288 if (!timeo) {
1289 err = -EAGAIN;
1290 break;
1291 }
1292
1293 release_sock(sk);
1294 timeo = schedule_timeout(timeo);
1295 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
1296
1297 if (sk->sk_state != BT_LISTEN) {
1298 err = -EBADFD;
1299 break;
1300 }
1301
1302 if (signal_pending(current)) {
1303 err = sock_intr_errno(timeo);
1304 break;
1305 }
1306 }
1307 set_current_state(TASK_RUNNING);
1308 remove_wait_queue(sk_sleep(sk), &wait);
1309
1310 if (err)
1311 goto done;
1312
1313 newsock->state = SS_CONNECTED;
1314
1315 BT_DBG("new socket %p", nsk);
1316
1317done:
1318 release_sock(sk);
1319 return err;
1320}
1321
1322static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
1323{
1324 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
1325 struct sock *sk = sock->sk;
1326
1327 BT_DBG("sock %p, sk %p", sock, sk);
1328
1329 addr->sa_family = AF_BLUETOOTH;
1330 *len = sizeof(struct sockaddr_l2);
1331
1332 if (peer) {
1333 la->l2_psm = l2cap_pi(sk)->psm;
1334 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
1335 la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid);
1336 } else {
1337 la->l2_psm = l2cap_pi(sk)->sport;
1338 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
1339 la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid);
1340 }
1341
1342 return 0;
1343}
1344
1345static int __l2cap_wait_ack(struct sock *sk)
1346{ 903{
1347 DECLARE_WAITQUEUE(wait, current); 904 DECLARE_WAITQUEUE(wait, current);
1348 int err = 0; 905 int err = 0;
@@ -1428,16 +985,23 @@ static void l2cap_drop_acked_frames(struct sock *sk)
1428 del_timer(&l2cap_pi(sk)->retrans_timer); 985 del_timer(&l2cap_pi(sk)->retrans_timer);
1429} 986}
1430 987
1431static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) 988void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
1432{ 989{
1433 struct l2cap_pinfo *pi = l2cap_pi(sk); 990 struct l2cap_pinfo *pi = l2cap_pi(sk);
991 struct hci_conn *hcon = pi->conn->hcon;
992 u16 flags;
1434 993
1435 BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len); 994 BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
1436 995
1437 hci_send_acl(pi->conn->hcon, skb, 0); 996 if (!pi->flushable && lmp_no_flush_capable(hcon->hdev))
997 flags = ACL_START_NO_FLUSH;
998 else
999 flags = ACL_START;
1000
1001 hci_send_acl(hcon, skb, flags);
1438} 1002}
1439 1003
1440static void l2cap_streaming_send(struct sock *sk) 1004void l2cap_streaming_send(struct sock *sk)
1441{ 1005{
1442 struct sk_buff *skb; 1006 struct sk_buff *skb;
1443 struct l2cap_pinfo *pi = l2cap_pi(sk); 1007 struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1506,7 +1070,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
1506 l2cap_do_send(sk, tx_skb); 1070 l2cap_do_send(sk, tx_skb);
1507} 1071}
1508 1072
1509static int l2cap_ertm_send(struct sock *sk) 1073int l2cap_ertm_send(struct sock *sk)
1510{ 1074{
1511 struct sk_buff *skb, *tx_skb; 1075 struct sk_buff *skb, *tx_skb;
1512 struct l2cap_pinfo *pi = l2cap_pi(sk); 1076 struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1646,7 +1210,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
1646 return sent; 1210 return sent;
1647} 1211}
1648 1212
1649static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) 1213struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len)
1650{ 1214{
1651 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 1215 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1652 struct sk_buff *skb; 1216 struct sk_buff *skb;
@@ -1675,7 +1239,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr
1675 return skb; 1239 return skb;
1676} 1240}
1677 1241
1678static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) 1242struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len)
1679{ 1243{
1680 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 1244 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1681 struct sk_buff *skb; 1245 struct sk_buff *skb;
@@ -1703,7 +1267,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms
1703 return skb; 1267 return skb;
1704} 1268}
1705 1269
1706static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) 1270struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen)
1707{ 1271{
1708 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 1272 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1709 struct sk_buff *skb; 1273 struct sk_buff *skb;
@@ -1748,7 +1312,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
1748 return skb; 1312 return skb;
1749} 1313}
1750 1314
1751static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) 1315int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len)
1752{ 1316{
1753 struct l2cap_pinfo *pi = l2cap_pi(sk); 1317 struct l2cap_pinfo *pi = l2cap_pi(sk);
1754 struct sk_buff *skb; 1318 struct sk_buff *skb;
@@ -1794,487 +1358,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
1794 return size; 1358 return size;
1795} 1359}
1796 1360
1797static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
1798{
1799 struct sock *sk = sock->sk;
1800 struct l2cap_pinfo *pi = l2cap_pi(sk);
1801 struct sk_buff *skb;
1802 u16 control;
1803 int err;
1804
1805 BT_DBG("sock %p, sk %p", sock, sk);
1806
1807 err = sock_error(sk);
1808 if (err)
1809 return err;
1810
1811 if (msg->msg_flags & MSG_OOB)
1812 return -EOPNOTSUPP;
1813
1814 lock_sock(sk);
1815
1816 if (sk->sk_state != BT_CONNECTED) {
1817 err = -ENOTCONN;
1818 goto done;
1819 }
1820
1821 /* Connectionless channel */
1822 if (sk->sk_type == SOCK_DGRAM) {
1823 skb = l2cap_create_connless_pdu(sk, msg, len);
1824 if (IS_ERR(skb)) {
1825 err = PTR_ERR(skb);
1826 } else {
1827 l2cap_do_send(sk, skb);
1828 err = len;
1829 }
1830 goto done;
1831 }
1832
1833 switch (pi->mode) {
1834 case L2CAP_MODE_BASIC:
1835 /* Check outgoing MTU */
1836 if (len > pi->omtu) {
1837 err = -EMSGSIZE;
1838 goto done;
1839 }
1840
1841 /* Create a basic PDU */
1842 skb = l2cap_create_basic_pdu(sk, msg, len);
1843 if (IS_ERR(skb)) {
1844 err = PTR_ERR(skb);
1845 goto done;
1846 }
1847
1848 l2cap_do_send(sk, skb);
1849 err = len;
1850 break;
1851
1852 case L2CAP_MODE_ERTM:
1853 case L2CAP_MODE_STREAMING:
1854 /* Entire SDU fits into one PDU */
1855 if (len <= pi->remote_mps) {
1856 control = L2CAP_SDU_UNSEGMENTED;
1857 skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
1858 if (IS_ERR(skb)) {
1859 err = PTR_ERR(skb);
1860 goto done;
1861 }
1862 __skb_queue_tail(TX_QUEUE(sk), skb);
1863
1864 if (sk->sk_send_head == NULL)
1865 sk->sk_send_head = skb;
1866
1867 } else {
1868 /* Segment SDU into multiples PDUs */
1869 err = l2cap_sar_segment_sdu(sk, msg, len);
1870 if (err < 0)
1871 goto done;
1872 }
1873
1874 if (pi->mode == L2CAP_MODE_STREAMING) {
1875 l2cap_streaming_send(sk);
1876 } else {
1877 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
1878 (pi->conn_state & L2CAP_CONN_WAIT_F)) {
1879 err = len;
1880 break;
1881 }
1882 err = l2cap_ertm_send(sk);
1883 }
1884
1885 if (err >= 0)
1886 err = len;
1887 break;
1888
1889 default:
1890 BT_DBG("bad state %1.1x", pi->mode);
1891 err = -EBADFD;
1892 }
1893
1894done:
1895 release_sock(sk);
1896 return err;
1897}
1898
1899static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
1900{
1901 struct sock *sk = sock->sk;
1902
1903 lock_sock(sk);
1904
1905 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
1906 struct l2cap_conn_rsp rsp;
1907 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1908 u8 buf[128];
1909
1910 sk->sk_state = BT_CONFIG;
1911
1912 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1913 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
1914 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
1915 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
1916 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
1917 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1918
1919 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
1920 release_sock(sk);
1921 return 0;
1922 }
1923
1924 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
1925 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
1926 l2cap_build_conf_req(sk, buf), buf);
1927 l2cap_pi(sk)->num_conf_req++;
1928
1929 release_sock(sk);
1930 return 0;
1931 }
1932
1933 release_sock(sk);
1934
1935 if (sock->type == SOCK_STREAM)
1936 return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
1937
1938 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
1939}
1940
1941static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
1942{
1943 struct sock *sk = sock->sk;
1944 struct l2cap_options opts;
1945 int len, err = 0;
1946 u32 opt;
1947
1948 BT_DBG("sk %p", sk);
1949
1950 lock_sock(sk);
1951
1952 switch (optname) {
1953 case L2CAP_OPTIONS:
1954 if (sk->sk_state == BT_CONNECTED) {
1955 err = -EINVAL;
1956 break;
1957 }
1958
1959 opts.imtu = l2cap_pi(sk)->imtu;
1960 opts.omtu = l2cap_pi(sk)->omtu;
1961 opts.flush_to = l2cap_pi(sk)->flush_to;
1962 opts.mode = l2cap_pi(sk)->mode;
1963 opts.fcs = l2cap_pi(sk)->fcs;
1964 opts.max_tx = l2cap_pi(sk)->max_tx;
1965 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
1966
1967 len = min_t(unsigned int, sizeof(opts), optlen);
1968 if (copy_from_user((char *) &opts, optval, len)) {
1969 err = -EFAULT;
1970 break;
1971 }
1972
1973 if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
1974 err = -EINVAL;
1975 break;
1976 }
1977
1978 l2cap_pi(sk)->mode = opts.mode;
1979 switch (l2cap_pi(sk)->mode) {
1980 case L2CAP_MODE_BASIC:
1981 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
1982 break;
1983 case L2CAP_MODE_ERTM:
1984 case L2CAP_MODE_STREAMING:
1985 if (!disable_ertm)
1986 break;
1987 /* fall through */
1988 default:
1989 err = -EINVAL;
1990 break;
1991 }
1992
1993 l2cap_pi(sk)->imtu = opts.imtu;
1994 l2cap_pi(sk)->omtu = opts.omtu;
1995 l2cap_pi(sk)->fcs = opts.fcs;
1996 l2cap_pi(sk)->max_tx = opts.max_tx;
1997 l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
1998 break;
1999
2000 case L2CAP_LM:
2001 if (get_user(opt, (u32 __user *) optval)) {
2002 err = -EFAULT;
2003 break;
2004 }
2005
2006 if (opt & L2CAP_LM_AUTH)
2007 l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
2008 if (opt & L2CAP_LM_ENCRYPT)
2009 l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
2010 if (opt & L2CAP_LM_SECURE)
2011 l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
2012
2013 l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER);
2014 l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
2015 break;
2016
2017 default:
2018 err = -ENOPROTOOPT;
2019 break;
2020 }
2021
2022 release_sock(sk);
2023 return err;
2024}
2025
2026static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
2027{
2028 struct sock *sk = sock->sk;
2029 struct bt_security sec;
2030 int len, err = 0;
2031 u32 opt;
2032
2033 BT_DBG("sk %p", sk);
2034
2035 if (level == SOL_L2CAP)
2036 return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
2037
2038 if (level != SOL_BLUETOOTH)
2039 return -ENOPROTOOPT;
2040
2041 lock_sock(sk);
2042
2043 switch (optname) {
2044 case BT_SECURITY:
2045 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
2046 && sk->sk_type != SOCK_RAW) {
2047 err = -EINVAL;
2048 break;
2049 }
2050
2051 sec.level = BT_SECURITY_LOW;
2052
2053 len = min_t(unsigned int, sizeof(sec), optlen);
2054 if (copy_from_user((char *) &sec, optval, len)) {
2055 err = -EFAULT;
2056 break;
2057 }
2058
2059 if (sec.level < BT_SECURITY_LOW ||
2060 sec.level > BT_SECURITY_HIGH) {
2061 err = -EINVAL;
2062 break;
2063 }
2064
2065 l2cap_pi(sk)->sec_level = sec.level;
2066 break;
2067
2068 case BT_DEFER_SETUP:
2069 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
2070 err = -EINVAL;
2071 break;
2072 }
2073
2074 if (get_user(opt, (u32 __user *) optval)) {
2075 err = -EFAULT;
2076 break;
2077 }
2078
2079 bt_sk(sk)->defer_setup = opt;
2080 break;
2081
2082 default:
2083 err = -ENOPROTOOPT;
2084 break;
2085 }
2086
2087 release_sock(sk);
2088 return err;
2089}
2090
2091static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
2092{
2093 struct sock *sk = sock->sk;
2094 struct l2cap_options opts;
2095 struct l2cap_conninfo cinfo;
2096 int len, err = 0;
2097 u32 opt;
2098
2099 BT_DBG("sk %p", sk);
2100
2101 if (get_user(len, optlen))
2102 return -EFAULT;
2103
2104 lock_sock(sk);
2105
2106 switch (optname) {
2107 case L2CAP_OPTIONS:
2108 opts.imtu = l2cap_pi(sk)->imtu;
2109 opts.omtu = l2cap_pi(sk)->omtu;
2110 opts.flush_to = l2cap_pi(sk)->flush_to;
2111 opts.mode = l2cap_pi(sk)->mode;
2112 opts.fcs = l2cap_pi(sk)->fcs;
2113 opts.max_tx = l2cap_pi(sk)->max_tx;
2114 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
2115
2116 len = min_t(unsigned int, len, sizeof(opts));
2117 if (copy_to_user(optval, (char *) &opts, len))
2118 err = -EFAULT;
2119
2120 break;
2121
2122 case L2CAP_LM:
2123 switch (l2cap_pi(sk)->sec_level) {
2124 case BT_SECURITY_LOW:
2125 opt = L2CAP_LM_AUTH;
2126 break;
2127 case BT_SECURITY_MEDIUM:
2128 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
2129 break;
2130 case BT_SECURITY_HIGH:
2131 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
2132 L2CAP_LM_SECURE;
2133 break;
2134 default:
2135 opt = 0;
2136 break;
2137 }
2138
2139 if (l2cap_pi(sk)->role_switch)
2140 opt |= L2CAP_LM_MASTER;
2141
2142 if (l2cap_pi(sk)->force_reliable)
2143 opt |= L2CAP_LM_RELIABLE;
2144
2145 if (put_user(opt, (u32 __user *) optval))
2146 err = -EFAULT;
2147 break;
2148
2149 case L2CAP_CONNINFO:
2150 if (sk->sk_state != BT_CONNECTED &&
2151 !(sk->sk_state == BT_CONNECT2 &&
2152 bt_sk(sk)->defer_setup)) {
2153 err = -ENOTCONN;
2154 break;
2155 }
2156
2157 cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
2158 memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);
2159
2160 len = min_t(unsigned int, len, sizeof(cinfo));
2161 if (copy_to_user(optval, (char *) &cinfo, len))
2162 err = -EFAULT;
2163
2164 break;
2165
2166 default:
2167 err = -ENOPROTOOPT;
2168 break;
2169 }
2170
2171 release_sock(sk);
2172 return err;
2173}
2174
2175static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
2176{
2177 struct sock *sk = sock->sk;
2178 struct bt_security sec;
2179 int len, err = 0;
2180
2181 BT_DBG("sk %p", sk);
2182
2183 if (level == SOL_L2CAP)
2184 return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
2185
2186 if (level != SOL_BLUETOOTH)
2187 return -ENOPROTOOPT;
2188
2189 if (get_user(len, optlen))
2190 return -EFAULT;
2191
2192 lock_sock(sk);
2193
2194 switch (optname) {
2195 case BT_SECURITY:
2196 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
2197 && sk->sk_type != SOCK_RAW) {
2198 err = -EINVAL;
2199 break;
2200 }
2201
2202 sec.level = l2cap_pi(sk)->sec_level;
2203
2204 len = min_t(unsigned int, len, sizeof(sec));
2205 if (copy_to_user(optval, (char *) &sec, len))
2206 err = -EFAULT;
2207
2208 break;
2209
2210 case BT_DEFER_SETUP:
2211 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
2212 err = -EINVAL;
2213 break;
2214 }
2215
2216 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
2217 err = -EFAULT;
2218
2219 break;
2220
2221 default:
2222 err = -ENOPROTOOPT;
2223 break;
2224 }
2225
2226 release_sock(sk);
2227 return err;
2228}
2229
2230static int l2cap_sock_shutdown(struct socket *sock, int how)
2231{
2232 struct sock *sk = sock->sk;
2233 int err = 0;
2234
2235 BT_DBG("sock %p, sk %p", sock, sk);
2236
2237 if (!sk)
2238 return 0;
2239
2240 lock_sock(sk);
2241 if (!sk->sk_shutdown) {
2242 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
2243 err = __l2cap_wait_ack(sk);
2244
2245 sk->sk_shutdown = SHUTDOWN_MASK;
2246 l2cap_sock_clear_timer(sk);
2247 __l2cap_sock_close(sk, 0);
2248
2249 if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
2250 err = bt_sock_wait_state(sk, BT_CLOSED,
2251 sk->sk_lingertime);
2252 }
2253
2254 if (!err && sk->sk_err)
2255 err = -sk->sk_err;
2256
2257 release_sock(sk);
2258 return err;
2259}
2260
2261static int l2cap_sock_release(struct socket *sock)
2262{
2263 struct sock *sk = sock->sk;
2264 int err;
2265
2266 BT_DBG("sock %p, sk %p", sock, sk);
2267
2268 if (!sk)
2269 return 0;
2270
2271 err = l2cap_sock_shutdown(sock, 2);
2272
2273 sock_orphan(sk);
2274 l2cap_sock_kill(sk);
2275 return err;
2276}
2277
2278static void l2cap_chan_ready(struct sock *sk) 1361static void l2cap_chan_ready(struct sock *sk)
2279{ 1362{
2280 struct sock *parent = bt_sk(sk)->parent; 1363 struct sock *parent = bt_sk(sk)->parent;
@@ -2346,7 +1429,11 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
2346 1429
2347 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1430 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
2348 lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); 1431 lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
2349 lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); 1432
1433 if (conn->hcon->type == LE_LINK)
1434 lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING);
1435 else
1436 lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
2350 1437
2351 cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); 1438 cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
2352 cmd->code = code; 1439 cmd->code = code;
@@ -2493,7 +1580,7 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
2493 } 1580 }
2494} 1581}
2495 1582
2496static int l2cap_build_conf_req(struct sock *sk, void *data) 1583int l2cap_build_conf_req(struct sock *sk, void *data)
2497{ 1584{
2498 struct l2cap_pinfo *pi = l2cap_pi(sk); 1585 struct l2cap_pinfo *pi = l2cap_pi(sk);
2499 struct l2cap_conf_req *req = data; 1586 struct l2cap_conf_req *req = data;
@@ -2518,11 +1605,11 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
2518 } 1605 }
2519 1606
2520done: 1607done:
1608 if (pi->imtu != L2CAP_DEFAULT_MTU)
1609 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
1610
2521 switch (pi->mode) { 1611 switch (pi->mode) {
2522 case L2CAP_MODE_BASIC: 1612 case L2CAP_MODE_BASIC:
2523 if (pi->imtu != L2CAP_DEFAULT_MTU)
2524 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
2525
2526 if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) && 1613 if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) &&
2527 !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING)) 1614 !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING))
2528 break; 1615 break;
@@ -2585,10 +1672,6 @@ done:
2585 break; 1672 break;
2586 } 1673 }
2587 1674
2588 /* FIXME: Need actual value of the flush timeout */
2589 //if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
2590 // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
2591
2592 req->dcid = cpu_to_le16(pi->dcid); 1675 req->dcid = cpu_to_le16(pi->dcid);
2593 req->flags = cpu_to_le16(0); 1676 req->flags = cpu_to_le16(0);
2594 1677
@@ -3415,12 +2498,153 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
3415 return 0; 2498 return 0;
3416} 2499}
3417 2500
3418static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) 2501static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency,
2502 u16 to_multiplier)
2503{
2504 u16 max_latency;
2505
2506 if (min > max || min < 6 || max > 3200)
2507 return -EINVAL;
2508
2509 if (to_multiplier < 10 || to_multiplier > 3200)
2510 return -EINVAL;
2511
2512 if (max >= to_multiplier * 8)
2513 return -EINVAL;
2514
2515 max_latency = (to_multiplier * 8 / max) - 1;
2516 if (latency > 499 || latency > max_latency)
2517 return -EINVAL;
2518
2519 return 0;
2520}
2521
2522static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
2523 struct l2cap_cmd_hdr *cmd, u8 *data)
2524{
2525 struct hci_conn *hcon = conn->hcon;
2526 struct l2cap_conn_param_update_req *req;
2527 struct l2cap_conn_param_update_rsp rsp;
2528 u16 min, max, latency, to_multiplier, cmd_len;
2529 int err;
2530
2531 if (!(hcon->link_mode & HCI_LM_MASTER))
2532 return -EINVAL;
2533
2534 cmd_len = __le16_to_cpu(cmd->len);
2535 if (cmd_len != sizeof(struct l2cap_conn_param_update_req))
2536 return -EPROTO;
2537
2538 req = (struct l2cap_conn_param_update_req *) data;
2539 min = __le16_to_cpu(req->min);
2540 max = __le16_to_cpu(req->max);
2541 latency = __le16_to_cpu(req->latency);
2542 to_multiplier = __le16_to_cpu(req->to_multiplier);
2543
2544 BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x",
2545 min, max, latency, to_multiplier);
2546
2547 memset(&rsp, 0, sizeof(rsp));
2548
2549 err = l2cap_check_conn_param(min, max, latency, to_multiplier);
2550 if (err)
2551 rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
2552 else
2553 rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED);
2554
2555 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP,
2556 sizeof(rsp), &rsp);
2557
2558 if (!err)
2559 hci_le_conn_update(hcon, min, max, latency, to_multiplier);
2560
2561 return 0;
2562}
2563
2564static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
2565 struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
2566{
2567 int err = 0;
2568
2569 switch (cmd->code) {
2570 case L2CAP_COMMAND_REJ:
2571 l2cap_command_rej(conn, cmd, data);
2572 break;
2573
2574 case L2CAP_CONN_REQ:
2575 err = l2cap_connect_req(conn, cmd, data);
2576 break;
2577
2578 case L2CAP_CONN_RSP:
2579 err = l2cap_connect_rsp(conn, cmd, data);
2580 break;
2581
2582 case L2CAP_CONF_REQ:
2583 err = l2cap_config_req(conn, cmd, cmd_len, data);
2584 break;
2585
2586 case L2CAP_CONF_RSP:
2587 err = l2cap_config_rsp(conn, cmd, data);
2588 break;
2589
2590 case L2CAP_DISCONN_REQ:
2591 err = l2cap_disconnect_req(conn, cmd, data);
2592 break;
2593
2594 case L2CAP_DISCONN_RSP:
2595 err = l2cap_disconnect_rsp(conn, cmd, data);
2596 break;
2597
2598 case L2CAP_ECHO_REQ:
2599 l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data);
2600 break;
2601
2602 case L2CAP_ECHO_RSP:
2603 break;
2604
2605 case L2CAP_INFO_REQ:
2606 err = l2cap_information_req(conn, cmd, data);
2607 break;
2608
2609 case L2CAP_INFO_RSP:
2610 err = l2cap_information_rsp(conn, cmd, data);
2611 break;
2612
2613 default:
2614 BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code);
2615 err = -EINVAL;
2616 break;
2617 }
2618
2619 return err;
2620}
2621
2622static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn,
2623 struct l2cap_cmd_hdr *cmd, u8 *data)
2624{
2625 switch (cmd->code) {
2626 case L2CAP_COMMAND_REJ:
2627 return 0;
2628
2629 case L2CAP_CONN_PARAM_UPDATE_REQ:
2630 return l2cap_conn_param_update_req(conn, cmd, data);
2631
2632 case L2CAP_CONN_PARAM_UPDATE_RSP:
2633 return 0;
2634
2635 default:
2636 BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code);
2637 return -EINVAL;
2638 }
2639}
2640
2641static inline void l2cap_sig_channel(struct l2cap_conn *conn,
2642 struct sk_buff *skb)
3419{ 2643{
3420 u8 *data = skb->data; 2644 u8 *data = skb->data;
3421 int len = skb->len; 2645 int len = skb->len;
3422 struct l2cap_cmd_hdr cmd; 2646 struct l2cap_cmd_hdr cmd;
3423 int err = 0; 2647 int err;
3424 2648
3425 l2cap_raw_recv(conn, skb); 2649 l2cap_raw_recv(conn, skb);
3426 2650
@@ -3439,55 +2663,10 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
3439 break; 2663 break;
3440 } 2664 }
3441 2665
3442 switch (cmd.code) { 2666 if (conn->hcon->type == LE_LINK)
3443 case L2CAP_COMMAND_REJ: 2667 err = l2cap_le_sig_cmd(conn, &cmd, data);
3444 l2cap_command_rej(conn, &cmd, data); 2668 else
3445 break; 2669 err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data);
3446
3447 case L2CAP_CONN_REQ:
3448 err = l2cap_connect_req(conn, &cmd, data);
3449 break;
3450
3451 case L2CAP_CONN_RSP:
3452 err = l2cap_connect_rsp(conn, &cmd, data);
3453 break;
3454
3455 case L2CAP_CONF_REQ:
3456 err = l2cap_config_req(conn, &cmd, cmd_len, data);
3457 break;
3458
3459 case L2CAP_CONF_RSP:
3460 err = l2cap_config_rsp(conn, &cmd, data);
3461 break;
3462
3463 case L2CAP_DISCONN_REQ:
3464 err = l2cap_disconnect_req(conn, &cmd, data);
3465 break;
3466
3467 case L2CAP_DISCONN_RSP:
3468 err = l2cap_disconnect_rsp(conn, &cmd, data);
3469 break;
3470
3471 case L2CAP_ECHO_REQ:
3472 l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data);
3473 break;
3474
3475 case L2CAP_ECHO_RSP:
3476 break;
3477
3478 case L2CAP_INFO_REQ:
3479 err = l2cap_information_req(conn, &cmd, data);
3480 break;
3481
3482 case L2CAP_INFO_RSP:
3483 err = l2cap_information_rsp(conn, &cmd, data);
3484 break;
3485
3486 default:
3487 BT_ERR("Unknown signaling command 0x%2.2x", cmd.code);
3488 err = -EINVAL;
3489 break;
3490 }
3491 2670
3492 if (err) { 2671 if (err) {
3493 struct l2cap_cmd_rej rej; 2672 struct l2cap_cmd_rej rej;
@@ -4484,6 +3663,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
4484 BT_DBG("len %d, cid 0x%4.4x", len, cid); 3663 BT_DBG("len %d, cid 0x%4.4x", len, cid);
4485 3664
4486 switch (cid) { 3665 switch (cid) {
3666 case L2CAP_CID_LE_SIGNALING:
4487 case L2CAP_CID_SIGNALING: 3667 case L2CAP_CID_SIGNALING:
4488 l2cap_sig_channel(conn, skb); 3668 l2cap_sig_channel(conn, skb);
4489 break; 3669 break;
@@ -4541,7 +3721,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
4541 3721
4542 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); 3722 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
4543 3723
4544 if (hcon->type != ACL_LINK) 3724 if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK))
4545 return -EINVAL; 3725 return -EINVAL;
4546 3726
4547 if (!status) { 3727 if (!status) {
@@ -4570,7 +3750,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
4570{ 3750{
4571 BT_DBG("hcon %p reason %d", hcon, reason); 3751 BT_DBG("hcon %p reason %d", hcon, reason);
4572 3752
4573 if (hcon->type != ACL_LINK) 3753 if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK))
4574 return -EINVAL; 3754 return -EINVAL;
4575 3755
4576 l2cap_conn_del(hcon, bt_err(reason)); 3756 l2cap_conn_del(hcon, bt_err(reason));
@@ -4673,12 +3853,15 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4673{ 3853{
4674 struct l2cap_conn *conn = hcon->l2cap_data; 3854 struct l2cap_conn *conn = hcon->l2cap_data;
4675 3855
4676 if (!conn && !(conn = l2cap_conn_add(hcon, 0))) 3856 if (!conn)
3857 conn = l2cap_conn_add(hcon, 0);
3858
3859 if (!conn)
4677 goto drop; 3860 goto drop;
4678 3861
4679 BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); 3862 BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags);
4680 3863
4681 if (flags & ACL_START) { 3864 if (!(flags & ACL_CONT)) {
4682 struct l2cap_hdr *hdr; 3865 struct l2cap_hdr *hdr;
4683 struct sock *sk; 3866 struct sock *sk;
4684 u16 cid; 3867 u16 cid;
@@ -4784,12 +3967,13 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p)
4784 sk_for_each(sk, node, &l2cap_sk_list.head) { 3967 sk_for_each(sk, node, &l2cap_sk_list.head) {
4785 struct l2cap_pinfo *pi = l2cap_pi(sk); 3968 struct l2cap_pinfo *pi = l2cap_pi(sk);
4786 3969
4787 seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", 3970 seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n",
4788 batostr(&bt_sk(sk)->src), 3971 batostr(&bt_sk(sk)->src),
4789 batostr(&bt_sk(sk)->dst), 3972 batostr(&bt_sk(sk)->dst),
4790 sk->sk_state, __le16_to_cpu(pi->psm), 3973 sk->sk_state, __le16_to_cpu(pi->psm),
4791 pi->scid, pi->dcid, 3974 pi->scid, pi->dcid,
4792 pi->imtu, pi->omtu, pi->sec_level); 3975 pi->imtu, pi->omtu, pi->sec_level,
3976 pi->mode);
4793 } 3977 }
4794 3978
4795 read_unlock_bh(&l2cap_sk_list.lock); 3979 read_unlock_bh(&l2cap_sk_list.lock);
@@ -4811,32 +3995,6 @@ static const struct file_operations l2cap_debugfs_fops = {
4811 3995
4812static struct dentry *l2cap_debugfs; 3996static struct dentry *l2cap_debugfs;
4813 3997
4814static const struct proto_ops l2cap_sock_ops = {
4815 .family = PF_BLUETOOTH,
4816 .owner = THIS_MODULE,
4817 .release = l2cap_sock_release,
4818 .bind = l2cap_sock_bind,
4819 .connect = l2cap_sock_connect,
4820 .listen = l2cap_sock_listen,
4821 .accept = l2cap_sock_accept,
4822 .getname = l2cap_sock_getname,
4823 .sendmsg = l2cap_sock_sendmsg,
4824 .recvmsg = l2cap_sock_recvmsg,
4825 .poll = bt_sock_poll,
4826 .ioctl = bt_sock_ioctl,
4827 .mmap = sock_no_mmap,
4828 .socketpair = sock_no_socketpair,
4829 .shutdown = l2cap_sock_shutdown,
4830 .setsockopt = l2cap_sock_setsockopt,
4831 .getsockopt = l2cap_sock_getsockopt
4832};
4833
4834static const struct net_proto_family l2cap_sock_family_ops = {
4835 .family = PF_BLUETOOTH,
4836 .owner = THIS_MODULE,
4837 .create = l2cap_sock_create,
4838};
4839
4840static struct hci_proto l2cap_hci_proto = { 3998static struct hci_proto l2cap_hci_proto = {
4841 .name = "L2CAP", 3999 .name = "L2CAP",
4842 .id = HCI_PROTO_L2CAP, 4000 .id = HCI_PROTO_L2CAP,
@@ -4848,23 +4006,17 @@ static struct hci_proto l2cap_hci_proto = {
4848 .recv_acldata = l2cap_recv_acldata 4006 .recv_acldata = l2cap_recv_acldata
4849}; 4007};
4850 4008
4851static int __init l2cap_init(void) 4009int __init l2cap_init(void)
4852{ 4010{
4853 int err; 4011 int err;
4854 4012
4855 err = proto_register(&l2cap_proto, 0); 4013 err = l2cap_init_sockets();
4856 if (err < 0) 4014 if (err < 0)
4857 return err; 4015 return err;
4858 4016
4859 _busy_wq = create_singlethread_workqueue("l2cap"); 4017 _busy_wq = create_singlethread_workqueue("l2cap");
4860 if (!_busy_wq) { 4018 if (!_busy_wq) {
4861 proto_unregister(&l2cap_proto); 4019 err = -ENOMEM;
4862 return -ENOMEM;
4863 }
4864
4865 err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
4866 if (err < 0) {
4867 BT_ERR("L2CAP socket registration failed");
4868 goto error; 4020 goto error;
4869 } 4021 }
4870 4022
@@ -4882,49 +4034,26 @@ static int __init l2cap_init(void)
4882 BT_ERR("Failed to create L2CAP debug file"); 4034 BT_ERR("Failed to create L2CAP debug file");
4883 } 4035 }
4884 4036
4885 BT_INFO("L2CAP ver %s", VERSION);
4886 BT_INFO("L2CAP socket layer initialized");
4887
4888 return 0; 4037 return 0;
4889 4038
4890error: 4039error:
4891 destroy_workqueue(_busy_wq); 4040 destroy_workqueue(_busy_wq);
4892 proto_unregister(&l2cap_proto); 4041 l2cap_cleanup_sockets();
4893 return err; 4042 return err;
4894} 4043}
4895 4044
4896static void __exit l2cap_exit(void) 4045void l2cap_exit(void)
4897{ 4046{
4898 debugfs_remove(l2cap_debugfs); 4047 debugfs_remove(l2cap_debugfs);
4899 4048
4900 flush_workqueue(_busy_wq); 4049 flush_workqueue(_busy_wq);
4901 destroy_workqueue(_busy_wq); 4050 destroy_workqueue(_busy_wq);
4902 4051
4903 if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
4904 BT_ERR("L2CAP socket unregistration failed");
4905
4906 if (hci_unregister_proto(&l2cap_hci_proto) < 0) 4052 if (hci_unregister_proto(&l2cap_hci_proto) < 0)
4907 BT_ERR("L2CAP protocol unregistration failed"); 4053 BT_ERR("L2CAP protocol unregistration failed");
4908 4054
4909 proto_unregister(&l2cap_proto); 4055 l2cap_cleanup_sockets();
4910}
4911
4912void l2cap_load(void)
4913{
4914 /* Dummy function to trigger automatic L2CAP module loading by
4915 * other modules that use L2CAP sockets but don't use any other
4916 * symbols from it. */
4917} 4056}
4918EXPORT_SYMBOL(l2cap_load);
4919
4920module_init(l2cap_init);
4921module_exit(l2cap_exit);
4922 4057
4923module_param(disable_ertm, bool, 0644); 4058module_param(disable_ertm, bool, 0644);
4924MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); 4059MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
4925
4926MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
4927MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
4928MODULE_VERSION(VERSION);
4929MODULE_LICENSE("GPL");
4930MODULE_ALIAS("bt-proto-0");
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
new file mode 100644
index 00000000000..fc85e7ae33c
--- /dev/null
+++ b/net/bluetooth/l2cap_sock.c
@@ -0,0 +1,1156 @@
1/*
2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated
4 Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
5 Copyright (C) 2010 Google Inc.
6
7 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License version 2 as
11 published by the Free Software Foundation;
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
16 IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
17 CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
18 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
22 ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
23 COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
24 SOFTWARE IS DISCLAIMED.
25*/
26
27/* Bluetooth L2CAP sockets. */
28
29#include <net/bluetooth/bluetooth.h>
30#include <net/bluetooth/hci_core.h>
31#include <net/bluetooth/l2cap.h>
32
33/* ---- L2CAP timers ---- */
34static void l2cap_sock_timeout(unsigned long arg)
35{
36 struct sock *sk = (struct sock *) arg;
37 int reason;
38
39 BT_DBG("sock %p state %d", sk, sk->sk_state);
40
41 bh_lock_sock(sk);
42
43 if (sock_owned_by_user(sk)) {
44 /* sk is owned by user. Try again later */
45 l2cap_sock_set_timer(sk, HZ / 5);
46 bh_unlock_sock(sk);
47 sock_put(sk);
48 return;
49 }
50
51 if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
52 reason = ECONNREFUSED;
53 else if (sk->sk_state == BT_CONNECT &&
54 l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
55 reason = ECONNREFUSED;
56 else
57 reason = ETIMEDOUT;
58
59 __l2cap_sock_close(sk, reason);
60
61 bh_unlock_sock(sk);
62
63 l2cap_sock_kill(sk);
64 sock_put(sk);
65}
66
67void l2cap_sock_set_timer(struct sock *sk, long timeout)
68{
69 BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
70 sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
71}
72
73void l2cap_sock_clear_timer(struct sock *sk)
74{
75 BT_DBG("sock %p state %d", sk, sk->sk_state);
76 sk_stop_timer(sk, &sk->sk_timer);
77}
78
79static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src)
80{
81 struct sock *sk;
82 struct hlist_node *node;
83 sk_for_each(sk, node, &l2cap_sk_list.head)
84 if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src))
85 goto found;
86 sk = NULL;
87found:
88 return sk;
89}
90
91static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
92{
93 struct sock *sk = sock->sk;
94 struct sockaddr_l2 la;
95 int len, err = 0;
96
97 BT_DBG("sk %p", sk);
98
99 if (!addr || addr->sa_family != AF_BLUETOOTH)
100 return -EINVAL;
101
102 memset(&la, 0, sizeof(la));
103 len = min_t(unsigned int, sizeof(la), alen);
104 memcpy(&la, addr, len);
105
106 if (la.l2_cid && la.l2_psm)
107 return -EINVAL;
108
109 lock_sock(sk);
110
111 if (sk->sk_state != BT_OPEN) {
112 err = -EBADFD;
113 goto done;
114 }
115
116 if (la.l2_psm) {
117 __u16 psm = __le16_to_cpu(la.l2_psm);
118
119 /* PSM must be odd and lsb of upper byte must be 0 */
120 if ((psm & 0x0101) != 0x0001) {
121 err = -EINVAL;
122 goto done;
123 }
124
125 /* Restrict usage of well-known PSMs */
126 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
127 err = -EACCES;
128 goto done;
129 }
130 }
131
132 write_lock_bh(&l2cap_sk_list.lock);
133
134 if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
135 err = -EADDRINUSE;
136 } else {
137 /* Save source address */
138 bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
139 l2cap_pi(sk)->psm = la.l2_psm;
140 l2cap_pi(sk)->sport = la.l2_psm;
141 sk->sk_state = BT_BOUND;
142
143 if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
144 __le16_to_cpu(la.l2_psm) == 0x0003)
145 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
146 }
147
148 if (la.l2_cid)
149 l2cap_pi(sk)->scid = la.l2_cid;
150
151 write_unlock_bh(&l2cap_sk_list.lock);
152
153done:
154 release_sock(sk);
155 return err;
156}
157
158static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
159{
160 struct sock *sk = sock->sk;
161 struct sockaddr_l2 la;
162 int len, err = 0;
163
164 BT_DBG("sk %p", sk);
165
166 if (!addr || alen < sizeof(addr->sa_family) ||
167 addr->sa_family != AF_BLUETOOTH)
168 return -EINVAL;
169
170 memset(&la, 0, sizeof(la));
171 len = min_t(unsigned int, sizeof(la), alen);
172 memcpy(&la, addr, len);
173
174 if (la.l2_cid && la.l2_psm)
175 return -EINVAL;
176
177 lock_sock(sk);
178
179 if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
180 && !(la.l2_psm || la.l2_cid)) {
181 err = -EINVAL;
182 goto done;
183 }
184
185 switch (l2cap_pi(sk)->mode) {
186 case L2CAP_MODE_BASIC:
187 break;
188 case L2CAP_MODE_ERTM:
189 case L2CAP_MODE_STREAMING:
190 if (!disable_ertm)
191 break;
192 /* fall through */
193 default:
194 err = -ENOTSUPP;
195 goto done;
196 }
197
198 switch (sk->sk_state) {
199 case BT_CONNECT:
200 case BT_CONNECT2:
201 case BT_CONFIG:
202 /* Already connecting */
203 goto wait;
204
205 case BT_CONNECTED:
206 /* Already connected */
207 err = -EISCONN;
208 goto done;
209
210 case BT_OPEN:
211 case BT_BOUND:
212 /* Can connect */
213 break;
214
215 default:
216 err = -EBADFD;
217 goto done;
218 }
219
220 /* PSM must be odd and lsb of upper byte must be 0 */
221 if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
222 sk->sk_type != SOCK_RAW && !la.l2_cid) {
223 err = -EINVAL;
224 goto done;
225 }
226
227 /* Set destination address and psm */
228 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
229 l2cap_pi(sk)->psm = la.l2_psm;
230 l2cap_pi(sk)->dcid = la.l2_cid;
231
232 err = l2cap_do_connect(sk);
233 if (err)
234 goto done;
235
236wait:
237 err = bt_sock_wait_state(sk, BT_CONNECTED,
238 sock_sndtimeo(sk, flags & O_NONBLOCK));
239done:
240 release_sock(sk);
241 return err;
242}
243
244static int l2cap_sock_listen(struct socket *sock, int backlog)
245{
246 struct sock *sk = sock->sk;
247 int err = 0;
248
249 BT_DBG("sk %p backlog %d", sk, backlog);
250
251 lock_sock(sk);
252
253 if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
254 || sk->sk_state != BT_BOUND) {
255 err = -EBADFD;
256 goto done;
257 }
258
259 switch (l2cap_pi(sk)->mode) {
260 case L2CAP_MODE_BASIC:
261 break;
262 case L2CAP_MODE_ERTM:
263 case L2CAP_MODE_STREAMING:
264 if (!disable_ertm)
265 break;
266 /* fall through */
267 default:
268 err = -ENOTSUPP;
269 goto done;
270 }
271
272 if (!l2cap_pi(sk)->psm && !l2cap_pi(sk)->dcid) {
273 bdaddr_t *src = &bt_sk(sk)->src;
274 u16 psm;
275
276 err = -EINVAL;
277
278 write_lock_bh(&l2cap_sk_list.lock);
279
280 for (psm = 0x1001; psm < 0x1100; psm += 2)
281 if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) {
282 l2cap_pi(sk)->psm = cpu_to_le16(psm);
283 l2cap_pi(sk)->sport = cpu_to_le16(psm);
284 err = 0;
285 break;
286 }
287
288 write_unlock_bh(&l2cap_sk_list.lock);
289
290 if (err < 0)
291 goto done;
292 }
293
294 sk->sk_max_ack_backlog = backlog;
295 sk->sk_ack_backlog = 0;
296 sk->sk_state = BT_LISTEN;
297
298done:
299 release_sock(sk);
300 return err;
301}
302
303static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
304{
305 DECLARE_WAITQUEUE(wait, current);
306 struct sock *sk = sock->sk, *nsk;
307 long timeo;
308 int err = 0;
309
310 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
311
312 if (sk->sk_state != BT_LISTEN) {
313 err = -EBADFD;
314 goto done;
315 }
316
317 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
318
319 BT_DBG("sk %p timeo %ld", sk, timeo);
320
321 /* Wait for an incoming connection. (wake-one). */
322 add_wait_queue_exclusive(sk_sleep(sk), &wait);
323 while (!(nsk = bt_accept_dequeue(sk, newsock))) {
324 set_current_state(TASK_INTERRUPTIBLE);
325 if (!timeo) {
326 err = -EAGAIN;
327 break;
328 }
329
330 release_sock(sk);
331 timeo = schedule_timeout(timeo);
332 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
333
334 if (sk->sk_state != BT_LISTEN) {
335 err = -EBADFD;
336 break;
337 }
338
339 if (signal_pending(current)) {
340 err = sock_intr_errno(timeo);
341 break;
342 }
343 }
344 set_current_state(TASK_RUNNING);
345 remove_wait_queue(sk_sleep(sk), &wait);
346
347 if (err)
348 goto done;
349
350 newsock->state = SS_CONNECTED;
351
352 BT_DBG("new socket %p", nsk);
353
354done:
355 release_sock(sk);
356 return err;
357}
358
359static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
360{
361 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
362 struct sock *sk = sock->sk;
363
364 BT_DBG("sock %p, sk %p", sock, sk);
365
366 addr->sa_family = AF_BLUETOOTH;
367 *len = sizeof(struct sockaddr_l2);
368
369 if (peer) {
370 la->l2_psm = l2cap_pi(sk)->psm;
371 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
372 la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid);
373 } else {
374 la->l2_psm = l2cap_pi(sk)->sport;
375 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
376 la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid);
377 }
378
379 return 0;
380}
381
382static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
383{
384 struct sock *sk = sock->sk;
385 struct l2cap_options opts;
386 struct l2cap_conninfo cinfo;
387 int len, err = 0;
388 u32 opt;
389
390 BT_DBG("sk %p", sk);
391
392 if (get_user(len, optlen))
393 return -EFAULT;
394
395 lock_sock(sk);
396
397 switch (optname) {
398 case L2CAP_OPTIONS:
399 memset(&opts, 0, sizeof(opts));
400 opts.imtu = l2cap_pi(sk)->imtu;
401 opts.omtu = l2cap_pi(sk)->omtu;
402 opts.flush_to = l2cap_pi(sk)->flush_to;
403 opts.mode = l2cap_pi(sk)->mode;
404 opts.fcs = l2cap_pi(sk)->fcs;
405 opts.max_tx = l2cap_pi(sk)->max_tx;
406 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
407
408 len = min_t(unsigned int, len, sizeof(opts));
409 if (copy_to_user(optval, (char *) &opts, len))
410 err = -EFAULT;
411
412 break;
413
414 case L2CAP_LM:
415 switch (l2cap_pi(sk)->sec_level) {
416 case BT_SECURITY_LOW:
417 opt = L2CAP_LM_AUTH;
418 break;
419 case BT_SECURITY_MEDIUM:
420 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
421 break;
422 case BT_SECURITY_HIGH:
423 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
424 L2CAP_LM_SECURE;
425 break;
426 default:
427 opt = 0;
428 break;
429 }
430
431 if (l2cap_pi(sk)->role_switch)
432 opt |= L2CAP_LM_MASTER;
433
434 if (l2cap_pi(sk)->force_reliable)
435 opt |= L2CAP_LM_RELIABLE;
436
437 if (put_user(opt, (u32 __user *) optval))
438 err = -EFAULT;
439 break;
440
441 case L2CAP_CONNINFO:
442 if (sk->sk_state != BT_CONNECTED &&
443 !(sk->sk_state == BT_CONNECT2 &&
444 bt_sk(sk)->defer_setup)) {
445 err = -ENOTCONN;
446 break;
447 }
448
449 cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
450 memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);
451
452 len = min_t(unsigned int, len, sizeof(cinfo));
453 if (copy_to_user(optval, (char *) &cinfo, len))
454 err = -EFAULT;
455
456 break;
457
458 default:
459 err = -ENOPROTOOPT;
460 break;
461 }
462
463 release_sock(sk);
464 return err;
465}
466
467static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
468{
469 struct sock *sk = sock->sk;
470 struct bt_security sec;
471 int len, err = 0;
472
473 BT_DBG("sk %p", sk);
474
475 if (level == SOL_L2CAP)
476 return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
477
478 if (level != SOL_BLUETOOTH)
479 return -ENOPROTOOPT;
480
481 if (get_user(len, optlen))
482 return -EFAULT;
483
484 lock_sock(sk);
485
486 switch (optname) {
487 case BT_SECURITY:
488 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
489 && sk->sk_type != SOCK_RAW) {
490 err = -EINVAL;
491 break;
492 }
493
494 sec.level = l2cap_pi(sk)->sec_level;
495
496 len = min_t(unsigned int, len, sizeof(sec));
497 if (copy_to_user(optval, (char *) &sec, len))
498 err = -EFAULT;
499
500 break;
501
502 case BT_DEFER_SETUP:
503 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
504 err = -EINVAL;
505 break;
506 }
507
508 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
509 err = -EFAULT;
510
511 break;
512
513 case BT_FLUSHABLE:
514 if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval))
515 err = -EFAULT;
516
517 break;
518
519 default:
520 err = -ENOPROTOOPT;
521 break;
522 }
523
524 release_sock(sk);
525 return err;
526}
527
528static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
529{
530 struct sock *sk = sock->sk;
531 struct l2cap_options opts;
532 int len, err = 0;
533 u32 opt;
534
535 BT_DBG("sk %p", sk);
536
537 lock_sock(sk);
538
539 switch (optname) {
540 case L2CAP_OPTIONS:
541 if (sk->sk_state == BT_CONNECTED) {
542 err = -EINVAL;
543 break;
544 }
545
546 opts.imtu = l2cap_pi(sk)->imtu;
547 opts.omtu = l2cap_pi(sk)->omtu;
548 opts.flush_to = l2cap_pi(sk)->flush_to;
549 opts.mode = l2cap_pi(sk)->mode;
550 opts.fcs = l2cap_pi(sk)->fcs;
551 opts.max_tx = l2cap_pi(sk)->max_tx;
552 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
553
554 len = min_t(unsigned int, sizeof(opts), optlen);
555 if (copy_from_user((char *) &opts, optval, len)) {
556 err = -EFAULT;
557 break;
558 }
559
560 if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
561 err = -EINVAL;
562 break;
563 }
564
565 l2cap_pi(sk)->mode = opts.mode;
566 switch (l2cap_pi(sk)->mode) {
567 case L2CAP_MODE_BASIC:
568 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
569 break;
570 case L2CAP_MODE_ERTM:
571 case L2CAP_MODE_STREAMING:
572 if (!disable_ertm)
573 break;
574 /* fall through */
575 default:
576 err = -EINVAL;
577 break;
578 }
579
580 l2cap_pi(sk)->imtu = opts.imtu;
581 l2cap_pi(sk)->omtu = opts.omtu;
582 l2cap_pi(sk)->fcs = opts.fcs;
583 l2cap_pi(sk)->max_tx = opts.max_tx;
584 l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
585 break;
586
587 case L2CAP_LM:
588 if (get_user(opt, (u32 __user *) optval)) {
589 err = -EFAULT;
590 break;
591 }
592
593 if (opt & L2CAP_LM_AUTH)
594 l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
595 if (opt & L2CAP_LM_ENCRYPT)
596 l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
597 if (opt & L2CAP_LM_SECURE)
598 l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
599
600 l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER);
601 l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
602 break;
603
604 default:
605 err = -ENOPROTOOPT;
606 break;
607 }
608
609 release_sock(sk);
610 return err;
611}
612
613static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
614{
615 struct sock *sk = sock->sk;
616 struct bt_security sec;
617 int len, err = 0;
618 u32 opt;
619
620 BT_DBG("sk %p", sk);
621
622 if (level == SOL_L2CAP)
623 return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
624
625 if (level != SOL_BLUETOOTH)
626 return -ENOPROTOOPT;
627
628 lock_sock(sk);
629
630 switch (optname) {
631 case BT_SECURITY:
632 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
633 && sk->sk_type != SOCK_RAW) {
634 err = -EINVAL;
635 break;
636 }
637
638 sec.level = BT_SECURITY_LOW;
639
640 len = min_t(unsigned int, sizeof(sec), optlen);
641 if (copy_from_user((char *) &sec, optval, len)) {
642 err = -EFAULT;
643 break;
644 }
645
646 if (sec.level < BT_SECURITY_LOW ||
647 sec.level > BT_SECURITY_HIGH) {
648 err = -EINVAL;
649 break;
650 }
651
652 l2cap_pi(sk)->sec_level = sec.level;
653 break;
654
655 case BT_DEFER_SETUP:
656 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
657 err = -EINVAL;
658 break;
659 }
660
661 if (get_user(opt, (u32 __user *) optval)) {
662 err = -EFAULT;
663 break;
664 }
665
666 bt_sk(sk)->defer_setup = opt;
667 break;
668
669 case BT_FLUSHABLE:
670 if (get_user(opt, (u32 __user *) optval)) {
671 err = -EFAULT;
672 break;
673 }
674
675 if (opt > BT_FLUSHABLE_ON) {
676 err = -EINVAL;
677 break;
678 }
679
680 if (opt == BT_FLUSHABLE_OFF) {
681 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
682 /* proceed futher only when we have l2cap_conn and
683 No Flush support in the LM */
684 if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) {
685 err = -EINVAL;
686 break;
687 }
688 }
689
690 l2cap_pi(sk)->flushable = opt;
691 break;
692
693 default:
694 err = -ENOPROTOOPT;
695 break;
696 }
697
698 release_sock(sk);
699 return err;
700}
701
702static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
703{
704 struct sock *sk = sock->sk;
705 struct l2cap_pinfo *pi = l2cap_pi(sk);
706 struct sk_buff *skb;
707 u16 control;
708 int err;
709
710 BT_DBG("sock %p, sk %p", sock, sk);
711
712 err = sock_error(sk);
713 if (err)
714 return err;
715
716 if (msg->msg_flags & MSG_OOB)
717 return -EOPNOTSUPP;
718
719 lock_sock(sk);
720
721 if (sk->sk_state != BT_CONNECTED) {
722 err = -ENOTCONN;
723 goto done;
724 }
725
726 /* Connectionless channel */
727 if (sk->sk_type == SOCK_DGRAM) {
728 skb = l2cap_create_connless_pdu(sk, msg, len);
729 if (IS_ERR(skb)) {
730 err = PTR_ERR(skb);
731 } else {
732 l2cap_do_send(sk, skb);
733 err = len;
734 }
735 goto done;
736 }
737
738 switch (pi->mode) {
739 case L2CAP_MODE_BASIC:
740 /* Check outgoing MTU */
741 if (len > pi->omtu) {
742 err = -EMSGSIZE;
743 goto done;
744 }
745
746 /* Create a basic PDU */
747 skb = l2cap_create_basic_pdu(sk, msg, len);
748 if (IS_ERR(skb)) {
749 err = PTR_ERR(skb);
750 goto done;
751 }
752
753 l2cap_do_send(sk, skb);
754 err = len;
755 break;
756
757 case L2CAP_MODE_ERTM:
758 case L2CAP_MODE_STREAMING:
759 /* Entire SDU fits into one PDU */
760 if (len <= pi->remote_mps) {
761 control = L2CAP_SDU_UNSEGMENTED;
762 skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
763 if (IS_ERR(skb)) {
764 err = PTR_ERR(skb);
765 goto done;
766 }
767 __skb_queue_tail(TX_QUEUE(sk), skb);
768
769 if (sk->sk_send_head == NULL)
770 sk->sk_send_head = skb;
771
772 } else {
773 /* Segment SDU into multiples PDUs */
774 err = l2cap_sar_segment_sdu(sk, msg, len);
775 if (err < 0)
776 goto done;
777 }
778
779 if (pi->mode == L2CAP_MODE_STREAMING) {
780 l2cap_streaming_send(sk);
781 } else {
782 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
783 (pi->conn_state & L2CAP_CONN_WAIT_F)) {
784 err = len;
785 break;
786 }
787 err = l2cap_ertm_send(sk);
788 }
789
790 if (err >= 0)
791 err = len;
792 break;
793
794 default:
795 BT_DBG("bad state %1.1x", pi->mode);
796 err = -EBADFD;
797 }
798
799done:
800 release_sock(sk);
801 return err;
802}
803
804static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
805{
806 struct sock *sk = sock->sk;
807
808 lock_sock(sk);
809
810 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
811 struct l2cap_conn_rsp rsp;
812 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
813 u8 buf[128];
814
815 sk->sk_state = BT_CONFIG;
816
817 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
818 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
819 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
820 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
821 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
822 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
823
824 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
825 release_sock(sk);
826 return 0;
827 }
828
829 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
830 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
831 l2cap_build_conf_req(sk, buf), buf);
832 l2cap_pi(sk)->num_conf_req++;
833
834 release_sock(sk);
835 return 0;
836 }
837
838 release_sock(sk);
839
840 if (sock->type == SOCK_STREAM)
841 return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
842
843 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
844}
845
846/* Kill socket (only if zapped and orphan)
847 * Must be called on unlocked socket.
848 */
849void l2cap_sock_kill(struct sock *sk)
850{
851 if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
852 return;
853
854 BT_DBG("sk %p state %d", sk, sk->sk_state);
855
856 /* Kill poor orphan */
857 bt_sock_unlink(&l2cap_sk_list, sk);
858 sock_set_flag(sk, SOCK_DEAD);
859 sock_put(sk);
860}
861
862/* Must be called on unlocked socket. */
863static void l2cap_sock_close(struct sock *sk)
864{
865 l2cap_sock_clear_timer(sk);
866 lock_sock(sk);
867 __l2cap_sock_close(sk, ECONNRESET);
868 release_sock(sk);
869 l2cap_sock_kill(sk);
870}
871
872static void l2cap_sock_cleanup_listen(struct sock *parent)
873{
874 struct sock *sk;
875
876 BT_DBG("parent %p", parent);
877
878 /* Close not yet accepted channels */
879 while ((sk = bt_accept_dequeue(parent, NULL)))
880 l2cap_sock_close(sk);
881
882 parent->sk_state = BT_CLOSED;
883 sock_set_flag(parent, SOCK_ZAPPED);
884}
885
886void __l2cap_sock_close(struct sock *sk, int reason)
887{
888 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
889
890 BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
891
892 switch (sk->sk_state) {
893 case BT_LISTEN:
894 l2cap_sock_cleanup_listen(sk);
895 break;
896
897 case BT_CONNECTED:
898 case BT_CONFIG:
899 if ((sk->sk_type == SOCK_SEQPACKET ||
900 sk->sk_type == SOCK_STREAM) &&
901 conn->hcon->type == ACL_LINK) {
902 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
903 l2cap_send_disconn_req(conn, sk, reason);
904 } else
905 l2cap_chan_del(sk, reason);
906 break;
907
908 case BT_CONNECT2:
909 if ((sk->sk_type == SOCK_SEQPACKET ||
910 sk->sk_type == SOCK_STREAM) &&
911 conn->hcon->type == ACL_LINK) {
912 struct l2cap_conn_rsp rsp;
913 __u16 result;
914
915 if (bt_sk(sk)->defer_setup)
916 result = L2CAP_CR_SEC_BLOCK;
917 else
918 result = L2CAP_CR_BAD_PSM;
919
920 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
921 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
922 rsp.result = cpu_to_le16(result);
923 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
924 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
925 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
926 } else
927 l2cap_chan_del(sk, reason);
928 break;
929
930 case BT_CONNECT:
931 case BT_DISCONN:
932 l2cap_chan_del(sk, reason);
933 break;
934
935 default:
936 sock_set_flag(sk, SOCK_ZAPPED);
937 break;
938 }
939}
940
941static int l2cap_sock_shutdown(struct socket *sock, int how)
942{
943 struct sock *sk = sock->sk;
944 int err = 0;
945
946 BT_DBG("sock %p, sk %p", sock, sk);
947
948 if (!sk)
949 return 0;
950
951 lock_sock(sk);
952 if (!sk->sk_shutdown) {
953 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
954 err = __l2cap_wait_ack(sk);
955
956 sk->sk_shutdown = SHUTDOWN_MASK;
957 l2cap_sock_clear_timer(sk);
958 __l2cap_sock_close(sk, 0);
959
960 if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
961 err = bt_sock_wait_state(sk, BT_CLOSED,
962 sk->sk_lingertime);
963 }
964
965 if (!err && sk->sk_err)
966 err = -sk->sk_err;
967
968 release_sock(sk);
969 return err;
970}
971
972static int l2cap_sock_release(struct socket *sock)
973{
974 struct sock *sk = sock->sk;
975 int err;
976
977 BT_DBG("sock %p, sk %p", sock, sk);
978
979 if (!sk)
980 return 0;
981
982 err = l2cap_sock_shutdown(sock, 2);
983
984 sock_orphan(sk);
985 l2cap_sock_kill(sk);
986 return err;
987}
988
989static void l2cap_sock_destruct(struct sock *sk)
990{
991 BT_DBG("sk %p", sk);
992
993 skb_queue_purge(&sk->sk_receive_queue);
994 skb_queue_purge(&sk->sk_write_queue);
995}
996
997void l2cap_sock_init(struct sock *sk, struct sock *parent)
998{
999 struct l2cap_pinfo *pi = l2cap_pi(sk);
1000
1001 BT_DBG("sk %p", sk);
1002
1003 if (parent) {
1004 sk->sk_type = parent->sk_type;
1005 bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
1006
1007 pi->imtu = l2cap_pi(parent)->imtu;
1008 pi->omtu = l2cap_pi(parent)->omtu;
1009 pi->conf_state = l2cap_pi(parent)->conf_state;
1010 pi->mode = l2cap_pi(parent)->mode;
1011 pi->fcs = l2cap_pi(parent)->fcs;
1012 pi->max_tx = l2cap_pi(parent)->max_tx;
1013 pi->tx_win = l2cap_pi(parent)->tx_win;
1014 pi->sec_level = l2cap_pi(parent)->sec_level;
1015 pi->role_switch = l2cap_pi(parent)->role_switch;
1016 pi->force_reliable = l2cap_pi(parent)->force_reliable;
1017 pi->flushable = l2cap_pi(parent)->flushable;
1018 } else {
1019 pi->imtu = L2CAP_DEFAULT_MTU;
1020 pi->omtu = 0;
1021 if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
1022 pi->mode = L2CAP_MODE_ERTM;
1023 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
1024 } else {
1025 pi->mode = L2CAP_MODE_BASIC;
1026 }
1027 pi->max_tx = L2CAP_DEFAULT_MAX_TX;
1028 pi->fcs = L2CAP_FCS_CRC16;
1029 pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
1030 pi->sec_level = BT_SECURITY_LOW;
1031 pi->role_switch = 0;
1032 pi->force_reliable = 0;
1033 pi->flushable = BT_FLUSHABLE_OFF;
1034 }
1035
1036 /* Default config options */
1037 pi->conf_len = 0;
1038 pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
1039 skb_queue_head_init(TX_QUEUE(sk));
1040 skb_queue_head_init(SREJ_QUEUE(sk));
1041 skb_queue_head_init(BUSY_QUEUE(sk));
1042 INIT_LIST_HEAD(SREJ_LIST(sk));
1043}
1044
1045static struct proto l2cap_proto = {
1046 .name = "L2CAP",
1047 .owner = THIS_MODULE,
1048 .obj_size = sizeof(struct l2cap_pinfo)
1049};
1050
1051struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
1052{
1053 struct sock *sk;
1054
1055 sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
1056 if (!sk)
1057 return NULL;
1058
1059 sock_init_data(sock, sk);
1060 INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
1061
1062 sk->sk_destruct = l2cap_sock_destruct;
1063 sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
1064
1065 sock_reset_flag(sk, SOCK_ZAPPED);
1066
1067 sk->sk_protocol = proto;
1068 sk->sk_state = BT_OPEN;
1069
1070 setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
1071
1072 bt_sock_link(&l2cap_sk_list, sk);
1073 return sk;
1074}
1075
1076static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
1077 int kern)
1078{
1079 struct sock *sk;
1080
1081 BT_DBG("sock %p", sock);
1082
1083 sock->state = SS_UNCONNECTED;
1084
1085 if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
1086 sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
1087 return -ESOCKTNOSUPPORT;
1088
1089 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
1090 return -EPERM;
1091
1092 sock->ops = &l2cap_sock_ops;
1093
1094 sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
1095 if (!sk)
1096 return -ENOMEM;
1097
1098 l2cap_sock_init(sk, NULL);
1099 return 0;
1100}
1101
1102const struct proto_ops l2cap_sock_ops = {
1103 .family = PF_BLUETOOTH,
1104 .owner = THIS_MODULE,
1105 .release = l2cap_sock_release,
1106 .bind = l2cap_sock_bind,
1107 .connect = l2cap_sock_connect,
1108 .listen = l2cap_sock_listen,
1109 .accept = l2cap_sock_accept,
1110 .getname = l2cap_sock_getname,
1111 .sendmsg = l2cap_sock_sendmsg,
1112 .recvmsg = l2cap_sock_recvmsg,
1113 .poll = bt_sock_poll,
1114 .ioctl = bt_sock_ioctl,
1115 .mmap = sock_no_mmap,
1116 .socketpair = sock_no_socketpair,
1117 .shutdown = l2cap_sock_shutdown,
1118 .setsockopt = l2cap_sock_setsockopt,
1119 .getsockopt = l2cap_sock_getsockopt
1120};
1121
1122static const struct net_proto_family l2cap_sock_family_ops = {
1123 .family = PF_BLUETOOTH,
1124 .owner = THIS_MODULE,
1125 .create = l2cap_sock_create,
1126};
1127
1128int __init l2cap_init_sockets(void)
1129{
1130 int err;
1131
1132 err = proto_register(&l2cap_proto, 0);
1133 if (err < 0)
1134 return err;
1135
1136 err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
1137 if (err < 0)
1138 goto error;
1139
1140 BT_INFO("L2CAP socket layer initialized");
1141
1142 return 0;
1143
1144error:
1145 BT_ERR("L2CAP socket registration failed");
1146 proto_unregister(&l2cap_proto);
1147 return err;
1148}
1149
1150void l2cap_cleanup_sockets(void)
1151{
1152 if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
1153 BT_ERR("L2CAP socket unregistration failed");
1154
1155 proto_unregister(&l2cap_proto);
1156}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f827fd90838..0054c74e27b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -22,7 +22,7 @@
22 22
23/* Bluetooth HCI Management interface */ 23/* Bluetooth HCI Management interface */
24 24
25#include <asm/uaccess.h> 25#include <linux/uaccess.h>
26#include <asm/unaligned.h> 26#include <asm/unaligned.h>
27 27
28#include <net/bluetooth/bluetooth.h> 28#include <net/bluetooth/bluetooth.h>
@@ -32,13 +32,24 @@
32#define MGMT_VERSION 0 32#define MGMT_VERSION 0
33#define MGMT_REVISION 1 33#define MGMT_REVISION 1
34 34
35static int cmd_status(struct sock *sk, u16 cmd, u8 status) 35struct pending_cmd {
36 struct list_head list;
37 __u16 opcode;
38 int index;
39 void *cmd;
40 struct sock *sk;
41 void *user_data;
42};
43
44LIST_HEAD(cmd_list);
45
46static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
36{ 47{
37 struct sk_buff *skb; 48 struct sk_buff *skb;
38 struct mgmt_hdr *hdr; 49 struct mgmt_hdr *hdr;
39 struct mgmt_ev_cmd_status *ev; 50 struct mgmt_ev_cmd_status *ev;
40 51
41 BT_DBG("sock %p", sk); 52 BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status);
42 53
43 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); 54 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC);
44 if (!skb) 55 if (!skb)
@@ -47,6 +58,7 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status)
47 hdr = (void *) skb_put(skb, sizeof(*hdr)); 58 hdr = (void *) skb_put(skb, sizeof(*hdr));
48 59
49 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); 60 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
61 hdr->index = cpu_to_le16(index);
50 hdr->len = cpu_to_le16(sizeof(*ev)); 62 hdr->len = cpu_to_le16(sizeof(*ev));
51 63
52 ev = (void *) skb_put(skb, sizeof(*ev)); 64 ev = (void *) skb_put(skb, sizeof(*ev));
@@ -59,29 +71,30 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status)
59 return 0; 71 return 0;
60} 72}
61 73
62static int read_version(struct sock *sk) 74static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp,
75 size_t rp_len)
63{ 76{
64 struct sk_buff *skb; 77 struct sk_buff *skb;
65 struct mgmt_hdr *hdr; 78 struct mgmt_hdr *hdr;
66 struct mgmt_ev_cmd_complete *ev; 79 struct mgmt_ev_cmd_complete *ev;
67 struct mgmt_rp_read_version *rp;
68 80
69 BT_DBG("sock %p", sk); 81 BT_DBG("sock %p", sk);
70 82
71 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); 83 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_ATOMIC);
72 if (!skb) 84 if (!skb)
73 return -ENOMEM; 85 return -ENOMEM;
74 86
75 hdr = (void *) skb_put(skb, sizeof(*hdr)); 87 hdr = (void *) skb_put(skb, sizeof(*hdr));
88
76 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); 89 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
77 hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); 90 hdr->index = cpu_to_le16(index);
91 hdr->len = cpu_to_le16(sizeof(*ev) + rp_len);
78 92
79 ev = (void *) skb_put(skb, sizeof(*ev)); 93 ev = (void *) skb_put(skb, sizeof(*ev) + rp_len);
80 put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode); 94 put_unaligned_le16(cmd, &ev->opcode);
81 95
82 rp = (void *) skb_put(skb, sizeof(*rp)); 96 if (rp)
83 rp->version = MGMT_VERSION; 97 memcpy(ev->data, rp, rp_len);
84 put_unaligned_le16(MGMT_REVISION, &rp->revision);
85 98
86 if (sock_queue_rcv_skb(sk, skb) < 0) 99 if (sock_queue_rcv_skb(sk, skb) < 0)
87 kfree_skb(skb); 100 kfree_skb(skb);
@@ -89,16 +102,26 @@ static int read_version(struct sock *sk)
89 return 0; 102 return 0;
90} 103}
91 104
105static int read_version(struct sock *sk)
106{
107 struct mgmt_rp_read_version rp;
108
109 BT_DBG("sock %p", sk);
110
111 rp.version = MGMT_VERSION;
112 put_unaligned_le16(MGMT_REVISION, &rp.revision);
113
114 return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, &rp,
115 sizeof(rp));
116}
117
92static int read_index_list(struct sock *sk) 118static int read_index_list(struct sock *sk)
93{ 119{
94 struct sk_buff *skb;
95 struct mgmt_hdr *hdr;
96 struct mgmt_ev_cmd_complete *ev;
97 struct mgmt_rp_read_index_list *rp; 120 struct mgmt_rp_read_index_list *rp;
98 struct list_head *p; 121 struct list_head *p;
99 size_t body_len; 122 size_t rp_len;
100 u16 count; 123 u16 count;
101 int i; 124 int i, err;
102 125
103 BT_DBG("sock %p", sk); 126 BT_DBG("sock %p", sk);
104 127
@@ -109,112 +132,1131 @@ static int read_index_list(struct sock *sk)
109 count++; 132 count++;
110 } 133 }
111 134
112 body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); 135 rp_len = sizeof(*rp) + (2 * count);
113 skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); 136 rp = kmalloc(rp_len, GFP_ATOMIC);
114 if (!skb) 137 if (!rp) {
138 read_unlock(&hci_dev_list_lock);
115 return -ENOMEM; 139 return -ENOMEM;
140 }
116 141
117 hdr = (void *) skb_put(skb, sizeof(*hdr));
118 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
119 hdr->len = cpu_to_le16(body_len);
120
121 ev = (void *) skb_put(skb, sizeof(*ev));
122 put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode);
123
124 rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count));
125 put_unaligned_le16(count, &rp->num_controllers); 142 put_unaligned_le16(count, &rp->num_controllers);
126 143
127 i = 0; 144 i = 0;
128 list_for_each(p, &hci_dev_list) { 145 list_for_each(p, &hci_dev_list) {
129 struct hci_dev *d = list_entry(p, struct hci_dev, list); 146 struct hci_dev *d = list_entry(p, struct hci_dev, list);
147
148 hci_del_off_timer(d);
149
150 set_bit(HCI_MGMT, &d->flags);
151
152 if (test_bit(HCI_SETUP, &d->flags))
153 continue;
154
130 put_unaligned_le16(d->id, &rp->index[i++]); 155 put_unaligned_le16(d->id, &rp->index[i++]);
131 BT_DBG("Added hci%u", d->id); 156 BT_DBG("Added hci%u", d->id);
132 } 157 }
133 158
134 read_unlock(&hci_dev_list_lock); 159 read_unlock(&hci_dev_list_lock);
135 160
136 if (sock_queue_rcv_skb(sk, skb) < 0) 161 err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, rp,
137 kfree_skb(skb); 162 rp_len);
138 163
139 return 0; 164 kfree(rp);
165
166 return err;
140} 167}
141 168
142static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) 169static int read_controller_info(struct sock *sk, u16 index)
143{ 170{
144 struct sk_buff *skb; 171 struct mgmt_rp_read_info rp;
145 struct mgmt_hdr *hdr;
146 struct mgmt_ev_cmd_complete *ev;
147 struct mgmt_rp_read_info *rp;
148 struct mgmt_cp_read_info *cp;
149 struct hci_dev *hdev; 172 struct hci_dev *hdev;
150 u16 dev_id;
151 173
152 BT_DBG("sock %p", sk); 174 BT_DBG("sock %p hci%u", sk, index);
175
176 hdev = hci_dev_get(index);
177 if (!hdev)
178 return cmd_status(sk, index, MGMT_OP_READ_INFO, ENODEV);
179
180 hci_del_off_timer(hdev);
181
182 hci_dev_lock_bh(hdev);
183
184 set_bit(HCI_MGMT, &hdev->flags);
185
186 rp.type = hdev->dev_type;
187
188 rp.powered = test_bit(HCI_UP, &hdev->flags);
189 rp.connectable = test_bit(HCI_PSCAN, &hdev->flags);
190 rp.discoverable = test_bit(HCI_ISCAN, &hdev->flags);
191 rp.pairable = test_bit(HCI_PSCAN, &hdev->flags);
192
193 if (test_bit(HCI_AUTH, &hdev->flags))
194 rp.sec_mode = 3;
195 else if (hdev->ssp_mode > 0)
196 rp.sec_mode = 4;
197 else
198 rp.sec_mode = 2;
199
200 bacpy(&rp.bdaddr, &hdev->bdaddr);
201 memcpy(rp.features, hdev->features, 8);
202 memcpy(rp.dev_class, hdev->dev_class, 3);
203 put_unaligned_le16(hdev->manufacturer, &rp.manufacturer);
204 rp.hci_ver = hdev->hci_ver;
205 put_unaligned_le16(hdev->hci_rev, &rp.hci_rev);
206
207 hci_dev_unlock_bh(hdev);
208 hci_dev_put(hdev);
209
210 return cmd_complete(sk, index, MGMT_OP_READ_INFO, &rp, sizeof(rp));
211}
212
213static void mgmt_pending_free(struct pending_cmd *cmd)
214{
215 sock_put(cmd->sk);
216 kfree(cmd->cmd);
217 kfree(cmd);
218}
219
220static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
221 u16 index, void *data, u16 len)
222{
223 struct pending_cmd *cmd;
224
225 cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
226 if (!cmd)
227 return NULL;
228
229 cmd->opcode = opcode;
230 cmd->index = index;
231
232 cmd->cmd = kmalloc(len, GFP_ATOMIC);
233 if (!cmd->cmd) {
234 kfree(cmd);
235 return NULL;
236 }
237
238 memcpy(cmd->cmd, data, len);
239
240 cmd->sk = sk;
241 sock_hold(sk);
242
243 list_add(&cmd->list, &cmd_list);
244
245 return cmd;
246}
247
248static void mgmt_pending_foreach(u16 opcode, int index,
249 void (*cb)(struct pending_cmd *cmd, void *data),
250 void *data)
251{
252 struct list_head *p, *n;
253
254 list_for_each_safe(p, n, &cmd_list) {
255 struct pending_cmd *cmd;
256
257 cmd = list_entry(p, struct pending_cmd, list);
258
259 if (cmd->opcode != opcode)
260 continue;
261
262 if (index >= 0 && cmd->index != index)
263 continue;
264
265 cb(cmd, data);
266 }
267}
268
269static struct pending_cmd *mgmt_pending_find(u16 opcode, int index)
270{
271 struct list_head *p;
272
273 list_for_each(p, &cmd_list) {
274 struct pending_cmd *cmd;
275
276 cmd = list_entry(p, struct pending_cmd, list);
277
278 if (cmd->opcode != opcode)
279 continue;
280
281 if (index >= 0 && cmd->index != index)
282 continue;
283
284 return cmd;
285 }
286
287 return NULL;
288}
289
290static void mgmt_pending_remove(struct pending_cmd *cmd)
291{
292 list_del(&cmd->list);
293 mgmt_pending_free(cmd);
294}
295
296static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len)
297{
298 struct mgmt_mode *cp;
299 struct hci_dev *hdev;
300 struct pending_cmd *cmd;
301 int err, up;
302
303 cp = (void *) data;
304
305 BT_DBG("request for hci%u", index);
306
307 if (len != sizeof(*cp))
308 return cmd_status(sk, index, MGMT_OP_SET_POWERED, EINVAL);
309
310 hdev = hci_dev_get(index);
311 if (!hdev)
312 return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV);
313
314 hci_dev_lock_bh(hdev);
315
316 up = test_bit(HCI_UP, &hdev->flags);
317 if ((cp->val && up) || (!cp->val && !up)) {
318 err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EALREADY);
319 goto failed;
320 }
321
322 if (mgmt_pending_find(MGMT_OP_SET_POWERED, index)) {
323 err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EBUSY);
324 goto failed;
325 }
326
327 cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, index, data, len);
328 if (!cmd) {
329 err = -ENOMEM;
330 goto failed;
331 }
332
333 if (cp->val)
334 queue_work(hdev->workqueue, &hdev->power_on);
335 else
336 queue_work(hdev->workqueue, &hdev->power_off);
337
338 err = 0;
339
340failed:
341 hci_dev_unlock_bh(hdev);
342 hci_dev_put(hdev);
343 return err;
344}
345
346static int set_discoverable(struct sock *sk, u16 index, unsigned char *data,
347 u16 len)
348{
349 struct mgmt_mode *cp;
350 struct hci_dev *hdev;
351 struct pending_cmd *cmd;
352 u8 scan;
353 int err;
354
355 cp = (void *) data;
356
357 BT_DBG("request for hci%u", index);
358
359 if (len != sizeof(*cp))
360 return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EINVAL);
361
362 hdev = hci_dev_get(index);
363 if (!hdev)
364 return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV);
365
366 hci_dev_lock_bh(hdev);
367
368 if (!test_bit(HCI_UP, &hdev->flags)) {
369 err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENETDOWN);
370 goto failed;
371 }
372
373 if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) ||
374 mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) {
375 err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EBUSY);
376 goto failed;
377 }
378
379 if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) &&
380 test_bit(HCI_PSCAN, &hdev->flags)) {
381 err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EALREADY);
382 goto failed;
383 }
153 384
154 if (len != 2) 385 cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, index, data, len);
155 return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); 386 if (!cmd) {
387 err = -ENOMEM;
388 goto failed;
389 }
390
391 scan = SCAN_PAGE;
392
393 if (cp->val)
394 scan |= SCAN_INQUIRY;
395
396 err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
397 if (err < 0)
398 mgmt_pending_remove(cmd);
399
400failed:
401 hci_dev_unlock_bh(hdev);
402 hci_dev_put(hdev);
403
404 return err;
405}
406
407static int set_connectable(struct sock *sk, u16 index, unsigned char *data,
408 u16 len)
409{
410 struct mgmt_mode *cp;
411 struct hci_dev *hdev;
412 struct pending_cmd *cmd;
413 u8 scan;
414 int err;
415
416 cp = (void *) data;
417
418 BT_DBG("request for hci%u", index);
419
420 if (len != sizeof(*cp))
421 return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EINVAL);
422
423 hdev = hci_dev_get(index);
424 if (!hdev)
425 return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV);
426
427 hci_dev_lock_bh(hdev);
428
429 if (!test_bit(HCI_UP, &hdev->flags)) {
430 err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENETDOWN);
431 goto failed;
432 }
433
434 if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) ||
435 mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) {
436 err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EBUSY);
437 goto failed;
438 }
439
440 if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) {
441 err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EALREADY);
442 goto failed;
443 }
444
445 cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, index, data, len);
446 if (!cmd) {
447 err = -ENOMEM;
448 goto failed;
449 }
450
451 if (cp->val)
452 scan = SCAN_PAGE;
453 else
454 scan = 0;
455
456 err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
457 if (err < 0)
458 mgmt_pending_remove(cmd);
459
460failed:
461 hci_dev_unlock_bh(hdev);
462 hci_dev_put(hdev);
463
464 return err;
465}
466
467static int mgmt_event(u16 event, u16 index, void *data, u16 data_len,
468 struct sock *skip_sk)
469{
470 struct sk_buff *skb;
471 struct mgmt_hdr *hdr;
156 472
157 skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); 473 skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC);
158 if (!skb) 474 if (!skb)
159 return -ENOMEM; 475 return -ENOMEM;
160 476
477 bt_cb(skb)->channel = HCI_CHANNEL_CONTROL;
478
161 hdr = (void *) skb_put(skb, sizeof(*hdr)); 479 hdr = (void *) skb_put(skb, sizeof(*hdr));
162 hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); 480 hdr->opcode = cpu_to_le16(event);
163 hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); 481 hdr->index = cpu_to_le16(index);
482 hdr->len = cpu_to_le16(data_len);
164 483
165 ev = (void *) skb_put(skb, sizeof(*ev)); 484 if (data)
166 put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode); 485 memcpy(skb_put(skb, data_len), data, data_len);
486
487 hci_send_to_sock(NULL, skb, skip_sk);
488 kfree_skb(skb);
167 489
168 rp = (void *) skb_put(skb, sizeof(*rp)); 490 return 0;
491}
492
493static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val)
494{
495 struct mgmt_mode rp;
496
497 rp.val = val;
498
499 return cmd_complete(sk, index, opcode, &rp, sizeof(rp));
500}
501
502static int set_pairable(struct sock *sk, u16 index, unsigned char *data,
503 u16 len)
504{
505 struct mgmt_mode *cp, ev;
506 struct hci_dev *hdev;
507 int err;
169 508
170 cp = (void *) data; 509 cp = (void *) data;
171 dev_id = get_unaligned_le16(&cp->index);
172 510
173 BT_DBG("request for hci%u", dev_id); 511 BT_DBG("request for hci%u", index);
174 512
175 hdev = hci_dev_get(dev_id); 513 if (len != sizeof(*cp))
176 if (!hdev) { 514 return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, EINVAL);
177 kfree_skb(skb); 515
178 return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); 516 hdev = hci_dev_get(index);
517 if (!hdev)
518 return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV);
519
520 hci_dev_lock_bh(hdev);
521
522 if (cp->val)
523 set_bit(HCI_PAIRABLE, &hdev->flags);
524 else
525 clear_bit(HCI_PAIRABLE, &hdev->flags);
526
527 err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, index, cp->val);
528 if (err < 0)
529 goto failed;
530
531 ev.val = cp->val;
532
533 err = mgmt_event(MGMT_EV_PAIRABLE, index, &ev, sizeof(ev), sk);
534
535failed:
536 hci_dev_unlock_bh(hdev);
537 hci_dev_put(hdev);
538
539 return err;
540}
541
542static u8 get_service_classes(struct hci_dev *hdev)
543{
544 struct list_head *p;
545 u8 val = 0;
546
547 list_for_each(p, &hdev->uuids) {
548 struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list);
549
550 val |= uuid->svc_hint;
179 } 551 }
180 552
553 return val;
554}
555
556static int update_class(struct hci_dev *hdev)
557{
558 u8 cod[3];
559
560 BT_DBG("%s", hdev->name);
561
562 if (test_bit(HCI_SERVICE_CACHE, &hdev->flags))
563 return 0;
564
565 cod[0] = hdev->minor_class;
566 cod[1] = hdev->major_class;
567 cod[2] = get_service_classes(hdev);
568
569 if (memcmp(cod, hdev->dev_class, 3) == 0)
570 return 0;
571
572 return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
573}
574
575static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len)
576{
577 struct mgmt_cp_add_uuid *cp;
578 struct hci_dev *hdev;
579 struct bt_uuid *uuid;
580 int err;
581
582 cp = (void *) data;
583
584 BT_DBG("request for hci%u", index);
585
586 if (len != sizeof(*cp))
587 return cmd_status(sk, index, MGMT_OP_ADD_UUID, EINVAL);
588
589 hdev = hci_dev_get(index);
590 if (!hdev)
591 return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV);
592
181 hci_dev_lock_bh(hdev); 593 hci_dev_lock_bh(hdev);
182 594
183 put_unaligned_le16(hdev->id, &rp->index); 595 uuid = kmalloc(sizeof(*uuid), GFP_ATOMIC);
184 rp->type = hdev->dev_type; 596 if (!uuid) {
597 err = -ENOMEM;
598 goto failed;
599 }
185 600
186 rp->powered = test_bit(HCI_UP, &hdev->flags); 601 memcpy(uuid->uuid, cp->uuid, 16);
187 rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); 602 uuid->svc_hint = cp->svc_hint;
188 rp->pairable = test_bit(HCI_PSCAN, &hdev->flags);
189 603
190 if (test_bit(HCI_AUTH, &hdev->flags)) 604 list_add(&uuid->list, &hdev->uuids);
191 rp->sec_mode = 3;
192 else if (hdev->ssp_mode > 0)
193 rp->sec_mode = 4;
194 else
195 rp->sec_mode = 2;
196 605
197 bacpy(&rp->bdaddr, &hdev->bdaddr); 606 err = update_class(hdev);
198 memcpy(rp->features, hdev->features, 8); 607 if (err < 0)
199 memcpy(rp->dev_class, hdev->dev_class, 3); 608 goto failed;
200 put_unaligned_le16(hdev->manufacturer, &rp->manufacturer);
201 rp->hci_ver = hdev->hci_ver;
202 put_unaligned_le16(hdev->hci_rev, &rp->hci_rev);
203 609
610 err = cmd_complete(sk, index, MGMT_OP_ADD_UUID, NULL, 0);
611
612failed:
204 hci_dev_unlock_bh(hdev); 613 hci_dev_unlock_bh(hdev);
205 hci_dev_put(hdev); 614 hci_dev_put(hdev);
206 615
207 if (sock_queue_rcv_skb(sk, skb) < 0) 616 return err;
208 kfree_skb(skb); 617}
618
619static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len)
620{
621 struct list_head *p, *n;
622 struct mgmt_cp_remove_uuid *cp;
623 struct hci_dev *hdev;
624 u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
625 int err, found;
626
627 cp = (void *) data;
628
629 BT_DBG("request for hci%u", index);
630
631 if (len != sizeof(*cp))
632 return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, EINVAL);
633
634 hdev = hci_dev_get(index);
635 if (!hdev)
636 return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV);
637
638 hci_dev_lock_bh(hdev);
639
640 if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) {
641 err = hci_uuids_clear(hdev);
642 goto unlock;
643 }
644
645 found = 0;
646
647 list_for_each_safe(p, n, &hdev->uuids) {
648 struct bt_uuid *match = list_entry(p, struct bt_uuid, list);
649
650 if (memcmp(match->uuid, cp->uuid, 16) != 0)
651 continue;
652
653 list_del(&match->list);
654 found++;
655 }
656
657 if (found == 0) {
658 err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENOENT);
659 goto unlock;
660 }
661
662 err = update_class(hdev);
663 if (err < 0)
664 goto unlock;
665
666 err = cmd_complete(sk, index, MGMT_OP_REMOVE_UUID, NULL, 0);
667
668unlock:
669 hci_dev_unlock_bh(hdev);
670 hci_dev_put(hdev);
671
672 return err;
673}
674
675static int set_dev_class(struct sock *sk, u16 index, unsigned char *data,
676 u16 len)
677{
678 struct hci_dev *hdev;
679 struct mgmt_cp_set_dev_class *cp;
680 int err;
681
682 cp = (void *) data;
683
684 BT_DBG("request for hci%u", index);
685
686 if (len != sizeof(*cp))
687 return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, EINVAL);
688
689 hdev = hci_dev_get(index);
690 if (!hdev)
691 return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV);
692
693 hci_dev_lock_bh(hdev);
694
695 hdev->major_class = cp->major;
696 hdev->minor_class = cp->minor;
697
698 err = update_class(hdev);
699
700 if (err == 0)
701 err = cmd_complete(sk, index, MGMT_OP_SET_DEV_CLASS, NULL, 0);
702
703 hci_dev_unlock_bh(hdev);
704 hci_dev_put(hdev);
705
706 return err;
707}
708
709static int set_service_cache(struct sock *sk, u16 index, unsigned char *data,
710 u16 len)
711{
712 struct hci_dev *hdev;
713 struct mgmt_cp_set_service_cache *cp;
714 int err;
715
716 cp = (void *) data;
717
718 if (len != sizeof(*cp))
719 return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, EINVAL);
720
721 hdev = hci_dev_get(index);
722 if (!hdev)
723 return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV);
724
725 hci_dev_lock_bh(hdev);
726
727 BT_DBG("hci%u enable %d", index, cp->enable);
728
729 if (cp->enable) {
730 set_bit(HCI_SERVICE_CACHE, &hdev->flags);
731 err = 0;
732 } else {
733 clear_bit(HCI_SERVICE_CACHE, &hdev->flags);
734 err = update_class(hdev);
735 }
736
737 if (err == 0)
738 err = cmd_complete(sk, index, MGMT_OP_SET_SERVICE_CACHE, NULL,
739 0);
740
741 hci_dev_unlock_bh(hdev);
742 hci_dev_put(hdev);
743
744 return err;
745}
746
747static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len)
748{
749 struct hci_dev *hdev;
750 struct mgmt_cp_load_keys *cp;
751 u16 key_count, expected_len;
752 int i;
753
754 cp = (void *) data;
755
756 if (len < sizeof(*cp))
757 return -EINVAL;
758
759 key_count = get_unaligned_le16(&cp->key_count);
760
761 expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info);
762 if (expected_len != len) {
763 BT_ERR("load_keys: expected %u bytes, got %u bytes",
764 len, expected_len);
765 return -EINVAL;
766 }
767
768 hdev = hci_dev_get(index);
769 if (!hdev)
770 return cmd_status(sk, index, MGMT_OP_LOAD_KEYS, ENODEV);
771
772 BT_DBG("hci%u debug_keys %u key_count %u", index, cp->debug_keys,
773 key_count);
774
775 hci_dev_lock_bh(hdev);
776
777 hci_link_keys_clear(hdev);
778
779 set_bit(HCI_LINK_KEYS, &hdev->flags);
780
781 if (cp->debug_keys)
782 set_bit(HCI_DEBUG_KEYS, &hdev->flags);
783 else
784 clear_bit(HCI_DEBUG_KEYS, &hdev->flags);
785
786 for (i = 0; i < key_count; i++) {
787 struct mgmt_key_info *key = &cp->keys[i];
788
789 hci_add_link_key(hdev, 0, &key->bdaddr, key->val, key->type,
790 key->pin_len);
791 }
792
793 hci_dev_unlock_bh(hdev);
794 hci_dev_put(hdev);
209 795
210 return 0; 796 return 0;
211} 797}
212 798
799static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len)
800{
801 struct hci_dev *hdev;
802 struct mgmt_cp_remove_key *cp;
803 struct hci_conn *conn;
804 int err;
805
806 cp = (void *) data;
807
808 if (len != sizeof(*cp))
809 return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, EINVAL);
810
811 hdev = hci_dev_get(index);
812 if (!hdev)
813 return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV);
814
815 hci_dev_lock_bh(hdev);
816
817 err = hci_remove_link_key(hdev, &cp->bdaddr);
818 if (err < 0) {
819 err = cmd_status(sk, index, MGMT_OP_REMOVE_KEY, -err);
820 goto unlock;
821 }
822
823 err = 0;
824
825 if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect)
826 goto unlock;
827
828 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
829 if (conn) {
830 struct hci_cp_disconnect dc;
831
832 put_unaligned_le16(conn->handle, &dc.handle);
833 dc.reason = 0x13; /* Remote User Terminated Connection */
834 err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, 0, NULL);
835 }
836
837unlock:
838 hci_dev_unlock_bh(hdev);
839 hci_dev_put(hdev);
840
841 return err;
842}
843
844static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len)
845{
846 struct hci_dev *hdev;
847 struct mgmt_cp_disconnect *cp;
848 struct hci_cp_disconnect dc;
849 struct pending_cmd *cmd;
850 struct hci_conn *conn;
851 int err;
852
853 BT_DBG("");
854
855 cp = (void *) data;
856
857 if (len != sizeof(*cp))
858 return cmd_status(sk, index, MGMT_OP_DISCONNECT, EINVAL);
859
860 hdev = hci_dev_get(index);
861 if (!hdev)
862 return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV);
863
864 hci_dev_lock_bh(hdev);
865
866 if (!test_bit(HCI_UP, &hdev->flags)) {
867 err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENETDOWN);
868 goto failed;
869 }
870
871 if (mgmt_pending_find(MGMT_OP_DISCONNECT, index)) {
872 err = cmd_status(sk, index, MGMT_OP_DISCONNECT, EBUSY);
873 goto failed;
874 }
875
876 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
877 if (!conn) {
878 err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENOTCONN);
879 goto failed;
880 }
881
882 cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, index, data, len);
883 if (!cmd) {
884 err = -ENOMEM;
885 goto failed;
886 }
887
888 put_unaligned_le16(conn->handle, &dc.handle);
889 dc.reason = 0x13; /* Remote User Terminated Connection */
890
891 err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
892 if (err < 0)
893 mgmt_pending_remove(cmd);
894
895failed:
896 hci_dev_unlock_bh(hdev);
897 hci_dev_put(hdev);
898
899 return err;
900}
901
902static int get_connections(struct sock *sk, u16 index)
903{
904 struct mgmt_rp_get_connections *rp;
905 struct hci_dev *hdev;
906 struct list_head *p;
907 size_t rp_len;
908 u16 count;
909 int i, err;
910
911 BT_DBG("");
912
913 hdev = hci_dev_get(index);
914 if (!hdev)
915 return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV);
916
917 hci_dev_lock_bh(hdev);
918
919 count = 0;
920 list_for_each(p, &hdev->conn_hash.list) {
921 count++;
922 }
923
924 rp_len = sizeof(*rp) + (count * sizeof(bdaddr_t));
925 rp = kmalloc(rp_len, GFP_ATOMIC);
926 if (!rp) {
927 err = -ENOMEM;
928 goto unlock;
929 }
930
931 put_unaligned_le16(count, &rp->conn_count);
932
933 read_lock(&hci_dev_list_lock);
934
935 i = 0;
936 list_for_each(p, &hdev->conn_hash.list) {
937 struct hci_conn *c = list_entry(p, struct hci_conn, list);
938
939 bacpy(&rp->conn[i++], &c->dst);
940 }
941
942 read_unlock(&hci_dev_list_lock);
943
944 err = cmd_complete(sk, index, MGMT_OP_GET_CONNECTIONS, rp, rp_len);
945
946unlock:
947 kfree(rp);
948 hci_dev_unlock_bh(hdev);
949 hci_dev_put(hdev);
950 return err;
951}
952
953static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data,
954 u16 len)
955{
956 struct hci_dev *hdev;
957 struct mgmt_cp_pin_code_reply *cp;
958 struct hci_cp_pin_code_reply reply;
959 struct pending_cmd *cmd;
960 int err;
961
962 BT_DBG("");
963
964 cp = (void *) data;
965
966 if (len != sizeof(*cp))
967 return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, EINVAL);
968
969 hdev = hci_dev_get(index);
970 if (!hdev)
971 return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV);
972
973 hci_dev_lock_bh(hdev);
974
975 if (!test_bit(HCI_UP, &hdev->flags)) {
976 err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENETDOWN);
977 goto failed;
978 }
979
980 cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, index, data, len);
981 if (!cmd) {
982 err = -ENOMEM;
983 goto failed;
984 }
985
986 bacpy(&reply.bdaddr, &cp->bdaddr);
987 reply.pin_len = cp->pin_len;
988 memcpy(reply.pin_code, cp->pin_code, 16);
989
990 err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply);
991 if (err < 0)
992 mgmt_pending_remove(cmd);
993
994failed:
995 hci_dev_unlock_bh(hdev);
996 hci_dev_put(hdev);
997
998 return err;
999}
1000
1001static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data,
1002 u16 len)
1003{
1004 struct hci_dev *hdev;
1005 struct mgmt_cp_pin_code_neg_reply *cp;
1006 struct pending_cmd *cmd;
1007 int err;
1008
1009 BT_DBG("");
1010
1011 cp = (void *) data;
1012
1013 if (len != sizeof(*cp))
1014 return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY,
1015 EINVAL);
1016
1017 hdev = hci_dev_get(index);
1018 if (!hdev)
1019 return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY,
1020 ENODEV);
1021
1022 hci_dev_lock_bh(hdev);
1023
1024 if (!test_bit(HCI_UP, &hdev->flags)) {
1025 err = cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY,
1026 ENETDOWN);
1027 goto failed;
1028 }
1029
1030 cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, index,
1031 data, len);
1032 if (!cmd) {
1033 err = -ENOMEM;
1034 goto failed;
1035 }
1036
1037 err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->bdaddr),
1038 &cp->bdaddr);
1039 if (err < 0)
1040 mgmt_pending_remove(cmd);
1041
1042failed:
1043 hci_dev_unlock_bh(hdev);
1044 hci_dev_put(hdev);
1045
1046 return err;
1047}
1048
1049static int set_io_capability(struct sock *sk, u16 index, unsigned char *data,
1050 u16 len)
1051{
1052 struct hci_dev *hdev;
1053 struct mgmt_cp_set_io_capability *cp;
1054
1055 BT_DBG("");
1056
1057 cp = (void *) data;
1058
1059 if (len != sizeof(*cp))
1060 return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, EINVAL);
1061
1062 hdev = hci_dev_get(index);
1063 if (!hdev)
1064 return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV);
1065
1066 hci_dev_lock_bh(hdev);
1067
1068 hdev->io_capability = cp->io_capability;
1069
1070 BT_DBG("%s IO capability set to 0x%02x", hdev->name,
1071 hdev->io_capability);
1072
1073 hci_dev_unlock_bh(hdev);
1074 hci_dev_put(hdev);
1075
1076 return cmd_complete(sk, index, MGMT_OP_SET_IO_CAPABILITY, NULL, 0);
1077}
1078
1079static inline struct pending_cmd *find_pairing(struct hci_conn *conn)
1080{
1081 struct hci_dev *hdev = conn->hdev;
1082 struct list_head *p;
1083
1084 list_for_each(p, &cmd_list) {
1085 struct pending_cmd *cmd;
1086
1087 cmd = list_entry(p, struct pending_cmd, list);
1088
1089 if (cmd->opcode != MGMT_OP_PAIR_DEVICE)
1090 continue;
1091
1092 if (cmd->index != hdev->id)
1093 continue;
1094
1095 if (cmd->user_data != conn)
1096 continue;
1097
1098 return cmd;
1099 }
1100
1101 return NULL;
1102}
1103
1104static void pairing_complete(struct pending_cmd *cmd, u8 status)
1105{
1106 struct mgmt_rp_pair_device rp;
1107 struct hci_conn *conn = cmd->user_data;
1108
1109 bacpy(&rp.bdaddr, &conn->dst);
1110 rp.status = status;
1111
1112 cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp));
1113
1114 /* So we don't get further callbacks for this connection */
1115 conn->connect_cfm_cb = NULL;
1116 conn->security_cfm_cb = NULL;
1117 conn->disconn_cfm_cb = NULL;
1118
1119 hci_conn_put(conn);
1120
1121 mgmt_pending_remove(cmd);
1122}
1123
1124static void pairing_complete_cb(struct hci_conn *conn, u8 status)
1125{
1126 struct pending_cmd *cmd;
1127
1128 BT_DBG("status %u", status);
1129
1130 cmd = find_pairing(conn);
1131 if (!cmd) {
1132 BT_DBG("Unable to find a pending command");
1133 return;
1134 }
1135
1136 pairing_complete(cmd, status);
1137}
1138
1139static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len)
1140{
1141 struct hci_dev *hdev;
1142 struct mgmt_cp_pair_device *cp;
1143 struct pending_cmd *cmd;
1144 u8 sec_level, auth_type;
1145 struct hci_conn *conn;
1146 int err;
1147
1148 BT_DBG("");
1149
1150 cp = (void *) data;
1151
1152 if (len != sizeof(*cp))
1153 return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EINVAL);
1154
1155 hdev = hci_dev_get(index);
1156 if (!hdev)
1157 return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV);
1158
1159 hci_dev_lock_bh(hdev);
1160
1161 if (cp->io_cap == 0x03) {
1162 sec_level = BT_SECURITY_MEDIUM;
1163 auth_type = HCI_AT_DEDICATED_BONDING;
1164 } else {
1165 sec_level = BT_SECURITY_HIGH;
1166 auth_type = HCI_AT_DEDICATED_BONDING_MITM;
1167 }
1168
1169 conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type);
1170 if (IS_ERR(conn)) {
1171 err = PTR_ERR(conn);
1172 goto unlock;
1173 }
1174
1175 if (conn->connect_cfm_cb) {
1176 hci_conn_put(conn);
1177 err = cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EBUSY);
1178 goto unlock;
1179 }
1180
1181 cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, index, data, len);
1182 if (!cmd) {
1183 err = -ENOMEM;
1184 hci_conn_put(conn);
1185 goto unlock;
1186 }
1187
1188 conn->connect_cfm_cb = pairing_complete_cb;
1189 conn->security_cfm_cb = pairing_complete_cb;
1190 conn->disconn_cfm_cb = pairing_complete_cb;
1191 conn->io_capability = cp->io_cap;
1192 cmd->user_data = conn;
1193
1194 if (conn->state == BT_CONNECTED &&
1195 hci_conn_security(conn, sec_level, auth_type))
1196 pairing_complete(cmd, 0);
1197
1198 err = 0;
1199
1200unlock:
1201 hci_dev_unlock_bh(hdev);
1202 hci_dev_put(hdev);
1203
1204 return err;
1205}
1206
1207static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data,
1208 u16 len, int success)
1209{
1210 struct mgmt_cp_user_confirm_reply *cp = (void *) data;
1211 u16 mgmt_op, hci_op;
1212 struct pending_cmd *cmd;
1213 struct hci_dev *hdev;
1214 int err;
1215
1216 BT_DBG("");
1217
1218 if (success) {
1219 mgmt_op = MGMT_OP_USER_CONFIRM_REPLY;
1220 hci_op = HCI_OP_USER_CONFIRM_REPLY;
1221 } else {
1222 mgmt_op = MGMT_OP_USER_CONFIRM_NEG_REPLY;
1223 hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY;
1224 }
1225
1226 if (len != sizeof(*cp))
1227 return cmd_status(sk, index, mgmt_op, EINVAL);
1228
1229 hdev = hci_dev_get(index);
1230 if (!hdev)
1231 return cmd_status(sk, index, mgmt_op, ENODEV);
1232
1233 if (!test_bit(HCI_UP, &hdev->flags)) {
1234 err = cmd_status(sk, index, mgmt_op, ENETDOWN);
1235 goto failed;
1236 }
1237
1238 cmd = mgmt_pending_add(sk, mgmt_op, index, data, len);
1239 if (!cmd) {
1240 err = -ENOMEM;
1241 goto failed;
1242 }
1243
1244 err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr);
1245 if (err < 0)
1246 mgmt_pending_remove(cmd);
1247
1248failed:
1249 hci_dev_unlock_bh(hdev);
1250 hci_dev_put(hdev);
1251
1252 return err;
1253}
1254
213int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) 1255int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
214{ 1256{
215 unsigned char *buf; 1257 unsigned char *buf;
216 struct mgmt_hdr *hdr; 1258 struct mgmt_hdr *hdr;
217 u16 opcode, len; 1259 u16 opcode, index, len;
218 int err; 1260 int err;
219 1261
220 BT_DBG("got %zu bytes", msglen); 1262 BT_DBG("got %zu bytes", msglen);
@@ -233,6 +1275,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
233 1275
234 hdr = (struct mgmt_hdr *) buf; 1276 hdr = (struct mgmt_hdr *) buf;
235 opcode = get_unaligned_le16(&hdr->opcode); 1277 opcode = get_unaligned_le16(&hdr->opcode);
1278 index = get_unaligned_le16(&hdr->index);
236 len = get_unaligned_le16(&hdr->len); 1279 len = get_unaligned_le16(&hdr->len);
237 1280
238 if (len != msglen - sizeof(*hdr)) { 1281 if (len != msglen - sizeof(*hdr)) {
@@ -248,11 +1291,65 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
248 err = read_index_list(sk); 1291 err = read_index_list(sk);
249 break; 1292 break;
250 case MGMT_OP_READ_INFO: 1293 case MGMT_OP_READ_INFO:
251 err = read_controller_info(sk, buf + sizeof(*hdr), len); 1294 err = read_controller_info(sk, index);
1295 break;
1296 case MGMT_OP_SET_POWERED:
1297 err = set_powered(sk, index, buf + sizeof(*hdr), len);
1298 break;
1299 case MGMT_OP_SET_DISCOVERABLE:
1300 err = set_discoverable(sk, index, buf + sizeof(*hdr), len);
1301 break;
1302 case MGMT_OP_SET_CONNECTABLE:
1303 err = set_connectable(sk, index, buf + sizeof(*hdr), len);
1304 break;
1305 case MGMT_OP_SET_PAIRABLE:
1306 err = set_pairable(sk, index, buf + sizeof(*hdr), len);
1307 break;
1308 case MGMT_OP_ADD_UUID:
1309 err = add_uuid(sk, index, buf + sizeof(*hdr), len);
1310 break;
1311 case MGMT_OP_REMOVE_UUID:
1312 err = remove_uuid(sk, index, buf + sizeof(*hdr), len);
1313 break;
1314 case MGMT_OP_SET_DEV_CLASS:
1315 err = set_dev_class(sk, index, buf + sizeof(*hdr), len);
1316 break;
1317 case MGMT_OP_SET_SERVICE_CACHE:
1318 err = set_service_cache(sk, index, buf + sizeof(*hdr), len);
1319 break;
1320 case MGMT_OP_LOAD_KEYS:
1321 err = load_keys(sk, index, buf + sizeof(*hdr), len);
1322 break;
1323 case MGMT_OP_REMOVE_KEY:
1324 err = remove_key(sk, index, buf + sizeof(*hdr), len);
1325 break;
1326 case MGMT_OP_DISCONNECT:
1327 err = disconnect(sk, index, buf + sizeof(*hdr), len);
1328 break;
1329 case MGMT_OP_GET_CONNECTIONS:
1330 err = get_connections(sk, index);
1331 break;
1332 case MGMT_OP_PIN_CODE_REPLY:
1333 err = pin_code_reply(sk, index, buf + sizeof(*hdr), len);
1334 break;
1335 case MGMT_OP_PIN_CODE_NEG_REPLY:
1336 err = pin_code_neg_reply(sk, index, buf + sizeof(*hdr), len);
1337 break;
1338 case MGMT_OP_SET_IO_CAPABILITY:
1339 err = set_io_capability(sk, index, buf + sizeof(*hdr), len);
1340 break;
1341 case MGMT_OP_PAIR_DEVICE:
1342 err = pair_device(sk, index, buf + sizeof(*hdr), len);
1343 break;
1344 case MGMT_OP_USER_CONFIRM_REPLY:
1345 err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 1);
1346 break;
1347 case MGMT_OP_USER_CONFIRM_NEG_REPLY:
1348 err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 0);
252 break; 1349 break;
253 default: 1350 default:
254 BT_DBG("Unknown op %u", opcode); 1351 BT_DBG("Unknown op %u", opcode);
255 err = cmd_status(sk, opcode, 0x01); 1352 err = cmd_status(sk, index, opcode, 0x01);
256 break; 1353 break;
257 } 1354 }
258 1355
@@ -266,43 +1363,283 @@ done:
266 return err; 1363 return err;
267} 1364}
268 1365
269static int mgmt_event(u16 event, void *data, u16 data_len) 1366int mgmt_index_added(u16 index)
270{ 1367{
271 struct sk_buff *skb; 1368 return mgmt_event(MGMT_EV_INDEX_ADDED, index, NULL, 0, NULL);
272 struct mgmt_hdr *hdr; 1369}
273 1370
274 skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); 1371int mgmt_index_removed(u16 index)
275 if (!skb) 1372{
276 return -ENOMEM; 1373 return mgmt_event(MGMT_EV_INDEX_REMOVED, index, NULL, 0, NULL);
1374}
277 1375
278 bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; 1376struct cmd_lookup {
1377 u8 val;
1378 struct sock *sk;
1379};
279 1380
280 hdr = (void *) skb_put(skb, sizeof(*hdr)); 1381static void mode_rsp(struct pending_cmd *cmd, void *data)
281 hdr->opcode = cpu_to_le16(event); 1382{
282 hdr->len = cpu_to_le16(data_len); 1383 struct mgmt_mode *cp = cmd->cmd;
1384 struct cmd_lookup *match = data;
283 1385
284 memcpy(skb_put(skb, data_len), data, data_len); 1386 if (cp->val != match->val)
1387 return;
285 1388
286 hci_send_to_sock(NULL, skb); 1389 send_mode_rsp(cmd->sk, cmd->opcode, cmd->index, cp->val);
287 kfree_skb(skb);
288 1390
289 return 0; 1391 list_del(&cmd->list);
1392
1393 if (match->sk == NULL) {
1394 match->sk = cmd->sk;
1395 sock_hold(match->sk);
1396 }
1397
1398 mgmt_pending_free(cmd);
290} 1399}
291 1400
292int mgmt_index_added(u16 index) 1401int mgmt_powered(u16 index, u8 powered)
293{ 1402{
294 struct mgmt_ev_index_added ev; 1403 struct mgmt_mode ev;
1404 struct cmd_lookup match = { powered, NULL };
1405 int ret;
1406
1407 mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match);
295 1408
296 put_unaligned_le16(index, &ev.index); 1409 ev.val = powered;
297 1410
298 return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev)); 1411 ret = mgmt_event(MGMT_EV_POWERED, index, &ev, sizeof(ev), match.sk);
1412
1413 if (match.sk)
1414 sock_put(match.sk);
1415
1416 return ret;
299} 1417}
300 1418
301int mgmt_index_removed(u16 index) 1419int mgmt_discoverable(u16 index, u8 discoverable)
1420{
1421 struct mgmt_mode ev;
1422 struct cmd_lookup match = { discoverable, NULL };
1423 int ret;
1424
1425 mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match);
1426
1427 ev.val = discoverable;
1428
1429 ret = mgmt_event(MGMT_EV_DISCOVERABLE, index, &ev, sizeof(ev),
1430 match.sk);
1431
1432 if (match.sk)
1433 sock_put(match.sk);
1434
1435 return ret;
1436}
1437
1438int mgmt_connectable(u16 index, u8 connectable)
1439{
1440 struct mgmt_mode ev;
1441 struct cmd_lookup match = { connectable, NULL };
1442 int ret;
1443
1444 mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match);
1445
1446 ev.val = connectable;
1447
1448 ret = mgmt_event(MGMT_EV_CONNECTABLE, index, &ev, sizeof(ev), match.sk);
1449
1450 if (match.sk)
1451 sock_put(match.sk);
1452
1453 return ret;
1454}
1455
1456int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type)
1457{
1458 struct mgmt_ev_new_key ev;
1459
1460 memset(&ev, 0, sizeof(ev));
1461
1462 bacpy(&ev.key.bdaddr, &key->bdaddr);
1463 ev.key.type = key->type;
1464 memcpy(ev.key.val, key->val, 16);
1465 ev.key.pin_len = key->pin_len;
1466 ev.old_key_type = old_key_type;
1467
1468 return mgmt_event(MGMT_EV_NEW_KEY, index, &ev, sizeof(ev), NULL);
1469}
1470
1471int mgmt_connected(u16 index, bdaddr_t *bdaddr)
1472{
1473 struct mgmt_ev_connected ev;
1474
1475 bacpy(&ev.bdaddr, bdaddr);
1476
1477 return mgmt_event(MGMT_EV_CONNECTED, index, &ev, sizeof(ev), NULL);
1478}
1479
1480static void disconnect_rsp(struct pending_cmd *cmd, void *data)
1481{
1482 struct mgmt_cp_disconnect *cp = cmd->cmd;
1483 struct sock **sk = data;
1484 struct mgmt_rp_disconnect rp;
1485
1486 bacpy(&rp.bdaddr, &cp->bdaddr);
1487
1488 cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, &rp, sizeof(rp));
1489
1490 *sk = cmd->sk;
1491 sock_hold(*sk);
1492
1493 mgmt_pending_remove(cmd);
1494}
1495
1496int mgmt_disconnected(u16 index, bdaddr_t *bdaddr)
1497{
1498 struct mgmt_ev_disconnected ev;
1499 struct sock *sk = NULL;
1500 int err;
1501
1502 mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk);
1503
1504 bacpy(&ev.bdaddr, bdaddr);
1505
1506 err = mgmt_event(MGMT_EV_DISCONNECTED, index, &ev, sizeof(ev), sk);
1507
1508 if (sk)
1509 sock_put(sk);
1510
1511 return err;
1512}
1513
1514int mgmt_disconnect_failed(u16 index)
1515{
1516 struct pending_cmd *cmd;
1517 int err;
1518
1519 cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, index);
1520 if (!cmd)
1521 return -ENOENT;
1522
1523 err = cmd_status(cmd->sk, index, MGMT_OP_DISCONNECT, EIO);
1524
1525 mgmt_pending_remove(cmd);
1526
1527 return err;
1528}
1529
1530int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status)
1531{
1532 struct mgmt_ev_connect_failed ev;
1533
1534 bacpy(&ev.bdaddr, bdaddr);
1535 ev.status = status;
1536
1537 return mgmt_event(MGMT_EV_CONNECT_FAILED, index, &ev, sizeof(ev), NULL);
1538}
1539
1540int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr)
1541{
1542 struct mgmt_ev_pin_code_request ev;
1543
1544 bacpy(&ev.bdaddr, bdaddr);
1545
1546 return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, index, &ev, sizeof(ev),
1547 NULL);
1548}
1549
1550int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
1551{
1552 struct pending_cmd *cmd;
1553 struct mgmt_rp_pin_code_reply rp;
1554 int err;
1555
1556 cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index);
1557 if (!cmd)
1558 return -ENOENT;
1559
1560 bacpy(&rp.bdaddr, bdaddr);
1561 rp.status = status;
1562
1563 err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_REPLY, &rp,
1564 sizeof(rp));
1565
1566 mgmt_pending_remove(cmd);
1567
1568 return err;
1569}
1570
1571int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
1572{
1573 struct pending_cmd *cmd;
1574 struct mgmt_rp_pin_code_reply rp;
1575 int err;
1576
1577 cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index);
1578 if (!cmd)
1579 return -ENOENT;
1580
1581 bacpy(&rp.bdaddr, bdaddr);
1582 rp.status = status;
1583
1584 err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, &rp,
1585 sizeof(rp));
1586
1587 mgmt_pending_remove(cmd);
1588
1589 return err;
1590}
1591
1592int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value)
1593{
1594 struct mgmt_ev_user_confirm_request ev;
1595
1596 BT_DBG("hci%u", index);
1597
1598 bacpy(&ev.bdaddr, bdaddr);
1599 put_unaligned_le32(value, &ev.value);
1600
1601 return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, index, &ev, sizeof(ev),
1602 NULL);
1603}
1604
1605static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status,
1606 u8 opcode)
1607{
1608 struct pending_cmd *cmd;
1609 struct mgmt_rp_user_confirm_reply rp;
1610 int err;
1611
1612 cmd = mgmt_pending_find(opcode, index);
1613 if (!cmd)
1614 return -ENOENT;
1615
1616 bacpy(&rp.bdaddr, bdaddr);
1617 rp.status = status;
1618 err = cmd_complete(cmd->sk, index, opcode, &rp, sizeof(rp));
1619
1620 mgmt_pending_remove(cmd);
1621
1622 return err;
1623}
1624
1625int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
1626{
1627 return confirm_reply_complete(index, bdaddr, status,
1628 MGMT_OP_USER_CONFIRM_REPLY);
1629}
1630
1631int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
1632{
1633 return confirm_reply_complete(index, bdaddr, status,
1634 MGMT_OP_USER_CONFIRM_NEG_REPLY);
1635}
1636
1637int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status)
302{ 1638{
303 struct mgmt_ev_index_added ev; 1639 struct mgmt_ev_auth_failed ev;
304 1640
305 put_unaligned_le16(index, &ev.index); 1641 bacpy(&ev.bdaddr, bdaddr);
1642 ev.status = status;
306 1643
307 return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); 1644 return mgmt_event(MGMT_EV_AUTH_FAILED, index, &ev, sizeof(ev), NULL);
308} 1645}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 6b83776534f..c9973932456 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2154,8 +2154,6 @@ static int __init rfcomm_init(void)
2154{ 2154{
2155 int err; 2155 int err;
2156 2156
2157 l2cap_load();
2158
2159 hci_register_cb(&rfcomm_cb); 2157 hci_register_cb(&rfcomm_cb);
2160 2158
2161 rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); 2159 rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd");
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index d7b9af4703d..c258796313e 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -832,7 +832,7 @@ static int rfcomm_tty_write_room(struct tty_struct *tty)
832 return room; 832 return room;
833} 833}
834 834
835static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, unsigned long arg) 835static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
836{ 836{
837 BT_DBG("tty %p cmd 0x%02x", tty, cmd); 837 BT_DBG("tty %p cmd 0x%02x", tty, cmd);
838 838
@@ -1091,7 +1091,7 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
1091 } 1091 }
1092} 1092}
1093 1093
1094static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) 1094static int rfcomm_tty_tiocmget(struct tty_struct *tty)
1095{ 1095{
1096 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 1096 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
1097 1097
@@ -1100,7 +1100,7 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
1100 return dev->modem_status; 1100 return dev->modem_status;
1101} 1101}
1102 1102
1103static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear) 1103static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
1104{ 1104{
1105 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 1105 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
1106 struct rfcomm_dlc *dlc = dev->dlc; 1106 struct rfcomm_dlc *dlc = dev->dlc;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 960c6d1637d..42fdffd1d76 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -50,8 +50,6 @@
50#include <net/bluetooth/hci_core.h> 50#include <net/bluetooth/hci_core.h>
51#include <net/bluetooth/sco.h> 51#include <net/bluetooth/sco.h>
52 52
53#define VERSION "0.6"
54
55static int disable_esco; 53static int disable_esco;
56 54
57static const struct proto_ops sco_sock_ops; 55static const struct proto_ops sco_sock_ops;
@@ -192,20 +190,21 @@ static int sco_connect(struct sock *sk)
192 190
193 hci_dev_lock_bh(hdev); 191 hci_dev_lock_bh(hdev);
194 192
195 err = -ENOMEM;
196
197 if (lmp_esco_capable(hdev) && !disable_esco) 193 if (lmp_esco_capable(hdev) && !disable_esco)
198 type = ESCO_LINK; 194 type = ESCO_LINK;
199 else 195 else
200 type = SCO_LINK; 196 type = SCO_LINK;
201 197
202 hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); 198 hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
203 if (!hcon) 199 if (IS_ERR(hcon)) {
200 err = PTR_ERR(hcon);
204 goto done; 201 goto done;
202 }
205 203
206 conn = sco_conn_add(hcon, 0); 204 conn = sco_conn_add(hcon, 0);
207 if (!conn) { 205 if (!conn) {
208 hci_conn_put(hcon); 206 hci_conn_put(hcon);
207 err = -ENOMEM;
209 goto done; 208 goto done;
210 } 209 }
211 210
@@ -703,6 +702,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
703 break; 702 break;
704 } 703 }
705 704
705 memset(&cinfo, 0, sizeof(cinfo));
706 cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; 706 cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
707 memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); 707 memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
708 708
@@ -1023,7 +1023,7 @@ static struct hci_proto sco_hci_proto = {
1023 .recv_scodata = sco_recv_scodata 1023 .recv_scodata = sco_recv_scodata
1024}; 1024};
1025 1025
1026static int __init sco_init(void) 1026int __init sco_init(void)
1027{ 1027{
1028 int err; 1028 int err;
1029 1029
@@ -1051,7 +1051,6 @@ static int __init sco_init(void)
1051 BT_ERR("Failed to create SCO debug file"); 1051 BT_ERR("Failed to create SCO debug file");
1052 } 1052 }
1053 1053
1054 BT_INFO("SCO (Voice Link) ver %s", VERSION);
1055 BT_INFO("SCO socket layer initialized"); 1054 BT_INFO("SCO socket layer initialized");
1056 1055
1057 return 0; 1056 return 0;
@@ -1061,7 +1060,7 @@ error:
1061 return err; 1060 return err;
1062} 1061}
1063 1062
1064static void __exit sco_exit(void) 1063void __exit sco_exit(void)
1065{ 1064{
1066 debugfs_remove(sco_debugfs); 1065 debugfs_remove(sco_debugfs);
1067 1066
@@ -1074,14 +1073,5 @@ static void __exit sco_exit(void)
1074 proto_unregister(&sco_proto); 1073 proto_unregister(&sco_proto);
1075} 1074}
1076 1075
1077module_init(sco_init);
1078module_exit(sco_exit);
1079
1080module_param(disable_esco, bool, 0644); 1076module_param(disable_esco, bool, 0644);
1081MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation"); 1077MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation");
1082
1083MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
1084MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION);
1085MODULE_VERSION(VERSION);
1086MODULE_LICENSE("GPL");
1087MODULE_ALIAS("bt-proto-2");
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb..6dee7bf648a 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
6 tristate "802.1d Ethernet Bridging" 6 tristate "802.1d Ethernet Bridging"
7 select LLC 7 select LLC
8 select STP 8 select STP
9 depends on IPV6 || IPV6=n
9 ---help--- 10 ---help---
10 If you say Y here, then your Linux box will be able to act as an 11 If you say Y here, then your Linux box will be able to act as an
11 Ethernet bridge, which means that the different Ethernet segments it 12 Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 556443566e9..21e5901186e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -78,6 +78,8 @@ static int br_dev_open(struct net_device *dev)
78{ 78{
79 struct net_bridge *br = netdev_priv(dev); 79 struct net_bridge *br = netdev_priv(dev);
80 80
81 netif_carrier_off(dev);
82
81 br_features_recompute(br); 83 br_features_recompute(br);
82 netif_start_queue(dev); 84 netif_start_queue(dev);
83 br_stp_enable_bridge(br); 85 br_stp_enable_bridge(br);
@@ -94,6 +96,8 @@ static int br_dev_stop(struct net_device *dev)
94{ 96{
95 struct net_bridge *br = netdev_priv(dev); 97 struct net_bridge *br = netdev_priv(dev);
96 98
99 netif_carrier_off(dev);
100
97 br_stp_disable_bridge(br); 101 br_stp_disable_bridge(br);
98 br_multicast_stop(br); 102 br_multicast_stop(br);
99 103
@@ -297,6 +301,21 @@ void br_netpoll_disable(struct net_bridge_port *p)
297 301
298#endif 302#endif
299 303
304static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
305
306{
307 struct net_bridge *br = netdev_priv(dev);
308
309 return br_add_if(br, slave_dev);
310}
311
312static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
313{
314 struct net_bridge *br = netdev_priv(dev);
315
316 return br_del_if(br, slave_dev);
317}
318
300static const struct ethtool_ops br_ethtool_ops = { 319static const struct ethtool_ops br_ethtool_ops = {
301 .get_drvinfo = br_getinfo, 320 .get_drvinfo = br_getinfo,
302 .get_link = ethtool_op_get_link, 321 .get_link = ethtool_op_get_link,
@@ -326,6 +345,8 @@ static const struct net_device_ops br_netdev_ops = {
326 .ndo_netpoll_cleanup = br_netpoll_cleanup, 345 .ndo_netpoll_cleanup = br_netpoll_cleanup,
327 .ndo_poll_controller = br_poll_controller, 346 .ndo_poll_controller = br_poll_controller,
328#endif 347#endif
348 .ndo_add_slave = br_add_slave,
349 .ndo_del_slave = br_del_slave,
329}; 350};
330 351
331static void br_dev_free(struct net_device *dev) 352static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9d1e2bac1d..dce8f0009a1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -148,6 +148,8 @@ static void del_nbp(struct net_bridge_port *p)
148 148
149 netdev_rx_handler_unregister(dev); 149 netdev_rx_handler_unregister(dev);
150 150
151 netdev_set_master(dev, NULL);
152
151 br_multicast_del_port(p); 153 br_multicast_del_port(p);
152 154
153 kobject_uevent(&p->kobj, KOBJ_REMOVE); 155 kobject_uevent(&p->kobj, KOBJ_REMOVE);
@@ -365,7 +367,7 @@ int br_min_mtu(const struct net_bridge *br)
365void br_features_recompute(struct net_bridge *br) 367void br_features_recompute(struct net_bridge *br)
366{ 368{
367 struct net_bridge_port *p; 369 struct net_bridge_port *p;
368 unsigned long features, mask; 370 u32 features, mask;
369 371
370 features = mask = br->feature_mask; 372 features = mask = br->feature_mask;
371 if (list_empty(&br->port_list)) 373 if (list_empty(&br->port_list))
@@ -379,7 +381,7 @@ void br_features_recompute(struct net_bridge *br)
379 } 381 }
380 382
381done: 383done:
382 br->dev->features = netdev_fix_features(features, NULL); 384 br->dev->features = netdev_fix_features(br->dev, features);
383} 385}
384 386
385/* called with RTNL */ 387/* called with RTNL */
@@ -429,10 +431,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
429 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) 431 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
430 goto err3; 432 goto err3;
431 433
432 err = netdev_rx_handler_register(dev, br_handle_frame, p); 434 err = netdev_set_master(dev, br->dev);
433 if (err) 435 if (err)
434 goto err3; 436 goto err3;
435 437
438 err = netdev_rx_handler_register(dev, br_handle_frame, p);
439 if (err)
440 goto err4;
441
436 dev->priv_flags |= IFF_BRIDGE_PORT; 442 dev->priv_flags |= IFF_BRIDGE_PORT;
437 443
438 dev_disable_lro(dev); 444 dev_disable_lro(dev);
@@ -455,6 +461,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
455 kobject_uevent(&p->kobj, KOBJ_ADD); 461 kobject_uevent(&p->kobj, KOBJ_ADD);
456 462
457 return 0; 463 return 0;
464
465err4:
466 netdev_set_master(dev, NULL);
458err3: 467err3:
459 sysfs_remove_link(br->ifobj, p->dev->name); 468 sysfs_remove_link(br->ifobj, p->dev->name);
460err2: 469err2:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 88e4aa9cb1f..e2160792e1b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -139,21 +139,22 @@ static inline int is_link_local(const unsigned char *dest)
139 * Return NULL if skb is handled 139 * Return NULL if skb is handled
140 * note: already called with rcu_read_lock 140 * note: already called with rcu_read_lock
141 */ 141 */
142struct sk_buff *br_handle_frame(struct sk_buff *skb) 142rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
143{ 143{
144 struct net_bridge_port *p; 144 struct net_bridge_port *p;
145 struct sk_buff *skb = *pskb;
145 const unsigned char *dest = eth_hdr(skb)->h_dest; 146 const unsigned char *dest = eth_hdr(skb)->h_dest;
146 br_should_route_hook_t *rhook; 147 br_should_route_hook_t *rhook;
147 148
148 if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) 149 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
149 return skb; 150 return RX_HANDLER_PASS;
150 151
151 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 152 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
152 goto drop; 153 goto drop;
153 154
154 skb = skb_share_check(skb, GFP_ATOMIC); 155 skb = skb_share_check(skb, GFP_ATOMIC);
155 if (!skb) 156 if (!skb)
156 return NULL; 157 return RX_HANDLER_CONSUMED;
157 158
158 p = br_port_get_rcu(skb->dev); 159 p = br_port_get_rcu(skb->dev);
159 160
@@ -167,10 +168,12 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
167 goto forward; 168 goto forward;
168 169
169 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 170 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
170 NULL, br_handle_local_finish)) 171 NULL, br_handle_local_finish)) {
171 return NULL; /* frame consumed by filter */ 172 return RX_HANDLER_CONSUMED; /* consumed by filter */
172 else 173 } else {
173 return skb; /* continue processing */ 174 *pskb = skb;
175 return RX_HANDLER_PASS; /* continue processing */
176 }
174 } 177 }
175 178
176forward: 179forward:
@@ -178,8 +181,10 @@ forward:
178 case BR_STATE_FORWARDING: 181 case BR_STATE_FORWARDING:
179 rhook = rcu_dereference(br_should_route_hook); 182 rhook = rcu_dereference(br_should_route_hook);
180 if (rhook) { 183 if (rhook) {
181 if ((*rhook)(skb)) 184 if ((*rhook)(skb)) {
182 return skb; 185 *pskb = skb;
186 return RX_HANDLER_PASS;
187 }
183 dest = eth_hdr(skb)->h_dest; 188 dest = eth_hdr(skb)->h_dest;
184 } 189 }
185 /* fall through */ 190 /* fall through */
@@ -194,5 +199,5 @@ forward:
194drop: 199drop:
195 kfree_skb(skb); 200 kfree_skb(skb);
196 } 201 }
197 return NULL; 202 return RX_HANDLER_CONSUMED;
198} 203}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 4b5b66d07bb..008ff6c4eec 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -412,10 +412,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
412 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 412 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
413 if (dnat_took_place(skb)) { 413 if (dnat_took_place(skb)) {
414 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 414 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
415 struct flowi fl = {
416 .fl4_dst = iph->daddr,
417 .fl4_tos = RT_TOS(iph->tos),
418 };
419 struct in_device *in_dev = __in_dev_get_rcu(dev); 415 struct in_device *in_dev = __in_dev_get_rcu(dev);
420 416
421 /* If err equals -EHOSTUNREACH the error is due to a 417 /* If err equals -EHOSTUNREACH the error is due to a
@@ -428,14 +424,16 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
428 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 424 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
429 goto free_skb; 425 goto free_skb;
430 426
431 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 427 rt = ip_route_output(dev_net(dev), iph->daddr, 0,
428 RT_TOS(iph->tos), 0);
429 if (!IS_ERR(rt)) {
432 /* - Bridged-and-DNAT'ed traffic doesn't 430 /* - Bridged-and-DNAT'ed traffic doesn't
433 * require ip_forwarding. */ 431 * require ip_forwarding. */
434 if (((struct dst_entry *)rt)->dev == dev) { 432 if (rt->dst.dev == dev) {
435 skb_dst_set(skb, (struct dst_entry *)rt); 433 skb_dst_set(skb, &rt->dst);
436 goto bridged_dnat; 434 goto bridged_dnat;
437 } 435 }
438 dst_release((struct dst_entry *)rt); 436 ip_rt_put(rt);
439 } 437 }
440free_skb: 438free_skb:
441 kfree_skb(skb); 439 kfree_skb(skb);
@@ -741,6 +739,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
741 nf_bridge->mask |= BRNF_PKT_TYPE; 739 nf_bridge->mask |= BRNF_PKT_TYPE;
742 } 740 }
743 741
742 if (br_parse_ip_options(skb))
743 return NF_DROP;
744
744 /* The physdev module checks on this */ 745 /* The physdev module checks on this */
745 nf_bridge->mask |= BRNF_BRIDGED; 746 nf_bridge->mask |= BRNF_BRIDGED;
746 nf_bridge->physoutdev = skb->dev; 747 nf_bridge->physoutdev = skb->dev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4e1b620b6be..19e2f46ed08 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -182,7 +182,7 @@ struct net_bridge
182 struct br_cpu_netstats __percpu *stats; 182 struct br_cpu_netstats __percpu *stats;
183 spinlock_t hash_lock; 183 spinlock_t hash_lock;
184 struct hlist_head hash[BR_HASH_SIZE]; 184 struct hlist_head hash[BR_HASH_SIZE];
185 unsigned long feature_mask; 185 u32 feature_mask;
186#ifdef CONFIG_BRIDGE_NETFILTER 186#ifdef CONFIG_BRIDGE_NETFILTER
187 struct rtable fake_rtable; 187 struct rtable fake_rtable;
188 bool nf_call_iptables; 188 bool nf_call_iptables;
@@ -379,7 +379,7 @@ extern void br_features_recompute(struct net_bridge *br);
379 379
380/* br_input.c */ 380/* br_input.c */
381extern int br_handle_frame_finish(struct sk_buff *skb); 381extern int br_handle_frame_finish(struct sk_buff *skb);
382extern struct sk_buff *br_handle_frame(struct sk_buff *skb); 382extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
383 383
384/* br_ioctl.c */ 384/* br_ioctl.c */
385extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); 385extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 57186d84d2b..7370d14f634 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -375,12 +375,12 @@ static void br_make_forwarding(struct net_bridge_port *p)
375 if (p->state != BR_STATE_BLOCKING) 375 if (p->state != BR_STATE_BLOCKING)
376 return; 376 return;
377 377
378 if (br->forward_delay == 0) { 378 if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
379 p->state = BR_STATE_FORWARDING; 379 p->state = BR_STATE_FORWARDING;
380 br_topology_change_detection(br); 380 br_topology_change_detection(br);
381 del_timer(&p->forward_delay_timer); 381 del_timer(&p->forward_delay_timer);
382 } 382 }
383 else if (p->br->stp_enabled == BR_KERNEL_STP) 383 else if (br->stp_enabled == BR_KERNEL_STP)
384 p->state = BR_STATE_LISTENING; 384 p->state = BR_STATE_LISTENING;
385 else 385 else
386 p->state = BR_STATE_LEARNING; 386 p->state = BR_STATE_LEARNING;
@@ -397,28 +397,37 @@ static void br_make_forwarding(struct net_bridge_port *p)
397void br_port_state_selection(struct net_bridge *br) 397void br_port_state_selection(struct net_bridge *br)
398{ 398{
399 struct net_bridge_port *p; 399 struct net_bridge_port *p;
400 unsigned int liveports = 0;
400 401
401 /* Don't change port states if userspace is handling STP */ 402 /* Don't change port states if userspace is handling STP */
402 if (br->stp_enabled == BR_USER_STP) 403 if (br->stp_enabled == BR_USER_STP)
403 return; 404 return;
404 405
405 list_for_each_entry(p, &br->port_list, list) { 406 list_for_each_entry(p, &br->port_list, list) {
406 if (p->state != BR_STATE_DISABLED) { 407 if (p->state == BR_STATE_DISABLED)
407 if (p->port_no == br->root_port) { 408 continue;
408 p->config_pending = 0; 409
409 p->topology_change_ack = 0; 410 if (p->port_no == br->root_port) {
410 br_make_forwarding(p); 411 p->config_pending = 0;
411 } else if (br_is_designated_port(p)) { 412 p->topology_change_ack = 0;
412 del_timer(&p->message_age_timer); 413 br_make_forwarding(p);
413 br_make_forwarding(p); 414 } else if (br_is_designated_port(p)) {
414 } else { 415 del_timer(&p->message_age_timer);
415 p->config_pending = 0; 416 br_make_forwarding(p);
416 p->topology_change_ack = 0; 417 } else {
417 br_make_blocking(p); 418 p->config_pending = 0;
418 } 419 p->topology_change_ack = 0;
420 br_make_blocking(p);
419 } 421 }
420 422
423 if (p->state == BR_STATE_FORWARDING)
424 ++liveports;
421 } 425 }
426
427 if (liveports == 0)
428 netif_carrier_off(br->dev);
429 else
430 netif_carrier_on(br->dev);
422} 431}
423 432
424/* called under bridge lock */ 433/* called under bridge lock */
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 7b22456023c..3e965140051 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -94,6 +94,7 @@ static void br_forward_delay_timer_expired(unsigned long arg)
94 p->state = BR_STATE_FORWARDING; 94 p->state = BR_STATE_FORWARDING;
95 if (br_is_designated_for_some_port(br)) 95 if (br_is_designated_for_some_port(br))
96 br_topology_change_detection(br); 96 br_topology_change_detection(br);
97 netif_carrier_on(br->dev);
97 } 98 }
98 br_log_state(p); 99 br_log_state(p);
99 spin_unlock(&br->lock); 100 spin_unlock(&br->lock);
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 50a46afc2bc..2ed0056a39a 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
22#include <linux/netfilter_bridge/ebtables.h> 22#include <linux/netfilter_bridge/ebtables.h>
23#include <linux/netfilter_bridge/ebt_ip6.h> 23#include <linux/netfilter_bridge/ebt_ip6.h>
24 24
25struct tcpudphdr { 25union pkthdr {
26 __be16 src; 26 struct {
27 __be16 dst; 27 __be16 src;
28 __be16 dst;
29 } tcpudphdr;
30 struct {
31 u8 type;
32 u8 code;
33 } icmphdr;
28}; 34};
29 35
30static bool 36static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
33 const struct ebt_ip6_info *info = par->matchinfo; 39 const struct ebt_ip6_info *info = par->matchinfo;
34 const struct ipv6hdr *ih6; 40 const struct ipv6hdr *ih6;
35 struct ipv6hdr _ip6h; 41 struct ipv6hdr _ip6h;
36 const struct tcpudphdr *pptr; 42 const union pkthdr *pptr;
37 struct tcpudphdr _ports; 43 union pkthdr _pkthdr;
38 44
39 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); 45 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
40 if (ih6 == NULL) 46 if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
56 return false; 62 return false;
57 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) 63 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
58 return false; 64 return false;
59 if (!(info->bitmask & EBT_IP6_DPORT) && 65 if (!(info->bitmask & ( EBT_IP6_DPORT |
60 !(info->bitmask & EBT_IP6_SPORT)) 66 EBT_IP6_SPORT | EBT_IP6_ICMP6)))
61 return true; 67 return true;
62 pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), 68
63 &_ports); 69 /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
70 pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
71 &_pkthdr);
64 if (pptr == NULL) 72 if (pptr == NULL)
65 return false; 73 return false;
66 if (info->bitmask & EBT_IP6_DPORT) { 74 if (info->bitmask & EBT_IP6_DPORT) {
67 u32 dst = ntohs(pptr->dst); 75 u16 dst = ntohs(pptr->tcpudphdr.dst);
68 if (FWINV(dst < info->dport[0] || 76 if (FWINV(dst < info->dport[0] ||
69 dst > info->dport[1], EBT_IP6_DPORT)) 77 dst > info->dport[1], EBT_IP6_DPORT))
70 return false; 78 return false;
71 } 79 }
72 if (info->bitmask & EBT_IP6_SPORT) { 80 if (info->bitmask & EBT_IP6_SPORT) {
73 u32 src = ntohs(pptr->src); 81 u16 src = ntohs(pptr->tcpudphdr.src);
74 if (FWINV(src < info->sport[0] || 82 if (FWINV(src < info->sport[0] ||
75 src > info->sport[1], EBT_IP6_SPORT)) 83 src > info->sport[1], EBT_IP6_SPORT))
76 return false; 84 return false;
77 } 85 }
78 return true; 86 if ((info->bitmask & EBT_IP6_ICMP6) &&
87 FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
88 pptr->icmphdr.type > info->icmpv6_type[1] ||
89 pptr->icmphdr.code < info->icmpv6_code[0] ||
90 pptr->icmphdr.code > info->icmpv6_code[1],
91 EBT_IP6_ICMP6))
92 return false;
79 } 93 }
80 return true; 94 return true;
81} 95}
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
103 return -EINVAL; 117 return -EINVAL;
104 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) 118 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
105 return -EINVAL; 119 return -EINVAL;
120 if (info->bitmask & EBT_IP6_ICMP6) {
121 if ((info->invflags & EBT_IP6_PROTO) ||
122 info->protocol != IPPROTO_ICMPV6)
123 return -EINVAL;
124 if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
125 info->icmpv6_code[0] > info->icmpv6_code[1])
126 return -EINVAL;
127 }
106 return 0; 128 return 0;
107} 129}
108 130
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df0532d4b..893669caa8d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1107,6 +1107,8 @@ static int do_replace(struct net *net, const void __user *user,
1107 if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) 1107 if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
1108 return -ENOMEM; 1108 return -ENOMEM;
1109 1109
1110 tmp.name[sizeof(tmp.name) - 1] = 0;
1111
1110 countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; 1112 countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
1111 newinfo = vmalloc(sizeof(*newinfo) + countersize); 1113 newinfo = vmalloc(sizeof(*newinfo) + countersize);
1112 if (!newinfo) 1114 if (!newinfo)
@@ -1764,6 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info,
1764 1766
1765 newinfo->entries_size = size; 1767 newinfo->entries_size = size;
1766 1768
1769 xt_compat_init_offsets(AF_INET, info->nentries);
1767 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, 1770 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
1768 entries, newinfo); 1771 entries, newinfo);
1769} 1772}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c665de778b6..f1f98d967d8 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -23,10 +23,8 @@
23#include <asm/atomic.h> 23#include <asm/atomic.h>
24 24
25#define MAX_PHY_LAYERS 7 25#define MAX_PHY_LAYERS 7
26#define PHY_NAME_LEN 20
27 26
28#define container_obj(layr) container_of(layr, struct cfcnfg, layer) 27#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
29#define RFM_FRAGMENT_SIZE 4030
30 28
31/* Information about CAIF physical interfaces held by Config Module in order 29/* Information about CAIF physical interfaces held by Config Module in order
32 * to manage physical interfaces 30 * to manage physical interfaces
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index d3ed264ad6c..27dab26ad3b 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -18,7 +18,6 @@
18#define DGM_CMD_BIT 0x80 18#define DGM_CMD_BIT 0x80
19#define DGM_FLOW_OFF 0x81 19#define DGM_FLOW_OFF 0x81
20#define DGM_FLOW_ON 0x80 20#define DGM_FLOW_ON 0x80
21#define DGM_CTRL_PKT_SIZE 1
22#define DGM_MTU 1500 21#define DGM_MTU 1500
23 22
24static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); 23static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 9297f7dea9d..8303fe3ebf8 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -25,7 +25,6 @@ struct cfserl {
25 spinlock_t sync; 25 spinlock_t sync;
26 bool usestx; 26 bool usestx;
27}; 27};
28#define STXLEN(layr) (layr->usestx ? 1 : 0)
29 28
30static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); 29static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
31static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); 30static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index efad410e4c8..315c0d60136 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -20,7 +20,7 @@
20#define UTIL_REMOTE_SHUTDOWN 0x82 20#define UTIL_REMOTE_SHUTDOWN 0x82
21#define UTIL_FLOW_OFF 0x81 21#define UTIL_FLOW_OFF 0x81
22#define UTIL_FLOW_ON 0x80 22#define UTIL_FLOW_ON 0x80
23#define UTIL_CTRL_PKT_SIZE 1 23
24static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt); 24static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
25static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); 25static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
26 26
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3b425b189a9..c3b1dec4acf 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -17,7 +17,7 @@
17#define VEI_FLOW_OFF 0x81 17#define VEI_FLOW_OFF 0x81
18#define VEI_FLOW_ON 0x80 18#define VEI_FLOW_ON 0x80
19#define VEI_SET_PIN 0x82 19#define VEI_SET_PIN 0x82
20#define VEI_CTRL_PKT_SIZE 1 20
21#define container_obj(layr) container_of(layr, struct cfsrvl, layer) 21#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
22 22
23static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt); 23static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index eb2a666b0be..1fc1ee11dfa 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end)
78 while (src < end) { 78 while (src < end) {
79 int a, b, c, d; 79 int a, b, c, d;
80 80
81 if (src < end && src[0] == '\n') 81 if (src[0] == '\n') {
82 src++; 82 src++;
83 continue;
84 }
83 if (src + 4 > end) 85 if (src + 4 > end)
84 return -EINVAL; 86 return -EINVAL;
85 a = decode_bits(src[0]); 87 a = decode_bits(src[0]);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f3e4a13fea0..95f96ab94bb 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -62,6 +62,7 @@ const char *ceph_msg_type_name(int type)
62 case CEPH_MSG_OSD_MAP: return "osd_map"; 62 case CEPH_MSG_OSD_MAP: return "osd_map";
63 case CEPH_MSG_OSD_OP: return "osd_op"; 63 case CEPH_MSG_OSD_OP: return "osd_op";
64 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; 64 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
65 case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
65 default: return "unknown"; 66 default: return "unknown";
66 } 67 }
67} 68}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3e20a122ffa..02212ed5085 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -22,10 +22,15 @@
22#define OSD_OPREPLY_FRONT_LEN 512 22#define OSD_OPREPLY_FRONT_LEN 512
23 23
24static const struct ceph_connection_operations osd_con_ops; 24static const struct ceph_connection_operations osd_con_ops;
25static int __kick_requests(struct ceph_osd_client *osdc,
26 struct ceph_osd *kickosd);
27 25
28static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); 26static void send_queued(struct ceph_osd_client *osdc);
27static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
28static void __register_request(struct ceph_osd_client *osdc,
29 struct ceph_osd_request *req);
30static void __unregister_linger_request(struct ceph_osd_client *osdc,
31 struct ceph_osd_request *req);
32static int __send_request(struct ceph_osd_client *osdc,
33 struct ceph_osd_request *req);
29 34
30static int op_needs_trail(int op) 35static int op_needs_trail(int op)
31{ 36{
@@ -34,6 +39,7 @@ static int op_needs_trail(int op)
34 case CEPH_OSD_OP_SETXATTR: 39 case CEPH_OSD_OP_SETXATTR:
35 case CEPH_OSD_OP_CMPXATTR: 40 case CEPH_OSD_OP_CMPXATTR:
36 case CEPH_OSD_OP_CALL: 41 case CEPH_OSD_OP_CALL:
42 case CEPH_OSD_OP_NOTIFY:
37 return 1; 43 return 1;
38 default: 44 default:
39 return 0; 45 return 0;
@@ -209,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
209 init_completion(&req->r_completion); 215 init_completion(&req->r_completion);
210 init_completion(&req->r_safe_completion); 216 init_completion(&req->r_safe_completion);
211 INIT_LIST_HEAD(&req->r_unsafe_item); 217 INIT_LIST_HEAD(&req->r_unsafe_item);
218 INIT_LIST_HEAD(&req->r_linger_item);
219 INIT_LIST_HEAD(&req->r_linger_osd);
212 req->r_flags = flags; 220 req->r_flags = flags;
213 221
214 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); 222 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -315,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
315 break; 323 break;
316 case CEPH_OSD_OP_STARTSYNC: 324 case CEPH_OSD_OP_STARTSYNC:
317 break; 325 break;
326 case CEPH_OSD_OP_NOTIFY:
327 {
328 __le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
329 __le32 timeout = cpu_to_le32(src->watch.timeout);
330
331 BUG_ON(!req->r_trail);
332
333 ceph_pagelist_append(req->r_trail,
334 &prot_ver, sizeof(prot_ver));
335 ceph_pagelist_append(req->r_trail,
336 &timeout, sizeof(timeout));
337 }
338 case CEPH_OSD_OP_NOTIFY_ACK:
339 case CEPH_OSD_OP_WATCH:
340 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
341 dst->watch.ver = cpu_to_le64(src->watch.ver);
342 dst->watch.flag = src->watch.flag;
343 break;
318 default: 344 default:
319 pr_err("unrecognized osd opcode %d\n", dst->op); 345 pr_err("unrecognized osd opcode %d\n", dst->op);
320 WARN_ON(1); 346 WARN_ON(1);
@@ -529,6 +555,45 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
529 return NULL; 555 return NULL;
530} 556}
531 557
558/*
559 * Resubmit requests pending on the given osd.
560 */
561static void __kick_osd_requests(struct ceph_osd_client *osdc,
562 struct ceph_osd *osd)
563{
564 struct ceph_osd_request *req, *nreq;
565 int err;
566
567 dout("__kick_osd_requests osd%d\n", osd->o_osd);
568 err = __reset_osd(osdc, osd);
569 if (err == -EAGAIN)
570 return;
571
572 list_for_each_entry(req, &osd->o_requests, r_osd_item) {
573 list_move(&req->r_req_lru_item, &osdc->req_unsent);
574 dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
575 osd->o_osd);
576 if (!req->r_linger)
577 req->r_flags |= CEPH_OSD_FLAG_RETRY;
578 }
579
580 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
581 r_linger_osd) {
582 __unregister_linger_request(osdc, req);
583 __register_request(osdc, req);
584 list_move(&req->r_req_lru_item, &osdc->req_unsent);
585 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
586 osd->o_osd);
587 }
588}
589
590static void kick_osd_requests(struct ceph_osd_client *osdc,
591 struct ceph_osd *kickosd)
592{
593 mutex_lock(&osdc->request_mutex);
594 __kick_osd_requests(osdc, kickosd);
595 mutex_unlock(&osdc->request_mutex);
596}
532 597
533/* 598/*
534 * If the osd connection drops, we need to resubmit all requests. 599 * If the osd connection drops, we need to resubmit all requests.
@@ -543,7 +608,8 @@ static void osd_reset(struct ceph_connection *con)
543 dout("osd_reset osd%d\n", osd->o_osd); 608 dout("osd_reset osd%d\n", osd->o_osd);
544 osdc = osd->o_osdc; 609 osdc = osd->o_osdc;
545 down_read(&osdc->map_sem); 610 down_read(&osdc->map_sem);
546 kick_requests(osdc, osd); 611 kick_osd_requests(osdc, osd);
612 send_queued(osdc);
547 up_read(&osdc->map_sem); 613 up_read(&osdc->map_sem);
548} 614}
549 615
@@ -561,6 +627,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
561 atomic_set(&osd->o_ref, 1); 627 atomic_set(&osd->o_ref, 1);
562 osd->o_osdc = osdc; 628 osd->o_osdc = osdc;
563 INIT_LIST_HEAD(&osd->o_requests); 629 INIT_LIST_HEAD(&osd->o_requests);
630 INIT_LIST_HEAD(&osd->o_linger_requests);
564 INIT_LIST_HEAD(&osd->o_osd_lru); 631 INIT_LIST_HEAD(&osd->o_osd_lru);
565 osd->o_incarnation = 1; 632 osd->o_incarnation = 1;
566 633
@@ -650,7 +717,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
650 int ret = 0; 717 int ret = 0;
651 718
652 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 719 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
653 if (list_empty(&osd->o_requests)) { 720 if (list_empty(&osd->o_requests) &&
721 list_empty(&osd->o_linger_requests)) {
654 __remove_osd(osdc, osd); 722 __remove_osd(osdc, osd);
655 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], 723 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
656 &osd->o_con.peer_addr, 724 &osd->o_con.peer_addr,
@@ -723,10 +791,9 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
723 * Register request, assign tid. If this is the first request, set up 791 * Register request, assign tid. If this is the first request, set up
724 * the timeout event. 792 * the timeout event.
725 */ 793 */
726static void register_request(struct ceph_osd_client *osdc, 794static void __register_request(struct ceph_osd_client *osdc,
727 struct ceph_osd_request *req) 795 struct ceph_osd_request *req)
728{ 796{
729 mutex_lock(&osdc->request_mutex);
730 req->r_tid = ++osdc->last_tid; 797 req->r_tid = ++osdc->last_tid;
731 req->r_request->hdr.tid = cpu_to_le64(req->r_tid); 798 req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
732 INIT_LIST_HEAD(&req->r_req_lru_item); 799 INIT_LIST_HEAD(&req->r_req_lru_item);
@@ -740,6 +807,13 @@ static void register_request(struct ceph_osd_client *osdc,
740 dout(" first request, scheduling timeout\n"); 807 dout(" first request, scheduling timeout\n");
741 __schedule_osd_timeout(osdc); 808 __schedule_osd_timeout(osdc);
742 } 809 }
810}
811
812static void register_request(struct ceph_osd_client *osdc,
813 struct ceph_osd_request *req)
814{
815 mutex_lock(&osdc->request_mutex);
816 __register_request(osdc, req);
743 mutex_unlock(&osdc->request_mutex); 817 mutex_unlock(&osdc->request_mutex);
744} 818}
745 819
@@ -758,9 +832,14 @@ static void __unregister_request(struct ceph_osd_client *osdc,
758 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 832 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
759 833
760 list_del_init(&req->r_osd_item); 834 list_del_init(&req->r_osd_item);
761 if (list_empty(&req->r_osd->o_requests)) 835 if (list_empty(&req->r_osd->o_requests) &&
836 list_empty(&req->r_osd->o_linger_requests)) {
837 dout("moving osd to %p lru\n", req->r_osd);
762 __move_osd_to_lru(osdc, req->r_osd); 838 __move_osd_to_lru(osdc, req->r_osd);
763 req->r_osd = NULL; 839 }
840 if (list_empty(&req->r_osd_item) &&
841 list_empty(&req->r_linger_item))
842 req->r_osd = NULL;
764 } 843 }
765 844
766 ceph_osdc_put_request(req); 845 ceph_osdc_put_request(req);
@@ -781,20 +860,72 @@ static void __cancel_request(struct ceph_osd_request *req)
781 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 860 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
782 req->r_sent = 0; 861 req->r_sent = 0;
783 } 862 }
784 list_del_init(&req->r_req_lru_item);
785} 863}
786 864
865static void __register_linger_request(struct ceph_osd_client *osdc,
866 struct ceph_osd_request *req)
867{
868 dout("__register_linger_request %p\n", req);
869 list_add_tail(&req->r_linger_item, &osdc->req_linger);
870 list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
871}
872
873static void __unregister_linger_request(struct ceph_osd_client *osdc,
874 struct ceph_osd_request *req)
875{
876 dout("__unregister_linger_request %p\n", req);
877 if (req->r_osd) {
878 list_del_init(&req->r_linger_item);
879 list_del_init(&req->r_linger_osd);
880
881 if (list_empty(&req->r_osd->o_requests) &&
882 list_empty(&req->r_osd->o_linger_requests)) {
883 dout("moving osd to %p lru\n", req->r_osd);
884 __move_osd_to_lru(osdc, req->r_osd);
885 }
886 req->r_osd = NULL;
887 }
888}
889
890void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
891 struct ceph_osd_request *req)
892{
893 mutex_lock(&osdc->request_mutex);
894 if (req->r_linger) {
895 __unregister_linger_request(osdc, req);
896 ceph_osdc_put_request(req);
897 }
898 mutex_unlock(&osdc->request_mutex);
899}
900EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
901
902void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
903 struct ceph_osd_request *req)
904{
905 if (!req->r_linger) {
906 dout("set_request_linger %p\n", req);
907 req->r_linger = 1;
908 /*
909 * caller is now responsible for calling
910 * unregister_linger_request
911 */
912 ceph_osdc_get_request(req);
913 }
914}
915EXPORT_SYMBOL(ceph_osdc_set_request_linger);
916
787/* 917/*
788 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct 918 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
789 * (as needed), and set the request r_osd appropriately. If there is 919 * (as needed), and set the request r_osd appropriately. If there is
790 * no up osd, set r_osd to NULL. 920 * no up osd, set r_osd to NULL. Move the request to the appropiate list
921 * (unsent, homeless) or leave on in-flight lru.
791 * 922 *
792 * Return 0 if unchanged, 1 if changed, or negative on error. 923 * Return 0 if unchanged, 1 if changed, or negative on error.
793 * 924 *
794 * Caller should hold map_sem for read and request_mutex. 925 * Caller should hold map_sem for read and request_mutex.
795 */ 926 */
796static int __map_osds(struct ceph_osd_client *osdc, 927static int __map_request(struct ceph_osd_client *osdc,
797 struct ceph_osd_request *req) 928 struct ceph_osd_request *req)
798{ 929{
799 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 930 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
800 struct ceph_pg pgid; 931 struct ceph_pg pgid;
@@ -802,11 +933,13 @@ static int __map_osds(struct ceph_osd_client *osdc,
802 int o = -1, num = 0; 933 int o = -1, num = 0;
803 int err; 934 int err;
804 935
805 dout("map_osds %p tid %lld\n", req, req->r_tid); 936 dout("map_request %p tid %lld\n", req, req->r_tid);
806 err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, 937 err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
807 &req->r_file_layout, osdc->osdmap); 938 &req->r_file_layout, osdc->osdmap);
808 if (err) 939 if (err) {
940 list_move(&req->r_req_lru_item, &osdc->req_notarget);
809 return err; 941 return err;
942 }
810 pgid = reqhead->layout.ol_pgid; 943 pgid = reqhead->layout.ol_pgid;
811 req->r_pgid = pgid; 944 req->r_pgid = pgid;
812 945
@@ -823,7 +956,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
823 (req->r_osd == NULL && o == -1)) 956 (req->r_osd == NULL && o == -1))
824 return 0; /* no change */ 957 return 0; /* no change */
825 958
826 dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n", 959 dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
827 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 960 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
828 req->r_osd ? req->r_osd->o_osd : -1); 961 req->r_osd ? req->r_osd->o_osd : -1);
829 962
@@ -841,10 +974,12 @@ static int __map_osds(struct ceph_osd_client *osdc,
841 if (!req->r_osd && o >= 0) { 974 if (!req->r_osd && o >= 0) {
842 err = -ENOMEM; 975 err = -ENOMEM;
843 req->r_osd = create_osd(osdc); 976 req->r_osd = create_osd(osdc);
844 if (!req->r_osd) 977 if (!req->r_osd) {
978 list_move(&req->r_req_lru_item, &osdc->req_notarget);
845 goto out; 979 goto out;
980 }
846 981
847 dout("map_osds osd %p is osd%d\n", req->r_osd, o); 982 dout("map_request osd %p is osd%d\n", req->r_osd, o);
848 req->r_osd->o_osd = o; 983 req->r_osd->o_osd = o;
849 req->r_osd->o_con.peer_name.num = cpu_to_le64(o); 984 req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
850 __insert_osd(osdc, req->r_osd); 985 __insert_osd(osdc, req->r_osd);
@@ -855,6 +990,9 @@ static int __map_osds(struct ceph_osd_client *osdc,
855 if (req->r_osd) { 990 if (req->r_osd) {
856 __remove_osd_from_lru(req->r_osd); 991 __remove_osd_from_lru(req->r_osd);
857 list_add(&req->r_osd_item, &req->r_osd->o_requests); 992 list_add(&req->r_osd_item, &req->r_osd->o_requests);
993 list_move(&req->r_req_lru_item, &osdc->req_unsent);
994 } else {
995 list_move(&req->r_req_lru_item, &osdc->req_notarget);
858 } 996 }
859 err = 1; /* osd or pg changed */ 997 err = 1; /* osd or pg changed */
860 998
@@ -869,16 +1007,6 @@ static int __send_request(struct ceph_osd_client *osdc,
869 struct ceph_osd_request *req) 1007 struct ceph_osd_request *req)
870{ 1008{
871 struct ceph_osd_request_head *reqhead; 1009 struct ceph_osd_request_head *reqhead;
872 int err;
873
874 err = __map_osds(osdc, req);
875 if (err < 0)
876 return err;
877 if (req->r_osd == NULL) {
878 dout("send_request %p no up osds in pg\n", req);
879 ceph_monc_request_next_osdmap(&osdc->client->monc);
880 return 0;
881 }
882 1010
883 dout("send_request %p tid %llu to osd%d flags %d\n", 1011 dout("send_request %p tid %llu to osd%d flags %d\n",
884 req, req->r_tid, req->r_osd->o_osd, req->r_flags); 1012 req, req->r_tid, req->r_osd->o_osd, req->r_flags);
@@ -898,6 +1026,21 @@ static int __send_request(struct ceph_osd_client *osdc,
898} 1026}
899 1027
900/* 1028/*
1029 * Send any requests in the queue (req_unsent).
1030 */
1031static void send_queued(struct ceph_osd_client *osdc)
1032{
1033 struct ceph_osd_request *req, *tmp;
1034
1035 dout("send_queued\n");
1036 mutex_lock(&osdc->request_mutex);
1037 list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
1038 __send_request(osdc, req);
1039 }
1040 mutex_unlock(&osdc->request_mutex);
1041}
1042
1043/*
901 * Timeout callback, called every N seconds when 1 or more osd 1044 * Timeout callback, called every N seconds when 1 or more osd
902 * requests has been active for more than N seconds. When this 1045 * requests has been active for more than N seconds. When this
903 * happens, we ping all OSDs with requests who have timed out to 1046 * happens, we ping all OSDs with requests who have timed out to
@@ -916,30 +1059,13 @@ static void handle_timeout(struct work_struct *work)
916 unsigned long keepalive = 1059 unsigned long keepalive =
917 osdc->client->options->osd_keepalive_timeout * HZ; 1060 osdc->client->options->osd_keepalive_timeout * HZ;
918 unsigned long last_stamp = 0; 1061 unsigned long last_stamp = 0;
919 struct rb_node *p;
920 struct list_head slow_osds; 1062 struct list_head slow_osds;
921
922 dout("timeout\n"); 1063 dout("timeout\n");
923 down_read(&osdc->map_sem); 1064 down_read(&osdc->map_sem);
924 1065
925 ceph_monc_request_next_osdmap(&osdc->client->monc); 1066 ceph_monc_request_next_osdmap(&osdc->client->monc);
926 1067
927 mutex_lock(&osdc->request_mutex); 1068 mutex_lock(&osdc->request_mutex);
928 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
929 req = rb_entry(p, struct ceph_osd_request, r_node);
930
931 if (req->r_resend) {
932 int err;
933
934 dout("osdc resending prev failed %lld\n", req->r_tid);
935 err = __send_request(osdc, req);
936 if (err)
937 dout("osdc failed again on %lld\n", req->r_tid);
938 else
939 req->r_resend = false;
940 continue;
941 }
942 }
943 1069
944 /* 1070 /*
945 * reset osds that appear to be _really_ unresponsive. this 1071 * reset osds that appear to be _really_ unresponsive. this
@@ -963,7 +1089,7 @@ static void handle_timeout(struct work_struct *work)
963 BUG_ON(!osd); 1089 BUG_ON(!osd);
964 pr_warning(" tid %llu timed out on osd%d, will reset osd\n", 1090 pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
965 req->r_tid, osd->o_osd); 1091 req->r_tid, osd->o_osd);
966 __kick_requests(osdc, osd); 1092 __kick_osd_requests(osdc, osd);
967 } 1093 }
968 1094
969 /* 1095 /*
@@ -991,7 +1117,7 @@ static void handle_timeout(struct work_struct *work)
991 1117
992 __schedule_osd_timeout(osdc); 1118 __schedule_osd_timeout(osdc);
993 mutex_unlock(&osdc->request_mutex); 1119 mutex_unlock(&osdc->request_mutex);
994 1120 send_queued(osdc);
995 up_read(&osdc->map_sem); 1121 up_read(&osdc->map_sem);
996} 1122}
997 1123
@@ -1035,7 +1161,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1035 numops * sizeof(struct ceph_osd_op)) 1161 numops * sizeof(struct ceph_osd_op))
1036 goto bad; 1162 goto bad;
1037 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); 1163 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
1038
1039 /* lookup */ 1164 /* lookup */
1040 mutex_lock(&osdc->request_mutex); 1165 mutex_lock(&osdc->request_mutex);
1041 req = __lookup_request(osdc, tid); 1166 req = __lookup_request(osdc, tid);
@@ -1079,6 +1204,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1079 1204
1080 dout("handle_reply tid %llu flags %d\n", tid, flags); 1205 dout("handle_reply tid %llu flags %d\n", tid, flags);
1081 1206
1207 if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
1208 __register_linger_request(osdc, req);
1209
1082 /* either this is a read, or we got the safe response */ 1210 /* either this is a read, or we got the safe response */
1083 if (result < 0 || 1211 if (result < 0 ||
1084 (flags & CEPH_OSD_FLAG_ONDISK) || 1212 (flags & CEPH_OSD_FLAG_ONDISK) ||
@@ -1099,6 +1227,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1099 } 1227 }
1100 1228
1101done: 1229done:
1230 dout("req=%p req->r_linger=%d\n", req, req->r_linger);
1102 ceph_osdc_put_request(req); 1231 ceph_osdc_put_request(req);
1103 return; 1232 return;
1104 1233
@@ -1109,108 +1238,83 @@ bad:
1109 ceph_msg_dump(msg); 1238 ceph_msg_dump(msg);
1110} 1239}
1111 1240
1112 1241static void reset_changed_osds(struct ceph_osd_client *osdc)
1113static int __kick_requests(struct ceph_osd_client *osdc,
1114 struct ceph_osd *kickosd)
1115{ 1242{
1116 struct ceph_osd_request *req;
1117 struct rb_node *p, *n; 1243 struct rb_node *p, *n;
1118 int needmap = 0;
1119 int err;
1120 1244
1121 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 1245 for (p = rb_first(&osdc->osds); p; p = n) {
1122 if (kickosd) { 1246 struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
1123 err = __reset_osd(osdc, kickosd); 1247
1124 if (err == -EAGAIN) 1248 n = rb_next(p);
1125 return 1; 1249 if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
1126 } else { 1250 memcmp(&osd->o_con.peer_addr,
1127 for (p = rb_first(&osdc->osds); p; p = n) { 1251 ceph_osd_addr(osdc->osdmap,
1128 struct ceph_osd *osd = 1252 osd->o_osd),
1129 rb_entry(p, struct ceph_osd, o_node); 1253 sizeof(struct ceph_entity_addr)) != 0)
1130 1254 __reset_osd(osdc, osd);
1131 n = rb_next(p);
1132 if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
1133 memcmp(&osd->o_con.peer_addr,
1134 ceph_osd_addr(osdc->osdmap,
1135 osd->o_osd),
1136 sizeof(struct ceph_entity_addr)) != 0)
1137 __reset_osd(osdc, osd);
1138 }
1139 } 1255 }
1256}
1257
1258/*
1259 * Requeue requests whose mapping to an OSD has changed. If requests map to
1260 * no osd, request a new map.
1261 *
1262 * Caller should hold map_sem for read and request_mutex.
1263 */
1264static void kick_requests(struct ceph_osd_client *osdc)
1265{
1266 struct ceph_osd_request *req, *nreq;
1267 struct rb_node *p;
1268 int needmap = 0;
1269 int err;
1140 1270
1271 dout("kick_requests\n");
1272 mutex_lock(&osdc->request_mutex);
1141 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 1273 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
1142 req = rb_entry(p, struct ceph_osd_request, r_node); 1274 req = rb_entry(p, struct ceph_osd_request, r_node);
1143 1275 err = __map_request(osdc, req);
1144 if (req->r_resend) { 1276 if (err < 0)
1145 dout(" r_resend set on tid %llu\n", req->r_tid); 1277 continue; /* error */
1146 __cancel_request(req); 1278 if (req->r_osd == NULL) {
1147 goto kick; 1279 dout("%p tid %llu maps to no osd\n", req, req->r_tid);
1148 } 1280 needmap++; /* request a newer map */
1149 if (req->r_osd && kickosd == req->r_osd) { 1281 } else if (err > 0) {
1150 __cancel_request(req); 1282 dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
1151 goto kick; 1283 req->r_osd ? req->r_osd->o_osd : -1);
1284 if (!req->r_linger)
1285 req->r_flags |= CEPH_OSD_FLAG_RETRY;
1152 } 1286 }
1287 }
1288
1289 list_for_each_entry_safe(req, nreq, &osdc->req_linger,
1290 r_linger_item) {
1291 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
1153 1292
1154 err = __map_osds(osdc, req); 1293 err = __map_request(osdc, req);
1155 if (err == 0) 1294 if (err == 0)
1156 continue; /* no change */ 1295 continue; /* no change and no osd was specified */
1157 if (err < 0) { 1296 if (err < 0)
1158 /* 1297 continue; /* hrm! */
1159 * FIXME: really, we should set the request
1160 * error and fail if this isn't a 'nofail'
1161 * request, but that's a fair bit more
1162 * complicated to do. So retry!
1163 */
1164 dout(" setting r_resend on %llu\n", req->r_tid);
1165 req->r_resend = true;
1166 continue;
1167 }
1168 if (req->r_osd == NULL) { 1298 if (req->r_osd == NULL) {
1169 dout("tid %llu maps to no valid osd\n", req->r_tid); 1299 dout("tid %llu maps to no valid osd\n", req->r_tid);
1170 needmap++; /* request a newer map */ 1300 needmap++; /* request a newer map */
1171 continue; 1301 continue;
1172 } 1302 }
1173 1303
1174kick: 1304 dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
1175 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
1176 req->r_osd ? req->r_osd->o_osd : -1); 1305 req->r_osd ? req->r_osd->o_osd : -1);
1177 req->r_flags |= CEPH_OSD_FLAG_RETRY; 1306 __unregister_linger_request(osdc, req);
1178 err = __send_request(osdc, req); 1307 __register_request(osdc, req);
1179 if (err) {
1180 dout(" setting r_resend on %llu\n", req->r_tid);
1181 req->r_resend = true;
1182 }
1183 } 1308 }
1184
1185 return needmap;
1186}
1187
1188/*
1189 * Resubmit osd requests whose osd or osd address has changed. Request
1190 * a new osd map if osds are down, or we are otherwise unable to determine
1191 * how to direct a request.
1192 *
1193 * Close connections to down osds.
1194 *
1195 * If @who is specified, resubmit requests for that specific osd.
1196 *
1197 * Caller should hold map_sem for read and request_mutex.
1198 */
1199static void kick_requests(struct ceph_osd_client *osdc,
1200 struct ceph_osd *kickosd)
1201{
1202 int needmap;
1203
1204 mutex_lock(&osdc->request_mutex);
1205 needmap = __kick_requests(osdc, kickosd);
1206 mutex_unlock(&osdc->request_mutex); 1309 mutex_unlock(&osdc->request_mutex);
1207 1310
1208 if (needmap) { 1311 if (needmap) {
1209 dout("%d requests for down osds, need new map\n", needmap); 1312 dout("%d requests for down osds, need new map\n", needmap);
1210 ceph_monc_request_next_osdmap(&osdc->client->monc); 1313 ceph_monc_request_next_osdmap(&osdc->client->monc);
1211 } 1314 }
1212
1213} 1315}
1316
1317
1214/* 1318/*
1215 * Process updated osd map. 1319 * Process updated osd map.
1216 * 1320 *
@@ -1263,6 +1367,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1263 ceph_osdmap_destroy(osdc->osdmap); 1367 ceph_osdmap_destroy(osdc->osdmap);
1264 osdc->osdmap = newmap; 1368 osdc->osdmap = newmap;
1265 } 1369 }
1370 kick_requests(osdc);
1371 reset_changed_osds(osdc);
1266 } else { 1372 } else {
1267 dout("ignoring incremental map %u len %d\n", 1373 dout("ignoring incremental map %u len %d\n",
1268 epoch, maplen); 1374 epoch, maplen);
@@ -1300,6 +1406,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1300 osdc->osdmap = newmap; 1406 osdc->osdmap = newmap;
1301 if (oldmap) 1407 if (oldmap)
1302 ceph_osdmap_destroy(oldmap); 1408 ceph_osdmap_destroy(oldmap);
1409 kick_requests(osdc);
1303 } 1410 }
1304 p += maplen; 1411 p += maplen;
1305 nr_maps--; 1412 nr_maps--;
@@ -1308,8 +1415,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1308done: 1415done:
1309 downgrade_write(&osdc->map_sem); 1416 downgrade_write(&osdc->map_sem);
1310 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 1417 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
1311 if (newmap) 1418 send_queued(osdc);
1312 kick_requests(osdc, NULL);
1313 up_read(&osdc->map_sem); 1419 up_read(&osdc->map_sem);
1314 wake_up_all(&osdc->client->auth_wq); 1420 wake_up_all(&osdc->client->auth_wq);
1315 return; 1421 return;
@@ -1322,6 +1428,223 @@ bad:
1322} 1428}
1323 1429
1324/* 1430/*
1431 * watch/notify callback event infrastructure
1432 *
1433 * These callbacks are used both for watch and notify operations.
1434 */
1435static void __release_event(struct kref *kref)
1436{
1437 struct ceph_osd_event *event =
1438 container_of(kref, struct ceph_osd_event, kref);
1439
1440 dout("__release_event %p\n", event);
1441 kfree(event);
1442}
1443
1444static void get_event(struct ceph_osd_event *event)
1445{
1446 kref_get(&event->kref);
1447}
1448
1449void ceph_osdc_put_event(struct ceph_osd_event *event)
1450{
1451 kref_put(&event->kref, __release_event);
1452}
1453EXPORT_SYMBOL(ceph_osdc_put_event);
1454
1455static void __insert_event(struct ceph_osd_client *osdc,
1456 struct ceph_osd_event *new)
1457{
1458 struct rb_node **p = &osdc->event_tree.rb_node;
1459 struct rb_node *parent = NULL;
1460 struct ceph_osd_event *event = NULL;
1461
1462 while (*p) {
1463 parent = *p;
1464 event = rb_entry(parent, struct ceph_osd_event, node);
1465 if (new->cookie < event->cookie)
1466 p = &(*p)->rb_left;
1467 else if (new->cookie > event->cookie)
1468 p = &(*p)->rb_right;
1469 else
1470 BUG();
1471 }
1472
1473 rb_link_node(&new->node, parent, p);
1474 rb_insert_color(&new->node, &osdc->event_tree);
1475}
1476
1477static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
1478 u64 cookie)
1479{
1480 struct rb_node **p = &osdc->event_tree.rb_node;
1481 struct rb_node *parent = NULL;
1482 struct ceph_osd_event *event = NULL;
1483
1484 while (*p) {
1485 parent = *p;
1486 event = rb_entry(parent, struct ceph_osd_event, node);
1487 if (cookie < event->cookie)
1488 p = &(*p)->rb_left;
1489 else if (cookie > event->cookie)
1490 p = &(*p)->rb_right;
1491 else
1492 return event;
1493 }
1494 return NULL;
1495}
1496
1497static void __remove_event(struct ceph_osd_event *event)
1498{
1499 struct ceph_osd_client *osdc = event->osdc;
1500
1501 if (!RB_EMPTY_NODE(&event->node)) {
1502 dout("__remove_event removed %p\n", event);
1503 rb_erase(&event->node, &osdc->event_tree);
1504 ceph_osdc_put_event(event);
1505 } else {
1506 dout("__remove_event didn't remove %p\n", event);
1507 }
1508}
1509
1510int ceph_osdc_create_event(struct ceph_osd_client *osdc,
1511 void (*event_cb)(u64, u64, u8, void *),
1512 int one_shot, void *data,
1513 struct ceph_osd_event **pevent)
1514{
1515 struct ceph_osd_event *event;
1516
1517 event = kmalloc(sizeof(*event), GFP_NOIO);
1518 if (!event)
1519 return -ENOMEM;
1520
1521 dout("create_event %p\n", event);
1522 event->cb = event_cb;
1523 event->one_shot = one_shot;
1524 event->data = data;
1525 event->osdc = osdc;
1526 INIT_LIST_HEAD(&event->osd_node);
1527 kref_init(&event->kref); /* one ref for us */
1528 kref_get(&event->kref); /* one ref for the caller */
1529 init_completion(&event->completion);
1530
1531 spin_lock(&osdc->event_lock);
1532 event->cookie = ++osdc->event_count;
1533 __insert_event(osdc, event);
1534 spin_unlock(&osdc->event_lock);
1535
1536 *pevent = event;
1537 return 0;
1538}
1539EXPORT_SYMBOL(ceph_osdc_create_event);
1540
1541void ceph_osdc_cancel_event(struct ceph_osd_event *event)
1542{
1543 struct ceph_osd_client *osdc = event->osdc;
1544
1545 dout("cancel_event %p\n", event);
1546 spin_lock(&osdc->event_lock);
1547 __remove_event(event);
1548 spin_unlock(&osdc->event_lock);
1549 ceph_osdc_put_event(event); /* caller's */
1550}
1551EXPORT_SYMBOL(ceph_osdc_cancel_event);
1552
1553
1554static void do_event_work(struct work_struct *work)
1555{
1556 struct ceph_osd_event_work *event_work =
1557 container_of(work, struct ceph_osd_event_work, work);
1558 struct ceph_osd_event *event = event_work->event;
1559 u64 ver = event_work->ver;
1560 u64 notify_id = event_work->notify_id;
1561 u8 opcode = event_work->opcode;
1562
1563 dout("do_event_work completing %p\n", event);
1564 event->cb(ver, notify_id, opcode, event->data);
1565 complete(&event->completion);
1566 dout("do_event_work completed %p\n", event);
1567 ceph_osdc_put_event(event);
1568 kfree(event_work);
1569}
1570
1571
1572/*
1573 * Process osd watch notifications
1574 */
1575void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1576{
1577 void *p, *end;
1578 u8 proto_ver;
1579 u64 cookie, ver, notify_id;
1580 u8 opcode;
1581 struct ceph_osd_event *event;
1582 struct ceph_osd_event_work *event_work;
1583
1584 p = msg->front.iov_base;
1585 end = p + msg->front.iov_len;
1586
1587 ceph_decode_8_safe(&p, end, proto_ver, bad);
1588 ceph_decode_8_safe(&p, end, opcode, bad);
1589 ceph_decode_64_safe(&p, end, cookie, bad);
1590 ceph_decode_64_safe(&p, end, ver, bad);
1591 ceph_decode_64_safe(&p, end, notify_id, bad);
1592
1593 spin_lock(&osdc->event_lock);
1594 event = __find_event(osdc, cookie);
1595 if (event) {
1596 get_event(event);
1597 if (event->one_shot)
1598 __remove_event(event);
1599 }
1600 spin_unlock(&osdc->event_lock);
1601 dout("handle_watch_notify cookie %lld ver %lld event %p\n",
1602 cookie, ver, event);
1603 if (event) {
1604 event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
1605 INIT_WORK(&event_work->work, do_event_work);
1606 if (!event_work) {
1607 dout("ERROR: could not allocate event_work\n");
1608 goto done_err;
1609 }
1610 event_work->event = event;
1611 event_work->ver = ver;
1612 event_work->notify_id = notify_id;
1613 event_work->opcode = opcode;
1614 if (!queue_work(osdc->notify_wq, &event_work->work)) {
1615 dout("WARNING: failed to queue notify event work\n");
1616 goto done_err;
1617 }
1618 }
1619
1620 return;
1621
1622done_err:
1623 complete(&event->completion);
1624 ceph_osdc_put_event(event);
1625 return;
1626
1627bad:
1628 pr_err("osdc handle_watch_notify corrupt msg\n");
1629 return;
1630}
1631
1632int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
1633{
1634 int err;
1635
1636 dout("wait_event %p\n", event);
1637 err = wait_for_completion_interruptible_timeout(&event->completion,
1638 timeout * HZ);
1639 ceph_osdc_put_event(event);
1640 if (err > 0)
1641 err = 0;
1642 dout("wait_event %p returns %d\n", event, err);
1643 return err;
1644}
1645EXPORT_SYMBOL(ceph_osdc_wait_event);
1646
1647/*
1325 * Register request, send initial attempt. 1648 * Register request, send initial attempt.
1326 */ 1649 */
1327int ceph_osdc_start_request(struct ceph_osd_client *osdc, 1650int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1347,15 +1670,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1347 * the request still han't been touched yet. 1670 * the request still han't been touched yet.
1348 */ 1671 */
1349 if (req->r_sent == 0) { 1672 if (req->r_sent == 0) {
1350 rc = __send_request(osdc, req); 1673 rc = __map_request(osdc, req);
1351 if (rc) { 1674 if (rc < 0)
1352 if (nofail) { 1675 return rc;
1353 dout("osdc_start_request failed send, " 1676 if (req->r_osd == NULL) {
1354 " marking %lld\n", req->r_tid); 1677 dout("send_request %p no up osds in pg\n", req);
1355 req->r_resend = true; 1678 ceph_monc_request_next_osdmap(&osdc->client->monc);
1356 rc = 0; 1679 } else {
1357 } else { 1680 rc = __send_request(osdc, req);
1358 __unregister_request(osdc, req); 1681 if (rc) {
1682 if (nofail) {
1683 dout("osdc_start_request failed send, "
1684 " will retry %lld\n", req->r_tid);
1685 rc = 0;
1686 } else {
1687 __unregister_request(osdc, req);
1688 }
1359 } 1689 }
1360 } 1690 }
1361 } 1691 }
@@ -1441,9 +1771,15 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1441 INIT_LIST_HEAD(&osdc->osd_lru); 1771 INIT_LIST_HEAD(&osdc->osd_lru);
1442 osdc->requests = RB_ROOT; 1772 osdc->requests = RB_ROOT;
1443 INIT_LIST_HEAD(&osdc->req_lru); 1773 INIT_LIST_HEAD(&osdc->req_lru);
1774 INIT_LIST_HEAD(&osdc->req_unsent);
1775 INIT_LIST_HEAD(&osdc->req_notarget);
1776 INIT_LIST_HEAD(&osdc->req_linger);
1444 osdc->num_requests = 0; 1777 osdc->num_requests = 0;
1445 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); 1778 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
1446 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); 1779 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
1780 spin_lock_init(&osdc->event_lock);
1781 osdc->event_tree = RB_ROOT;
1782 osdc->event_count = 0;
1447 1783
1448 schedule_delayed_work(&osdc->osds_timeout_work, 1784 schedule_delayed_work(&osdc->osds_timeout_work,
1449 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); 1785 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
@@ -1463,6 +1799,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1463 "osd_op_reply"); 1799 "osd_op_reply");
1464 if (err < 0) 1800 if (err < 0)
1465 goto out_msgpool; 1801 goto out_msgpool;
1802
1803 osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
1804 if (IS_ERR(osdc->notify_wq)) {
1805 err = PTR_ERR(osdc->notify_wq);
1806 osdc->notify_wq = NULL;
1807 goto out_msgpool;
1808 }
1466 return 0; 1809 return 0;
1467 1810
1468out_msgpool: 1811out_msgpool:
@@ -1476,6 +1819,8 @@ EXPORT_SYMBOL(ceph_osdc_init);
1476 1819
1477void ceph_osdc_stop(struct ceph_osd_client *osdc) 1820void ceph_osdc_stop(struct ceph_osd_client *osdc)
1478{ 1821{
1822 flush_workqueue(osdc->notify_wq);
1823 destroy_workqueue(osdc->notify_wq);
1479 cancel_delayed_work_sync(&osdc->timeout_work); 1824 cancel_delayed_work_sync(&osdc->timeout_work);
1480 cancel_delayed_work_sync(&osdc->osds_timeout_work); 1825 cancel_delayed_work_sync(&osdc->osds_timeout_work);
1481 if (osdc->osdmap) { 1826 if (osdc->osdmap) {
@@ -1483,6 +1828,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
1483 osdc->osdmap = NULL; 1828 osdc->osdmap = NULL;
1484 } 1829 }
1485 remove_old_osds(osdc, 1); 1830 remove_old_osds(osdc, 1);
1831 WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
1486 mempool_destroy(osdc->req_mempool); 1832 mempool_destroy(osdc->req_mempool);
1487 ceph_msgpool_destroy(&osdc->msgpool_op); 1833 ceph_msgpool_destroy(&osdc->msgpool_op);
1488 ceph_msgpool_destroy(&osdc->msgpool_op_reply); 1834 ceph_msgpool_destroy(&osdc->msgpool_op_reply);
@@ -1591,6 +1937,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
1591 case CEPH_MSG_OSD_OPREPLY: 1937 case CEPH_MSG_OSD_OPREPLY:
1592 handle_reply(osdc, msg, con); 1938 handle_reply(osdc, msg, con);
1593 break; 1939 break;
1940 case CEPH_MSG_WATCH_NOTIFY:
1941 handle_watch_notify(osdc, msg);
1942 break;
1594 1943
1595 default: 1944 default:
1596 pr_err("received unknown message type %d %s\n", type, 1945 pr_err("received unknown message type %d %s\n", type,
@@ -1684,6 +2033,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
1684 2033
1685 switch (type) { 2034 switch (type) {
1686 case CEPH_MSG_OSD_MAP: 2035 case CEPH_MSG_OSD_MAP:
2036 case CEPH_MSG_WATCH_NOTIFY:
1687 return ceph_msg_new(type, front, GFP_NOFS); 2037 return ceph_msg_new(type, front, GFP_NOFS);
1688 case CEPH_MSG_OSD_OPREPLY: 2038 case CEPH_MSG_OSD_OPREPLY:
1689 return get_reply(con, hdr, skip); 2039 return get_reply(con, hdr, skip);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc..0b88eba97da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -1114,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1115void dev_load(struct net *net, const char *name)
1115{ 1116{
1116 struct net_device *dev; 1117 struct net_device *dev;
1118 int no_module;
1117 1119
1118 rcu_read_lock(); 1120 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1121 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1122 rcu_read_unlock();
1121 1123
1122 if (!dev && capable(CAP_NET_ADMIN)) 1124 no_module = !dev;
1123 request_module("%s", name); 1125 if (no_module && capable(CAP_NET_ADMIN))
1126 no_module = request_module("netdev-%s", name);
1127 if (no_module && capable(CAP_SYS_MODULE)) {
1128 if (!request_module("%s", name))
1129 pr_err("Loading kernel module for a network device "
1130"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1131"instead\n", name);
1132 }
1124} 1133}
1125EXPORT_SYMBOL(dev_load); 1134EXPORT_SYMBOL(dev_load);
1126 1135
@@ -1289,7 +1298,7 @@ static int __dev_close(struct net_device *dev)
1289 return retval; 1298 return retval;
1290} 1299}
1291 1300
1292int dev_close_many(struct list_head *head) 1301static int dev_close_many(struct list_head *head)
1293{ 1302{
1294 struct net_device *dev, *tmp; 1303 struct net_device *dev, *tmp;
1295 LIST_HEAD(tmp_list); 1304 LIST_HEAD(tmp_list);
@@ -1597,6 +1606,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1597 rcu_read_unlock(); 1606 rcu_read_unlock();
1598} 1607}
1599 1608
1609/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1610 * @dev: Network device
1611 * @txq: number of queues available
1612 *
1613 * If real_num_tx_queues is changed the tc mappings may no longer be
1614 * valid. To resolve this verify the tc mapping remains valid and if
1615 * not NULL the mapping. With no priorities mapping to this
1616 * offset/count pair it will no longer be used. In the worst case TC0
1617 * is invalid nothing can be done so disable priority mappings. If is
1618 * expected that drivers will fix this mapping if they can before
1619 * calling netif_set_real_num_tx_queues.
1620 */
1621static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1622{
1623 int i;
1624 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1625
1626 /* If TC0 is invalidated disable TC mapping */
1627 if (tc->offset + tc->count > txq) {
1628 pr_warning("Number of in use tx queues changed "
1629 "invalidating tc mappings. Priority "
1630 "traffic classification disabled!\n");
1631 dev->num_tc = 0;
1632 return;
1633 }
1634
1635 /* Invalidated prio to tc mappings set to TC0 */
1636 for (i = 1; i < TC_BITMASK + 1; i++) {
1637 int q = netdev_get_prio_tc_map(dev, i);
1638
1639 tc = &dev->tc_to_txq[q];
1640 if (tc->offset + tc->count > txq) {
1641 pr_warning("Number of in use tx queues "
1642 "changed. Priority %i to tc "
1643 "mapping %i is no longer valid "
1644 "setting map to 0\n",
1645 i, q);
1646 netdev_set_prio_tc_map(dev, i, 0);
1647 }
1648 }
1649}
1650
1600/* 1651/*
1601 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1652 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1602 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1653 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1608,7 +1659,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1608 if (txq < 1 || txq > dev->num_tx_queues) 1659 if (txq < 1 || txq > dev->num_tx_queues)
1609 return -EINVAL; 1660 return -EINVAL;
1610 1661
1611 if (dev->reg_state == NETREG_REGISTERED) { 1662 if (dev->reg_state == NETREG_REGISTERED ||
1663 dev->reg_state == NETREG_UNREGISTERING) {
1612 ASSERT_RTNL(); 1664 ASSERT_RTNL();
1613 1665
1614 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, 1666 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -1616,6 +1668,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1616 if (rc) 1668 if (rc)
1617 return rc; 1669 return rc;
1618 1670
1671 if (dev->num_tc)
1672 netif_setup_tc(dev, txq);
1673
1619 if (txq < dev->real_num_tx_queues) 1674 if (txq < dev->real_num_tx_queues)
1620 qdisc_reset_all_tx_gt(dev, txq); 1675 qdisc_reset_all_tx_gt(dev, txq);
1621 } 1676 }
@@ -1815,7 +1870,7 @@ EXPORT_SYMBOL(skb_checksum_help);
1815 * It may return NULL if the skb requires no segmentation. This is 1870 * It may return NULL if the skb requires no segmentation. This is
1816 * only possible when GSO is used for verifying header integrity. 1871 * only possible when GSO is used for verifying header integrity.
1817 */ 1872 */
1818struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1873struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
1819{ 1874{
1820 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1875 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1821 struct packet_type *ptype; 1876 struct packet_type *ptype;
@@ -2003,7 +2058,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2003 protocol == htons(ETH_P_FCOE))); 2058 protocol == htons(ETH_P_FCOE)));
2004} 2059}
2005 2060
2006static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) 2061static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
2007{ 2062{
2008 if (!can_checksum_protocol(features, protocol)) { 2063 if (!can_checksum_protocol(features, protocol)) {
2009 features &= ~NETIF_F_ALL_CSUM; 2064 features &= ~NETIF_F_ALL_CSUM;
@@ -2015,10 +2070,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
2015 return features; 2070 return features;
2016} 2071}
2017 2072
2018int netif_skb_features(struct sk_buff *skb) 2073u32 netif_skb_features(struct sk_buff *skb)
2019{ 2074{
2020 __be16 protocol = skb->protocol; 2075 __be16 protocol = skb->protocol;
2021 int features = skb->dev->features; 2076 u32 features = skb->dev->features;
2022 2077
2023 if (protocol == htons(ETH_P_8021Q)) { 2078 if (protocol == htons(ETH_P_8021Q)) {
2024 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2079 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2063,7 +2118,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2063 int rc = NETDEV_TX_OK; 2118 int rc = NETDEV_TX_OK;
2064 2119
2065 if (likely(!skb->next)) { 2120 if (likely(!skb->next)) {
2066 int features; 2121 u32 features;
2067 2122
2068 /* 2123 /*
2069 * If device doesnt need skb->dst, release it right now while 2124 * If device doesnt need skb->dst, release it right now while
@@ -2165,6 +2220,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2165 unsigned int num_tx_queues) 2220 unsigned int num_tx_queues)
2166{ 2221{
2167 u32 hash; 2222 u32 hash;
2223 u16 qoffset = 0;
2224 u16 qcount = num_tx_queues;
2168 2225
2169 if (skb_rx_queue_recorded(skb)) { 2226 if (skb_rx_queue_recorded(skb)) {
2170 hash = skb_get_rx_queue(skb); 2227 hash = skb_get_rx_queue(skb);
@@ -2173,13 +2230,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2173 return hash; 2230 return hash;
2174 } 2231 }
2175 2232
2233 if (dev->num_tc) {
2234 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2235 qoffset = dev->tc_to_txq[tc].offset;
2236 qcount = dev->tc_to_txq[tc].count;
2237 }
2238
2176 if (skb->sk && skb->sk->sk_hash) 2239 if (skb->sk && skb->sk->sk_hash)
2177 hash = skb->sk->sk_hash; 2240 hash = skb->sk->sk_hash;
2178 else 2241 else
2179 hash = (__force u16) skb->protocol ^ skb->rxhash; 2242 hash = (__force u16) skb->protocol ^ skb->rxhash;
2180 hash = jhash_1word(hash, hashrnd); 2243 hash = jhash_1word(hash, hashrnd);
2181 2244
2182 return (u16) (((u64) hash * num_tx_queues) >> 32); 2245 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2183} 2246}
2184EXPORT_SYMBOL(__skb_tx_hash); 2247EXPORT_SYMBOL(__skb_tx_hash);
2185 2248
@@ -2276,15 +2339,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2276 struct netdev_queue *txq) 2339 struct netdev_queue *txq)
2277{ 2340{
2278 spinlock_t *root_lock = qdisc_lock(q); 2341 spinlock_t *root_lock = qdisc_lock(q);
2279 bool contended = qdisc_is_running(q); 2342 bool contended;
2280 int rc; 2343 int rc;
2281 2344
2345 qdisc_skb_cb(skb)->pkt_len = skb->len;
2346 qdisc_calculate_pkt_len(skb, q);
2282 /* 2347 /*
2283 * Heuristic to force contended enqueues to serialize on a 2348 * Heuristic to force contended enqueues to serialize on a
2284 * separate lock before trying to get qdisc main lock. 2349 * separate lock before trying to get qdisc main lock.
2285 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2350 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2286 * and dequeue packets faster. 2351 * and dequeue packets faster.
2287 */ 2352 */
2353 contended = qdisc_is_running(q);
2288 if (unlikely(contended)) 2354 if (unlikely(contended))
2289 spin_lock(&q->busylock); 2355 spin_lock(&q->busylock);
2290 2356
@@ -2302,7 +2368,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2302 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2368 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2303 skb_dst_force(skb); 2369 skb_dst_force(skb);
2304 2370
2305 qdisc_skb_cb(skb)->pkt_len = skb->len;
2306 qdisc_bstats_update(q, skb); 2371 qdisc_bstats_update(q, skb);
2307 2372
2308 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2373 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2317,7 +2382,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2317 rc = NET_XMIT_SUCCESS; 2382 rc = NET_XMIT_SUCCESS;
2318 } else { 2383 } else {
2319 skb_dst_force(skb); 2384 skb_dst_force(skb);
2320 rc = qdisc_enqueue_root(skb, q); 2385 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2321 if (qdisc_run_begin(q)) { 2386 if (qdisc_run_begin(q)) {
2322 if (unlikely(contended)) { 2387 if (unlikely(contended)) {
2323 spin_unlock(&q->busylock); 2388 spin_unlock(&q->busylock);
@@ -2536,6 +2601,54 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2536struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2601struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2537EXPORT_SYMBOL(rps_sock_flow_table); 2602EXPORT_SYMBOL(rps_sock_flow_table);
2538 2603
2604static struct rps_dev_flow *
2605set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2606 struct rps_dev_flow *rflow, u16 next_cpu)
2607{
2608 u16 tcpu;
2609
2610 tcpu = rflow->cpu = next_cpu;
2611 if (tcpu != RPS_NO_CPU) {
2612#ifdef CONFIG_RFS_ACCEL
2613 struct netdev_rx_queue *rxqueue;
2614 struct rps_dev_flow_table *flow_table;
2615 struct rps_dev_flow *old_rflow;
2616 u32 flow_id;
2617 u16 rxq_index;
2618 int rc;
2619
2620 /* Should we steer this flow to a different hardware queue? */
2621 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2622 !(dev->features & NETIF_F_NTUPLE))
2623 goto out;
2624 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2625 if (rxq_index == skb_get_rx_queue(skb))
2626 goto out;
2627
2628 rxqueue = dev->_rx + rxq_index;
2629 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2630 if (!flow_table)
2631 goto out;
2632 flow_id = skb->rxhash & flow_table->mask;
2633 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2634 rxq_index, flow_id);
2635 if (rc < 0)
2636 goto out;
2637 old_rflow = rflow;
2638 rflow = &flow_table->flows[flow_id];
2639 rflow->cpu = next_cpu;
2640 rflow->filter = rc;
2641 if (old_rflow->filter == rflow->filter)
2642 old_rflow->filter = RPS_NO_FILTER;
2643 out:
2644#endif
2645 rflow->last_qtail =
2646 per_cpu(softnet_data, tcpu).input_queue_head;
2647 }
2648
2649 return rflow;
2650}
2651
2539/* 2652/*
2540 * get_rps_cpu is called from netif_receive_skb and returns the target 2653 * get_rps_cpu is called from netif_receive_skb and returns the target
2541 * CPU from the RPS map of the receiving queue for a given skb. 2654 * CPU from the RPS map of the receiving queue for a given skb.
@@ -2607,12 +2720,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2607 if (unlikely(tcpu != next_cpu) && 2720 if (unlikely(tcpu != next_cpu) &&
2608 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 2721 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2609 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 2722 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2610 rflow->last_qtail)) >= 0)) { 2723 rflow->last_qtail)) >= 0))
2611 tcpu = rflow->cpu = next_cpu; 2724 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2612 if (tcpu != RPS_NO_CPU) 2725
2613 rflow->last_qtail = per_cpu(softnet_data,
2614 tcpu).input_queue_head;
2615 }
2616 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 2726 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2617 *rflowp = rflow; 2727 *rflowp = rflow;
2618 cpu = tcpu; 2728 cpu = tcpu;
@@ -2633,6 +2743,46 @@ done:
2633 return cpu; 2743 return cpu;
2634} 2744}
2635 2745
2746#ifdef CONFIG_RFS_ACCEL
2747
2748/**
2749 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2750 * @dev: Device on which the filter was set
2751 * @rxq_index: RX queue index
2752 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2753 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2754 *
2755 * Drivers that implement ndo_rx_flow_steer() should periodically call
2756 * this function for each installed filter and remove the filters for
2757 * which it returns %true.
2758 */
2759bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2760 u32 flow_id, u16 filter_id)
2761{
2762 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2763 struct rps_dev_flow_table *flow_table;
2764 struct rps_dev_flow *rflow;
2765 bool expire = true;
2766 int cpu;
2767
2768 rcu_read_lock();
2769 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2770 if (flow_table && flow_id <= flow_table->mask) {
2771 rflow = &flow_table->flows[flow_id];
2772 cpu = ACCESS_ONCE(rflow->cpu);
2773 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2774 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2775 rflow->last_qtail) <
2776 (int)(10 * flow_table->mask)))
2777 expire = false;
2778 }
2779 rcu_read_unlock();
2780 return expire;
2781}
2782EXPORT_SYMBOL(rps_may_expire_flow);
2783
2784#endif /* CONFIG_RFS_ACCEL */
2785
2636/* Called from hardirq (IPI) context */ 2786/* Called from hardirq (IPI) context */
2637static void rps_trigger_softirq(void *data) 2787static void rps_trigger_softirq(void *data)
2638{ 2788{
@@ -2920,6 +3070,8 @@ out:
2920 * on a failure. 3070 * on a failure.
2921 * 3071 *
2922 * The caller must hold the rtnl_mutex. 3072 * The caller must hold the rtnl_mutex.
3073 *
3074 * For a general description of rx_handler, see enum rx_handler_result.
2923 */ 3075 */
2924int netdev_rx_handler_register(struct net_device *dev, 3076int netdev_rx_handler_register(struct net_device *dev,
2925 rx_handler_func_t *rx_handler, 3077 rx_handler_func_t *rx_handler,
@@ -2954,64 +3106,32 @@ void netdev_rx_handler_unregister(struct net_device *dev)
2954} 3106}
2955EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3107EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2956 3108
2957static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, 3109static void vlan_on_bond_hook(struct sk_buff *skb)
2958 struct net_device *master)
2959{
2960 if (skb->pkt_type == PACKET_HOST) {
2961 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2962
2963 memcpy(dest, master->dev_addr, ETH_ALEN);
2964 }
2965}
2966
2967/* On bonding slaves other than the currently active slave, suppress
2968 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2969 * ARP on active-backup slaves with arp_validate enabled.
2970 */
2971int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2972{ 3110{
2973 struct net_device *dev = skb->dev; 3111 /*
2974 3112 * Make sure ARP frames received on VLAN interfaces stacked on
2975 if (master->priv_flags & IFF_MASTER_ARPMON) 3113 * bonding interfaces still make their way to any base bonding
2976 dev->last_rx = jiffies; 3114 * device that may have registered for a specific ptype.
2977 3115 */
2978 if ((master->priv_flags & IFF_MASTER_ALB) && 3116 if (skb->dev->priv_flags & IFF_802_1Q_VLAN &&
2979 (master->priv_flags & IFF_BRIDGE_PORT)) { 3117 vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING &&
2980 /* Do address unmangle. The local destination address 3118 skb->protocol == htons(ETH_P_ARP)) {
2981 * will be always the one master has. Provides the right 3119 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2982 * functionality in a bridge.
2983 */
2984 skb_bond_set_mac_by_master(skb, master);
2985 }
2986
2987 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2988 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2989 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2990 return 0;
2991
2992 if (master->priv_flags & IFF_MASTER_ALB) {
2993 if (skb->pkt_type != PACKET_BROADCAST &&
2994 skb->pkt_type != PACKET_MULTICAST)
2995 return 0;
2996 }
2997 if (master->priv_flags & IFF_MASTER_8023AD &&
2998 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2999 return 0;
3000 3120
3001 return 1; 3121 if (!skb2)
3122 return;
3123 skb2->dev = vlan_dev_real_dev(skb->dev);
3124 netif_rx(skb2);
3002 } 3125 }
3003 return 0;
3004} 3126}
3005EXPORT_SYMBOL(__skb_bond_should_drop);
3006 3127
3007static int __netif_receive_skb(struct sk_buff *skb) 3128static int __netif_receive_skb(struct sk_buff *skb)
3008{ 3129{
3009 struct packet_type *ptype, *pt_prev; 3130 struct packet_type *ptype, *pt_prev;
3010 rx_handler_func_t *rx_handler; 3131 rx_handler_func_t *rx_handler;
3011 struct net_device *orig_dev; 3132 struct net_device *orig_dev;
3012 struct net_device *master; 3133 struct net_device *null_or_dev;
3013 struct net_device *null_or_orig; 3134 bool deliver_exact = false;
3014 struct net_device *orig_or_bond;
3015 int ret = NET_RX_DROP; 3135 int ret = NET_RX_DROP;
3016 __be16 type; 3136 __be16 type;
3017 3137
@@ -3026,28 +3146,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
3026 3146
3027 if (!skb->skb_iif) 3147 if (!skb->skb_iif)
3028 skb->skb_iif = skb->dev->ifindex; 3148 skb->skb_iif = skb->dev->ifindex;
3029
3030 /*
3031 * bonding note: skbs received on inactive slaves should only
3032 * be delivered to pkt handlers that are exact matches. Also
3033 * the deliver_no_wcard flag will be set. If packet handlers
3034 * are sensitive to duplicate packets these skbs will need to
3035 * be dropped at the handler.
3036 */
3037 null_or_orig = NULL;
3038 orig_dev = skb->dev; 3149 orig_dev = skb->dev;
3039 master = ACCESS_ONCE(orig_dev->master);
3040 if (skb->deliver_no_wcard)
3041 null_or_orig = orig_dev;
3042 else if (master) {
3043 if (skb_bond_should_drop(skb, master)) {
3044 skb->deliver_no_wcard = 1;
3045 null_or_orig = orig_dev; /* deliver only exact match */
3046 } else
3047 skb->dev = master;
3048 }
3049 3150
3050 __this_cpu_inc(softnet_data.processed);
3051 skb_reset_network_header(skb); 3151 skb_reset_network_header(skb);
3052 skb_reset_transport_header(skb); 3152 skb_reset_transport_header(skb);
3053 skb->mac_len = skb->network_header - skb->mac_header; 3153 skb->mac_len = skb->network_header - skb->mac_header;
@@ -3056,6 +3156,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
3056 3156
3057 rcu_read_lock(); 3157 rcu_read_lock();
3058 3158
3159another_round:
3160
3161 __this_cpu_inc(softnet_data.processed);
3162
3059#ifdef CONFIG_NET_CLS_ACT 3163#ifdef CONFIG_NET_CLS_ACT
3060 if (skb->tc_verd & TC_NCLS) { 3164 if (skb->tc_verd & TC_NCLS) {
3061 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 3165 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3064,8 +3168,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3064#endif 3168#endif
3065 3169
3066 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3170 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3067 if (ptype->dev == null_or_orig || ptype->dev == skb->dev || 3171 if (!ptype->dev || ptype->dev == skb->dev) {
3068 ptype->dev == orig_dev) {
3069 if (pt_prev) 3172 if (pt_prev)
3070 ret = deliver_skb(skb, pt_prev, orig_dev); 3173 ret = deliver_skb(skb, pt_prev, orig_dev);
3071 pt_prev = ptype; 3174 pt_prev = ptype;
@@ -3079,16 +3182,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
3079ncls: 3182ncls:
3080#endif 3183#endif
3081 3184
3082 /* Handle special case of bridge or macvlan */
3083 rx_handler = rcu_dereference(skb->dev->rx_handler); 3185 rx_handler = rcu_dereference(skb->dev->rx_handler);
3084 if (rx_handler) { 3186 if (rx_handler) {
3085 if (pt_prev) { 3187 if (pt_prev) {
3086 ret = deliver_skb(skb, pt_prev, orig_dev); 3188 ret = deliver_skb(skb, pt_prev, orig_dev);
3087 pt_prev = NULL; 3189 pt_prev = NULL;
3088 } 3190 }
3089 skb = rx_handler(skb); 3191 switch (rx_handler(&skb)) {
3090 if (!skb) 3192 case RX_HANDLER_CONSUMED:
3091 goto out; 3193 goto out;
3194 case RX_HANDLER_ANOTHER:
3195 goto another_round;
3196 case RX_HANDLER_EXACT:
3197 deliver_exact = true;
3198 case RX_HANDLER_PASS:
3199 break;
3200 default:
3201 BUG();
3202 }
3092 } 3203 }
3093 3204
3094 if (vlan_tx_tag_present(skb)) { 3205 if (vlan_tx_tag_present(skb)) {
@@ -3103,24 +3214,17 @@ ncls:
3103 goto out; 3214 goto out;
3104 } 3215 }
3105 3216
3106 /* 3217 vlan_on_bond_hook(skb);
3107 * Make sure frames received on VLAN interfaces stacked on 3218
3108 * bonding interfaces still make their way to any base bonding 3219 /* deliver only exact match when indicated */
3109 * device that may have registered for a specific ptype. The 3220 null_or_dev = deliver_exact ? skb->dev : NULL;
3110 * handler may have to adjust skb->dev and orig_dev.
3111 */
3112 orig_or_bond = orig_dev;
3113 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
3114 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
3115 orig_or_bond = vlan_dev_real_dev(skb->dev);
3116 }
3117 3221
3118 type = skb->protocol; 3222 type = skb->protocol;
3119 list_for_each_entry_rcu(ptype, 3223 list_for_each_entry_rcu(ptype,
3120 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3224 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3121 if (ptype->type == type && (ptype->dev == null_or_orig || 3225 if (ptype->type == type &&
3122 ptype->dev == skb->dev || ptype->dev == orig_dev || 3226 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3123 ptype->dev == orig_or_bond)) { 3227 ptype->dev == orig_dev)) {
3124 if (pt_prev) 3228 if (pt_prev)
3125 ret = deliver_skb(skb, pt_prev, orig_dev); 3229 ret = deliver_skb(skb, pt_prev, orig_dev);
3126 pt_prev = ptype; 3230 pt_prev = ptype;
@@ -3917,12 +4021,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3917 4021
3918void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4022void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3919{ 4023{
3920 struct net_device *dev = (v == SEQ_START_TOKEN) ? 4024 struct net_device *dev = v;
3921 first_net_device(seq_file_net(seq)) : 4025
3922 next_net_device((struct net_device *)v); 4026 if (v == SEQ_START_TOKEN)
4027 dev = first_net_device_rcu(seq_file_net(seq));
4028 else
4029 dev = next_net_device_rcu(dev);
3923 4030
3924 ++*pos; 4031 ++*pos;
3925 return rcu_dereference(dev); 4032 return dev;
3926} 4033}
3927 4034
3928void dev_seq_stop(struct seq_file *seq, void *v) 4035void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4206,15 +4313,14 @@ static int __init dev_proc_init(void)
4206 4313
4207 4314
4208/** 4315/**
4209 * netdev_set_master - set up master/slave pair 4316 * netdev_set_master - set up master pointer
4210 * @slave: slave device 4317 * @slave: slave device
4211 * @master: new master device 4318 * @master: new master device
4212 * 4319 *
4213 * Changes the master device of the slave. Pass %NULL to break the 4320 * Changes the master device of the slave. Pass %NULL to break the
4214 * bonding. The caller must hold the RTNL semaphore. On a failure 4321 * bonding. The caller must hold the RTNL semaphore. On a failure
4215 * a negative errno code is returned. On success the reference counts 4322 * a negative errno code is returned. On success the reference counts
4216 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 4323 * are adjusted and the function returns zero.
4217 * function returns zero.
4218 */ 4324 */
4219int netdev_set_master(struct net_device *slave, struct net_device *master) 4325int netdev_set_master(struct net_device *slave, struct net_device *master)
4220{ 4326{
@@ -4234,6 +4340,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4234 synchronize_net(); 4340 synchronize_net();
4235 dev_put(old); 4341 dev_put(old);
4236 } 4342 }
4343 return 0;
4344}
4345EXPORT_SYMBOL(netdev_set_master);
4346
4347/**
4348 * netdev_set_bond_master - set up bonding master/slave pair
4349 * @slave: slave device
4350 * @master: new master device
4351 *
4352 * Changes the master device of the slave. Pass %NULL to break the
4353 * bonding. The caller must hold the RTNL semaphore. On a failure
4354 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4355 * to the routing socket and the function returns zero.
4356 */
4357int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4358{
4359 int err;
4360
4361 ASSERT_RTNL();
4362
4363 err = netdev_set_master(slave, master);
4364 if (err)
4365 return err;
4237 if (master) 4366 if (master)
4238 slave->flags |= IFF_SLAVE; 4367 slave->flags |= IFF_SLAVE;
4239 else 4368 else
@@ -4242,7 +4371,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4242 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 4371 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4243 return 0; 4372 return 0;
4244} 4373}
4245EXPORT_SYMBOL(netdev_set_master); 4374EXPORT_SYMBOL(netdev_set_bond_master);
4246 4375
4247static void dev_change_rx_flags(struct net_device *dev, int flags) 4376static void dev_change_rx_flags(struct net_device *dev, int flags)
4248{ 4377{
@@ -4579,6 +4708,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
4579EXPORT_SYMBOL(dev_set_mtu); 4708EXPORT_SYMBOL(dev_set_mtu);
4580 4709
4581/** 4710/**
4711 * dev_set_group - Change group this device belongs to
4712 * @dev: device
4713 * @new_group: group this device should belong to
4714 */
4715void dev_set_group(struct net_device *dev, int new_group)
4716{
4717 dev->group = new_group;
4718}
4719EXPORT_SYMBOL(dev_set_group);
4720
4721/**
4582 * dev_set_mac_address - Change Media Access Control Address 4722 * dev_set_mac_address - Change Media Access Control Address
4583 * @dev: device 4723 * @dev: device
4584 * @sa: new address 4724 * @sa: new address
@@ -5069,41 +5209,55 @@ static void rollback_registered(struct net_device *dev)
5069 list_del(&single); 5209 list_del(&single);
5070} 5210}
5071 5211
5072unsigned long netdev_fix_features(unsigned long features, const char *name) 5212u32 netdev_fix_features(struct net_device *dev, u32 features)
5073{ 5213{
5214 /* Fix illegal checksum combinations */
5215 if ((features & NETIF_F_HW_CSUM) &&
5216 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5217 netdev_info(dev, "mixed HW and IP checksum settings.\n");
5218 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5219 }
5220
5221 if ((features & NETIF_F_NO_CSUM) &&
5222 (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5223 netdev_info(dev, "mixed no checksumming and other settings.\n");
5224 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5225 }
5226
5074 /* Fix illegal SG+CSUM combinations. */ 5227 /* Fix illegal SG+CSUM combinations. */
5075 if ((features & NETIF_F_SG) && 5228 if ((features & NETIF_F_SG) &&
5076 !(features & NETIF_F_ALL_CSUM)) { 5229 !(features & NETIF_F_ALL_CSUM)) {
5077 if (name) 5230 netdev_info(dev,
5078 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " 5231 "Dropping NETIF_F_SG since no checksum feature.\n");
5079 "checksum feature.\n", name);
5080 features &= ~NETIF_F_SG; 5232 features &= ~NETIF_F_SG;
5081 } 5233 }
5082 5234
5083 /* TSO requires that SG is present as well. */ 5235 /* TSO requires that SG is present as well. */
5084 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { 5236 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
5085 if (name) 5237 netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
5086 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
5087 "SG feature.\n", name);
5088 features &= ~NETIF_F_TSO; 5238 features &= ~NETIF_F_TSO;
5089 } 5239 }
5090 5240
5241 /* Software GSO depends on SG. */
5242 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5243 netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5244 features &= ~NETIF_F_GSO;
5245 }
5246
5247 /* UFO needs SG and checksumming */
5091 if (features & NETIF_F_UFO) { 5248 if (features & NETIF_F_UFO) {
5092 /* maybe split UFO into V4 and V6? */ 5249 /* maybe split UFO into V4 and V6? */
5093 if (!((features & NETIF_F_GEN_CSUM) || 5250 if (!((features & NETIF_F_GEN_CSUM) ||
5094 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 5251 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5095 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5252 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5096 if (name) 5253 netdev_info(dev,
5097 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5254 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5098 "since no checksum offload features.\n",
5099 name);
5100 features &= ~NETIF_F_UFO; 5255 features &= ~NETIF_F_UFO;
5101 } 5256 }
5102 5257
5103 if (!(features & NETIF_F_SG)) { 5258 if (!(features & NETIF_F_SG)) {
5104 if (name) 5259 netdev_info(dev,
5105 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5260 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5106 "since no NETIF_F_SG feature.\n", name);
5107 features &= ~NETIF_F_UFO; 5261 features &= ~NETIF_F_UFO;
5108 } 5262 }
5109 } 5263 }
@@ -5112,6 +5266,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
5112} 5266}
5113EXPORT_SYMBOL(netdev_fix_features); 5267EXPORT_SYMBOL(netdev_fix_features);
5114 5268
5269void netdev_update_features(struct net_device *dev)
5270{
5271 u32 features;
5272 int err = 0;
5273
5274 features = netdev_get_wanted_features(dev);
5275
5276 if (dev->netdev_ops->ndo_fix_features)
5277 features = dev->netdev_ops->ndo_fix_features(dev, features);
5278
5279 /* driver might be less strict about feature dependencies */
5280 features = netdev_fix_features(dev, features);
5281
5282 if (dev->features == features)
5283 return;
5284
5285 netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
5286 dev->features, features);
5287
5288 if (dev->netdev_ops->ndo_set_features)
5289 err = dev->netdev_ops->ndo_set_features(dev, features);
5290
5291 if (!err)
5292 dev->features = features;
5293 else if (err < 0)
5294 netdev_err(dev,
5295 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
5296 err, features, dev->features);
5297}
5298EXPORT_SYMBOL(netdev_update_features);
5299
5115/** 5300/**
5116 * netif_stacked_transfer_operstate - transfer operstate 5301 * netif_stacked_transfer_operstate - transfer operstate
5117 * @rootdev: the root or lower level device to transfer state from 5302 * @rootdev: the root or lower level device to transfer state from
@@ -5246,27 +5431,19 @@ int register_netdevice(struct net_device *dev)
5246 if (dev->iflink == -1) 5431 if (dev->iflink == -1)
5247 dev->iflink = dev->ifindex; 5432 dev->iflink = dev->ifindex;
5248 5433
5249 /* Fix illegal checksum combinations */ 5434 /* Transfer changeable features to wanted_features and enable
5250 if ((dev->features & NETIF_F_HW_CSUM) && 5435 * software offloads (GSO and GRO).
5251 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5436 */
5252 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", 5437 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5253 dev->name); 5438 dev->features |= NETIF_F_SOFT_FEATURES;
5254 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5439 dev->wanted_features = dev->features & dev->hw_features;
5255 }
5256 5440
5257 if ((dev->features & NETIF_F_NO_CSUM) && 5441 /* Avoid warning from netdev_fix_features() for GSO without SG */
5258 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5442 if (!(dev->wanted_features & NETIF_F_SG)) {
5259 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", 5443 dev->wanted_features &= ~NETIF_F_GSO;
5260 dev->name); 5444 dev->features &= ~NETIF_F_GSO;
5261 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5262 } 5445 }
5263 5446
5264 dev->features = netdev_fix_features(dev->features, dev->name);
5265
5266 /* Enable software GSO if SG is supported. */
5267 if (dev->features & NETIF_F_SG)
5268 dev->features |= NETIF_F_GSO;
5269
5270 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, 5447 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5271 * vlan_dev_init() will do the dev->features check, so these features 5448 * vlan_dev_init() will do the dev->features check, so these features
5272 * are enabled only if supported by underlying device. 5449 * are enabled only if supported by underlying device.
@@ -5283,6 +5460,8 @@ int register_netdevice(struct net_device *dev)
5283 goto err_uninit; 5460 goto err_uninit;
5284 dev->reg_state = NETREG_REGISTERED; 5461 dev->reg_state = NETREG_REGISTERED;
5285 5462
5463 netdev_update_features(dev);
5464
5286 /* 5465 /*
5287 * Default initial state at registry is that the 5466 * Default initial state at registry is that the
5288 * device is present. 5467 * device is present.
@@ -5687,6 +5866,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5687#endif 5866#endif
5688 5867
5689 strcpy(dev->name, name); 5868 strcpy(dev->name, name);
5869 dev->group = INIT_NETDEV_GROUP;
5690 return dev; 5870 return dev;
5691 5871
5692free_all: 5872free_all:
@@ -6001,8 +6181,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6001 * @one to the master device with current feature set @all. Will not 6181 * @one to the master device with current feature set @all. Will not
6002 * enable anything that is off in @mask. Returns the new feature set. 6182 * enable anything that is off in @mask. Returns the new feature set.
6003 */ 6183 */
6004unsigned long netdev_increment_features(unsigned long all, unsigned long one, 6184u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6005 unsigned long mask)
6006{ 6185{
6007 /* If device needs checksumming, downgrade to it. */ 6186 /* If device needs checksumming, downgrade to it. */
6008 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 6187 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 133fd22ea28..7b39f3ed2fd 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -357,8 +357,8 @@ EXPORT_SYMBOL(dev_addr_add_multiple);
357/** 357/**
358 * dev_addr_del_multiple - Delete device addresses by another device 358 * dev_addr_del_multiple - Delete device addresses by another device
359 * @to_dev: device where the addresses will be deleted 359 * @to_dev: device where the addresses will be deleted
360 * @from_dev: device by which addresses the addresses will be deleted 360 * @from_dev: device supplying the addresses to be deleted
361 * @addr_type: address type - 0 means type will used from from_dev 361 * @addr_type: address type - 0 means type will be used from from_dev
362 * 362 *
363 * Deletes addresses in to device by the list of addresses in from device. 363 * Deletes addresses in to device by the list of addresses in from device.
364 * 364 *
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce..706502ff64a 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void)
350 struct per_cpu_dm_data *data; 350 struct per_cpu_dm_data *data;
351 int cpu, rc; 351 int cpu, rc;
352 352
353 printk(KERN_INFO "Initalizing network drop monitor service\n"); 353 printk(KERN_INFO "Initializing network drop monitor service\n");
354 354
355 if (sizeof(void *) > 8) { 355 if (sizeof(void *) > 8) {
356 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); 356 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7ffce..91104d35de7 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -164,7 +164,9 @@ int dst_discard(struct sk_buff *skb)
164} 164}
165EXPORT_SYMBOL(dst_discard); 165EXPORT_SYMBOL(dst_discard);
166 166
167void *dst_alloc(struct dst_ops *ops) 167const u32 dst_default_metrics[RTAX_MAX];
168
169void *dst_alloc(struct dst_ops *ops, int initial_ref)
168{ 170{
169 struct dst_entry *dst; 171 struct dst_entry *dst;
170 172
@@ -175,11 +177,12 @@ void *dst_alloc(struct dst_ops *ops)
175 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); 177 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
176 if (!dst) 178 if (!dst)
177 return NULL; 179 return NULL;
178 atomic_set(&dst->__refcnt, 0); 180 atomic_set(&dst->__refcnt, initial_ref);
179 dst->ops = ops; 181 dst->ops = ops;
180 dst->lastuse = jiffies; 182 dst->lastuse = jiffies;
181 dst->path = dst; 183 dst->path = dst;
182 dst->input = dst->output = dst_discard; 184 dst->input = dst->output = dst_discard;
185 dst_init_metrics(dst, dst_default_metrics, true);
183#if RT_CACHE_DEBUG >= 2 186#if RT_CACHE_DEBUG >= 2
184 atomic_inc(&dst_total); 187 atomic_inc(&dst_total);
185#endif 188#endif
@@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst)
282} 285}
283EXPORT_SYMBOL(dst_release); 286EXPORT_SYMBOL(dst_release);
284 287
288u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
289{
290 u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
291
292 if (p) {
293 u32 *old_p = __DST_METRICS_PTR(old);
294 unsigned long prev, new;
295
296 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
297
298 new = (unsigned long) p;
299 prev = cmpxchg(&dst->_metrics, old, new);
300
301 if (prev != old) {
302 kfree(p);
303 p = __DST_METRICS_PTR(prev);
304 if (prev & DST_METRICS_READ_ONLY)
305 p = NULL;
306 }
307 }
308 return p;
309}
310EXPORT_SYMBOL(dst_cow_metrics_generic);
311
312/* Caller asserts that dst_metrics_read_only(dst) is false. */
313void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
314{
315 unsigned long prev, new;
316
317 new = (unsigned long) dst_default_metrics;
318 prev = cmpxchg(&dst->_metrics, old, new);
319 if (prev == old)
320 kfree(__DST_METRICS_PTR(old));
321}
322EXPORT_SYMBOL(__dst_destroy_metrics_generic);
323
285/** 324/**
286 * skb_dst_set_noref - sets skb dst, without a reference 325 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer 326 * @skb: buffer
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ff2302910b5..a1086fb0c0c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev)
34} 34}
35EXPORT_SYMBOL(ethtool_op_get_link); 35EXPORT_SYMBOL(ethtool_op_get_link);
36 36
37u32 ethtool_op_get_rx_csum(struct net_device *dev)
38{
39 return (dev->features & NETIF_F_ALL_CSUM) != 0;
40}
41EXPORT_SYMBOL(ethtool_op_get_rx_csum);
42
43u32 ethtool_op_get_tx_csum(struct net_device *dev) 37u32 ethtool_op_get_tx_csum(struct net_device *dev)
44{ 38{
45 return (dev->features & NETIF_F_ALL_CSUM) != 0; 39 return (dev->features & NETIF_F_ALL_CSUM) != 0;
@@ -55,6 +49,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
55 49
56 return 0; 50 return 0;
57} 51}
52EXPORT_SYMBOL(ethtool_op_set_tx_csum);
58 53
59int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) 54int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
60{ 55{
@@ -171,6 +166,381 @@ EXPORT_SYMBOL(ethtool_ntuple_flush);
171 166
172/* Handlers for each ethtool command */ 167/* Handlers for each ethtool command */
173 168
169#define ETHTOOL_DEV_FEATURE_WORDS 1
170
171static void ethtool_get_features_compat(struct net_device *dev,
172 struct ethtool_get_features_block *features)
173{
174 if (!dev->ethtool_ops)
175 return;
176
177 /* getting RX checksum */
178 if (dev->ethtool_ops->get_rx_csum)
179 if (dev->ethtool_ops->get_rx_csum(dev))
180 features[0].active |= NETIF_F_RXCSUM;
181
182 /* mark legacy-changeable features */
183 if (dev->ethtool_ops->set_sg)
184 features[0].available |= NETIF_F_SG;
185 if (dev->ethtool_ops->set_tx_csum)
186 features[0].available |= NETIF_F_ALL_CSUM;
187 if (dev->ethtool_ops->set_tso)
188 features[0].available |= NETIF_F_ALL_TSO;
189 if (dev->ethtool_ops->set_rx_csum)
190 features[0].available |= NETIF_F_RXCSUM;
191 if (dev->ethtool_ops->set_flags)
192 features[0].available |= flags_dup_features;
193}
194
195static int ethtool_set_feature_compat(struct net_device *dev,
196 int (*legacy_set)(struct net_device *, u32),
197 struct ethtool_set_features_block *features, u32 mask)
198{
199 u32 do_set;
200
201 if (!legacy_set)
202 return 0;
203
204 if (!(features[0].valid & mask))
205 return 0;
206
207 features[0].valid &= ~mask;
208
209 do_set = !!(features[0].requested & mask);
210
211 if (legacy_set(dev, do_set) < 0)
212 netdev_info(dev,
213 "Legacy feature change (%s) failed for 0x%08x\n",
214 do_set ? "set" : "clear", mask);
215
216 return 1;
217}
218
219static int ethtool_set_features_compat(struct net_device *dev,
220 struct ethtool_set_features_block *features)
221{
222 int compat;
223
224 if (!dev->ethtool_ops)
225 return 0;
226
227 compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
228 features, NETIF_F_SG);
229 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
230 features, NETIF_F_ALL_CSUM);
231 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
232 features, NETIF_F_ALL_TSO);
233 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
234 features, NETIF_F_RXCSUM);
235 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags,
236 features, flags_dup_features);
237
238 return compat;
239}
240
241static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
242{
243 struct ethtool_gfeatures cmd = {
244 .cmd = ETHTOOL_GFEATURES,
245 .size = ETHTOOL_DEV_FEATURE_WORDS,
246 };
247 struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
248 {
249 .available = dev->hw_features,
250 .requested = dev->wanted_features,
251 .active = dev->features,
252 .never_changed = NETIF_F_NEVER_CHANGE,
253 },
254 };
255 u32 __user *sizeaddr;
256 u32 copy_size;
257
258 ethtool_get_features_compat(dev, features);
259
260 sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
261 if (get_user(copy_size, sizeaddr))
262 return -EFAULT;
263
264 if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
265 copy_size = ETHTOOL_DEV_FEATURE_WORDS;
266
267 if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
268 return -EFAULT;
269 useraddr += sizeof(cmd);
270 if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
271 return -EFAULT;
272
273 return 0;
274}
275
276static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
277{
278 struct ethtool_sfeatures cmd;
279 struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
280 int ret = 0;
281
282 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
283 return -EFAULT;
284 useraddr += sizeof(cmd);
285
286 if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
287 return -EINVAL;
288
289 if (copy_from_user(features, useraddr, sizeof(features)))
290 return -EFAULT;
291
292 if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
293 return -EINVAL;
294
295 if (ethtool_set_features_compat(dev, features))
296 ret |= ETHTOOL_F_COMPAT;
297
298 if (features[0].valid & ~dev->hw_features) {
299 features[0].valid &= dev->hw_features;
300 ret |= ETHTOOL_F_UNSUPPORTED;
301 }
302
303 dev->wanted_features &= ~features[0].valid;
304 dev->wanted_features |= features[0].valid & features[0].requested;
305 netdev_update_features(dev);
306
307 if ((dev->wanted_features ^ dev->features) & features[0].valid)
308 ret |= ETHTOOL_F_WISH;
309
310 return ret;
311}
312
313static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
314 /* NETIF_F_SG */ "tx-scatter-gather",
315 /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
316 /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
317 /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
318 /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6",
319 /* NETIF_F_HIGHDMA */ "highdma",
320 /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
321 /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
322
323 /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
324 /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
325 /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
326 /* NETIF_F_GSO */ "tx-generic-segmentation",
327 /* NETIF_F_LLTX */ "tx-lockless",
328 /* NETIF_F_NETNS_LOCAL */ "netns-local",
329 /* NETIF_F_GRO */ "rx-gro",
330 /* NETIF_F_LRO */ "rx-lro",
331
332 /* NETIF_F_TSO */ "tx-tcp-segmentation",
333 /* NETIF_F_UFO */ "tx-udp-fragmentation",
334 /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
335 /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
336 /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
337 /* NETIF_F_FSO */ "tx-fcoe-segmentation",
338 "",
339 "",
340
341 /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
342 /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
343 /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
344 /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
345 /* NETIF_F_RXHASH */ "rx-hashing",
346 /* NETIF_F_RXCSUM */ "rx-checksum",
347 "",
348 "",
349};
350
351static int __ethtool_get_sset_count(struct net_device *dev, int sset)
352{
353 const struct ethtool_ops *ops = dev->ethtool_ops;
354
355 if (sset == ETH_SS_FEATURES)
356 return ARRAY_SIZE(netdev_features_strings);
357
358 if (ops && ops->get_sset_count && ops->get_strings)
359 return ops->get_sset_count(dev, sset);
360 else
361 return -EOPNOTSUPP;
362}
363
364static void __ethtool_get_strings(struct net_device *dev,
365 u32 stringset, u8 *data)
366{
367 const struct ethtool_ops *ops = dev->ethtool_ops;
368
369 if (stringset == ETH_SS_FEATURES)
370 memcpy(data, netdev_features_strings,
371 sizeof(netdev_features_strings));
372 else
373 /* ops->get_strings is valid because checked earlier */
374 ops->get_strings(dev, stringset, data);
375}
376
377static u32 ethtool_get_feature_mask(u32 eth_cmd)
378{
379 /* feature masks of legacy discrete ethtool ops */
380
381 switch (eth_cmd) {
382 case ETHTOOL_GTXCSUM:
383 case ETHTOOL_STXCSUM:
384 return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
385 case ETHTOOL_GRXCSUM:
386 case ETHTOOL_SRXCSUM:
387 return NETIF_F_RXCSUM;
388 case ETHTOOL_GSG:
389 case ETHTOOL_SSG:
390 return NETIF_F_SG;
391 case ETHTOOL_GTSO:
392 case ETHTOOL_STSO:
393 return NETIF_F_ALL_TSO;
394 case ETHTOOL_GUFO:
395 case ETHTOOL_SUFO:
396 return NETIF_F_UFO;
397 case ETHTOOL_GGSO:
398 case ETHTOOL_SGSO:
399 return NETIF_F_GSO;
400 case ETHTOOL_GGRO:
401 case ETHTOOL_SGRO:
402 return NETIF_F_GRO;
403 default:
404 BUG();
405 }
406}
407
408static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
409{
410 const struct ethtool_ops *ops = dev->ethtool_ops;
411
412 if (!ops)
413 return NULL;
414
415 switch (ethcmd) {
416 case ETHTOOL_GTXCSUM:
417 return ops->get_tx_csum;
418 case ETHTOOL_GRXCSUM:
419 return ops->get_rx_csum;
420 case ETHTOOL_SSG:
421 return ops->get_sg;
422 case ETHTOOL_STSO:
423 return ops->get_tso;
424 case ETHTOOL_SUFO:
425 return ops->get_ufo;
426 default:
427 return NULL;
428 }
429}
430
431static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
432{
433 return !!(dev->features & NETIF_F_ALL_CSUM);
434}
435
436static int ethtool_get_one_feature(struct net_device *dev,
437 char __user *useraddr, u32 ethcmd)
438{
439 u32 mask = ethtool_get_feature_mask(ethcmd);
440 struct ethtool_value edata = {
441 .cmd = ethcmd,
442 .data = !!(dev->features & mask),
443 };
444
445 /* compatibility with discrete get_ ops */
446 if (!(dev->hw_features & mask)) {
447 u32 (*actor)(struct net_device *);
448
449 actor = __ethtool_get_one_feature_actor(dev, ethcmd);
450
451 /* bug compatibility with old get_rx_csum */
452 if (ethcmd == ETHTOOL_GRXCSUM && !actor)
453 actor = __ethtool_get_rx_csum_oldbug;
454
455 if (actor)
456 edata.data = actor(dev);
457 }
458
459 if (copy_to_user(useraddr, &edata, sizeof(edata)))
460 return -EFAULT;
461 return 0;
462}
463
464static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
465static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
466static int __ethtool_set_sg(struct net_device *dev, u32 data);
467static int __ethtool_set_tso(struct net_device *dev, u32 data);
468static int __ethtool_set_ufo(struct net_device *dev, u32 data);
469
470static int ethtool_set_one_feature(struct net_device *dev,
471 void __user *useraddr, u32 ethcmd)
472{
473 struct ethtool_value edata;
474 u32 mask;
475
476 if (copy_from_user(&edata, useraddr, sizeof(edata)))
477 return -EFAULT;
478
479 mask = ethtool_get_feature_mask(ethcmd);
480 mask &= dev->hw_features;
481 if (mask) {
482 if (edata.data)
483 dev->wanted_features |= mask;
484 else
485 dev->wanted_features &= ~mask;
486
487 netdev_update_features(dev);
488 return 0;
489 }
490
491 /* Driver is not converted to ndo_fix_features or does not
492 * support changing this offload. In the latter case it won't
493 * have corresponding ethtool_ops field set.
494 *
495 * Following part is to be removed after all drivers advertise
496 * their changeable features in netdev->hw_features and stop
497 * using discrete offload setting ops.
498 */
499
500 switch (ethcmd) {
501 case ETHTOOL_STXCSUM:
502 return __ethtool_set_tx_csum(dev, edata.data);
503 case ETHTOOL_SRXCSUM:
504 return __ethtool_set_rx_csum(dev, edata.data);
505 case ETHTOOL_SSG:
506 return __ethtool_set_sg(dev, edata.data);
507 case ETHTOOL_STSO:
508 return __ethtool_set_tso(dev, edata.data);
509 case ETHTOOL_SUFO:
510 return __ethtool_set_ufo(dev, edata.data);
511 default:
512 return -EOPNOTSUPP;
513 }
514}
515
516static int __ethtool_set_flags(struct net_device *dev, u32 data)
517{
518 u32 changed;
519
520 if (data & ~flags_dup_features)
521 return -EINVAL;
522
523 /* legacy set_flags() op */
524 if (dev->ethtool_ops->set_flags) {
525 if (unlikely(dev->hw_features & flags_dup_features))
526 netdev_warn(dev,
527 "driver BUG: mixed hw_features and set_flags()\n");
528 return dev->ethtool_ops->set_flags(dev, data);
529 }
530
531 /* allow changing only bits set in hw_features */
532 changed = (data ^ dev->wanted_features) & flags_dup_features;
533 if (changed & ~dev->hw_features)
534 return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
535
536 dev->wanted_features =
537 (dev->wanted_features & ~changed) | data;
538
539 netdev_update_features(dev);
540
541 return 0;
542}
543
174static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 544static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
175{ 545{
176 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; 546 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
@@ -251,14 +621,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
251 void __user *useraddr) 621 void __user *useraddr)
252{ 622{
253 struct ethtool_sset_info info; 623 struct ethtool_sset_info info;
254 const struct ethtool_ops *ops = dev->ethtool_ops;
255 u64 sset_mask; 624 u64 sset_mask;
256 int i, idx = 0, n_bits = 0, ret, rc; 625 int i, idx = 0, n_bits = 0, ret, rc;
257 u32 *info_buf = NULL; 626 u32 *info_buf = NULL;
258 627
259 if (!ops->get_sset_count)
260 return -EOPNOTSUPP;
261
262 if (copy_from_user(&info, useraddr, sizeof(info))) 628 if (copy_from_user(&info, useraddr, sizeof(info)))
263 return -EFAULT; 629 return -EFAULT;
264 630
@@ -285,7 +651,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
285 if (!(sset_mask & (1ULL << i))) 651 if (!(sset_mask & (1ULL << i)))
286 continue; 652 continue;
287 653
288 rc = ops->get_sset_count(dev, i); 654 rc = __ethtool_get_sset_count(dev, i);
289 if (rc >= 0) { 655 if (rc >= 0) {
290 info.sset_mask |= (1ULL << i); 656 info.sset_mask |= (1ULL << i);
291 info_buf[idx++] = rc; 657 info_buf[idx++] = rc;
@@ -1091,6 +1457,12 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
1091{ 1457{
1092 int err; 1458 int err;
1093 1459
1460 if (!dev->ethtool_ops->set_sg)
1461 return -EOPNOTSUPP;
1462
1463 if (data && !(dev->features & NETIF_F_ALL_CSUM))
1464 return -EINVAL;
1465
1094 if (!data && dev->ethtool_ops->set_tso) { 1466 if (!data && dev->ethtool_ops->set_tso) {
1095 err = dev->ethtool_ops->set_tso(dev, 0); 1467 err = dev->ethtool_ops->set_tso(dev, 0);
1096 if (err) 1468 if (err)
@@ -1105,145 +1477,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
1105 return dev->ethtool_ops->set_sg(dev, data); 1477 return dev->ethtool_ops->set_sg(dev, data);
1106} 1478}
1107 1479
1108static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) 1480static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
1109{ 1481{
1110 struct ethtool_value edata;
1111 int err; 1482 int err;
1112 1483
1113 if (!dev->ethtool_ops->set_tx_csum) 1484 if (!dev->ethtool_ops->set_tx_csum)
1114 return -EOPNOTSUPP; 1485 return -EOPNOTSUPP;
1115 1486
1116 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1487 if (!data && dev->ethtool_ops->set_sg) {
1117 return -EFAULT;
1118
1119 if (!edata.data && dev->ethtool_ops->set_sg) {
1120 err = __ethtool_set_sg(dev, 0); 1488 err = __ethtool_set_sg(dev, 0);
1121 if (err) 1489 if (err)
1122 return err; 1490 return err;
1123 } 1491 }
1124 1492
1125 return dev->ethtool_ops->set_tx_csum(dev, edata.data); 1493 return dev->ethtool_ops->set_tx_csum(dev, data);
1126} 1494}
1127EXPORT_SYMBOL(ethtool_op_set_tx_csum);
1128 1495
1129static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) 1496static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
1130{ 1497{
1131 struct ethtool_value edata;
1132
1133 if (!dev->ethtool_ops->set_rx_csum) 1498 if (!dev->ethtool_ops->set_rx_csum)
1134 return -EOPNOTSUPP; 1499 return -EOPNOTSUPP;
1135 1500
1136 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1501 if (!data)
1137 return -EFAULT;
1138
1139 if (!edata.data && dev->ethtool_ops->set_sg)
1140 dev->features &= ~NETIF_F_GRO; 1502 dev->features &= ~NETIF_F_GRO;
1141 1503
1142 return dev->ethtool_ops->set_rx_csum(dev, edata.data); 1504 return dev->ethtool_ops->set_rx_csum(dev, data);
1143}
1144
1145static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
1146{
1147 struct ethtool_value edata;
1148
1149 if (!dev->ethtool_ops->set_sg)
1150 return -EOPNOTSUPP;
1151
1152 if (copy_from_user(&edata, useraddr, sizeof(edata)))
1153 return -EFAULT;
1154
1155 if (edata.data &&
1156 !(dev->features & NETIF_F_ALL_CSUM))
1157 return -EINVAL;
1158
1159 return __ethtool_set_sg(dev, edata.data);
1160} 1505}
1161 1506
1162static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) 1507static int __ethtool_set_tso(struct net_device *dev, u32 data)
1163{ 1508{
1164 struct ethtool_value edata;
1165
1166 if (!dev->ethtool_ops->set_tso) 1509 if (!dev->ethtool_ops->set_tso)
1167 return -EOPNOTSUPP; 1510 return -EOPNOTSUPP;
1168 1511
1169 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1512 if (data && !(dev->features & NETIF_F_SG))
1170 return -EFAULT;
1171
1172 if (edata.data && !(dev->features & NETIF_F_SG))
1173 return -EINVAL; 1513 return -EINVAL;
1174 1514
1175 return dev->ethtool_ops->set_tso(dev, edata.data); 1515 return dev->ethtool_ops->set_tso(dev, data);
1176} 1516}
1177 1517
1178static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) 1518static int __ethtool_set_ufo(struct net_device *dev, u32 data)
1179{ 1519{
1180 struct ethtool_value edata;
1181
1182 if (!dev->ethtool_ops->set_ufo) 1520 if (!dev->ethtool_ops->set_ufo)
1183 return -EOPNOTSUPP; 1521 return -EOPNOTSUPP;
1184 if (copy_from_user(&edata, useraddr, sizeof(edata))) 1522 if (data && !(dev->features & NETIF_F_SG))
1185 return -EFAULT;
1186 if (edata.data && !(dev->features & NETIF_F_SG))
1187 return -EINVAL; 1523 return -EINVAL;
1188 if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || 1524 if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
1189 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 1525 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
1190 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) 1526 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
1191 return -EINVAL; 1527 return -EINVAL;
1192 return dev->ethtool_ops->set_ufo(dev, edata.data); 1528 return dev->ethtool_ops->set_ufo(dev, data);
1193}
1194
1195static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
1196{
1197 struct ethtool_value edata = { ETHTOOL_GGSO };
1198
1199 edata.data = dev->features & NETIF_F_GSO;
1200 if (copy_to_user(useraddr, &edata, sizeof(edata)))
1201 return -EFAULT;
1202 return 0;
1203}
1204
1205static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
1206{
1207 struct ethtool_value edata;
1208
1209 if (copy_from_user(&edata, useraddr, sizeof(edata)))
1210 return -EFAULT;
1211 if (edata.data)
1212 dev->features |= NETIF_F_GSO;
1213 else
1214 dev->features &= ~NETIF_F_GSO;
1215 return 0;
1216}
1217
1218static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
1219{
1220 struct ethtool_value edata = { ETHTOOL_GGRO };
1221
1222 edata.data = dev->features & NETIF_F_GRO;
1223 if (copy_to_user(useraddr, &edata, sizeof(edata)))
1224 return -EFAULT;
1225 return 0;
1226}
1227
1228static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
1229{
1230 struct ethtool_value edata;
1231
1232 if (copy_from_user(&edata, useraddr, sizeof(edata)))
1233 return -EFAULT;
1234
1235 if (edata.data) {
1236 u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
1237 dev->ethtool_ops->get_rx_csum(dev) :
1238 ethtool_op_get_rx_csum(dev);
1239
1240 if (!rxcsum)
1241 return -EINVAL;
1242 dev->features |= NETIF_F_GRO;
1243 } else
1244 dev->features &= ~NETIF_F_GRO;
1245
1246 return 0;
1247} 1529}
1248 1530
1249static int ethtool_self_test(struct net_device *dev, char __user *useraddr) 1531static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
@@ -1287,17 +1569,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
1287static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) 1569static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1288{ 1570{
1289 struct ethtool_gstrings gstrings; 1571 struct ethtool_gstrings gstrings;
1290 const struct ethtool_ops *ops = dev->ethtool_ops;
1291 u8 *data; 1572 u8 *data;
1292 int ret; 1573 int ret;
1293 1574
1294 if (!ops->get_strings || !ops->get_sset_count)
1295 return -EOPNOTSUPP;
1296
1297 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) 1575 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
1298 return -EFAULT; 1576 return -EFAULT;
1299 1577
1300 ret = ops->get_sset_count(dev, gstrings.string_set); 1578 ret = __ethtool_get_sset_count(dev, gstrings.string_set);
1301 if (ret < 0) 1579 if (ret < 0)
1302 return ret; 1580 return ret;
1303 1581
@@ -1307,7 +1585,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1307 if (!data) 1585 if (!data)
1308 return -ENOMEM; 1586 return -ENOMEM;
1309 1587
1310 ops->get_strings(dev, gstrings.string_set, data); 1588 __ethtool_get_strings(dev, gstrings.string_set, data);
1311 1589
1312 ret = -EFAULT; 1590 ret = -EFAULT;
1313 if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) 1591 if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1317,7 +1595,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1317 goto out; 1595 goto out;
1318 ret = 0; 1596 ret = 0;
1319 1597
1320 out: 1598out:
1321 kfree(data); 1599 kfree(data);
1322 return ret; 1600 return ret;
1323} 1601}
@@ -1458,7 +1736,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1458 void __user *useraddr = ifr->ifr_data; 1736 void __user *useraddr = ifr->ifr_data;
1459 u32 ethcmd; 1737 u32 ethcmd;
1460 int rc; 1738 int rc;
1461 unsigned long old_features; 1739 u32 old_features;
1462 1740
1463 if (!dev || !netif_device_present(dev)) 1741 if (!dev || !netif_device_present(dev))
1464 return -ENODEV; 1742 return -ENODEV;
@@ -1500,6 +1778,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1500 case ETHTOOL_GRXCLSRLCNT: 1778 case ETHTOOL_GRXCLSRLCNT:
1501 case ETHTOOL_GRXCLSRULE: 1779 case ETHTOOL_GRXCLSRULE:
1502 case ETHTOOL_GRXCLSRLALL: 1780 case ETHTOOL_GRXCLSRLALL:
1781 case ETHTOOL_GFEATURES:
1503 break; 1782 break;
1504 default: 1783 default:
1505 if (!capable(CAP_NET_ADMIN)) 1784 if (!capable(CAP_NET_ADMIN))
@@ -1570,42 +1849,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1570 case ETHTOOL_SPAUSEPARAM: 1849 case ETHTOOL_SPAUSEPARAM:
1571 rc = ethtool_set_pauseparam(dev, useraddr); 1850 rc = ethtool_set_pauseparam(dev, useraddr);
1572 break; 1851 break;
1573 case ETHTOOL_GRXCSUM:
1574 rc = ethtool_get_value(dev, useraddr, ethcmd,
1575 (dev->ethtool_ops->get_rx_csum ?
1576 dev->ethtool_ops->get_rx_csum :
1577 ethtool_op_get_rx_csum));
1578 break;
1579 case ETHTOOL_SRXCSUM:
1580 rc = ethtool_set_rx_csum(dev, useraddr);
1581 break;
1582 case ETHTOOL_GTXCSUM:
1583 rc = ethtool_get_value(dev, useraddr, ethcmd,
1584 (dev->ethtool_ops->get_tx_csum ?
1585 dev->ethtool_ops->get_tx_csum :
1586 ethtool_op_get_tx_csum));
1587 break;
1588 case ETHTOOL_STXCSUM:
1589 rc = ethtool_set_tx_csum(dev, useraddr);
1590 break;
1591 case ETHTOOL_GSG:
1592 rc = ethtool_get_value(dev, useraddr, ethcmd,
1593 (dev->ethtool_ops->get_sg ?
1594 dev->ethtool_ops->get_sg :
1595 ethtool_op_get_sg));
1596 break;
1597 case ETHTOOL_SSG:
1598 rc = ethtool_set_sg(dev, useraddr);
1599 break;
1600 case ETHTOOL_GTSO:
1601 rc = ethtool_get_value(dev, useraddr, ethcmd,
1602 (dev->ethtool_ops->get_tso ?
1603 dev->ethtool_ops->get_tso :
1604 ethtool_op_get_tso));
1605 break;
1606 case ETHTOOL_STSO:
1607 rc = ethtool_set_tso(dev, useraddr);
1608 break;
1609 case ETHTOOL_TEST: 1852 case ETHTOOL_TEST:
1610 rc = ethtool_self_test(dev, useraddr); 1853 rc = ethtool_self_test(dev, useraddr);
1611 break; 1854 break;
@@ -1621,21 +1864,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1621 case ETHTOOL_GPERMADDR: 1864 case ETHTOOL_GPERMADDR:
1622 rc = ethtool_get_perm_addr(dev, useraddr); 1865 rc = ethtool_get_perm_addr(dev, useraddr);
1623 break; 1866 break;
1624 case ETHTOOL_GUFO:
1625 rc = ethtool_get_value(dev, useraddr, ethcmd,
1626 (dev->ethtool_ops->get_ufo ?
1627 dev->ethtool_ops->get_ufo :
1628 ethtool_op_get_ufo));
1629 break;
1630 case ETHTOOL_SUFO:
1631 rc = ethtool_set_ufo(dev, useraddr);
1632 break;
1633 case ETHTOOL_GGSO:
1634 rc = ethtool_get_gso(dev, useraddr);
1635 break;
1636 case ETHTOOL_SGSO:
1637 rc = ethtool_set_gso(dev, useraddr);
1638 break;
1639 case ETHTOOL_GFLAGS: 1867 case ETHTOOL_GFLAGS:
1640 rc = ethtool_get_value(dev, useraddr, ethcmd, 1868 rc = ethtool_get_value(dev, useraddr, ethcmd,
1641 (dev->ethtool_ops->get_flags ? 1869 (dev->ethtool_ops->get_flags ?
@@ -1643,8 +1871,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1643 ethtool_op_get_flags)); 1871 ethtool_op_get_flags));
1644 break; 1872 break;
1645 case ETHTOOL_SFLAGS: 1873 case ETHTOOL_SFLAGS:
1646 rc = ethtool_set_value(dev, useraddr, 1874 rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
1647 dev->ethtool_ops->set_flags);
1648 break; 1875 break;
1649 case ETHTOOL_GPFLAGS: 1876 case ETHTOOL_GPFLAGS:
1650 rc = ethtool_get_value(dev, useraddr, ethcmd, 1877 rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1666,12 +1893,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1666 case ETHTOOL_SRXCLSRLINS: 1893 case ETHTOOL_SRXCLSRLINS:
1667 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); 1894 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
1668 break; 1895 break;
1669 case ETHTOOL_GGRO:
1670 rc = ethtool_get_gro(dev, useraddr);
1671 break;
1672 case ETHTOOL_SGRO:
1673 rc = ethtool_set_gro(dev, useraddr);
1674 break;
1675 case ETHTOOL_FLASHDEV: 1896 case ETHTOOL_FLASHDEV:
1676 rc = ethtool_flash_device(dev, useraddr); 1897 rc = ethtool_flash_device(dev, useraddr);
1677 break; 1898 break;
@@ -1693,6 +1914,30 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1693 case ETHTOOL_SRXFHINDIR: 1914 case ETHTOOL_SRXFHINDIR:
1694 rc = ethtool_set_rxfh_indir(dev, useraddr); 1915 rc = ethtool_set_rxfh_indir(dev, useraddr);
1695 break; 1916 break;
1917 case ETHTOOL_GFEATURES:
1918 rc = ethtool_get_features(dev, useraddr);
1919 break;
1920 case ETHTOOL_SFEATURES:
1921 rc = ethtool_set_features(dev, useraddr);
1922 break;
1923 case ETHTOOL_GTXCSUM:
1924 case ETHTOOL_GRXCSUM:
1925 case ETHTOOL_GSG:
1926 case ETHTOOL_GTSO:
1927 case ETHTOOL_GUFO:
1928 case ETHTOOL_GGSO:
1929 case ETHTOOL_GGRO:
1930 rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
1931 break;
1932 case ETHTOOL_STXCSUM:
1933 case ETHTOOL_SRXCSUM:
1934 case ETHTOOL_SSG:
1935 case ETHTOOL_STSO:
1936 case ETHTOOL_SUFO:
1937 case ETHTOOL_SGSO:
1938 case ETHTOOL_SGRO:
1939 rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
1940 break;
1696 default: 1941 default:
1697 rc = -EOPNOTSUPP; 1942 rc = -EOPNOTSUPP;
1698 } 1943 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a20e5d3bbfa..8248ebb5891 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
181{ 181{
182 int ret = 0; 182 int ret = 0;
183 183
184 if (rule->iifindex && (rule->iifindex != fl->iif)) 184 if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
185 goto out; 185 goto out;
186 186
187 if (rule->oifindex && (rule->oifindex != fl->oif)) 187 if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
188 goto out; 188 goto out;
189 189
190 if ((rule->mark ^ fl->mark) & rule->mark_mask) 190 if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
191 goto out; 191 goto out;
192 192
193 ret = ops->match(rule, fl, flags); 193 ret = ops->match(rule, fl, flags);
diff --git a/net/core/filter.c b/net/core/filter.c
index afc58374ca9..232b1873bb2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
142 if (err) 142 if (err)
143 return err; 143 return err;
144 144
145 rcu_read_lock_bh(); 145 rcu_read_lock();
146 filter = rcu_dereference_bh(sk->sk_filter); 146 filter = rcu_dereference(sk->sk_filter);
147 if (filter) { 147 if (filter) {
148 unsigned int pkt_len = sk_run_filter(skb, filter->insns); 148 unsigned int pkt_len = sk_run_filter(skb, filter->insns);
149 149
150 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 150 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
151 } 151 }
152 rcu_read_unlock_bh(); 152 rcu_read_unlock();
153 153
154 return err; 154 return err;
155} 155}
diff --git a/net/core/flow.c b/net/core/flow.c
index 127c8a7ffd6..990703b8863 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
172 172
173static u32 flow_hash_code(struct flow_cache *fc, 173static u32 flow_hash_code(struct flow_cache *fc,
174 struct flow_cache_percpu *fcp, 174 struct flow_cache_percpu *fcp,
175 struct flowi *key) 175 const struct flowi *key)
176{ 176{
177 u32 *k = (u32 *) key; 177 const u32 *k = (const u32 *) key;
178 178
179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
180 & (flow_cache_hash_size(fc) - 1); 180 & (flow_cache_hash_size(fc) - 1);
@@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t;
186 * important assumptions that we can here, such as alignment and 186 * important assumptions that we can here, such as alignment and
187 * constant size. 187 * constant size.
188 */ 188 */
189static int flow_key_compare(struct flowi *key1, struct flowi *key2) 189static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
190{ 190{
191 flow_compare_t *k1, *k1_lim, *k2; 191 const flow_compare_t *k1, *k1_lim, *k2;
192 const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); 192 const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
193 193
194 BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); 194 BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
195 195
196 k1 = (flow_compare_t *) key1; 196 k1 = (const flow_compare_t *) key1;
197 k1_lim = k1 + n_elem; 197 k1_lim = k1 + n_elem;
198 198
199 k2 = (flow_compare_t *) key2; 199 k2 = (const flow_compare_t *) key2;
200 200
201 do { 201 do {
202 if (*k1++ != *k2++) 202 if (*k1++ != *k2++)
@@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
207} 207}
208 208
209struct flow_cache_object * 209struct flow_cache_object *
210flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, 210flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
211 flow_resolve_t resolver, void *ctx) 211 flow_resolve_t resolver, void *ctx)
212{ 212{
213 struct flow_cache *fc = &flow_cache_global; 213 struct flow_cache *fc = &flow_cache_global;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60a90291342..799f06e03a2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
316{ 316{
317 size_t size = entries * sizeof(struct neighbour *); 317 size_t size = entries * sizeof(struct neighbour *);
318 struct neigh_hash_table *ret; 318 struct neigh_hash_table *ret;
319 struct neighbour **buckets; 319 struct neighbour __rcu **buckets;
320 320
321 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 321 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
322 if (!ret) 322 if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
324 if (size <= PAGE_SIZE) 324 if (size <= PAGE_SIZE)
325 buckets = kzalloc(size, GFP_ATOMIC); 325 buckets = kzalloc(size, GFP_ATOMIC);
326 else 326 else
327 buckets = (struct neighbour **) 327 buckets = (struct neighbour __rcu **)
328 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 328 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
329 get_order(size)); 329 get_order(size));
330 if (!buckets) { 330 if (!buckets) {
331 kfree(ret); 331 kfree(ret);
332 return NULL; 332 return NULL;
333 } 333 }
334 rcu_assign_pointer(ret->hash_buckets, buckets); 334 ret->hash_buckets = buckets;
335 ret->hash_mask = entries - 1; 335 ret->hash_mask = entries - 1;
336 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); 336 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
337 return ret; 337 return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
343 struct neigh_hash_table, 343 struct neigh_hash_table,
344 rcu); 344 rcu);
345 size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); 345 size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
346 struct neighbour **buckets = nht->hash_buckets; 346 struct neighbour __rcu **buckets = nht->hash_buckets;
347 347
348 if (size <= PAGE_SIZE) 348 if (size <= PAGE_SIZE)
349 kfree(buckets); 349 kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1540 panic("cannot create neighbour proc dir entry"); 1540 panic("cannot create neighbour proc dir entry");
1541#endif 1541#endif
1542 1542
1543 tbl->nht = neigh_hash_alloc(8); 1543 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
1544 1544
1545 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1545 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1546 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1546 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
1602 } 1602 }
1603 write_unlock(&neigh_tbl_lock); 1603 write_unlock(&neigh_tbl_lock);
1604 1604
1605 call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); 1605 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1606 neigh_hash_free_rcu);
1606 tbl->nht = NULL; 1607 tbl->nht = NULL;
1607 1608
1608 kfree(tbl->phash_buckets); 1609 kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e23c01be5a5..5ceb257e860 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
99NETDEVICE_SHOW(addr_len, fmt_dec); 99NETDEVICE_SHOW(addr_len, fmt_dec);
100NETDEVICE_SHOW(iflink, fmt_dec); 100NETDEVICE_SHOW(iflink, fmt_dec);
101NETDEVICE_SHOW(ifindex, fmt_dec); 101NETDEVICE_SHOW(ifindex, fmt_dec);
102NETDEVICE_SHOW(features, fmt_long_hex); 102NETDEVICE_SHOW(features, fmt_hex);
103NETDEVICE_SHOW(type, fmt_dec); 103NETDEVICE_SHOW(type, fmt_dec);
104NETDEVICE_SHOW(link_mode, fmt_dec); 104NETDEVICE_SHOW(link_mode, fmt_dec);
105 105
@@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev,
295 return ret; 295 return ret;
296} 296}
297 297
298NETDEVICE_SHOW(group, fmt_dec);
299
300static int change_group(struct net_device *net, unsigned long new_group)
301{
302 dev_set_group(net, (int) new_group);
303 return 0;
304}
305
306static ssize_t store_group(struct device *dev, struct device_attribute *attr,
307 const char *buf, size_t len)
308{
309 return netdev_store(dev, attr, buf, len, change_group);
310}
311
298static struct device_attribute net_class_attributes[] = { 312static struct device_attribute net_class_attributes[] = {
299 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), 313 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
300 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 314 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = {
316 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), 330 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
317 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 331 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
318 store_tx_queue_len), 332 store_tx_queue_len),
333 __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
319 {} 334 {}
320}; 335};
321 336
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 02dc2cbcbe8..06be2431753 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev)
193 193
194 poll_napi(dev); 194 poll_napi(dev);
195 195
196 if (dev->priv_flags & IFF_SLAVE) {
197 if (dev->npinfo) {
198 struct net_device *bond_dev = dev->master;
199 struct sk_buff *skb;
200 while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
201 skb->dev = bond_dev;
202 skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
203 }
204 }
205 }
206
196 service_arp_queue(dev->npinfo); 207 service_arp_queue(dev->npinfo);
197 208
198 zap_completion_queue(); 209 zap_completion_queue();
@@ -313,9 +324,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
313 tries > 0; --tries) { 324 tries > 0; --tries) {
314 if (__netif_tx_trylock(txq)) { 325 if (__netif_tx_trylock(txq)) {
315 if (!netif_tx_queue_stopped(txq)) { 326 if (!netif_tx_queue_stopped(txq)) {
316 dev->priv_flags |= IFF_IN_NETPOLL;
317 status = ops->ndo_start_xmit(skb, dev); 327 status = ops->ndo_start_xmit(skb, dev);
318 dev->priv_flags &= ~IFF_IN_NETPOLL;
319 if (status == NETDEV_TX_OK) 328 if (status == NETDEV_TX_OK)
320 txq_trans_update(txq); 329 txq_trans_update(txq);
321 } 330 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461..aeeece72b72 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -251,6 +251,7 @@ struct pktgen_dev {
251 int max_pkt_size; /* = ETH_ZLEN; */ 251 int max_pkt_size; /* = ETH_ZLEN; */
252 int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ 252 int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
253 int nfrags; 253 int nfrags;
254 struct page *page;
254 u64 delay; /* nano-seconds */ 255 u64 delay; /* nano-seconds */
255 256
256 __u64 count; /* Default No packets to send */ 257 __u64 count; /* Default No packets to send */
@@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file,
1134 if (node_possible(value)) { 1135 if (node_possible(value)) {
1135 pkt_dev->node = value; 1136 pkt_dev->node = value;
1136 sprintf(pg_result, "OK: node=%d", pkt_dev->node); 1137 sprintf(pg_result, "OK: node=%d", pkt_dev->node);
1138 if (pkt_dev->page) {
1139 put_page(pkt_dev->page);
1140 pkt_dev->page = NULL;
1141 }
1137 } 1142 }
1138 else 1143 else
1139 sprintf(pg_result, "ERROR: node not possible"); 1144 sprintf(pg_result, "ERROR: node not possible");
@@ -2605,6 +2610,89 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
2605 return htons(id | (cfi << 12) | (prio << 13)); 2610 return htons(id | (cfi << 12) | (prio << 13));
2606} 2611}
2607 2612
2613static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2614 int datalen)
2615{
2616 struct timeval timestamp;
2617 struct pktgen_hdr *pgh;
2618
2619 pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
2620 datalen -= sizeof(*pgh);
2621
2622 if (pkt_dev->nfrags <= 0) {
2623 memset(skb_put(skb, datalen), 0, datalen);
2624 } else {
2625 int frags = pkt_dev->nfrags;
2626 int i, len;
2627
2628
2629 if (frags > MAX_SKB_FRAGS)
2630 frags = MAX_SKB_FRAGS;
2631 len = datalen - frags * PAGE_SIZE;
2632 if (len > 0) {
2633 memset(skb_put(skb, len), 0, len);
2634 datalen = frags * PAGE_SIZE;
2635 }
2636
2637 i = 0;
2638 while (datalen > 0) {
2639 if (unlikely(!pkt_dev->page)) {
2640 int node = numa_node_id();
2641
2642 if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
2643 node = pkt_dev->node;
2644 pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
2645 if (!pkt_dev->page)
2646 break;
2647 }
2648 skb_shinfo(skb)->frags[i].page = pkt_dev->page;
2649 get_page(pkt_dev->page);
2650 skb_shinfo(skb)->frags[i].page_offset = 0;
2651 skb_shinfo(skb)->frags[i].size =
2652 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
2653 datalen -= skb_shinfo(skb)->frags[i].size;
2654 skb->len += skb_shinfo(skb)->frags[i].size;
2655 skb->data_len += skb_shinfo(skb)->frags[i].size;
2656 i++;
2657 skb_shinfo(skb)->nr_frags = i;
2658 }
2659
2660 while (i < frags) {
2661 int rem;
2662
2663 if (i == 0)
2664 break;
2665
2666 rem = skb_shinfo(skb)->frags[i - 1].size / 2;
2667 if (rem == 0)
2668 break;
2669
2670 skb_shinfo(skb)->frags[i - 1].size -= rem;
2671
2672 skb_shinfo(skb)->frags[i] =
2673 skb_shinfo(skb)->frags[i - 1];
2674 get_page(skb_shinfo(skb)->frags[i].page);
2675 skb_shinfo(skb)->frags[i].page =
2676 skb_shinfo(skb)->frags[i - 1].page;
2677 skb_shinfo(skb)->frags[i].page_offset +=
2678 skb_shinfo(skb)->frags[i - 1].size;
2679 skb_shinfo(skb)->frags[i].size = rem;
2680 i++;
2681 skb_shinfo(skb)->nr_frags = i;
2682 }
2683 }
2684
2685 /* Stamp the time, and sequence number,
2686 * convert them to network byte order
2687 */
2688 pgh->pgh_magic = htonl(PKTGEN_MAGIC);
2689 pgh->seq_num = htonl(pkt_dev->seq_num);
2690
2691 do_gettimeofday(&timestamp);
2692 pgh->tv_sec = htonl(timestamp.tv_sec);
2693 pgh->tv_usec = htonl(timestamp.tv_usec);
2694}
2695
2608static struct sk_buff *fill_packet_ipv4(struct net_device *odev, 2696static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2609 struct pktgen_dev *pkt_dev) 2697 struct pktgen_dev *pkt_dev)
2610{ 2698{
@@ -2613,7 +2701,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2613 struct udphdr *udph; 2701 struct udphdr *udph;
2614 int datalen, iplen; 2702 int datalen, iplen;
2615 struct iphdr *iph; 2703 struct iphdr *iph;
2616 struct pktgen_hdr *pgh = NULL;
2617 __be16 protocol = htons(ETH_P_IP); 2704 __be16 protocol = htons(ETH_P_IP);
2618 __be32 *mpls; 2705 __be32 *mpls;
2619 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ 2706 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -2729,76 +2816,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2729 pkt_dev->pkt_overhead); 2816 pkt_dev->pkt_overhead);
2730 skb->dev = odev; 2817 skb->dev = odev;
2731 skb->pkt_type = PACKET_HOST; 2818 skb->pkt_type = PACKET_HOST;
2732 2819 pktgen_finalize_skb(pkt_dev, skb, datalen);
2733 if (pkt_dev->nfrags <= 0) {
2734 pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
2735 memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
2736 } else {
2737 int frags = pkt_dev->nfrags;
2738 int i, len;
2739
2740 pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
2741
2742 if (frags > MAX_SKB_FRAGS)
2743 frags = MAX_SKB_FRAGS;
2744 if (datalen > frags * PAGE_SIZE) {
2745 len = datalen - frags * PAGE_SIZE;
2746 memset(skb_put(skb, len), 0, len);
2747 datalen = frags * PAGE_SIZE;
2748 }
2749
2750 i = 0;
2751 while (datalen > 0) {
2752 struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
2753 skb_shinfo(skb)->frags[i].page = page;
2754 skb_shinfo(skb)->frags[i].page_offset = 0;
2755 skb_shinfo(skb)->frags[i].size =
2756 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
2757 datalen -= skb_shinfo(skb)->frags[i].size;
2758 skb->len += skb_shinfo(skb)->frags[i].size;
2759 skb->data_len += skb_shinfo(skb)->frags[i].size;
2760 i++;
2761 skb_shinfo(skb)->nr_frags = i;
2762 }
2763
2764 while (i < frags) {
2765 int rem;
2766
2767 if (i == 0)
2768 break;
2769
2770 rem = skb_shinfo(skb)->frags[i - 1].size / 2;
2771 if (rem == 0)
2772 break;
2773
2774 skb_shinfo(skb)->frags[i - 1].size -= rem;
2775
2776 skb_shinfo(skb)->frags[i] =
2777 skb_shinfo(skb)->frags[i - 1];
2778 get_page(skb_shinfo(skb)->frags[i].page);
2779 skb_shinfo(skb)->frags[i].page =
2780 skb_shinfo(skb)->frags[i - 1].page;
2781 skb_shinfo(skb)->frags[i].page_offset +=
2782 skb_shinfo(skb)->frags[i - 1].size;
2783 skb_shinfo(skb)->frags[i].size = rem;
2784 i++;
2785 skb_shinfo(skb)->nr_frags = i;
2786 }
2787 }
2788
2789 /* Stamp the time, and sequence number,
2790 * convert them to network byte order
2791 */
2792 if (pgh) {
2793 struct timeval timestamp;
2794
2795 pgh->pgh_magic = htonl(PKTGEN_MAGIC);
2796 pgh->seq_num = htonl(pkt_dev->seq_num);
2797
2798 do_gettimeofday(&timestamp);
2799 pgh->tv_sec = htonl(timestamp.tv_sec);
2800 pgh->tv_usec = htonl(timestamp.tv_usec);
2801 }
2802 2820
2803#ifdef CONFIG_XFRM 2821#ifdef CONFIG_XFRM
2804 if (!process_ipsec(pkt_dev, skb, protocol)) 2822 if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2980,7 +2998,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2980 struct udphdr *udph; 2998 struct udphdr *udph;
2981 int datalen; 2999 int datalen;
2982 struct ipv6hdr *iph; 3000 struct ipv6hdr *iph;
2983 struct pktgen_hdr *pgh = NULL;
2984 __be16 protocol = htons(ETH_P_IPV6); 3001 __be16 protocol = htons(ETH_P_IPV6);
2985 __be32 *mpls; 3002 __be32 *mpls;
2986 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ 3003 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -3083,75 +3100,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
3083 skb->dev = odev; 3100 skb->dev = odev;
3084 skb->pkt_type = PACKET_HOST; 3101 skb->pkt_type = PACKET_HOST;
3085 3102
3086 if (pkt_dev->nfrags <= 0) 3103 pktgen_finalize_skb(pkt_dev, skb, datalen);
3087 pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
3088 else {
3089 int frags = pkt_dev->nfrags;
3090 int i;
3091
3092 pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
3093
3094 if (frags > MAX_SKB_FRAGS)
3095 frags = MAX_SKB_FRAGS;
3096 if (datalen > frags * PAGE_SIZE) {
3097 skb_put(skb, datalen - frags * PAGE_SIZE);
3098 datalen = frags * PAGE_SIZE;
3099 }
3100
3101 i = 0;
3102 while (datalen > 0) {
3103 struct page *page = alloc_pages(GFP_KERNEL, 0);
3104 skb_shinfo(skb)->frags[i].page = page;
3105 skb_shinfo(skb)->frags[i].page_offset = 0;
3106 skb_shinfo(skb)->frags[i].size =
3107 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
3108 datalen -= skb_shinfo(skb)->frags[i].size;
3109 skb->len += skb_shinfo(skb)->frags[i].size;
3110 skb->data_len += skb_shinfo(skb)->frags[i].size;
3111 i++;
3112 skb_shinfo(skb)->nr_frags = i;
3113 }
3114
3115 while (i < frags) {
3116 int rem;
3117
3118 if (i == 0)
3119 break;
3120
3121 rem = skb_shinfo(skb)->frags[i - 1].size / 2;
3122 if (rem == 0)
3123 break;
3124
3125 skb_shinfo(skb)->frags[i - 1].size -= rem;
3126
3127 skb_shinfo(skb)->frags[i] =
3128 skb_shinfo(skb)->frags[i - 1];
3129 get_page(skb_shinfo(skb)->frags[i].page);
3130 skb_shinfo(skb)->frags[i].page =
3131 skb_shinfo(skb)->frags[i - 1].page;
3132 skb_shinfo(skb)->frags[i].page_offset +=
3133 skb_shinfo(skb)->frags[i - 1].size;
3134 skb_shinfo(skb)->frags[i].size = rem;
3135 i++;
3136 skb_shinfo(skb)->nr_frags = i;
3137 }
3138 }
3139
3140 /* Stamp the time, and sequence number,
3141 * convert them to network byte order
3142 * should we update cloned packets too ?
3143 */
3144 if (pgh) {
3145 struct timeval timestamp;
3146
3147 pgh->pgh_magic = htonl(PKTGEN_MAGIC);
3148 pgh->seq_num = htonl(pkt_dev->seq_num);
3149
3150 do_gettimeofday(&timestamp);
3151 pgh->tv_sec = htonl(timestamp.tv_sec);
3152 pgh->tv_usec = htonl(timestamp.tv_usec);
3153 }
3154 /* pkt_dev->seq_num++; FF: you really mean this? */
3155 3104
3156 return skb; 3105 return skb;
3157} 3106}
@@ -3321,7 +3270,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
3321 pkt_dev->started_at); 3270 pkt_dev->started_at);
3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); 3271 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
3323 3272
3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", 3273 p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
3325 (unsigned long long)ktime_to_us(elapsed), 3274 (unsigned long long)ktime_to_us(elapsed),
3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), 3275 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
3327 (unsigned long long)ktime_to_us(idle), 3276 (unsigned long long)ktime_to_us(idle),
@@ -3812,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu)
3812 list_add_tail(&t->th_list, &pktgen_threads); 3761 list_add_tail(&t->th_list, &pktgen_threads);
3813 init_completion(&t->start_done); 3762 init_completion(&t->start_done);
3814 3763
3815 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3764 p = kthread_create_on_node(pktgen_thread_worker,
3765 t,
3766 cpu_to_node(cpu),
3767 "kpktgend_%d", cpu);
3816 if (IS_ERR(p)) { 3768 if (IS_ERR(p)) {
3817 pr_err("kernel_thread() failed for cpu %d\n", t->cpu); 3769 pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
3818 list_del(&t->th_list); 3770 list_del(&t->th_list);
@@ -3884,6 +3836,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3884 free_SAs(pkt_dev); 3836 free_SAs(pkt_dev);
3885#endif 3837#endif
3886 vfree(pkt_dev->flows); 3838 vfree(pkt_dev->flows);
3839 if (pkt_dev->page)
3840 put_page(pkt_dev->page);
3887 kfree(pkt_dev); 3841 kfree(pkt_dev);
3888 return 0; 3842 return 0;
3889} 3843}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d65c6bb24c..49f7ea5b4c7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
868 netif_running(dev) ? dev->operstate : IF_OPER_DOWN); 868 netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
869 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); 869 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
870 NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); 870 NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
871 NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
871 872
872 if (dev->ifindex != dev->iflink) 873 if (dev->ifindex != dev->iflink)
873 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); 874 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1035,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1035 [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, 1036 [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
1036 [IFLA_MTU] = { .type = NLA_U32 }, 1037 [IFLA_MTU] = { .type = NLA_U32 },
1037 [IFLA_LINK] = { .type = NLA_U32 }, 1038 [IFLA_LINK] = { .type = NLA_U32 },
1039 [IFLA_MASTER] = { .type = NLA_U32 },
1038 [IFLA_TXQLEN] = { .type = NLA_U32 }, 1040 [IFLA_TXQLEN] = { .type = NLA_U32 },
1039 [IFLA_WEIGHT] = { .type = NLA_U32 }, 1041 [IFLA_WEIGHT] = { .type = NLA_U32 },
1040 [IFLA_OPERSTATE] = { .type = NLA_U8 }, 1042 [IFLA_OPERSTATE] = { .type = NLA_U8 },
@@ -1177,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1177 return err; 1179 return err;
1178} 1180}
1179 1181
1182static int do_set_master(struct net_device *dev, int ifindex)
1183{
1184 struct net_device *master_dev;
1185 const struct net_device_ops *ops;
1186 int err;
1187
1188 if (dev->master) {
1189 if (dev->master->ifindex == ifindex)
1190 return 0;
1191 ops = dev->master->netdev_ops;
1192 if (ops->ndo_del_slave) {
1193 err = ops->ndo_del_slave(dev->master, dev);
1194 if (err)
1195 return err;
1196 } else {
1197 return -EOPNOTSUPP;
1198 }
1199 }
1200
1201 if (ifindex) {
1202 master_dev = __dev_get_by_index(dev_net(dev), ifindex);
1203 if (!master_dev)
1204 return -EINVAL;
1205 ops = master_dev->netdev_ops;
1206 if (ops->ndo_add_slave) {
1207 err = ops->ndo_add_slave(master_dev, dev);
1208 if (err)
1209 return err;
1210 } else {
1211 return -EOPNOTSUPP;
1212 }
1213 }
1214 return 0;
1215}
1216
1180static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 1217static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1181 struct nlattr **tb, char *ifname, int modified) 1218 struct nlattr **tb, char *ifname, int modified)
1182{ 1219{
@@ -1264,6 +1301,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1264 modified = 1; 1301 modified = 1;
1265 } 1302 }
1266 1303
1304 if (tb[IFLA_GROUP]) {
1305 dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
1306 modified = 1;
1307 }
1308
1267 /* 1309 /*
1268 * Interface selected by interface index but interface 1310 * Interface selected by interface index but interface
1269 * name provided implies that a name change has been 1311 * name provided implies that a name change has been
@@ -1295,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1295 goto errout; 1337 goto errout;
1296 } 1338 }
1297 1339
1340 if (tb[IFLA_MASTER]) {
1341 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
1342 if (err)
1343 goto errout;
1344 modified = 1;
1345 }
1346
1298 if (tb[IFLA_TXQLEN]) 1347 if (tb[IFLA_TXQLEN])
1299 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 1348 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
1300 1349
@@ -1541,6 +1590,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1541 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); 1590 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
1542 if (tb[IFLA_LINKMODE]) 1591 if (tb[IFLA_LINKMODE])
1543 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); 1592 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
1593 if (tb[IFLA_GROUP])
1594 dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
1544 1595
1545 return dev; 1596 return dev;
1546 1597
@@ -1551,6 +1602,24 @@ err:
1551} 1602}
1552EXPORT_SYMBOL(rtnl_create_link); 1603EXPORT_SYMBOL(rtnl_create_link);
1553 1604
1605static int rtnl_group_changelink(struct net *net, int group,
1606 struct ifinfomsg *ifm,
1607 struct nlattr **tb)
1608{
1609 struct net_device *dev;
1610 int err;
1611
1612 for_each_netdev(net, dev) {
1613 if (dev->group == group) {
1614 err = do_setlink(dev, ifm, tb, NULL, 0);
1615 if (err < 0)
1616 return err;
1617 }
1618 }
1619
1620 return 0;
1621}
1622
1554static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1623static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1555{ 1624{
1556 struct net *net = sock_net(skb->sk); 1625 struct net *net = sock_net(skb->sk);
@@ -1578,10 +1647,12 @@ replay:
1578 ifm = nlmsg_data(nlh); 1647 ifm = nlmsg_data(nlh);
1579 if (ifm->ifi_index > 0) 1648 if (ifm->ifi_index > 0)
1580 dev = __dev_get_by_index(net, ifm->ifi_index); 1649 dev = __dev_get_by_index(net, ifm->ifi_index);
1581 else if (ifname[0]) 1650 else {
1582 dev = __dev_get_by_name(net, ifname); 1651 if (ifname[0])
1583 else 1652 dev = __dev_get_by_name(net, ifname);
1584 dev = NULL; 1653 else
1654 dev = NULL;
1655 }
1585 1656
1586 err = validate_linkmsg(dev, tb); 1657 err = validate_linkmsg(dev, tb);
1587 if (err < 0) 1658 if (err < 0)
@@ -1645,8 +1716,13 @@ replay:
1645 return do_setlink(dev, ifm, tb, ifname, modified); 1716 return do_setlink(dev, ifm, tb, ifname, modified);
1646 } 1717 }
1647 1718
1648 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) 1719 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1720 if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
1721 return rtnl_group_changelink(net,
1722 nla_get_u32(tb[IFLA_GROUP]),
1723 ifm, tb);
1649 return -ENODEV; 1724 return -ENODEV;
1725 }
1650 1726
1651 if (ifm->ifi_index) 1727 if (ifm->ifi_index)
1652 return -EOPNOTSUPP; 1728 return -EOPNOTSUPP;
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe45445080..4c1ef026d69 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
95 int fd = fdp[i]; 95 int fd = fdp[i];
96 struct file *file; 96 struct file *file;
97 97
98 if (fd < 0 || !(file = fget(fd))) 98 if (fd < 0 || !(file = fget_raw(fd)))
99 return -EBADF; 99 return -EBADF;
100 *fpp++ = file; 100 *fpp++ = file;
101 fpl->count++; 101 fpl->count++;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d883dcc78b6..801dd08908f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -523,7 +523,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
523 new->ip_summed = old->ip_summed; 523 new->ip_summed = old->ip_summed;
524 skb_copy_queue_mapping(new, old); 524 skb_copy_queue_mapping(new, old);
525 new->priority = old->priority; 525 new->priority = old->priority;
526 new->deliver_no_wcard = old->deliver_no_wcard;
527#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 526#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
528 new->ipvs_property = old->ipvs_property; 527 new->ipvs_property = old->ipvs_property;
529#endif 528#endif
@@ -2434,8 +2433,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2434 return -ENOMEM; 2433 return -ENOMEM;
2435 2434
2436 /* initialize the next frag */ 2435 /* initialize the next frag */
2437 sk->sk_sndmsg_page = page;
2438 sk->sk_sndmsg_off = 0;
2439 skb_fill_page_desc(skb, frg_cnt, page, 0, 0); 2436 skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2440 skb->truesize += PAGE_SIZE; 2437 skb->truesize += PAGE_SIZE;
2441 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 2438 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
@@ -2455,7 +2452,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2455 return -EFAULT; 2452 return -EFAULT;
2456 2453
2457 /* copy was successful so update the size parameters */ 2454 /* copy was successful so update the size parameters */
2458 sk->sk_sndmsg_off += copy;
2459 frag->size += copy; 2455 frag->size += copy;
2460 skb->len += copy; 2456 skb->len += copy;
2461 skb->data_len += copy; 2457 skb->data_len += copy;
@@ -2498,7 +2494,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2498 * a pointer to the first in a list of new skbs for the segments. 2494 * a pointer to the first in a list of new skbs for the segments.
2499 * In case of error it returns ERR_PTR(err). 2495 * In case of error it returns ERR_PTR(err).
2500 */ 2496 */
2501struct sk_buff *skb_segment(struct sk_buff *skb, int features) 2497struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
2502{ 2498{
2503 struct sk_buff *segs = NULL; 2499 struct sk_buff *segs = NULL;
2504 struct sk_buff *tail = NULL; 2500 struct sk_buff *tail = NULL;
@@ -2508,7 +2504,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2508 unsigned int offset = doffset; 2504 unsigned int offset = doffset;
2509 unsigned int headroom; 2505 unsigned int headroom;
2510 unsigned int len; 2506 unsigned int len;
2511 int sg = features & NETIF_F_SG; 2507 int sg = !!(features & NETIF_F_SG);
2512 int nfrags = skb_shinfo(skb)->nr_frags; 2508 int nfrags = skb_shinfo(skb)->nr_frags;
2513 int err = -ENOMEM; 2509 int err = -ENOMEM;
2514 int i = 0; 2510 int i = 0;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index c44348adba3..3609eacaf4c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2008, Intel Corporation. 2 * Copyright (c) 2008-2011, Intel Corporation.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -1224,6 +1224,59 @@ err:
1224 return err; 1224 return err;
1225} 1225}
1226 1226
1227static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb,
1228 int app_nested_type, int app_info_type,
1229 int app_entry_type)
1230{
1231 struct dcb_peer_app_info info;
1232 struct dcb_app *table = NULL;
1233 const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
1234 u16 app_count;
1235 int err;
1236
1237
1238 /**
1239 * retrieve the peer app configuration form the driver. If the driver
1240 * handlers fail exit without doing anything
1241 */
1242 err = ops->peer_getappinfo(netdev, &info, &app_count);
1243 if (!err && app_count) {
1244 table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL);
1245 if (!table)
1246 return -ENOMEM;
1247
1248 err = ops->peer_getapptable(netdev, table);
1249 }
1250
1251 if (!err) {
1252 u16 i;
1253 struct nlattr *app;
1254
1255 /**
1256 * build the message, from here on the only possible failure
1257 * is due to the skb size
1258 */
1259 err = -EMSGSIZE;
1260
1261 app = nla_nest_start(skb, app_nested_type);
1262 if (!app)
1263 goto nla_put_failure;
1264
1265 if (app_info_type)
1266 NLA_PUT(skb, app_info_type, sizeof(info), &info);
1267
1268 for (i = 0; i < app_count; i++)
1269 NLA_PUT(skb, app_entry_type, sizeof(struct dcb_app),
1270 &table[i]);
1271
1272 nla_nest_end(skb, app);
1273 }
1274 err = 0;
1275
1276nla_put_failure:
1277 kfree(table);
1278 return err;
1279}
1227 1280
1228/* Handle IEEE 802.1Qaz GET commands. */ 1281/* Handle IEEE 802.1Qaz GET commands. */
1229static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, 1282static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
@@ -1288,6 +1341,30 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
1288 spin_unlock(&dcb_lock); 1341 spin_unlock(&dcb_lock);
1289 nla_nest_end(skb, app); 1342 nla_nest_end(skb, app);
1290 1343
1344 /* get peer info if available */
1345 if (ops->ieee_peer_getets) {
1346 struct ieee_ets ets;
1347 err = ops->ieee_peer_getets(netdev, &ets);
1348 if (!err)
1349 NLA_PUT(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets);
1350 }
1351
1352 if (ops->ieee_peer_getpfc) {
1353 struct ieee_pfc pfc;
1354 err = ops->ieee_peer_getpfc(netdev, &pfc);
1355 if (!err)
1356 NLA_PUT(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc);
1357 }
1358
1359 if (ops->peer_getappinfo && ops->peer_getapptable) {
1360 err = dcbnl_build_peer_app(netdev, skb,
1361 DCB_ATTR_IEEE_PEER_APP,
1362 DCB_ATTR_IEEE_APP_UNSPEC,
1363 DCB_ATTR_IEEE_APP);
1364 if (err)
1365 goto nla_put_failure;
1366 }
1367
1291 nla_nest_end(skb, ieee); 1368 nla_nest_end(skb, ieee);
1292 nlmsg_end(skb, nlh); 1369 nlmsg_end(skb, nlh);
1293 1370
@@ -1441,6 +1518,71 @@ err:
1441 return ret; 1518 return ret;
1442} 1519}
1443 1520
1521/* Handle CEE DCBX GET commands. */
1522static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb,
1523 u32 pid, u32 seq, u16 flags)
1524{
1525 struct sk_buff *skb;
1526 struct nlmsghdr *nlh;
1527 struct dcbmsg *dcb;
1528 struct nlattr *cee;
1529 const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
1530 int err;
1531
1532 if (!ops)
1533 return -EOPNOTSUPP;
1534
1535 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1536 if (!skb)
1537 return -ENOBUFS;
1538
1539 nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
1540
1541 dcb = NLMSG_DATA(nlh);
1542 dcb->dcb_family = AF_UNSPEC;
1543 dcb->cmd = DCB_CMD_CEE_GET;
1544
1545 NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name);
1546
1547 cee = nla_nest_start(skb, DCB_ATTR_CEE);
1548 if (!cee)
1549 goto nla_put_failure;
1550
1551 /* get peer info if available */
1552 if (ops->cee_peer_getpg) {
1553 struct cee_pg pg;
1554 err = ops->cee_peer_getpg(netdev, &pg);
1555 if (!err)
1556 NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg);
1557 }
1558
1559 if (ops->cee_peer_getpfc) {
1560 struct cee_pfc pfc;
1561 err = ops->cee_peer_getpfc(netdev, &pfc);
1562 if (!err)
1563 NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc);
1564 }
1565
1566 if (ops->peer_getappinfo && ops->peer_getapptable) {
1567 err = dcbnl_build_peer_app(netdev, skb,
1568 DCB_ATTR_CEE_PEER_APP_TABLE,
1569 DCB_ATTR_CEE_PEER_APP_INFO,
1570 DCB_ATTR_CEE_PEER_APP);
1571 if (err)
1572 goto nla_put_failure;
1573 }
1574
1575 nla_nest_end(skb, cee);
1576 nlmsg_end(skb, nlh);
1577
1578 return rtnl_unicast(skb, &init_net, pid);
1579nla_put_failure:
1580 nlmsg_cancel(skb, nlh);
1581nlmsg_failure:
1582 kfree_skb(skb);
1583 return -1;
1584}
1585
1444static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1586static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1445{ 1587{
1446 struct net *net = sock_net(skb->sk); 1588 struct net *net = sock_net(skb->sk);
@@ -1570,6 +1712,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1570 ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, 1712 ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
1571 nlh->nlmsg_flags); 1713 nlh->nlmsg_flags);
1572 goto out; 1714 goto out;
1715 case DCB_CMD_CEE_GET:
1716 ret = dcbnl_cee_get(netdev, tb, pid, nlh->nlmsg_seq,
1717 nlh->nlmsg_flags);
1718 goto out;
1573 default: 1719 default:
1574 goto errout; 1720 goto errout;
1575 } 1721 }
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e96d5e81003..fadecd20d75 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -583,6 +583,15 @@ done:
583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); 583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
584} 584}
585 585
586/*
587 * Convert RFC 3390 larger initial window into an equivalent number of packets.
588 * This is based on the numbers specified in RFC 5681, 3.1.
589 */
590static inline u32 rfc3390_bytes_to_packets(const u32 smss)
591{
592 return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
593}
594
586static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 595static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
587{ 596{
588 struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); 597 struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 45a434f9416..ae451c6d83b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -43,9 +43,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
43 struct inet_sock *inet = inet_sk(sk); 43 struct inet_sock *inet = inet_sk(sk);
44 struct dccp_sock *dp = dccp_sk(sk); 44 struct dccp_sock *dp = dccp_sk(sk);
45 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 45 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
46 __be16 orig_sport, orig_dport;
46 struct rtable *rt; 47 struct rtable *rt;
47 __be32 daddr, nexthop; 48 __be32 daddr, nexthop;
48 int tmp;
49 int err; 49 int err;
50 50
51 dp->dccps_role = DCCP_ROLE_CLIENT; 51 dp->dccps_role = DCCP_ROLE_CLIENT;
@@ -63,12 +63,14 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
63 nexthop = inet->opt->faddr; 63 nexthop = inet->opt->faddr;
64 } 64 }
65 65
66 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, 66 orig_sport = inet->inet_sport;
67 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 67 orig_dport = usin->sin_port;
68 IPPROTO_DCCP, 68 rt = ip_route_connect(nexthop, inet->inet_saddr,
69 inet->inet_sport, usin->sin_port, sk, 1); 69 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
70 if (tmp < 0) 70 IPPROTO_DCCP,
71 return tmp; 71 orig_sport, orig_dport, sk, true);
72 if (IS_ERR(rt))
73 return PTR_ERR(rt);
72 74
73 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 75 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
74 ip_rt_put(rt); 76 ip_rt_put(rt);
@@ -99,11 +101,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
99 if (err != 0) 101 if (err != 0)
100 goto failure; 102 goto failure;
101 103
102 err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, 104 rt = ip_route_newports(rt, IPPROTO_DCCP,
103 inet->inet_dport, sk); 105 orig_sport, orig_dport,
104 if (err != 0) 106 inet->inet_sport, inet->inet_dport, sk);
107 if (IS_ERR(rt)) {
108 rt = NULL;
105 goto failure; 109 goto failure;
106 110 }
107 /* OK, now commit destination to socket. */ 111 /* OK, now commit destination to socket. */
108 sk_setup_caps(sk, &rt->dst); 112 sk_setup_caps(sk, &rt->dst);
109 113
@@ -461,17 +465,19 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
461 struct sk_buff *skb) 465 struct sk_buff *skb)
462{ 466{
463 struct rtable *rt; 467 struct rtable *rt;
464 struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, 468 struct flowi4 fl4 = {
465 .fl4_dst = ip_hdr(skb)->saddr, 469 .flowi4_oif = skb_rtable(skb)->rt_iif,
466 .fl4_src = ip_hdr(skb)->daddr, 470 .daddr = ip_hdr(skb)->saddr,
467 .fl4_tos = RT_CONN_FLAGS(sk), 471 .saddr = ip_hdr(skb)->daddr,
468 .proto = sk->sk_protocol, 472 .flowi4_tos = RT_CONN_FLAGS(sk),
469 .fl_ip_sport = dccp_hdr(skb)->dccph_dport, 473 .flowi4_proto = sk->sk_protocol,
470 .fl_ip_dport = dccp_hdr(skb)->dccph_sport 474 .fl4_sport = dccp_hdr(skb)->dccph_dport,
471 }; 475 .fl4_dport = dccp_hdr(skb)->dccph_sport,
472 476 };
473 security_skb_classify_flow(skb, &fl); 477
474 if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { 478 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
479 rt = ip_route_output_flow(net, &fl4, sk);
480 if (IS_ERR(rt)) {
475 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 481 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
476 return NULL; 482 return NULL;
477 } 483 }
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index dca711df9b6..de1b7e37ad5 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -147,30 +147,24 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
147 dst = __sk_dst_check(sk, np->dst_cookie); 147 dst = __sk_dst_check(sk, np->dst_cookie);
148 if (dst == NULL) { 148 if (dst == NULL) {
149 struct inet_sock *inet = inet_sk(sk); 149 struct inet_sock *inet = inet_sk(sk);
150 struct flowi fl; 150 struct flowi6 fl6;
151 151
152 /* BUGGG_FUTURE: Again, it is not clear how 152 /* BUGGG_FUTURE: Again, it is not clear how
153 to handle rthdr case. Ignore this complexity 153 to handle rthdr case. Ignore this complexity
154 for now. 154 for now.
155 */ 155 */
156 memset(&fl, 0, sizeof(fl)); 156 memset(&fl6, 0, sizeof(fl6));
157 fl.proto = IPPROTO_DCCP; 157 fl6.flowi6_proto = IPPROTO_DCCP;
158 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 158 ipv6_addr_copy(&fl6.daddr, &np->daddr);
159 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 159 ipv6_addr_copy(&fl6.saddr, &np->saddr);
160 fl.oif = sk->sk_bound_dev_if; 160 fl6.flowi6_oif = sk->sk_bound_dev_if;
161 fl.fl_ip_dport = inet->inet_dport; 161 fl6.fl6_dport = inet->inet_dport;
162 fl.fl_ip_sport = inet->inet_sport; 162 fl6.fl6_sport = inet->inet_sport;
163 security_sk_classify_flow(sk, &fl); 163 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
164 164
165 err = ip6_dst_lookup(sk, &dst, &fl); 165 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
166 if (err) { 166 if (IS_ERR(dst)) {
167 sk->sk_err_soft = -err; 167 sk->sk_err_soft = -PTR_ERR(dst);
168 goto out;
169 }
170
171 err = xfrm_lookup(net, &dst, &fl, sk, 0);
172 if (err < 0) {
173 sk->sk_err_soft = -err;
174 goto out; 168 goto out;
175 } 169 }
176 } else 170 } else
@@ -249,34 +243,30 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
249 struct sk_buff *skb; 243 struct sk_buff *skb;
250 struct ipv6_txoptions *opt = NULL; 244 struct ipv6_txoptions *opt = NULL;
251 struct in6_addr *final_p, final; 245 struct in6_addr *final_p, final;
252 struct flowi fl; 246 struct flowi6 fl6;
253 int err = -1; 247 int err = -1;
254 struct dst_entry *dst; 248 struct dst_entry *dst;
255 249
256 memset(&fl, 0, sizeof(fl)); 250 memset(&fl6, 0, sizeof(fl6));
257 fl.proto = IPPROTO_DCCP; 251 fl6.flowi6_proto = IPPROTO_DCCP;
258 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 252 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
259 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 253 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
260 fl.fl6_flowlabel = 0; 254 fl6.flowlabel = 0;
261 fl.oif = ireq6->iif; 255 fl6.flowi6_oif = ireq6->iif;
262 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 256 fl6.fl6_dport = inet_rsk(req)->rmt_port;
263 fl.fl_ip_sport = inet_rsk(req)->loc_port; 257 fl6.fl6_sport = inet_rsk(req)->loc_port;
264 security_req_classify_flow(req, &fl); 258 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
265 259
266 opt = np->opt; 260 opt = np->opt;
267 261
268 final_p = fl6_update_dst(&fl, opt, &final); 262 final_p = fl6_update_dst(&fl6, opt, &final);
269 263
270 err = ip6_dst_lookup(sk, &dst, &fl); 264 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
271 if (err) 265 if (IS_ERR(dst)) {
272 goto done; 266 err = PTR_ERR(dst);
273 267 dst = NULL;
274 if (final_p)
275 ipv6_addr_copy(&fl.fl6_dst, final_p);
276
277 err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
278 if (err < 0)
279 goto done; 268 goto done;
269 }
280 270
281 skb = dccp_make_response(sk, dst, req); 271 skb = dccp_make_response(sk, dst, req);
282 if (skb != NULL) { 272 if (skb != NULL) {
@@ -285,8 +275,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
285 dh->dccph_checksum = dccp_v6_csum_finish(skb, 275 dh->dccph_checksum = dccp_v6_csum_finish(skb,
286 &ireq6->loc_addr, 276 &ireq6->loc_addr,
287 &ireq6->rmt_addr); 277 &ireq6->rmt_addr);
288 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 278 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
289 err = ip6_xmit(sk, skb, &fl, opt); 279 err = ip6_xmit(sk, skb, &fl6, opt);
290 err = net_xmit_eval(err); 280 err = net_xmit_eval(err);
291 } 281 }
292 282
@@ -308,7 +298,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
308{ 298{
309 struct ipv6hdr *rxip6h; 299 struct ipv6hdr *rxip6h;
310 struct sk_buff *skb; 300 struct sk_buff *skb;
311 struct flowi fl; 301 struct flowi6 fl6;
312 struct net *net = dev_net(skb_dst(rxskb)->dev); 302 struct net *net = dev_net(skb_dst(rxskb)->dev);
313 struct sock *ctl_sk = net->dccp.v6_ctl_sk; 303 struct sock *ctl_sk = net->dccp.v6_ctl_sk;
314 struct dst_entry *dst; 304 struct dst_entry *dst;
@@ -327,25 +317,24 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
327 dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, 317 dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
328 &rxip6h->daddr); 318 &rxip6h->daddr);
329 319
330 memset(&fl, 0, sizeof(fl)); 320 memset(&fl6, 0, sizeof(fl6));
331 ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); 321 ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr);
332 ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); 322 ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr);
333 323
334 fl.proto = IPPROTO_DCCP; 324 fl6.flowi6_proto = IPPROTO_DCCP;
335 fl.oif = inet6_iif(rxskb); 325 fl6.flowi6_oif = inet6_iif(rxskb);
336 fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; 326 fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
337 fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; 327 fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
338 security_skb_classify_flow(rxskb, &fl); 328 security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
339 329
340 /* sk = NULL, but it is safe for now. RST socket required. */ 330 /* sk = NULL, but it is safe for now. RST socket required. */
341 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { 331 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
342 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { 332 if (!IS_ERR(dst)) {
343 skb_dst_set(skb, dst); 333 skb_dst_set(skb, dst);
344 ip6_xmit(ctl_sk, skb, &fl, NULL); 334 ip6_xmit(ctl_sk, skb, &fl6, NULL);
345 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 335 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
346 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 336 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
347 return; 337 return;
348 }
349 } 338 }
350 339
351 kfree_skb(skb); 340 kfree_skb(skb);
@@ -484,7 +473,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
484 struct inet6_request_sock *ireq6 = inet6_rsk(req); 473 struct inet6_request_sock *ireq6 = inet6_rsk(req);
485 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 474 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
486 struct inet_sock *newinet; 475 struct inet_sock *newinet;
487 struct dccp_sock *newdp;
488 struct dccp6_sock *newdp6; 476 struct dccp6_sock *newdp6;
489 struct sock *newsk; 477 struct sock *newsk;
490 struct ipv6_txoptions *opt; 478 struct ipv6_txoptions *opt;
@@ -498,7 +486,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
498 return NULL; 486 return NULL;
499 487
500 newdp6 = (struct dccp6_sock *)newsk; 488 newdp6 = (struct dccp6_sock *)newsk;
501 newdp = dccp_sk(newsk);
502 newinet = inet_sk(newsk); 489 newinet = inet_sk(newsk);
503 newinet->pinet6 = &newdp6->inet6; 490 newinet->pinet6 = &newdp6->inet6;
504 newnp = inet6_sk(newsk); 491 newnp = inet6_sk(newsk);
@@ -540,25 +527,20 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
540 527
541 if (dst == NULL) { 528 if (dst == NULL) {
542 struct in6_addr *final_p, final; 529 struct in6_addr *final_p, final;
543 struct flowi fl; 530 struct flowi6 fl6;
544 531
545 memset(&fl, 0, sizeof(fl)); 532 memset(&fl6, 0, sizeof(fl6));
546 fl.proto = IPPROTO_DCCP; 533 fl6.flowi6_proto = IPPROTO_DCCP;
547 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 534 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
548 final_p = fl6_update_dst(&fl, opt, &final); 535 final_p = fl6_update_dst(&fl6, opt, &final);
549 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 536 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
550 fl.oif = sk->sk_bound_dev_if; 537 fl6.flowi6_oif = sk->sk_bound_dev_if;
551 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 538 fl6.fl6_dport = inet_rsk(req)->rmt_port;
552 fl.fl_ip_sport = inet_rsk(req)->loc_port; 539 fl6.fl6_sport = inet_rsk(req)->loc_port;
553 security_sk_classify_flow(sk, &fl); 540 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
554 541
555 if (ip6_dst_lookup(sk, &dst, &fl)) 542 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
556 goto out; 543 if (IS_ERR(dst))
557
558 if (final_p)
559 ipv6_addr_copy(&fl.fl6_dst, final_p);
560
561 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
562 goto out; 544 goto out;
563 } 545 }
564 546
@@ -578,7 +560,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
578 newdp6 = (struct dccp6_sock *)newsk; 560 newdp6 = (struct dccp6_sock *)newsk;
579 newinet = inet_sk(newsk); 561 newinet = inet_sk(newsk);
580 newinet->pinet6 = &newdp6->inet6; 562 newinet->pinet6 = &newdp6->inet6;
581 newdp = dccp_sk(newsk);
582 newnp = inet6_sk(newsk); 563 newnp = inet6_sk(newsk);
583 564
584 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 565 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -878,7 +859,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
878 struct ipv6_pinfo *np = inet6_sk(sk); 859 struct ipv6_pinfo *np = inet6_sk(sk);
879 struct dccp_sock *dp = dccp_sk(sk); 860 struct dccp_sock *dp = dccp_sk(sk);
880 struct in6_addr *saddr = NULL, *final_p, final; 861 struct in6_addr *saddr = NULL, *final_p, final;
881 struct flowi fl; 862 struct flowi6 fl6;
882 struct dst_entry *dst; 863 struct dst_entry *dst;
883 int addr_type; 864 int addr_type;
884 int err; 865 int err;
@@ -891,14 +872,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
891 if (usin->sin6_family != AF_INET6) 872 if (usin->sin6_family != AF_INET6)
892 return -EAFNOSUPPORT; 873 return -EAFNOSUPPORT;
893 874
894 memset(&fl, 0, sizeof(fl)); 875 memset(&fl6, 0, sizeof(fl6));
895 876
896 if (np->sndflow) { 877 if (np->sndflow) {
897 fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 878 fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
898 IP6_ECN_flow_init(fl.fl6_flowlabel); 879 IP6_ECN_flow_init(fl6.flowlabel);
899 if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { 880 if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
900 struct ip6_flowlabel *flowlabel; 881 struct ip6_flowlabel *flowlabel;
901 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 882 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
902 if (flowlabel == NULL) 883 if (flowlabel == NULL)
903 return -EINVAL; 884 return -EINVAL;
904 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); 885 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
@@ -935,7 +916,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
935 } 916 }
936 917
937 ipv6_addr_copy(&np->daddr, &usin->sin6_addr); 918 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
938 np->flow_label = fl.fl6_flowlabel; 919 np->flow_label = fl6.flowlabel;
939 920
940 /* 921 /*
941 * DCCP over IPv4 922 * DCCP over IPv4
@@ -972,33 +953,24 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
972 if (!ipv6_addr_any(&np->rcv_saddr)) 953 if (!ipv6_addr_any(&np->rcv_saddr))
973 saddr = &np->rcv_saddr; 954 saddr = &np->rcv_saddr;
974 955
975 fl.proto = IPPROTO_DCCP; 956 fl6.flowi6_proto = IPPROTO_DCCP;
976 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 957 ipv6_addr_copy(&fl6.daddr, &np->daddr);
977 ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); 958 ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr);
978 fl.oif = sk->sk_bound_dev_if; 959 fl6.flowi6_oif = sk->sk_bound_dev_if;
979 fl.fl_ip_dport = usin->sin6_port; 960 fl6.fl6_dport = usin->sin6_port;
980 fl.fl_ip_sport = inet->inet_sport; 961 fl6.fl6_sport = inet->inet_sport;
981 security_sk_classify_flow(sk, &fl); 962 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
982 963
983 final_p = fl6_update_dst(&fl, np->opt, &final); 964 final_p = fl6_update_dst(&fl6, np->opt, &final);
984 965
985 err = ip6_dst_lookup(sk, &dst, &fl); 966 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
986 if (err) 967 if (IS_ERR(dst)) {
968 err = PTR_ERR(dst);
987 goto failure; 969 goto failure;
988
989 if (final_p)
990 ipv6_addr_copy(&fl.fl6_dst, final_p);
991
992 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
993 if (err < 0) {
994 if (err == -EREMOTE)
995 err = ip6_dst_blackhole(sk, &dst, &fl);
996 if (err < 0)
997 goto failure;
998 } 970 }
999 971
1000 if (saddr == NULL) { 972 if (saddr == NULL) {
1001 saddr = &fl.fl6_src; 973 saddr = &fl6.saddr;
1002 ipv6_addr_copy(&np->rcv_saddr, saddr); 974 ipv6_addr_copy(&np->rcv_saddr, saddr);
1003 } 975 }
1004 976
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2af15b15d1f..ea3b6ee21fc 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -908,7 +908,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
908 struct socket *sock = sk->sk_socket; 908 struct socket *sock = sk->sk_socket;
909 struct dn_scp *scp = DN_SK(sk); 909 struct dn_scp *scp = DN_SK(sk);
910 int err = -EISCONN; 910 int err = -EISCONN;
911 struct flowi fl; 911 struct flowidn fld;
912 912
913 if (sock->state == SS_CONNECTED) 913 if (sock->state == SS_CONNECTED)
914 goto out; 914 goto out;
@@ -947,13 +947,13 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
947 memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); 947 memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
948 948
949 err = -EHOSTUNREACH; 949 err = -EHOSTUNREACH;
950 memset(&fl, 0, sizeof(fl)); 950 memset(&fld, 0, sizeof(fld));
951 fl.oif = sk->sk_bound_dev_if; 951 fld.flowidn_oif = sk->sk_bound_dev_if;
952 fl.fld_dst = dn_saddr2dn(&scp->peer); 952 fld.daddr = dn_saddr2dn(&scp->peer);
953 fl.fld_src = dn_saddr2dn(&scp->addr); 953 fld.saddr = dn_saddr2dn(&scp->addr);
954 dn_sk_ports_copy(&fl, scp); 954 dn_sk_ports_copy(&fld, scp);
955 fl.proto = DNPROTO_NSP; 955 fld.flowidn_proto = DNPROTO_NSP;
956 if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) 956 if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
957 goto out; 957 goto out;
958 sk->sk_route_caps = sk->sk_dst_cache->dev->features; 958 sk->sk_route_caps = sk->sk_dst_cache->dev->features;
959 sock->state = SS_CONNECTING; 959 sock->state = SS_CONNECTING;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 0ef0a81bcd7..1c74ed36ce8 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -201,7 +201,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
201 int err; 201 int err;
202 202
203 if (nh->nh_gw) { 203 if (nh->nh_gw) {
204 struct flowi fl; 204 struct flowidn fld;
205 struct dn_fib_res res; 205 struct dn_fib_res res;
206 206
207 if (nh->nh_flags&RTNH_F_ONLINK) { 207 if (nh->nh_flags&RTNH_F_ONLINK) {
@@ -221,15 +221,15 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
221 return 0; 221 return 0;
222 } 222 }
223 223
224 memset(&fl, 0, sizeof(fl)); 224 memset(&fld, 0, sizeof(fld));
225 fl.fld_dst = nh->nh_gw; 225 fld.daddr = nh->nh_gw;
226 fl.oif = nh->nh_oif; 226 fld.flowidn_oif = nh->nh_oif;
227 fl.fld_scope = r->rtm_scope + 1; 227 fld.flowidn_scope = r->rtm_scope + 1;
228 228
229 if (fl.fld_scope < RT_SCOPE_LINK) 229 if (fld.flowidn_scope < RT_SCOPE_LINK)
230 fl.fld_scope = RT_SCOPE_LINK; 230 fld.flowidn_scope = RT_SCOPE_LINK;
231 231
232 if ((err = dn_fib_lookup(&fl, &res)) != 0) 232 if ((err = dn_fib_lookup(&fld, &res)) != 0)
233 return err; 233 return err;
234 234
235 err = -EINVAL; 235 err = -EINVAL;
@@ -404,7 +404,7 @@ failure:
404 return NULL; 404 return NULL;
405} 405}
406 406
407int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *fl, struct dn_fib_res *res) 407int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res)
408{ 408{
409 int err = dn_fib_props[type].error; 409 int err = dn_fib_props[type].error;
410 410
@@ -424,7 +424,8 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *
424 for_nexthops(fi) { 424 for_nexthops(fi) {
425 if (nh->nh_flags & RTNH_F_DEAD) 425 if (nh->nh_flags & RTNH_F_DEAD)
426 continue; 426 continue;
427 if (!fl->oif || fl->oif == nh->nh_oif) 427 if (!fld->flowidn_oif ||
428 fld->flowidn_oif == nh->nh_oif)
428 break; 429 break;
429 } 430 }
430 if (nhsel < fi->fib_nhs) { 431 if (nhsel < fi->fib_nhs) {
@@ -445,7 +446,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *
445 return err; 446 return err;
446} 447}
447 448
448void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res) 449void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
449{ 450{
450 struct dn_fib_info *fi = res->fi; 451 struct dn_fib_info *fi = res->fi;
451 int w; 452 int w;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2ef115277be..bd78836a81e 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -78,7 +78,7 @@ static void dn_nsp_send(struct sk_buff *skb)
78 struct sock *sk = skb->sk; 78 struct sock *sk = skb->sk;
79 struct dn_scp *scp = DN_SK(sk); 79 struct dn_scp *scp = DN_SK(sk);
80 struct dst_entry *dst; 80 struct dst_entry *dst;
81 struct flowi fl; 81 struct flowidn fld;
82 82
83 skb_reset_transport_header(skb); 83 skb_reset_transport_header(skb);
84 scp->stamp = jiffies; 84 scp->stamp = jiffies;
@@ -91,13 +91,13 @@ try_again:
91 return; 91 return;
92 } 92 }
93 93
94 memset(&fl, 0, sizeof(fl)); 94 memset(&fld, 0, sizeof(fld));
95 fl.oif = sk->sk_bound_dev_if; 95 fld.flowidn_oif = sk->sk_bound_dev_if;
96 fl.fld_src = dn_saddr2dn(&scp->addr); 96 fld.saddr = dn_saddr2dn(&scp->addr);
97 fl.fld_dst = dn_saddr2dn(&scp->peer); 97 fld.daddr = dn_saddr2dn(&scp->peer);
98 dn_sk_ports_copy(&fl, scp); 98 dn_sk_ports_copy(&fld, scp);
99 fl.proto = DNPROTO_NSP; 99 fld.flowidn_proto = DNPROTO_NSP;
100 if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { 100 if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) {
101 dst = sk_dst_get(sk); 101 dst = sk_dst_get(sk);
102 sk->sk_route_caps = dst->dev->features; 102 sk->sk_route_caps = dst->dev->features;
103 goto try_again; 103 goto try_again;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e636365d33..9f09d4fc288 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops);
112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); 113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); 114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
115static void dn_dst_destroy(struct dst_entry *);
115static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 116static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
116static void dn_dst_link_failure(struct sk_buff *); 117static void dn_dst_link_failure(struct sk_buff *);
117static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); 118static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = {
133 .check = dn_dst_check, 134 .check = dn_dst_check,
134 .default_advmss = dn_dst_default_advmss, 135 .default_advmss = dn_dst_default_advmss,
135 .default_mtu = dn_dst_default_mtu, 136 .default_mtu = dn_dst_default_mtu,
137 .cow_metrics = dst_cow_metrics_generic,
138 .destroy = dn_dst_destroy,
136 .negative_advice = dn_dst_negative_advice, 139 .negative_advice = dn_dst_negative_advice,
137 .link_failure = dn_dst_link_failure, 140 .link_failure = dn_dst_link_failure,
138 .update_pmtu = dn_dst_update_pmtu, 141 .update_pmtu = dn_dst_update_pmtu,
139}; 142};
140 143
144static void dn_dst_destroy(struct dst_entry *dst)
145{
146 dst_destroy_metrics_generic(dst);
147}
148
141static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 149static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
142{ 150{
143 __u16 tmp = (__u16 __force)(src ^ dst); 151 __u16 tmp = (__u16 __force)(src ^ dst);
@@ -274,14 +282,14 @@ static void dn_dst_link_failure(struct sk_buff *skb)
274{ 282{
275} 283}
276 284
277static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 285static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
278{ 286{
279 return ((fl1->fld_dst ^ fl2->fld_dst) | 287 return ((fl1->daddr ^ fl2->daddr) |
280 (fl1->fld_src ^ fl2->fld_src) | 288 (fl1->saddr ^ fl2->saddr) |
281 (fl1->mark ^ fl2->mark) | 289 (fl1->flowidn_mark ^ fl2->flowidn_mark) |
282 (fl1->fld_scope ^ fl2->fld_scope) | 290 (fl1->flowidn_scope ^ fl2->flowidn_scope) |
283 (fl1->oif ^ fl2->oif) | 291 (fl1->flowidn_oif ^ fl2->flowidn_oif) |
284 (fl1->iif ^ fl2->iif)) == 0; 292 (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
285} 293}
286 294
287static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) 295static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
@@ -295,7 +303,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
295 spin_lock_bh(&dn_rt_hash_table[hash].lock); 303 spin_lock_bh(&dn_rt_hash_table[hash].lock);
296 while ((rth = rcu_dereference_protected(*rthp, 304 while ((rth = rcu_dereference_protected(*rthp,
297 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { 305 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
298 if (compare_keys(&rth->fl, &rt->fl)) { 306 if (compare_keys(&rth->fld, &rt->fld)) {
299 /* Put it first */ 307 /* Put it first */
300 *rthp = rth->dst.dn_next; 308 *rthp = rth->dst.dn_next;
301 rcu_assign_pointer(rth->dst.dn_next, 309 rcu_assign_pointer(rth->dst.dn_next,
@@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
814{ 822{
815 struct dn_fib_info *fi = res->fi; 823 struct dn_fib_info *fi = res->fi;
816 struct net_device *dev = rt->dst.dev; 824 struct net_device *dev = rt->dst.dev;
825 unsigned int mss_metric;
817 struct neighbour *n; 826 struct neighbour *n;
818 unsigned int metric;
819 827
820 if (fi) { 828 if (fi) {
821 if (DN_FIB_RES_GW(*res) && 829 if (DN_FIB_RES_GW(*res) &&
822 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 830 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
823 rt->rt_gateway = DN_FIB_RES_GW(*res); 831 rt->rt_gateway = DN_FIB_RES_GW(*res);
824 dst_import_metrics(&rt->dst, fi->fib_metrics); 832 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
825 } 833 }
826 rt->rt_type = res->type; 834 rt->rt_type = res->type;
827 835
@@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
834 842
835 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) 843 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
836 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); 844 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
837 metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); 845 mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
838 if (metric) { 846 if (mss_metric) {
839 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); 847 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
840 if (metric > mss) 848 if (mss_metric > mss)
841 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); 849 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
842 } 850 }
843 return 0; 851 return 0;
@@ -895,14 +903,16 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re
895 return (daddr&~mask)|res->fi->fib_nh->nh_gw; 903 return (daddr&~mask)|res->fi->fib_nh->nh_gw;
896} 904}
897 905
898static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) 906static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
899{ 907{
900 struct flowi fl = { .fld_dst = oldflp->fld_dst, 908 struct flowidn fld = {
901 .fld_src = oldflp->fld_src, 909 .daddr = oldflp->daddr,
902 .fld_scope = RT_SCOPE_UNIVERSE, 910 .saddr = oldflp->saddr,
903 .mark = oldflp->mark, 911 .flowidn_scope = RT_SCOPE_UNIVERSE,
904 .iif = init_net.loopback_dev->ifindex, 912 .flowidn_mark = oldflp->flowidn_mark,
905 .oif = oldflp->oif }; 913 .flowidn_iif = init_net.loopback_dev->ifindex,
914 .flowidn_oif = oldflp->flowidn_oif,
915 };
906 struct dn_route *rt = NULL; 916 struct dn_route *rt = NULL;
907 struct net_device *dev_out = NULL, *dev; 917 struct net_device *dev_out = NULL, *dev;
908 struct neighbour *neigh = NULL; 918 struct neighbour *neigh = NULL;
@@ -916,13 +926,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
916 if (decnet_debug_level & 16) 926 if (decnet_debug_level & 16)
917 printk(KERN_DEBUG 927 printk(KERN_DEBUG
918 "dn_route_output_slow: dst=%04x src=%04x mark=%d" 928 "dn_route_output_slow: dst=%04x src=%04x mark=%d"
919 " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), 929 " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
920 le16_to_cpu(oldflp->fld_src), 930 le16_to_cpu(oldflp->saddr),
921 oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); 931 oldflp->flowidn_mark, init_net.loopback_dev->ifindex,
932 oldflp->flowidn_oif);
922 933
923 /* If we have an output interface, verify its a DECnet device */ 934 /* If we have an output interface, verify its a DECnet device */
924 if (oldflp->oif) { 935 if (oldflp->flowidn_oif) {
925 dev_out = dev_get_by_index(&init_net, oldflp->oif); 936 dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
926 err = -ENODEV; 937 err = -ENODEV;
927 if (dev_out && dev_out->dn_ptr == NULL) { 938 if (dev_out && dev_out->dn_ptr == NULL) {
928 dev_put(dev_out); 939 dev_put(dev_out);
@@ -933,11 +944,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
933 } 944 }
934 945
935 /* If we have a source address, verify that its a local address */ 946 /* If we have a source address, verify that its a local address */
936 if (oldflp->fld_src) { 947 if (oldflp->saddr) {
937 err = -EADDRNOTAVAIL; 948 err = -EADDRNOTAVAIL;
938 949
939 if (dev_out) { 950 if (dev_out) {
940 if (dn_dev_islocal(dev_out, oldflp->fld_src)) 951 if (dn_dev_islocal(dev_out, oldflp->saddr))
941 goto source_ok; 952 goto source_ok;
942 dev_put(dev_out); 953 dev_put(dev_out);
943 goto out; 954 goto out;
@@ -946,11 +957,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
946 for_each_netdev_rcu(&init_net, dev) { 957 for_each_netdev_rcu(&init_net, dev) {
947 if (!dev->dn_ptr) 958 if (!dev->dn_ptr)
948 continue; 959 continue;
949 if (!dn_dev_islocal(dev, oldflp->fld_src)) 960 if (!dn_dev_islocal(dev, oldflp->saddr))
950 continue; 961 continue;
951 if ((dev->flags & IFF_LOOPBACK) && 962 if ((dev->flags & IFF_LOOPBACK) &&
952 oldflp->fld_dst && 963 oldflp->daddr &&
953 !dn_dev_islocal(dev, oldflp->fld_dst)) 964 !dn_dev_islocal(dev, oldflp->daddr))
954 continue; 965 continue;
955 966
956 dev_out = dev; 967 dev_out = dev;
@@ -965,22 +976,22 @@ source_ok:
965 } 976 }
966 977
967 /* No destination? Assume its local */ 978 /* No destination? Assume its local */
968 if (!fl.fld_dst) { 979 if (!fld.daddr) {
969 fl.fld_dst = fl.fld_src; 980 fld.daddr = fld.saddr;
970 981
971 err = -EADDRNOTAVAIL; 982 err = -EADDRNOTAVAIL;
972 if (dev_out) 983 if (dev_out)
973 dev_put(dev_out); 984 dev_put(dev_out);
974 dev_out = init_net.loopback_dev; 985 dev_out = init_net.loopback_dev;
975 dev_hold(dev_out); 986 dev_hold(dev_out);
976 if (!fl.fld_dst) { 987 if (!fld.daddr) {
977 fl.fld_dst = 988 fld.daddr =
978 fl.fld_src = dnet_select_source(dev_out, 0, 989 fld.saddr = dnet_select_source(dev_out, 0,
979 RT_SCOPE_HOST); 990 RT_SCOPE_HOST);
980 if (!fl.fld_dst) 991 if (!fld.daddr)
981 goto out; 992 goto out;
982 } 993 }
983 fl.oif = init_net.loopback_dev->ifindex; 994 fld.flowidn_oif = init_net.loopback_dev->ifindex;
984 res.type = RTN_LOCAL; 995 res.type = RTN_LOCAL;
985 goto make_route; 996 goto make_route;
986 } 997 }
@@ -989,8 +1000,8 @@ source_ok:
989 printk(KERN_DEBUG 1000 printk(KERN_DEBUG
990 "dn_route_output_slow: initial checks complete." 1001 "dn_route_output_slow: initial checks complete."
991 " dst=%o4x src=%04x oif=%d try_hard=%d\n", 1002 " dst=%o4x src=%04x oif=%d try_hard=%d\n",
992 le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), 1003 le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
993 fl.oif, try_hard); 1004 fld.flowidn_oif, try_hard);
994 1005
995 /* 1006 /*
996 * N.B. If the kernel is compiled without router support then 1007 * N.B. If the kernel is compiled without router support then
@@ -998,7 +1009,7 @@ source_ok:
998 * will always be executed. 1009 * will always be executed.
999 */ 1010 */
1000 err = -ESRCH; 1011 err = -ESRCH;
1001 if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) { 1012 if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
1002 struct dn_dev *dn_db; 1013 struct dn_dev *dn_db;
1003 if (err != -ESRCH) 1014 if (err != -ESRCH)
1004 goto out; 1015 goto out;
@@ -1013,19 +1024,19 @@ source_ok:
1013 * here 1024 * here
1014 */ 1025 */
1015 if (!try_hard) { 1026 if (!try_hard) {
1016 neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); 1027 neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
1017 if (neigh) { 1028 if (neigh) {
1018 if ((oldflp->oif && 1029 if ((oldflp->flowidn_oif &&
1019 (neigh->dev->ifindex != oldflp->oif)) || 1030 (neigh->dev->ifindex != oldflp->flowidn_oif)) ||
1020 (oldflp->fld_src && 1031 (oldflp->saddr &&
1021 (!dn_dev_islocal(neigh->dev, 1032 (!dn_dev_islocal(neigh->dev,
1022 oldflp->fld_src)))) { 1033 oldflp->saddr)))) {
1023 neigh_release(neigh); 1034 neigh_release(neigh);
1024 neigh = NULL; 1035 neigh = NULL;
1025 } else { 1036 } else {
1026 if (dev_out) 1037 if (dev_out)
1027 dev_put(dev_out); 1038 dev_put(dev_out);
1028 if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { 1039 if (dn_dev_islocal(neigh->dev, fld.daddr)) {
1029 dev_out = init_net.loopback_dev; 1040 dev_out = init_net.loopback_dev;
1030 res.type = RTN_LOCAL; 1041 res.type = RTN_LOCAL;
1031 } else { 1042 } else {
@@ -1045,7 +1056,7 @@ source_ok:
1045 goto out; 1056 goto out;
1046 dn_db = rcu_dereference_raw(dev_out->dn_ptr); 1057 dn_db = rcu_dereference_raw(dev_out->dn_ptr);
1047 /* Possible improvement - check all devices for local addr */ 1058 /* Possible improvement - check all devices for local addr */
1048 if (dn_dev_islocal(dev_out, fl.fld_dst)) { 1059 if (dn_dev_islocal(dev_out, fld.daddr)) {
1049 dev_put(dev_out); 1060 dev_put(dev_out);
1050 dev_out = init_net.loopback_dev; 1061 dev_out = init_net.loopback_dev;
1051 dev_hold(dev_out); 1062 dev_hold(dev_out);
@@ -1061,16 +1072,16 @@ select_source:
1061 if (neigh) 1072 if (neigh)
1062 gateway = ((struct dn_neigh *)neigh)->addr; 1073 gateway = ((struct dn_neigh *)neigh)->addr;
1063 if (gateway == 0) 1074 if (gateway == 0)
1064 gateway = fl.fld_dst; 1075 gateway = fld.daddr;
1065 if (fl.fld_src == 0) { 1076 if (fld.saddr == 0) {
1066 fl.fld_src = dnet_select_source(dev_out, gateway, 1077 fld.saddr = dnet_select_source(dev_out, gateway,
1067 res.type == RTN_LOCAL ? 1078 res.type == RTN_LOCAL ?
1068 RT_SCOPE_HOST : 1079 RT_SCOPE_HOST :
1069 RT_SCOPE_LINK); 1080 RT_SCOPE_LINK);
1070 if (fl.fld_src == 0 && res.type != RTN_LOCAL) 1081 if (fld.saddr == 0 && res.type != RTN_LOCAL)
1071 goto e_addr; 1082 goto e_addr;
1072 } 1083 }
1073 fl.oif = dev_out->ifindex; 1084 fld.flowidn_oif = dev_out->ifindex;
1074 goto make_route; 1085 goto make_route;
1075 } 1086 }
1076 free_res = 1; 1087 free_res = 1;
@@ -1079,61 +1090,61 @@ select_source:
1079 goto e_inval; 1090 goto e_inval;
1080 1091
1081 if (res.type == RTN_LOCAL) { 1092 if (res.type == RTN_LOCAL) {
1082 if (!fl.fld_src) 1093 if (!fld.saddr)
1083 fl.fld_src = fl.fld_dst; 1094 fld.saddr = fld.daddr;
1084 if (dev_out) 1095 if (dev_out)
1085 dev_put(dev_out); 1096 dev_put(dev_out);
1086 dev_out = init_net.loopback_dev; 1097 dev_out = init_net.loopback_dev;
1087 dev_hold(dev_out); 1098 dev_hold(dev_out);
1088 fl.oif = dev_out->ifindex; 1099 fld.flowidn_oif = dev_out->ifindex;
1089 if (res.fi) 1100 if (res.fi)
1090 dn_fib_info_put(res.fi); 1101 dn_fib_info_put(res.fi);
1091 res.fi = NULL; 1102 res.fi = NULL;
1092 goto make_route; 1103 goto make_route;
1093 } 1104 }
1094 1105
1095 if (res.fi->fib_nhs > 1 && fl.oif == 0) 1106 if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
1096 dn_fib_select_multipath(&fl, &res); 1107 dn_fib_select_multipath(&fld, &res);
1097 1108
1098 /* 1109 /*
1099 * We could add some logic to deal with default routes here and 1110 * We could add some logic to deal with default routes here and
1100 * get rid of some of the special casing above. 1111 * get rid of some of the special casing above.
1101 */ 1112 */
1102 1113
1103 if (!fl.fld_src) 1114 if (!fld.saddr)
1104 fl.fld_src = DN_FIB_RES_PREFSRC(res); 1115 fld.saddr = DN_FIB_RES_PREFSRC(res);
1105 1116
1106 if (dev_out) 1117 if (dev_out)
1107 dev_put(dev_out); 1118 dev_put(dev_out);
1108 dev_out = DN_FIB_RES_DEV(res); 1119 dev_out = DN_FIB_RES_DEV(res);
1109 dev_hold(dev_out); 1120 dev_hold(dev_out);
1110 fl.oif = dev_out->ifindex; 1121 fld.flowidn_oif = dev_out->ifindex;
1111 gateway = DN_FIB_RES_GW(res); 1122 gateway = DN_FIB_RES_GW(res);
1112 1123
1113make_route: 1124make_route:
1114 if (dev_out->flags & IFF_LOOPBACK) 1125 if (dev_out->flags & IFF_LOOPBACK)
1115 flags |= RTCF_LOCAL; 1126 flags |= RTCF_LOCAL;
1116 1127
1117 rt = dst_alloc(&dn_dst_ops); 1128 rt = dst_alloc(&dn_dst_ops, 0);
1118 if (rt == NULL) 1129 if (rt == NULL)
1119 goto e_nobufs; 1130 goto e_nobufs;
1120 1131
1121 atomic_set(&rt->dst.__refcnt, 1); 1132 atomic_set(&rt->dst.__refcnt, 1);
1122 rt->dst.flags = DST_HOST; 1133 rt->dst.flags = DST_HOST;
1123 1134
1124 rt->fl.fld_src = oldflp->fld_src; 1135 rt->fld.saddr = oldflp->saddr;
1125 rt->fl.fld_dst = oldflp->fld_dst; 1136 rt->fld.daddr = oldflp->daddr;
1126 rt->fl.oif = oldflp->oif; 1137 rt->fld.flowidn_oif = oldflp->flowidn_oif;
1127 rt->fl.iif = 0; 1138 rt->fld.flowidn_iif = 0;
1128 rt->fl.mark = oldflp->mark; 1139 rt->fld.flowidn_mark = oldflp->flowidn_mark;
1129 1140
1130 rt->rt_saddr = fl.fld_src; 1141 rt->rt_saddr = fld.saddr;
1131 rt->rt_daddr = fl.fld_dst; 1142 rt->rt_daddr = fld.daddr;
1132 rt->rt_gateway = gateway ? gateway : fl.fld_dst; 1143 rt->rt_gateway = gateway ? gateway : fld.daddr;
1133 rt->rt_local_src = fl.fld_src; 1144 rt->rt_local_src = fld.saddr;
1134 1145
1135 rt->rt_dst_map = fl.fld_dst; 1146 rt->rt_dst_map = fld.daddr;
1136 rt->rt_src_map = fl.fld_src; 1147 rt->rt_src_map = fld.saddr;
1137 1148
1138 rt->dst.dev = dev_out; 1149 rt->dst.dev = dev_out;
1139 dev_hold(dev_out); 1150 dev_hold(dev_out);
@@ -1151,7 +1162,7 @@ make_route:
1151 if (err) 1162 if (err)
1152 goto e_neighbour; 1163 goto e_neighbour;
1153 1164
1154 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1165 hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
1155 dn_insert_route(rt, hash, (struct dn_route **)pprt); 1166 dn_insert_route(rt, hash, (struct dn_route **)pprt);
1156 1167
1157done: 1168done:
@@ -1182,20 +1193,20 @@ e_neighbour:
1182/* 1193/*
1183 * N.B. The flags may be moved into the flowi at some future stage. 1194 * N.B. The flags may be moved into the flowi at some future stage.
1184 */ 1195 */
1185static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags) 1196static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
1186{ 1197{
1187 unsigned hash = dn_hash(flp->fld_src, flp->fld_dst); 1198 unsigned hash = dn_hash(flp->saddr, flp->daddr);
1188 struct dn_route *rt = NULL; 1199 struct dn_route *rt = NULL;
1189 1200
1190 if (!(flags & MSG_TRYHARD)) { 1201 if (!(flags & MSG_TRYHARD)) {
1191 rcu_read_lock_bh(); 1202 rcu_read_lock_bh();
1192 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; 1203 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
1193 rt = rcu_dereference_bh(rt->dst.dn_next)) { 1204 rt = rcu_dereference_bh(rt->dst.dn_next)) {
1194 if ((flp->fld_dst == rt->fl.fld_dst) && 1205 if ((flp->daddr == rt->fld.daddr) &&
1195 (flp->fld_src == rt->fl.fld_src) && 1206 (flp->saddr == rt->fld.saddr) &&
1196 (flp->mark == rt->fl.mark) && 1207 (flp->flowidn_mark == rt->fld.flowidn_mark) &&
1197 dn_is_output_route(rt) && 1208 dn_is_output_route(rt) &&
1198 (rt->fl.oif == flp->oif)) { 1209 (rt->fld.flowidn_oif == flp->flowidn_oif)) {
1199 dst_use(&rt->dst, jiffies); 1210 dst_use(&rt->dst, jiffies);
1200 rcu_read_unlock_bh(); 1211 rcu_read_unlock_bh();
1201 *pprt = &rt->dst; 1212 *pprt = &rt->dst;
@@ -1208,25 +1219,36 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
1208 return dn_route_output_slow(pprt, flp, flags); 1219 return dn_route_output_slow(pprt, flp, flags);
1209} 1220}
1210 1221
1211static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags) 1222static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
1212{ 1223{
1213 int err; 1224 int err;
1214 1225
1215 err = __dn_route_output_key(pprt, flp, flags); 1226 err = __dn_route_output_key(pprt, flp, flags);
1216 if (err == 0 && flp->proto) { 1227 if (err == 0 && flp->flowidn_proto) {
1217 err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); 1228 *pprt = xfrm_lookup(&init_net, *pprt,
1229 flowidn_to_flowi(flp), NULL, 0);
1230 if (IS_ERR(*pprt)) {
1231 err = PTR_ERR(*pprt);
1232 *pprt = NULL;
1233 }
1218 } 1234 }
1219 return err; 1235 return err;
1220} 1236}
1221 1237
1222int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags) 1238int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags)
1223{ 1239{
1224 int err; 1240 int err;
1225 1241
1226 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); 1242 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
1227 if (err == 0 && fl->proto) { 1243 if (err == 0 && fl->flowidn_proto) {
1228 err = xfrm_lookup(&init_net, pprt, fl, sk, 1244 if (!(flags & MSG_DONTWAIT))
1229 (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); 1245 fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP;
1246 *pprt = xfrm_lookup(&init_net, *pprt,
1247 flowidn_to_flowi(fl), sk, 0);
1248 if (IS_ERR(*pprt)) {
1249 err = PTR_ERR(*pprt);
1250 *pprt = NULL;
1251 }
1230 } 1252 }
1231 return err; 1253 return err;
1232} 1254}
@@ -1243,11 +1265,13 @@ static int dn_route_input_slow(struct sk_buff *skb)
1243 int flags = 0; 1265 int flags = 0;
1244 __le16 gateway = 0; 1266 __le16 gateway = 0;
1245 __le16 local_src = 0; 1267 __le16 local_src = 0;
1246 struct flowi fl = { .fld_dst = cb->dst, 1268 struct flowidn fld = {
1247 .fld_src = cb->src, 1269 .daddr = cb->dst,
1248 .fld_scope = RT_SCOPE_UNIVERSE, 1270 .saddr = cb->src,
1249 .mark = skb->mark, 1271 .flowidn_scope = RT_SCOPE_UNIVERSE,
1250 .iif = skb->dev->ifindex }; 1272 .flowidn_mark = skb->mark,
1273 .flowidn_iif = skb->dev->ifindex,
1274 };
1251 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; 1275 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
1252 int err = -EINVAL; 1276 int err = -EINVAL;
1253 int free_res = 0; 1277 int free_res = 0;
@@ -1258,7 +1282,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1258 goto out; 1282 goto out;
1259 1283
1260 /* Zero source addresses are not allowed */ 1284 /* Zero source addresses are not allowed */
1261 if (fl.fld_src == 0) 1285 if (fld.saddr == 0)
1262 goto out; 1286 goto out;
1263 1287
1264 /* 1288 /*
@@ -1272,7 +1296,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1272 if (dn_dev_islocal(in_dev, cb->src)) 1296 if (dn_dev_islocal(in_dev, cb->src))
1273 goto out; 1297 goto out;
1274 1298
1275 err = dn_fib_lookup(&fl, &res); 1299 err = dn_fib_lookup(&fld, &res);
1276 if (err) { 1300 if (err) {
1277 if (err != -ESRCH) 1301 if (err != -ESRCH)
1278 goto out; 1302 goto out;
@@ -1284,7 +1308,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1284 1308
1285 res.type = RTN_LOCAL; 1309 res.type = RTN_LOCAL;
1286 } else { 1310 } else {
1287 __le16 src_map = fl.fld_src; 1311 __le16 src_map = fld.saddr;
1288 free_res = 1; 1312 free_res = 1;
1289 1313
1290 out_dev = DN_FIB_RES_DEV(res); 1314 out_dev = DN_FIB_RES_DEV(res);
@@ -1297,22 +1321,22 @@ static int dn_route_input_slow(struct sk_buff *skb)
1297 dev_hold(out_dev); 1321 dev_hold(out_dev);
1298 1322
1299 if (res.r) 1323 if (res.r)
1300 src_map = fl.fld_src; /* no NAT support for now */ 1324 src_map = fld.saddr; /* no NAT support for now */
1301 1325
1302 gateway = DN_FIB_RES_GW(res); 1326 gateway = DN_FIB_RES_GW(res);
1303 if (res.type == RTN_NAT) { 1327 if (res.type == RTN_NAT) {
1304 fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res); 1328 fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
1305 dn_fib_res_put(&res); 1329 dn_fib_res_put(&res);
1306 free_res = 0; 1330 free_res = 0;
1307 if (dn_fib_lookup(&fl, &res)) 1331 if (dn_fib_lookup(&fld, &res))
1308 goto e_inval; 1332 goto e_inval;
1309 free_res = 1; 1333 free_res = 1;
1310 if (res.type != RTN_UNICAST) 1334 if (res.type != RTN_UNICAST)
1311 goto e_inval; 1335 goto e_inval;
1312 flags |= RTCF_DNAT; 1336 flags |= RTCF_DNAT;
1313 gateway = fl.fld_dst; 1337 gateway = fld.daddr;
1314 } 1338 }
1315 fl.fld_src = src_map; 1339 fld.saddr = src_map;
1316 } 1340 }
1317 1341
1318 switch(res.type) { 1342 switch(res.type) {
@@ -1326,8 +1350,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
1326 if (dn_db->parms.forwarding == 0) 1350 if (dn_db->parms.forwarding == 0)
1327 goto e_inval; 1351 goto e_inval;
1328 1352
1329 if (res.fi->fib_nhs > 1 && fl.oif == 0) 1353 if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
1330 dn_fib_select_multipath(&fl, &res); 1354 dn_fib_select_multipath(&fld, &res);
1331 1355
1332 /* 1356 /*
1333 * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT 1357 * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
@@ -1345,8 +1369,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
1345 break; 1369 break;
1346 case RTN_LOCAL: 1370 case RTN_LOCAL:
1347 flags |= RTCF_LOCAL; 1371 flags |= RTCF_LOCAL;
1348 fl.fld_src = cb->dst; 1372 fld.saddr = cb->dst;
1349 fl.fld_dst = cb->src; 1373 fld.daddr = cb->src;
1350 1374
1351 /* Routing tables gave us a gateway */ 1375 /* Routing tables gave us a gateway */
1352 if (gateway) 1376 if (gateway)
@@ -1375,25 +1399,25 @@ static int dn_route_input_slow(struct sk_buff *skb)
1375 } 1399 }
1376 1400
1377make_route: 1401make_route:
1378 rt = dst_alloc(&dn_dst_ops); 1402 rt = dst_alloc(&dn_dst_ops, 0);
1379 if (rt == NULL) 1403 if (rt == NULL)
1380 goto e_nobufs; 1404 goto e_nobufs;
1381 1405
1382 rt->rt_saddr = fl.fld_src; 1406 rt->rt_saddr = fld.saddr;
1383 rt->rt_daddr = fl.fld_dst; 1407 rt->rt_daddr = fld.daddr;
1384 rt->rt_gateway = fl.fld_dst; 1408 rt->rt_gateway = fld.daddr;
1385 if (gateway) 1409 if (gateway)
1386 rt->rt_gateway = gateway; 1410 rt->rt_gateway = gateway;
1387 rt->rt_local_src = local_src ? local_src : rt->rt_saddr; 1411 rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
1388 1412
1389 rt->rt_dst_map = fl.fld_dst; 1413 rt->rt_dst_map = fld.daddr;
1390 rt->rt_src_map = fl.fld_src; 1414 rt->rt_src_map = fld.saddr;
1391 1415
1392 rt->fl.fld_src = cb->src; 1416 rt->fld.saddr = cb->src;
1393 rt->fl.fld_dst = cb->dst; 1417 rt->fld.daddr = cb->dst;
1394 rt->fl.oif = 0; 1418 rt->fld.flowidn_oif = 0;
1395 rt->fl.iif = in_dev->ifindex; 1419 rt->fld.flowidn_iif = in_dev->ifindex;
1396 rt->fl.mark = fl.mark; 1420 rt->fld.flowidn_mark = fld.flowidn_mark;
1397 1421
1398 rt->dst.flags = DST_HOST; 1422 rt->dst.flags = DST_HOST;
1399 rt->dst.neighbour = neigh; 1423 rt->dst.neighbour = neigh;
@@ -1423,7 +1447,7 @@ make_route:
1423 if (err) 1447 if (err)
1424 goto e_neighbour; 1448 goto e_neighbour;
1425 1449
1426 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1450 hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
1427 dn_insert_route(rt, hash, &rt); 1451 dn_insert_route(rt, hash, &rt);
1428 skb_dst_set(skb, &rt->dst); 1452 skb_dst_set(skb, &rt->dst);
1429 1453
@@ -1463,11 +1487,11 @@ static int dn_route_input(struct sk_buff *skb)
1463 rcu_read_lock(); 1487 rcu_read_lock();
1464 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; 1488 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
1465 rt = rcu_dereference(rt->dst.dn_next)) { 1489 rt = rcu_dereference(rt->dst.dn_next)) {
1466 if ((rt->fl.fld_src == cb->src) && 1490 if ((rt->fld.saddr == cb->src) &&
1467 (rt->fl.fld_dst == cb->dst) && 1491 (rt->fld.daddr == cb->dst) &&
1468 (rt->fl.oif == 0) && 1492 (rt->fld.flowidn_oif == 0) &&
1469 (rt->fl.mark == skb->mark) && 1493 (rt->fld.flowidn_mark == skb->mark) &&
1470 (rt->fl.iif == cb->iif)) { 1494 (rt->fld.flowidn_iif == cb->iif)) {
1471 dst_use(&rt->dst, jiffies); 1495 dst_use(&rt->dst, jiffies);
1472 rcu_read_unlock(); 1496 rcu_read_unlock();
1473 skb_dst_set(skb, (struct dst_entry *)rt); 1497 skb_dst_set(skb, (struct dst_entry *)rt);
@@ -1503,9 +1527,9 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1503 if (rt->rt_flags & RTCF_NOTIFY) 1527 if (rt->rt_flags & RTCF_NOTIFY)
1504 r->rtm_flags |= RTM_F_NOTIFY; 1528 r->rtm_flags |= RTM_F_NOTIFY;
1505 RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr); 1529 RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr);
1506 if (rt->fl.fld_src) { 1530 if (rt->fld.saddr) {
1507 r->rtm_src_len = 16; 1531 r->rtm_src_len = 16;
1508 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); 1532 RTA_PUT(skb, RTA_SRC, 2, &rt->fld.saddr);
1509 } 1533 }
1510 if (rt->dst.dev) 1534 if (rt->dst.dev)
1511 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); 1535 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
@@ -1524,7 +1548,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1524 rt->dst.error) < 0) 1548 rt->dst.error) < 0)
1525 goto rtattr_failure; 1549 goto rtattr_failure;
1526 if (dn_is_input_route(rt)) 1550 if (dn_is_input_route(rt))
1527 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1551 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fld.flowidn_iif);
1528 1552
1529 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1553 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1530 return skb->len; 1554 return skb->len;
@@ -1547,13 +1571,13 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1547 struct dn_skb_cb *cb; 1571 struct dn_skb_cb *cb;
1548 int err; 1572 int err;
1549 struct sk_buff *skb; 1573 struct sk_buff *skb;
1550 struct flowi fl; 1574 struct flowidn fld;
1551 1575
1552 if (!net_eq(net, &init_net)) 1576 if (!net_eq(net, &init_net))
1553 return -EINVAL; 1577 return -EINVAL;
1554 1578
1555 memset(&fl, 0, sizeof(fl)); 1579 memset(&fld, 0, sizeof(fld));
1556 fl.proto = DNPROTO_NSP; 1580 fld.flowidn_proto = DNPROTO_NSP;
1557 1581
1558 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1582 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1559 if (skb == NULL) 1583 if (skb == NULL)
@@ -1562,15 +1586,15 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1562 cb = DN_SKB_CB(skb); 1586 cb = DN_SKB_CB(skb);
1563 1587
1564 if (rta[RTA_SRC-1]) 1588 if (rta[RTA_SRC-1])
1565 memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2); 1589 memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2);
1566 if (rta[RTA_DST-1]) 1590 if (rta[RTA_DST-1])
1567 memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); 1591 memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2);
1568 if (rta[RTA_IIF-1]) 1592 if (rta[RTA_IIF-1])
1569 memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1593 memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1570 1594
1571 if (fl.iif) { 1595 if (fld.flowidn_iif) {
1572 struct net_device *dev; 1596 struct net_device *dev;
1573 if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { 1597 if ((dev = dev_get_by_index(&init_net, fld.flowidn_iif)) == NULL) {
1574 kfree_skb(skb); 1598 kfree_skb(skb);
1575 return -ENODEV; 1599 return -ENODEV;
1576 } 1600 }
@@ -1581,8 +1605,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1581 } 1605 }
1582 skb->protocol = htons(ETH_P_DNA_RT); 1606 skb->protocol = htons(ETH_P_DNA_RT);
1583 skb->dev = dev; 1607 skb->dev = dev;
1584 cb->src = fl.fld_src; 1608 cb->src = fld.saddr;
1585 cb->dst = fl.fld_dst; 1609 cb->dst = fld.daddr;
1586 local_bh_disable(); 1610 local_bh_disable();
1587 err = dn_route_input(skb); 1611 err = dn_route_input(skb);
1588 local_bh_enable(); 1612 local_bh_enable();
@@ -1594,8 +1618,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1594 int oif = 0; 1618 int oif = 0;
1595 if (rta[RTA_OIF - 1]) 1619 if (rta[RTA_OIF - 1])
1596 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 1620 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
1597 fl.oif = oif; 1621 fld.flowidn_oif = oif;
1598 err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); 1622 err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
1599 } 1623 }
1600 1624
1601 if (skb->dev) 1625 if (skb->dev)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 6eb91df3c55..f0efb0ccfec 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -49,14 +49,15 @@ struct dn_fib_rule
49}; 49};
50 50
51 51
52int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) 52int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res)
53{ 53{
54 struct fib_lookup_arg arg = { 54 struct fib_lookup_arg arg = {
55 .result = res, 55 .result = res,
56 }; 56 };
57 int err; 57 int err;
58 58
59 err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); 59 err = fib_rules_lookup(dn_fib_rules_ops,
60 flowidn_to_flowi(flp), 0, &arg);
60 res->r = arg.rule; 61 res->r = arg.rule;
61 62
62 return err; 63 return err;
@@ -65,6 +66,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
65static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, 66static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
66 int flags, struct fib_lookup_arg *arg) 67 int flags, struct fib_lookup_arg *arg)
67{ 68{
69 struct flowidn *fld = &flp->u.dn;
68 int err = -EAGAIN; 70 int err = -EAGAIN;
69 struct dn_fib_table *tbl; 71 struct dn_fib_table *tbl;
70 72
@@ -90,7 +92,7 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
90 if (tbl == NULL) 92 if (tbl == NULL)
91 goto errout; 93 goto errout;
92 94
93 err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); 95 err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result);
94 if (err > 0) 96 if (err > 0)
95 err = -EAGAIN; 97 err = -EAGAIN;
96errout: 98errout:
@@ -104,8 +106,9 @@ static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = {
104static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 106static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
105{ 107{
106 struct dn_fib_rule *r = (struct dn_fib_rule *)rule; 108 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
107 __le16 daddr = fl->fld_dst; 109 struct flowidn *fld = &fl->u.dn;
108 __le16 saddr = fl->fld_src; 110 __le16 daddr = fld->daddr;
111 __le16 saddr = fld->saddr;
109 112
110 if (((saddr ^ r->src) & r->srcmask) || 113 if (((saddr ^ r->src) & r->srcmask) ||
111 ((daddr ^ r->dst) & r->dstmask)) 114 ((daddr ^ r->dst) & r->dstmask))
@@ -175,7 +178,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
175 178
176unsigned dnet_addr_type(__le16 addr) 179unsigned dnet_addr_type(__le16 addr)
177{ 180{
178 struct flowi fl = { .fld_dst = addr }; 181 struct flowidn fld = { .daddr = addr };
179 struct dn_fib_res res; 182 struct dn_fib_res res;
180 unsigned ret = RTN_UNICAST; 183 unsigned ret = RTN_UNICAST;
181 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); 184 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
@@ -183,7 +186,7 @@ unsigned dnet_addr_type(__le16 addr)
183 res.r = NULL; 186 res.r = NULL;
184 187
185 if (tb) { 188 if (tb) {
186 if (!tb->lookup(tb, &fl, &res)) { 189 if (!tb->lookup(tb, &fld, &res)) {
187 ret = res.type; 190 ret = res.type;
188 dn_fib_res_put(&res); 191 dn_fib_res_put(&res);
189 } 192 }
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f2abd375569..99d8d3a4099 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -59,7 +59,6 @@ struct dn_hash
59}; 59};
60 60
61#define dz_key_0(key) ((key).datum = 0) 61#define dz_key_0(key) ((key).datum = 0)
62#define dz_prefix(key,dz) ((key).datum)
63 62
64#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ 63#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
65 for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
@@ -765,7 +764,7 @@ static int dn_fib_table_flush(struct dn_fib_table *tb)
765 return found; 764 return found;
766} 765}
767 766
768static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, struct dn_fib_res *res) 767static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res)
769{ 768{
770 int err; 769 int err;
771 struct dn_zone *dz; 770 struct dn_zone *dz;
@@ -774,7 +773,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp,
774 read_lock(&dn_fib_tables_lock); 773 read_lock(&dn_fib_tables_lock);
775 for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { 774 for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
776 struct dn_fib_node *f; 775 struct dn_fib_node *f;
777 dn_fib_key_t k = dz_key(flp->fld_dst, dz); 776 dn_fib_key_t k = dz_key(flp->daddr, dz);
778 777
779 for(f = dz_chain(k, dz); f; f = f->fn_next) { 778 for(f = dz_chain(k, dz); f; f = f->fn_next) {
780 if (!dn_key_eq(k, f->fn_key)) { 779 if (!dn_key_eq(k, f->fn_key)) {
@@ -789,7 +788,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp,
789 if (f->fn_state&DN_S_ZOMBIE) 788 if (f->fn_state&DN_S_ZOMBIE)
790 continue; 789 continue;
791 790
792 if (f->fn_scope < flp->fld_scope) 791 if (f->fn_scope < flp->flowidn_scope)
793 continue; 792 continue;
794 793
795 err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); 794 err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
index 83277f463af..8f4ff5a2c81 100644
--- a/net/dsa/mv88e6060.c
+++ b/net/dsa/mv88e6060.c
@@ -18,7 +18,7 @@
18 18
19static int reg_read(struct dsa_switch *ds, int addr, int reg) 19static int reg_read(struct dsa_switch *ds, int addr, int reg)
20{ 20{
21 return mdiobus_read(ds->master_mii_bus, addr, reg); 21 return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg);
22} 22}
23 23
24#define REG_READ(addr, reg) \ 24#define REG_READ(addr, reg) \
@@ -34,7 +34,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg)
34 34
35static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) 35static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
36{ 36{
37 return mdiobus_write(ds->master_mii_bus, addr, reg, val); 37 return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr,
38 reg, val);
38} 39}
39 40
40#define REG_WRITE(addr, reg, val) \ 41#define REG_WRITE(addr, reg, val) \
@@ -50,7 +51,7 @@ static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
50{ 51{
51 int ret; 52 int ret;
52 53
53 ret = mdiobus_read(bus, REG_PORT(0), 0x03); 54 ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
54 if (ret >= 0) { 55 if (ret >= 0) {
55 ret &= 0xfff0; 56 ret &= 0xfff0;
56 if (ret == 0x0600) 57 if (ret == 0x0600)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 0c282633791..116d3fd3d66 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
435 udpdest.sin_addr.s_addr = htonl(network | addr.station); 435 udpdest.sin_addr.s_addr = htonl(network | addr.station);
436 } 436 }
437 437
438 memset(&ah, 0, sizeof(ah));
438 ah.port = port; 439 ah.port = port;
439 ah.cb = cb & 0x7f; 440 ah.cb = cb & 0x7f;
440 ah.code = 2; /* magic */ 441 ah.code = 2; /* magic */
441 ah.pad = 0;
442 442
443 /* tack our header on the front of the iovec */ 443 /* tack our header on the front of the iovec */
444 size = sizeof(struct aunhdr); 444 size = sizeof(struct aunhdr);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a5a1050595d..cbb505ba932 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER
55 55
56 If unsure, say N here. 56 If unsure, say N here.
57 57
58choice
59 prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
60 depends on IP_ADVANCED_ROUTER
61 default ASK_IP_FIB_HASH
62
63config ASK_IP_FIB_HASH
64 bool "FIB_HASH"
65 ---help---
66 Current FIB is very proven and good enough for most users.
67
68config IP_FIB_TRIE
69 bool "FIB_TRIE"
70 ---help---
71 Use new experimental LC-trie as FIB lookup algorithm.
72 This improves lookup performance if you have a large
73 number of routes.
74
75 LC-trie is a longest matching prefix lookup algorithm which
76 performs better than FIB_HASH for large routing tables.
77 But, it consumes more memory and is more complex.
78
79 LC-trie is described in:
80
81 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
82 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
83 June 1999
84
85 An experimental study of compression methods for dynamic tries
86 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
87 <http://www.csc.kth.se/~snilsson/software/dyntrie2/>
88
89endchoice
90
91config IP_FIB_HASH
92 def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
93
94config IP_FIB_TRIE_STATS 58config IP_FIB_TRIE_STATS
95 bool "FIB TRIE statistics" 59 bool "FIB TRIE statistics"
96 depends on IP_FIB_TRIE 60 depends on IP_ADVANCED_ROUTER
97 ---help--- 61 ---help---
98 Keep track of statistics on structure of FIB TRIE table. 62 Keep track of statistics on structure of FIB TRIE table.
99 Useful for testing and measuring TRIE performance. 63 Useful for testing and measuring TRIE performance.
@@ -140,6 +104,9 @@ config IP_ROUTE_VERBOSE
140 handled by the klogd daemon which is responsible for kernel messages 104 handled by the klogd daemon which is responsible for kernel messages
141 ("man klogd"). 105 ("man klogd").
142 106
107config IP_ROUTE_CLASSID
108 bool
109
143config IP_PNP 110config IP_PNP
144 bool "IP: kernel level autoconfiguration" 111 bool "IP: kernel level autoconfiguration"
145 help 112 help
@@ -657,4 +624,3 @@ config TCP_MD5SIG
657 on the Internet. 624 on the Internet.
658 625
659 If unsure, say N. 626 If unsure, say N.
660
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4978d22f9a7..0dc772d0d12 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o 14 inet_fragment.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
18obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
19obj-$(CONFIG_PROC_FS) += proc.o 17obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 18obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 19obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 45b89d7bda5..807d83c02ef 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1101,23 +1101,20 @@ int sysctl_ip_dynaddr __read_mostly;
1101static int inet_sk_reselect_saddr(struct sock *sk) 1101static int inet_sk_reselect_saddr(struct sock *sk)
1102{ 1102{
1103 struct inet_sock *inet = inet_sk(sk); 1103 struct inet_sock *inet = inet_sk(sk);
1104 int err;
1105 struct rtable *rt;
1106 __be32 old_saddr = inet->inet_saddr; 1104 __be32 old_saddr = inet->inet_saddr;
1107 __be32 new_saddr;
1108 __be32 daddr = inet->inet_daddr; 1105 __be32 daddr = inet->inet_daddr;
1106 struct rtable *rt;
1107 __be32 new_saddr;
1109 1108
1110 if (inet->opt && inet->opt->srr) 1109 if (inet->opt && inet->opt->srr)
1111 daddr = inet->opt->faddr; 1110 daddr = inet->opt->faddr;
1112 1111
1113 /* Query new route. */ 1112 /* Query new route. */
1114 err = ip_route_connect(&rt, daddr, 0, 1113 rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk),
1115 RT_CONN_FLAGS(sk), 1114 sk->sk_bound_dev_if, sk->sk_protocol,
1116 sk->sk_bound_dev_if, 1115 inet->inet_sport, inet->inet_dport, sk, false);
1117 sk->sk_protocol, 1116 if (IS_ERR(rt))
1118 inet->inet_sport, inet->inet_dport, sk, 0); 1117 return PTR_ERR(rt);
1119 if (err)
1120 return err;
1121 1118
1122 sk_setup_caps(sk, &rt->dst); 1119 sk_setup_caps(sk, &rt->dst);
1123 1120
@@ -1160,25 +1157,16 @@ int inet_sk_rebuild_header(struct sock *sk)
1160 daddr = inet->inet_daddr; 1157 daddr = inet->inet_daddr;
1161 if (inet->opt && inet->opt->srr) 1158 if (inet->opt && inet->opt->srr)
1162 daddr = inet->opt->faddr; 1159 daddr = inet->opt->faddr;
1163{ 1160 rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr,
1164 struct flowi fl = { 1161 inet->inet_dport, inet->inet_sport,
1165 .oif = sk->sk_bound_dev_if, 1162 sk->sk_protocol, RT_CONN_FLAGS(sk),
1166 .mark = sk->sk_mark, 1163 sk->sk_bound_dev_if);
1167 .fl4_dst = daddr, 1164 if (!IS_ERR(rt)) {
1168 .fl4_src = inet->inet_saddr, 1165 err = 0;
1169 .fl4_tos = RT_CONN_FLAGS(sk),
1170 .proto = sk->sk_protocol,
1171 .flags = inet_sk_flowi_flags(sk),
1172 .fl_ip_sport = inet->inet_sport,
1173 .fl_ip_dport = inet->inet_dport,
1174 };
1175
1176 security_sk_classify_flow(sk, &fl);
1177 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1178}
1179 if (!err)
1180 sk_setup_caps(sk, &rt->dst); 1166 sk_setup_caps(sk, &rt->dst);
1181 else { 1167 } else {
1168 err = PTR_ERR(rt);
1169
1182 /* Routing failed... */ 1170 /* Routing failed... */
1183 sk->sk_route_caps = 0; 1171 sk->sk_route_caps = 0;
1184 /* 1172 /*
@@ -1231,7 +1219,7 @@ out:
1231 return err; 1219 return err;
1232} 1220}
1233 1221
1234static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) 1222static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
1235{ 1223{
1236 struct sk_buff *segs = ERR_PTR(-EINVAL); 1224 struct sk_buff *segs = ERR_PTR(-EINVAL);
1237 struct iphdr *iph; 1225 struct iphdr *iph;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 86961bec70a..4286fd3cc0e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -201,11 +201,14 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
201 top_iph->ttl = 0; 201 top_iph->ttl = 0;
202 top_iph->check = 0; 202 top_iph->check = 0;
203 203
204 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; 204 if (x->props.flags & XFRM_STATE_ALIGN4)
205 ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
206 else
207 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
205 208
206 ah->reserved = 0; 209 ah->reserved = 0;
207 ah->spi = x->id.spi; 210 ah->spi = x->id.spi;
208 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 211 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
209 212
210 sg_init_table(sg, nfrags); 213 sg_init_table(sg, nfrags);
211 skb_to_sgvec(skb, sg, 0, skb->len); 214 skb_to_sgvec(skb, sg, 0, skb->len);
@@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
299 nexthdr = ah->nexthdr; 302 nexthdr = ah->nexthdr;
300 ah_hlen = (ah->hdrlen + 2) << 2; 303 ah_hlen = (ah->hdrlen + 2) << 2;
301 304
302 if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && 305 if (x->props.flags & XFRM_STATE_ALIGN4) {
303 ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) 306 if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) &&
304 goto out; 307 ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len))
308 goto out;
309 } else {
310 if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
311 ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
312 goto out;
313 }
305 314
306 if (!pskb_may_pull(skb, ah_hlen)) 315 if (!pskb_may_pull(skb, ah_hlen))
307 goto out; 316 goto out;
@@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x)
450 459
451 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); 460 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
452 461
453 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 462 if (x->props.flags & XFRM_STATE_ALIGN4)
454 ahp->icv_trunc_len); 463 x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) +
464 ahp->icv_trunc_len);
465 else
466 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
467 ahp->icv_trunc_len);
455 if (x->props.mode == XFRM_MODE_TUNNEL) 468 if (x->props.mode == XFRM_MODE_TUNNEL)
456 x->props.header_len += sizeof(struct iphdr); 469 x->props.header_len += sizeof(struct iphdr);
457 x->data = ahp; 470 x->data = ahp;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7927589813b..090d273d786 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -433,14 +433,13 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
433 433
434static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 434static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
435{ 435{
436 struct flowi fl = { .fl4_dst = sip,
437 .fl4_src = tip };
438 struct rtable *rt; 436 struct rtable *rt;
439 int flag = 0; 437 int flag = 0;
440 /*unsigned long now; */ 438 /*unsigned long now; */
441 struct net *net = dev_net(dev); 439 struct net *net = dev_net(dev);
442 440
443 if (ip_route_output_key(net, &rt, &fl) < 0) 441 rt = ip_route_output(net, sip, tip, 0, 0);
442 if (IS_ERR(rt))
444 return 1; 443 return 1;
445 if (rt->dst.dev != dev) { 444 if (rt->dst.dev != dev) {
446 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); 445 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
@@ -1061,12 +1060,10 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1061 if (r->arp_flags & ATF_PERM) 1060 if (r->arp_flags & ATF_PERM)
1062 r->arp_flags |= ATF_COM; 1061 r->arp_flags |= ATF_COM;
1063 if (dev == NULL) { 1062 if (dev == NULL) {
1064 struct flowi fl = { .fl4_dst = ip, 1063 struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
1065 .fl4_tos = RTO_ONLINK }; 1064
1066 struct rtable *rt; 1065 if (IS_ERR(rt))
1067 err = ip_route_output_key(net, &rt, &fl); 1066 return PTR_ERR(rt);
1068 if (err != 0)
1069 return err;
1070 dev = rt->dst.dev; 1067 dev = rt->dst.dev;
1071 ip_rt_put(rt); 1068 ip_rt_put(rt);
1072 if (!dev) 1069 if (!dev)
@@ -1177,7 +1174,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1177static int arp_req_delete(struct net *net, struct arpreq *r, 1174static int arp_req_delete(struct net *net, struct arpreq *r,
1178 struct net_device *dev) 1175 struct net_device *dev)
1179{ 1176{
1180 int err;
1181 __be32 ip; 1177 __be32 ip;
1182 1178
1183 if (r->arp_flags & ATF_PUBL) 1179 if (r->arp_flags & ATF_PUBL)
@@ -1185,12 +1181,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1185 1181
1186 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1182 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1187 if (dev == NULL) { 1183 if (dev == NULL) {
1188 struct flowi fl = { .fl4_dst = ip, 1184 struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
1189 .fl4_tos = RTO_ONLINK }; 1185 if (IS_ERR(rt))
1190 struct rtable *rt; 1186 return PTR_ERR(rt);
1191 err = ip_route_output_key(net, &rt, &fl);
1192 if (err != 0)
1193 return err;
1194 dev = rt->dst.dev; 1187 dev = rt->dst.dev;
1195 ip_rt_put(rt); 1188 ip_rt_put(rt);
1196 if (!dev) 1189 if (!dev)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 174be6caa5c..85bd24ca4f6 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -46,11 +46,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
46 if (!saddr) 46 if (!saddr)
47 saddr = inet->mc_addr; 47 saddr = inet->mc_addr;
48 } 48 }
49 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, 49 rt = ip_route_connect(usin->sin_addr.s_addr, saddr,
50 RT_CONN_FLAGS(sk), oif, 50 RT_CONN_FLAGS(sk), oif,
51 sk->sk_protocol, 51 sk->sk_protocol,
52 inet->inet_sport, usin->sin_port, sk, 1); 52 inet->inet_sport, usin->sin_port, sk, true);
53 if (err) { 53 if (IS_ERR(rt)) {
54 err = PTR_ERR(rt);
54 if (err == -ENETUNREACH) 55 if (err == -ENETUNREACH)
55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 56 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
56 return err; 57 return err;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce92..6d85800daeb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -51,6 +51,7 @@
51#include <linux/inetdevice.h> 51#include <linux/inetdevice.h>
52#include <linux/igmp.h> 52#include <linux/igmp.h>
53#include <linux/slab.h> 53#include <linux/slab.h>
54#include <linux/hash.h>
54#ifdef CONFIG_SYSCTL 55#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h> 56#include <linux/sysctl.h>
56#endif 57#endif
@@ -92,6 +93,71 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93}; 94};
94 95
96/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
97 * value. So if you change this define, make appropriate changes to
98 * inet_addr_hash as well.
99 */
100#define IN4_ADDR_HSIZE 256
101static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
105{
106 u32 val = (__force u32) addr ^ hash_ptr(net, 8);
107
108 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
109 (IN4_ADDR_HSIZE - 1));
110}
111
112static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
113{
114 unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
115
116 spin_lock(&inet_addr_hash_lock);
117 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
118 spin_unlock(&inet_addr_hash_lock);
119}
120
121static void inet_hash_remove(struct in_ifaddr *ifa)
122{
123 spin_lock(&inet_addr_hash_lock);
124 hlist_del_init_rcu(&ifa->hash);
125 spin_unlock(&inet_addr_hash_lock);
126}
127
128/**
129 * __ip_dev_find - find the first device with a given source address.
130 * @net: the net namespace
131 * @addr: the source address
132 * @devref: if true, take a reference on the found device
133 *
134 * If a caller uses devref=false, it should be protected by RCU, or RTNL
135 */
136struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
137{
138 unsigned int hash = inet_addr_hash(net, addr);
139 struct net_device *result = NULL;
140 struct in_ifaddr *ifa;
141 struct hlist_node *node;
142
143 rcu_read_lock();
144 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
145 struct net_device *dev = ifa->ifa_dev->dev;
146
147 if (!net_eq(dev_net(dev), net))
148 continue;
149 if (ifa->ifa_local == addr) {
150 result = dev;
151 break;
152 }
153 }
154 if (result && devref)
155 dev_hold(result);
156 rcu_read_unlock();
157 return result;
158}
159EXPORT_SYMBOL(__ip_dev_find);
160
95static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 161static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 162
97static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 163static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -265,6 +331,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
265 } 331 }
266 332
267 if (!do_promote) { 333 if (!do_promote) {
334 inet_hash_remove(ifa);
268 *ifap1 = ifa->ifa_next; 335 *ifap1 = ifa->ifa_next;
269 336
270 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); 337 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
@@ -281,6 +348,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
281 /* 2. Unlink it */ 348 /* 2. Unlink it */
282 349
283 *ifap = ifa1->ifa_next; 350 *ifap = ifa1->ifa_next;
351 inet_hash_remove(ifa1);
284 352
285 /* 3. Announce address deletion */ 353 /* 3. Announce address deletion */
286 354
@@ -368,6 +436,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
368 ifa->ifa_next = *ifap; 436 ifa->ifa_next = *ifap;
369 *ifap = ifa; 437 *ifap = ifa;
370 438
439 inet_hash_insert(dev_net(in_dev->dev), ifa);
440
371 /* Send message first, then call notifier. 441 /* Send message first, then call notifier.
372 Notifier will trigger FIB update, so that 442 Notifier will trigger FIB update, so that
373 listeners of netlink will know about new ifaddr */ 443 listeners of netlink will know about new ifaddr */
@@ -521,6 +591,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
521 if (tb[IFA_ADDRESS] == NULL) 591 if (tb[IFA_ADDRESS] == NULL)
522 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 592 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523 593
594 INIT_HLIST_NODE(&ifa->hash);
524 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 595 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 596 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 ifa->ifa_flags = ifm->ifa_flags; 597 ifa->ifa_flags = ifm->ifa_flags;
@@ -670,7 +741,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
670 ifap = &ifa->ifa_next) { 741 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 742 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr == 743 sin_orig.sin_addr.s_addr ==
673 ifa->ifa_address) { 744 ifa->ifa_local) {
674 break; /* found */ 745 break; /* found */
675 } 746 }
676 } 747 }
@@ -728,6 +799,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
728 if (!ifa) { 799 if (!ifa) {
729 ret = -ENOBUFS; 800 ret = -ENOBUFS;
730 ifa = inet_alloc_ifa(); 801 ifa = inet_alloc_ifa();
802 INIT_HLIST_NODE(&ifa->hash);
731 if (!ifa) 803 if (!ifa)
732 break; 804 break;
733 if (colon) 805 if (colon)
@@ -1040,8 +1112,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
1040 return; 1112 return;
1041 1113
1042 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1114 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043 ifa->ifa_address, dev, 1115 ifa->ifa_local, dev,
1044 ifa->ifa_address, NULL, 1116 ifa->ifa_local, NULL,
1045 dev->dev_addr, NULL); 1117 dev->dev_addr, NULL);
1046} 1118}
1047 1119
@@ -1084,6 +1156,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1084 struct in_ifaddr *ifa = inet_alloc_ifa(); 1156 struct in_ifaddr *ifa = inet_alloc_ifa();
1085 1157
1086 if (ifa) { 1158 if (ifa) {
1159 INIT_HLIST_NODE(&ifa->hash);
1087 ifa->ifa_local = 1160 ifa->ifa_local =
1088 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1161 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1089 ifa->ifa_prefixlen = 8; 1162 ifa->ifa_prefixlen = 8;
@@ -1720,6 +1793,11 @@ static struct rtnl_af_ops inet_af_ops = {
1720 1793
1721void __init devinet_init(void) 1794void __init devinet_init(void)
1722{ 1795{
1796 int i;
1797
1798 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1799 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1800
1723 register_pernet_subsys(&devinet_ops); 1801 register_pernet_subsys(&devinet_ops);
1724 1802
1725 register_gifconf(PF_INET, inet_gifconf); 1803 register_gifconf(PF_INET, inet_gifconf);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index e42a905180f..03f994bcf7d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
33 * 33 *
34 * TODO: Use spare space in skb for this where possible. 34 * TODO: Use spare space in skb for this where possible.
35 */ 35 */
36static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) 36static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
37{ 37{
38 unsigned int len; 38 unsigned int len;
39 39
40 len = crypto_aead_ivsize(aead); 40 len = seqhilen;
41
42 len += crypto_aead_ivsize(aead);
43
41 if (len) { 44 if (len) {
42 len += crypto_aead_alignmask(aead) & 45 len += crypto_aead_alignmask(aead) &
43 ~(crypto_tfm_ctx_alignment() - 1); 46 ~(crypto_tfm_ctx_alignment() - 1);
@@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
52 return kmalloc(len, GFP_ATOMIC); 55 return kmalloc(len, GFP_ATOMIC);
53} 56}
54 57
55static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) 58static inline __be32 *esp_tmp_seqhi(void *tmp)
59{
60 return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
61}
62static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
56{ 63{
57 return crypto_aead_ivsize(aead) ? 64 return crypto_aead_ivsize(aead) ?
58 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; 65 PTR_ALIGN((u8 *)tmp + seqhilen,
66 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
59} 67}
60 68
61static inline struct aead_givcrypt_request *esp_tmp_givreq( 69static inline struct aead_givcrypt_request *esp_tmp_givreq(
@@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
122 int plen; 130 int plen;
123 int tfclen; 131 int tfclen;
124 int nfrags; 132 int nfrags;
133 int assoclen;
134 int sglists;
135 int seqhilen;
136 __be32 *seqhi;
125 137
126 /* skb is pure payload to encrypt */ 138 /* skb is pure payload to encrypt */
127 139
@@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
151 goto error; 163 goto error;
152 nfrags = err; 164 nfrags = err;
153 165
154 tmp = esp_alloc_tmp(aead, nfrags + 1); 166 assoclen = sizeof(*esph);
167 sglists = 1;
168 seqhilen = 0;
169
170 if (x->props.flags & XFRM_STATE_ESN) {
171 sglists += 2;
172 seqhilen += sizeof(__be32);
173 assoclen += seqhilen;
174 }
175
176 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
155 if (!tmp) 177 if (!tmp)
156 goto error; 178 goto error;
157 179
158 iv = esp_tmp_iv(aead, tmp); 180 seqhi = esp_tmp_seqhi(tmp);
181 iv = esp_tmp_iv(aead, tmp, seqhilen);
159 req = esp_tmp_givreq(aead, iv); 182 req = esp_tmp_givreq(aead, iv);
160 asg = esp_givreq_sg(aead, req); 183 asg = esp_givreq_sg(aead, req);
161 sg = asg + 1; 184 sg = asg + sglists;
162 185
163 /* Fill padding... */ 186 /* Fill padding... */
164 tail = skb_tail_pointer(trailer); 187 tail = skb_tail_pointer(trailer);
@@ -215,19 +238,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
215 } 238 }
216 239
217 esph->spi = x->id.spi; 240 esph->spi = x->id.spi;
218 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 241 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
219 242
220 sg_init_table(sg, nfrags); 243 sg_init_table(sg, nfrags);
221 skb_to_sgvec(skb, sg, 244 skb_to_sgvec(skb, sg,
222 esph->enc_data + crypto_aead_ivsize(aead) - skb->data, 245 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
223 clen + alen); 246 clen + alen);
224 sg_init_one(asg, esph, sizeof(*esph)); 247
248 if ((x->props.flags & XFRM_STATE_ESN)) {
249 sg_init_table(asg, 3);
250 sg_set_buf(asg, &esph->spi, sizeof(__be32));
251 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
252 sg_set_buf(asg + 1, seqhi, seqhilen);
253 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
254 } else
255 sg_init_one(asg, esph, sizeof(*esph));
225 256
226 aead_givcrypt_set_callback(req, 0, esp_output_done, skb); 257 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
227 aead_givcrypt_set_crypt(req, sg, sg, clen, iv); 258 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
228 aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); 259 aead_givcrypt_set_assoc(req, asg, assoclen);
229 aead_givcrypt_set_giv(req, esph->enc_data, 260 aead_givcrypt_set_giv(req, esph->enc_data,
230 XFRM_SKB_CB(skb)->seq.output); 261 XFRM_SKB_CB(skb)->seq.output.low);
231 262
232 ESP_SKB_CB(skb)->tmp = tmp; 263 ESP_SKB_CB(skb)->tmp = tmp;
233 err = crypto_aead_givencrypt(req); 264 err = crypto_aead_givencrypt(req);
@@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
346 struct sk_buff *trailer; 377 struct sk_buff *trailer;
347 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); 378 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
348 int nfrags; 379 int nfrags;
380 int assoclen;
381 int sglists;
382 int seqhilen;
383 __be32 *seqhi;
349 void *tmp; 384 void *tmp;
350 u8 *iv; 385 u8 *iv;
351 struct scatterlist *sg; 386 struct scatterlist *sg;
@@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
362 goto out; 397 goto out;
363 nfrags = err; 398 nfrags = err;
364 399
400 assoclen = sizeof(*esph);
401 sglists = 1;
402 seqhilen = 0;
403
404 if (x->props.flags & XFRM_STATE_ESN) {
405 sglists += 2;
406 seqhilen += sizeof(__be32);
407 assoclen += seqhilen;
408 }
409
365 err = -ENOMEM; 410 err = -ENOMEM;
366 tmp = esp_alloc_tmp(aead, nfrags + 1); 411 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
367 if (!tmp) 412 if (!tmp)
368 goto out; 413 goto out;
369 414
370 ESP_SKB_CB(skb)->tmp = tmp; 415 ESP_SKB_CB(skb)->tmp = tmp;
371 iv = esp_tmp_iv(aead, tmp); 416 seqhi = esp_tmp_seqhi(tmp);
417 iv = esp_tmp_iv(aead, tmp, seqhilen);
372 req = esp_tmp_req(aead, iv); 418 req = esp_tmp_req(aead, iv);
373 asg = esp_req_sg(aead, req); 419 asg = esp_req_sg(aead, req);
374 sg = asg + 1; 420 sg = asg + sglists;
375 421
376 skb->ip_summed = CHECKSUM_NONE; 422 skb->ip_summed = CHECKSUM_NONE;
377 423
@@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
382 428
383 sg_init_table(sg, nfrags); 429 sg_init_table(sg, nfrags);
384 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); 430 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
385 sg_init_one(asg, esph, sizeof(*esph)); 431
432 if ((x->props.flags & XFRM_STATE_ESN)) {
433 sg_init_table(asg, 3);
434 sg_set_buf(asg, &esph->spi, sizeof(__be32));
435 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
436 sg_set_buf(asg + 1, seqhi, seqhilen);
437 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
438 } else
439 sg_init_one(asg, esph, sizeof(*esph));
386 440
387 aead_request_set_callback(req, 0, esp_input_done, skb); 441 aead_request_set_callback(req, 0, esp_input_done, skb);
388 aead_request_set_crypt(req, sg, sg, elen, iv); 442 aead_request_set_crypt(req, sg, sg, elen, iv);
389 aead_request_set_assoc(req, asg, sizeof(*esph)); 443 aead_request_set_assoc(req, asg, assoclen);
390 444
391 err = crypto_aead_decrypt(req); 445 err = crypto_aead_decrypt(req);
392 if (err == -EINPROGRESS) 446 if (err == -EINPROGRESS)
@@ -500,10 +554,20 @@ static int esp_init_authenc(struct xfrm_state *x)
500 goto error; 554 goto error;
501 555
502 err = -ENAMETOOLONG; 556 err = -ENAMETOOLONG;
503 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", 557
504 x->aalg ? x->aalg->alg_name : "digest_null", 558 if ((x->props.flags & XFRM_STATE_ESN)) {
505 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 559 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
506 goto error; 560 "authencesn(%s,%s)",
561 x->aalg ? x->aalg->alg_name : "digest_null",
562 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
563 goto error;
564 } else {
565 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
566 "authenc(%s,%s)",
567 x->aalg ? x->aalg->alg_name : "digest_null",
568 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
569 goto error;
570 }
507 571
508 aead = crypto_alloc_aead(authenc_name, 0, 0); 572 aead = crypto_alloc_aead(authenc_name, 0, 0);
509 err = PTR_ERR(aead); 573 err = PTR_ERR(aead);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1d2cdd43a87..a373a259253 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net)
51{ 51{
52 struct fib_table *local_table, *main_table; 52 struct fib_table *local_table, *main_table;
53 53
54 local_table = fib_hash_table(RT_TABLE_LOCAL); 54 local_table = fib_trie_table(RT_TABLE_LOCAL);
55 if (local_table == NULL) 55 if (local_table == NULL)
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 main_table = fib_hash_table(RT_TABLE_MAIN); 58 main_table = fib_trie_table(RT_TABLE_MAIN);
59 if (main_table == NULL) 59 if (main_table == NULL)
60 goto fail; 60 goto fail;
61 61
@@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
82 if (tb) 82 if (tb)
83 return tb; 83 return tb;
84 84
85 tb = fib_hash_table(id); 85 tb = fib_trie_table(id);
86 if (!tb) 86 if (!tb)
87 return NULL; 87 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1); 88 h = id & (FIB_TABLE_HASHSZ - 1);
@@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
114} 114}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116 116
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net) 117static void fib_flush(struct net *net)
133{ 118{
134 int flushed = 0; 119 int flushed = 0;
@@ -147,46 +132,6 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 132 rt_cache_flush(net, -1);
148} 133}
149 134
150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU, or RTNL
157 */
158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
159{
160 struct flowi fl = {
161 .fl4_dst = addr,
162 };
163 struct fib_result res = { 0 };
164 struct net_device *dev = NULL;
165 struct fib_table *local_table;
166
167#ifdef CONFIG_IP_MULTIPLE_TABLES
168 res.r = NULL;
169#endif
170
171 rcu_read_lock();
172 local_table = fib_get_table(net, RT_TABLE_LOCAL);
173 if (!local_table ||
174 fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
175 rcu_read_unlock();
176 return NULL;
177 }
178 if (res.type != RTN_LOCAL)
179 goto out;
180 dev = FIB_RES_DEV(res);
181
182 if (dev && devref)
183 dev_hold(dev);
184out:
185 rcu_read_unlock();
186 return dev;
187}
188EXPORT_SYMBOL(__ip_dev_find);
189
190/* 135/*
191 * Find address type as if only "dev" was present in the system. If 136 * Find address type as if only "dev" was present in the system. If
192 * on_dev is NULL then all interfaces are taken into consideration. 137 * on_dev is NULL then all interfaces are taken into consideration.
@@ -195,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
195 const struct net_device *dev, 140 const struct net_device *dev,
196 __be32 addr) 141 __be32 addr)
197{ 142{
198 struct flowi fl = { .fl4_dst = addr }; 143 struct flowi4 fl4 = { .daddr = addr };
199 struct fib_result res; 144 struct fib_result res;
200 unsigned ret = RTN_BROADCAST; 145 unsigned ret = RTN_BROADCAST;
201 struct fib_table *local_table; 146 struct fib_table *local_table;
@@ -213,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
213 if (local_table) { 158 if (local_table) {
214 ret = RTN_UNICAST; 159 ret = RTN_UNICAST;
215 rcu_read_lock(); 160 rcu_read_lock();
216 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { 161 if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
217 if (!dev || dev == res.fi->fib_dev) 162 if (!dev || dev == res.fi->fib_dev)
218 ret = res.type; 163 ret = res.type;
219 } 164 }
@@ -248,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
248 u32 *itag, u32 mark) 193 u32 *itag, u32 mark)
249{ 194{
250 struct in_device *in_dev; 195 struct in_device *in_dev;
251 struct flowi fl = { 196 struct flowi4 fl4;
252 .fl4_dst = src,
253 .fl4_src = dst,
254 .fl4_tos = tos,
255 .mark = mark,
256 .iif = oif
257 };
258 struct fib_result res; 197 struct fib_result res;
259 int no_addr, rpf, accept_local; 198 int no_addr, rpf, accept_local;
260 bool dev_match; 199 bool dev_match;
261 int ret; 200 int ret;
262 struct net *net; 201 struct net *net;
263 202
203 fl4.flowi4_oif = 0;
204 fl4.flowi4_iif = oif;
205 fl4.flowi4_mark = mark;
206 fl4.daddr = src;
207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos;
209 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
210
264 no_addr = rpf = accept_local = 0; 211 no_addr = rpf = accept_local = 0;
265 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
266 if (in_dev) { 213 if (in_dev) {
@@ -268,14 +215,14 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
268 rpf = IN_DEV_RPFILTER(in_dev); 215 rpf = IN_DEV_RPFILTER(in_dev);
269 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 216 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
270 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 217 if (mark && !IN_DEV_SRC_VMARK(in_dev))
271 fl.mark = 0; 218 fl4.flowi4_mark = 0;
272 } 219 }
273 220
274 if (in_dev == NULL) 221 if (in_dev == NULL)
275 goto e_inval; 222 goto e_inval;
276 223
277 net = dev_net(dev); 224 net = dev_net(dev);
278 if (fib_lookup(net, &fl, &res)) 225 if (fib_lookup(net, &fl4, &res))
279 goto last_resort; 226 goto last_resort;
280 if (res.type != RTN_UNICAST) { 227 if (res.type != RTN_UNICAST) {
281 if (res.type != RTN_LOCAL || !accept_local) 228 if (res.type != RTN_LOCAL || !accept_local)
@@ -306,10 +253,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
306 goto last_resort; 253 goto last_resort;
307 if (rpf == 1) 254 if (rpf == 1)
308 goto e_rpf; 255 goto e_rpf;
309 fl.oif = dev->ifindex; 256 fl4.flowi4_oif = dev->ifindex;
310 257
311 ret = 0; 258 ret = 0;
312 if (fib_lookup(net, &fl, &res) == 0) { 259 if (fib_lookup(net, &fl4, &res) == 0) {
313 if (res.type == RTN_UNICAST) { 260 if (res.type == RTN_UNICAST) {
314 *spec_dst = FIB_RES_PREFSRC(res); 261 *spec_dst = FIB_RES_PREFSRC(res);
315 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 262 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
@@ -849,11 +796,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
849{ 796{
850 797
851 struct fib_result res; 798 struct fib_result res;
852 struct flowi fl = { 799 struct flowi4 fl4 = {
853 .mark = frn->fl_mark, 800 .flowi4_mark = frn->fl_mark,
854 .fl4_dst = frn->fl_addr, 801 .daddr = frn->fl_addr,
855 .fl4_tos = frn->fl_tos, 802 .flowi4_tos = frn->fl_tos,
856 .fl4_scope = frn->fl_scope, 803 .flowi4_scope = frn->fl_scope,
857 }; 804 };
858 805
859#ifdef CONFIG_IP_MULTIPLE_TABLES 806#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -866,7 +813,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
866 813
867 frn->tb_id = tb->tb_id; 814 frn->tb_id = tb->tb_id;
868 rcu_read_lock(); 815 rcu_read_lock();
869 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); 816 frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
870 817
871 if (!frn->err) { 818 if (!frn->err) {
872 frn->prefixlen = res.prefixlen; 819 frn->prefixlen = res.prefixlen;
@@ -945,10 +892,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
945#ifdef CONFIG_IP_ROUTE_MULTIPATH 892#ifdef CONFIG_IP_ROUTE_MULTIPATH
946 fib_sync_up(dev); 893 fib_sync_up(dev);
947#endif 894#endif
895 fib_update_nh_saddrs(dev);
948 rt_cache_flush(dev_net(dev), -1); 896 rt_cache_flush(dev_net(dev), -1);
949 break; 897 break;
950 case NETDEV_DOWN: 898 case NETDEV_DOWN:
951 fib_del_ifaddr(ifa); 899 fib_del_ifaddr(ifa);
900 fib_update_nh_saddrs(dev);
952 if (ifa->ifa_dev->ifa_list == NULL) { 901 if (ifa->ifa_dev->ifa_list == NULL) {
953 /* Last address was deleted from this interface. 902 /* Last address was deleted from this interface.
954 * Disable IP. 903 * Disable IP.
@@ -1101,5 +1050,5 @@ void __init ip_fib_init(void)
1101 register_netdevice_notifier(&fib_netdev_notifier); 1050 register_netdevice_notifier(&fib_netdev_notifier);
1102 register_inetaddr_notifier(&fib_inetaddr_notifier); 1051 register_inetaddr_notifier(&fib_inetaddr_notifier);
1103 1052
1104 fib_hash_init(); 1053 fib_trie_init();
1105} 1054}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
deleted file mode 100644
index b3acb0417b2..00000000000
--- a/net/ipv4/fib_hash.c
+++ /dev/null
@@ -1,1133 +0,0 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 FIB: lookup engine and maintenance routines.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <asm/uaccess.h>
17#include <asm/system.h>
18#include <linux/bitops.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/inetdevice.h>
29#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/netlink.h>
34#include <linux/init.h>
35#include <linux/slab.h>
36
37#include <net/net_namespace.h>
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44
45#include "fib_lookup.h"
46
47static struct kmem_cache *fn_hash_kmem __read_mostly;
48static struct kmem_cache *fn_alias_kmem __read_mostly;
49
50struct fib_node {
51 struct hlist_node fn_hash;
52 struct list_head fn_alias;
53 __be32 fn_key;
54 struct fib_alias fn_embedded_alias;
55};
56
57#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
58
59struct fn_zone {
60 struct fn_zone __rcu *fz_next; /* Next not empty zone */
61 struct hlist_head __rcu *fz_hash; /* Hash table pointer */
62 seqlock_t fz_lock;
63 u32 fz_hashmask; /* (fz_divisor - 1) */
64
65 u8 fz_order; /* Zone order (0..32) */
66 u8 fz_revorder; /* 32 - fz_order */
67 __be32 fz_mask; /* inet_make_mask(order) */
68#define FZ_MASK(fz) ((fz)->fz_mask)
69
70 struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE];
71
72 int fz_nent; /* Number of entries */
73 int fz_divisor; /* Hash size (mask+1) */
74};
75
76struct fn_hash {
77 struct fn_zone *fn_zones[33];
78 struct fn_zone __rcu *fn_zone_list;
79};
80
81static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
82{
83 u32 h = ntohl(key) >> fz->fz_revorder;
84 h ^= (h>>20);
85 h ^= (h>>10);
86 h ^= (h>>5);
87 h &= fz->fz_hashmask;
88 return h;
89}
90
91static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
92{
93 return dst & FZ_MASK(fz);
94}
95
96static unsigned int fib_hash_genid;
97
98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
99
100static struct hlist_head *fz_hash_alloc(int divisor)
101{
102 unsigned long size = divisor * sizeof(struct hlist_head);
103
104 if (size <= PAGE_SIZE)
105 return kzalloc(size, GFP_KERNEL);
106
107 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109}
110
111/* The fib hash lock must be held when this is called. */
112static inline void fn_rebuild_zone(struct fn_zone *fz,
113 struct hlist_head *old_ht,
114 int old_divisor)
115{
116 int i;
117
118 for (i = 0; i < old_divisor; i++) {
119 struct hlist_node *node, *n;
120 struct fib_node *f;
121
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
123 struct hlist_head *new_head;
124
125 hlist_del_rcu(&f->fn_hash);
126
127 new_head = rcu_dereference_protected(fz->fz_hash, 1) +
128 fn_hash(f->fn_key, fz);
129 hlist_add_head_rcu(&f->fn_hash, new_head);
130 }
131 }
132}
133
134static void fz_hash_free(struct hlist_head *hash, int divisor)
135{
136 unsigned long size = divisor * sizeof(struct hlist_head);
137
138 if (size <= PAGE_SIZE)
139 kfree(hash);
140 else
141 free_pages((unsigned long)hash, get_order(size));
142}
143
144static void fn_rehash_zone(struct fn_zone *fz)
145{
146 struct hlist_head *ht, *old_ht;
147 int old_divisor, new_divisor;
148 u32 new_hashmask;
149
150 new_divisor = old_divisor = fz->fz_divisor;
151
152 switch (old_divisor) {
153 case EMBEDDED_HASH_SIZE:
154 new_divisor *= EMBEDDED_HASH_SIZE;
155 break;
156 case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
157 new_divisor *= (EMBEDDED_HASH_SIZE/2);
158 break;
159 default:
160 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
161 printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
162 return;
163 }
164 new_divisor = (old_divisor << 1);
165 break;
166 }
167
168 new_hashmask = (new_divisor - 1);
169
170#if RT_CACHE_DEBUG >= 2
171 printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
172 fz->fz_order, old_divisor);
173#endif
174
175 ht = fz_hash_alloc(new_divisor);
176
177 if (ht) {
178 struct fn_zone nfz;
179
180 memcpy(&nfz, fz, sizeof(nfz));
181
182 write_seqlock_bh(&fz->fz_lock);
183 old_ht = rcu_dereference_protected(fz->fz_hash, 1);
184 RCU_INIT_POINTER(nfz.fz_hash, ht);
185 nfz.fz_hashmask = new_hashmask;
186 nfz.fz_divisor = new_divisor;
187 fn_rebuild_zone(&nfz, old_ht, old_divisor);
188 fib_hash_genid++;
189 rcu_assign_pointer(fz->fz_hash, ht);
190 fz->fz_hashmask = new_hashmask;
191 fz->fz_divisor = new_divisor;
192 write_sequnlock_bh(&fz->fz_lock);
193
194 if (old_ht != fz->fz_embedded_hash) {
195 synchronize_rcu();
196 fz_hash_free(old_ht, old_divisor);
197 }
198 }
199}
200
201static void fn_free_node_rcu(struct rcu_head *head)
202{
203 struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
204
205 kmem_cache_free(fn_hash_kmem, f);
206}
207
208static inline void fn_free_node(struct fib_node *f)
209{
210 call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
211}
212
213static void fn_free_alias_rcu(struct rcu_head *head)
214{
215 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
216
217 kmem_cache_free(fn_alias_kmem, fa);
218}
219
220static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
221{
222 fib_release_info(fa->fa_info);
223 if (fa == &f->fn_embedded_alias)
224 fa->fa_info = NULL;
225 else
226 call_rcu(&fa->rcu, fn_free_alias_rcu);
227}
228
229static struct fn_zone *
230fn_new_zone(struct fn_hash *table, int z)
231{
232 int i;
233 struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
234 if (!fz)
235 return NULL;
236
237 seqlock_init(&fz->fz_lock);
238 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
239 fz->fz_hashmask = fz->fz_divisor - 1;
240 RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
241 fz->fz_order = z;
242 fz->fz_revorder = 32 - z;
243 fz->fz_mask = inet_make_mask(z);
244
245 /* Find the first not empty zone with more specific mask */
246 for (i = z + 1; i <= 32; i++)
247 if (table->fn_zones[i])
248 break;
249 if (i > 32) {
250 /* No more specific masks, we are the first. */
251 rcu_assign_pointer(fz->fz_next,
252 rtnl_dereference(table->fn_zone_list));
253 rcu_assign_pointer(table->fn_zone_list, fz);
254 } else {
255 rcu_assign_pointer(fz->fz_next,
256 rtnl_dereference(table->fn_zones[i]->fz_next));
257 rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
258 }
259 table->fn_zones[z] = fz;
260 fib_hash_genid++;
261 return fz;
262}
263
264int fib_table_lookup(struct fib_table *tb,
265 const struct flowi *flp, struct fib_result *res,
266 int fib_flags)
267{
268 int err;
269 struct fn_zone *fz;
270 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
271
272 rcu_read_lock();
273 for (fz = rcu_dereference(t->fn_zone_list);
274 fz != NULL;
275 fz = rcu_dereference(fz->fz_next)) {
276 struct hlist_head *head;
277 struct hlist_node *node;
278 struct fib_node *f;
279 __be32 k;
280 unsigned int seq;
281
282 do {
283 seq = read_seqbegin(&fz->fz_lock);
284 k = fz_key(flp->fl4_dst, fz);
285
286 head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
287 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
288 if (f->fn_key != k)
289 continue;
290
291 err = fib_semantic_match(&f->fn_alias,
292 flp, res,
293 fz->fz_order, fib_flags);
294 if (err <= 0)
295 goto out;
296 }
297 } while (read_seqretry(&fz->fz_lock, seq));
298 }
299 err = 1;
300out:
301 rcu_read_unlock();
302 return err;
303}
304
305void fib_table_select_default(struct fib_table *tb,
306 const struct flowi *flp, struct fib_result *res)
307{
308 int order, last_idx;
309 struct hlist_node *node;
310 struct fib_node *f;
311 struct fib_info *fi = NULL;
312 struct fib_info *last_resort;
313 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
314 struct fn_zone *fz = t->fn_zones[0];
315 struct hlist_head *head;
316
317 if (fz == NULL)
318 return;
319
320 last_idx = -1;
321 last_resort = NULL;
322 order = -1;
323
324 rcu_read_lock();
325 head = rcu_dereference(fz->fz_hash);
326 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
327 struct fib_alias *fa;
328
329 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
330 struct fib_info *next_fi = fa->fa_info;
331
332 if (fa->fa_scope != res->scope ||
333 fa->fa_type != RTN_UNICAST)
334 continue;
335
336 if (next_fi->fib_priority > res->fi->fib_priority)
337 break;
338 if (!next_fi->fib_nh[0].nh_gw ||
339 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
340 continue;
341
342 fib_alias_accessed(fa);
343
344 if (fi == NULL) {
345 if (next_fi != res->fi)
346 break;
347 } else if (!fib_detect_death(fi, order, &last_resort,
348 &last_idx, tb->tb_default)) {
349 fib_result_assign(res, fi);
350 tb->tb_default = order;
351 goto out;
352 }
353 fi = next_fi;
354 order++;
355 }
356 }
357
358 if (order <= 0 || fi == NULL) {
359 tb->tb_default = -1;
360 goto out;
361 }
362
363 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
364 tb->tb_default)) {
365 fib_result_assign(res, fi);
366 tb->tb_default = order;
367 goto out;
368 }
369
370 if (last_idx >= 0)
371 fib_result_assign(res, last_resort);
372 tb->tb_default = last_idx;
373out:
374 rcu_read_unlock();
375}
376
377/* Insert node F to FZ. */
378static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
379{
380 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
381
382 hlist_add_head_rcu(&f->fn_hash, head);
383}
384
385/* Return the node in FZ matching KEY. */
386static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
387{
388 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
389 struct hlist_node *node;
390 struct fib_node *f;
391
392 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
393 if (f->fn_key == key)
394 return f;
395 }
396
397 return NULL;
398}
399
400
401static struct fib_alias *fib_fast_alloc(struct fib_node *f)
402{
403 struct fib_alias *fa = &f->fn_embedded_alias;
404
405 if (fa->fa_info != NULL)
406 fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
407 return fa;
408}
409
410/* Caller must hold RTNL. */
411int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
412{
413 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
414 struct fib_node *new_f = NULL;
415 struct fib_node *f;
416 struct fib_alias *fa, *new_fa;
417 struct fn_zone *fz;
418 struct fib_info *fi;
419 u8 tos = cfg->fc_tos;
420 __be32 key;
421 int err;
422
423 if (cfg->fc_dst_len > 32)
424 return -EINVAL;
425
426 fz = table->fn_zones[cfg->fc_dst_len];
427 if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
428 return -ENOBUFS;
429
430 key = 0;
431 if (cfg->fc_dst) {
432 if (cfg->fc_dst & ~FZ_MASK(fz))
433 return -EINVAL;
434 key = fz_key(cfg->fc_dst, fz);
435 }
436
437 fi = fib_create_info(cfg);
438 if (IS_ERR(fi))
439 return PTR_ERR(fi);
440
441 if (fz->fz_nent > (fz->fz_divisor<<1) &&
442 fz->fz_divisor < FZ_MAX_DIVISOR &&
443 (cfg->fc_dst_len == 32 ||
444 (1 << cfg->fc_dst_len) > fz->fz_divisor))
445 fn_rehash_zone(fz);
446
447 f = fib_find_node(fz, key);
448
449 if (!f)
450 fa = NULL;
451 else
452 fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
453
454 /* Now fa, if non-NULL, points to the first fib alias
455 * with the same keys [prefix,tos,priority], if such key already
456 * exists or to the node before which we will insert new one.
457 *
458 * If fa is NULL, we will need to allocate a new one and
459 * insert to the head of f.
460 *
461 * If f is NULL, no fib node matched the destination key
462 * and we need to allocate a new one of those as well.
463 */
464
465 if (fa && fa->fa_tos == tos &&
466 fa->fa_info->fib_priority == fi->fib_priority) {
467 struct fib_alias *fa_first, *fa_match;
468
469 err = -EEXIST;
470 if (cfg->fc_nlflags & NLM_F_EXCL)
471 goto out;
472
473 /* We have 2 goals:
474 * 1. Find exact match for type, scope, fib_info to avoid
475 * duplicate routes
476 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
477 */
478 fa_match = NULL;
479 fa_first = fa;
480 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
481 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
482 if (fa->fa_tos != tos)
483 break;
484 if (fa->fa_info->fib_priority != fi->fib_priority)
485 break;
486 if (fa->fa_type == cfg->fc_type &&
487 fa->fa_scope == cfg->fc_scope &&
488 fa->fa_info == fi) {
489 fa_match = fa;
490 break;
491 }
492 }
493
494 if (cfg->fc_nlflags & NLM_F_REPLACE) {
495 u8 state;
496
497 fa = fa_first;
498 if (fa_match) {
499 if (fa == fa_match)
500 err = 0;
501 goto out;
502 }
503 err = -ENOBUFS;
504 new_fa = fib_fast_alloc(f);
505 if (new_fa == NULL)
506 goto out;
507
508 new_fa->fa_tos = fa->fa_tos;
509 new_fa->fa_info = fi;
510 new_fa->fa_type = cfg->fc_type;
511 new_fa->fa_scope = cfg->fc_scope;
512 state = fa->fa_state;
513 new_fa->fa_state = state & ~FA_S_ACCESSED;
514 fib_hash_genid++;
515 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
516
517 fn_free_alias(fa, f);
518 if (state & FA_S_ACCESSED)
519 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
520 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
521 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
522 return 0;
523 }
524
525 /* Error if we find a perfect match which
526 * uses the same scope, type, and nexthop
527 * information.
528 */
529 if (fa_match)
530 goto out;
531
532 if (!(cfg->fc_nlflags & NLM_F_APPEND))
533 fa = fa_first;
534 }
535
536 err = -ENOENT;
537 if (!(cfg->fc_nlflags & NLM_F_CREATE))
538 goto out;
539
540 err = -ENOBUFS;
541
542 if (!f) {
543 new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
544 if (new_f == NULL)
545 goto out;
546
547 INIT_HLIST_NODE(&new_f->fn_hash);
548 INIT_LIST_HEAD(&new_f->fn_alias);
549 new_f->fn_key = key;
550 f = new_f;
551 }
552
553 new_fa = fib_fast_alloc(f);
554 if (new_fa == NULL)
555 goto out;
556
557 new_fa->fa_info = fi;
558 new_fa->fa_tos = tos;
559 new_fa->fa_type = cfg->fc_type;
560 new_fa->fa_scope = cfg->fc_scope;
561 new_fa->fa_state = 0;
562
563 /*
564 * Insert new entry to the list.
565 */
566
567 if (new_f)
568 fib_insert_node(fz, new_f);
569 list_add_tail_rcu(&new_fa->fa_list,
570 (fa ? &fa->fa_list : &f->fn_alias));
571 fib_hash_genid++;
572
573 if (new_f)
574 fz->fz_nent++;
575 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
576
577 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
578 &cfg->fc_nlinfo, 0);
579 return 0;
580
581out:
582 if (new_f)
583 kmem_cache_free(fn_hash_kmem, new_f);
584 fib_release_info(fi);
585 return err;
586}
587
588int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
589{
590 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
591 struct fib_node *f;
592 struct fib_alias *fa, *fa_to_delete;
593 struct fn_zone *fz;
594 __be32 key;
595
596 if (cfg->fc_dst_len > 32)
597 return -EINVAL;
598
599 if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
600 return -ESRCH;
601
602 key = 0;
603 if (cfg->fc_dst) {
604 if (cfg->fc_dst & ~FZ_MASK(fz))
605 return -EINVAL;
606 key = fz_key(cfg->fc_dst, fz);
607 }
608
609 f = fib_find_node(fz, key);
610
611 if (!f)
612 fa = NULL;
613 else
614 fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
615 if (!fa)
616 return -ESRCH;
617
618 fa_to_delete = NULL;
619 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
620 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
621 struct fib_info *fi = fa->fa_info;
622
623 if (fa->fa_tos != cfg->fc_tos)
624 break;
625
626 if ((!cfg->fc_type ||
627 fa->fa_type == cfg->fc_type) &&
628 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
629 fa->fa_scope == cfg->fc_scope) &&
630 (!cfg->fc_protocol ||
631 fi->fib_protocol == cfg->fc_protocol) &&
632 fib_nh_match(cfg, fi) == 0) {
633 fa_to_delete = fa;
634 break;
635 }
636 }
637
638 if (fa_to_delete) {
639 int kill_fn;
640
641 fa = fa_to_delete;
642 rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
643 tb->tb_id, &cfg->fc_nlinfo, 0);
644
645 kill_fn = 0;
646 list_del_rcu(&fa->fa_list);
647 if (list_empty(&f->fn_alias)) {
648 hlist_del_rcu(&f->fn_hash);
649 kill_fn = 1;
650 }
651 fib_hash_genid++;
652
653 if (fa->fa_state & FA_S_ACCESSED)
654 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
655 fn_free_alias(fa, f);
656 if (kill_fn) {
657 fn_free_node(f);
658 fz->fz_nent--;
659 }
660
661 return 0;
662 }
663 return -ESRCH;
664}
665
666static int fn_flush_list(struct fn_zone *fz, int idx)
667{
668 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
669 struct hlist_node *node, *n;
670 struct fib_node *f;
671 int found = 0;
672
673 hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
674 struct fib_alias *fa, *fa_node;
675 int kill_f;
676
677 kill_f = 0;
678 list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
679 struct fib_info *fi = fa->fa_info;
680
681 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
682 list_del_rcu(&fa->fa_list);
683 if (list_empty(&f->fn_alias)) {
684 hlist_del_rcu(&f->fn_hash);
685 kill_f = 1;
686 }
687 fib_hash_genid++;
688
689 fn_free_alias(fa, f);
690 found++;
691 }
692 }
693 if (kill_f) {
694 fn_free_node(f);
695 fz->fz_nent--;
696 }
697 }
698 return found;
699}
700
701/* caller must hold RTNL. */
702int fib_table_flush(struct fib_table *tb)
703{
704 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
705 struct fn_zone *fz;
706 int found = 0;
707
708 for (fz = rtnl_dereference(table->fn_zone_list);
709 fz != NULL;
710 fz = rtnl_dereference(fz->fz_next)) {
711 int i;
712
713 for (i = fz->fz_divisor - 1; i >= 0; i--)
714 found += fn_flush_list(fz, i);
715 }
716 return found;
717}
718
719void fib_free_table(struct fib_table *tb)
720{
721 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
722 struct fn_zone *fz, *next;
723
724 next = table->fn_zone_list;
725 while (next != NULL) {
726 fz = next;
727 next = fz->fz_next;
728
729 if (fz->fz_hash != fz->fz_embedded_hash)
730 fz_hash_free(fz->fz_hash, fz->fz_divisor);
731
732 kfree(fz);
733 }
734
735 kfree(tb);
736}
737
738static inline int
739fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
740 struct fib_table *tb,
741 struct fn_zone *fz,
742 struct hlist_head *head)
743{
744 struct hlist_node *node;
745 struct fib_node *f;
746 int i, s_i;
747
748 s_i = cb->args[4];
749 i = 0;
750 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
751 struct fib_alias *fa;
752
753 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
754 if (i < s_i)
755 goto next;
756
757 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
758 cb->nlh->nlmsg_seq,
759 RTM_NEWROUTE,
760 tb->tb_id,
761 fa->fa_type,
762 fa->fa_scope,
763 f->fn_key,
764 fz->fz_order,
765 fa->fa_tos,
766 fa->fa_info,
767 NLM_F_MULTI) < 0) {
768 cb->args[4] = i;
769 return -1;
770 }
771next:
772 i++;
773 }
774 }
775 cb->args[4] = i;
776 return skb->len;
777}
778
779static inline int
780fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
781 struct fib_table *tb,
782 struct fn_zone *fz)
783{
784 int h, s_h;
785 struct hlist_head *head = rcu_dereference(fz->fz_hash);
786
787 if (head == NULL)
788 return skb->len;
789 s_h = cb->args[3];
790 for (h = s_h; h < fz->fz_divisor; h++) {
791 if (hlist_empty(head + h))
792 continue;
793 if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
794 cb->args[3] = h;
795 return -1;
796 }
797 memset(&cb->args[4], 0,
798 sizeof(cb->args) - 4*sizeof(cb->args[0]));
799 }
800 cb->args[3] = h;
801 return skb->len;
802}
803
804int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
805 struct netlink_callback *cb)
806{
807 int m = 0, s_m;
808 struct fn_zone *fz;
809 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
810
811 s_m = cb->args[2];
812 rcu_read_lock();
813 for (fz = rcu_dereference(table->fn_zone_list);
814 fz != NULL;
815 fz = rcu_dereference(fz->fz_next), m++) {
816 if (m < s_m)
817 continue;
818 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
819 cb->args[2] = m;
820 rcu_read_unlock();
821 return -1;
822 }
823 memset(&cb->args[3], 0,
824 sizeof(cb->args) - 3*sizeof(cb->args[0]));
825 }
826 rcu_read_unlock();
827 cb->args[2] = m;
828 return skb->len;
829}
830
831void __init fib_hash_init(void)
832{
833 fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
834 0, SLAB_PANIC, NULL);
835
836 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
837 0, SLAB_PANIC, NULL);
838
839}
840
841struct fib_table *fib_hash_table(u32 id)
842{
843 struct fib_table *tb;
844
845 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
846 GFP_KERNEL);
847 if (tb == NULL)
848 return NULL;
849
850 tb->tb_id = id;
851 tb->tb_default = -1;
852
853 memset(tb->tb_data, 0, sizeof(struct fn_hash));
854 return tb;
855}
856
857/* ------------------------------------------------------------------------ */
858#ifdef CONFIG_PROC_FS
859
860struct fib_iter_state {
861 struct seq_net_private p;
862 struct fn_zone *zone;
863 int bucket;
864 struct hlist_head *hash_head;
865 struct fib_node *fn;
866 struct fib_alias *fa;
867 loff_t pos;
868 unsigned int genid;
869 int valid;
870};
871
872static struct fib_alias *fib_get_first(struct seq_file *seq)
873{
874 struct fib_iter_state *iter = seq->private;
875 struct fib_table *main_table;
876 struct fn_hash *table;
877
878 main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
879 table = (struct fn_hash *)main_table->tb_data;
880
881 iter->bucket = 0;
882 iter->hash_head = NULL;
883 iter->fn = NULL;
884 iter->fa = NULL;
885 iter->pos = 0;
886 iter->genid = fib_hash_genid;
887 iter->valid = 1;
888
889 for (iter->zone = rcu_dereference(table->fn_zone_list);
890 iter->zone != NULL;
891 iter->zone = rcu_dereference(iter->zone->fz_next)) {
892 int maxslot;
893
894 if (!iter->zone->fz_nent)
895 continue;
896
897 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
898 maxslot = iter->zone->fz_divisor;
899
900 for (iter->bucket = 0; iter->bucket < maxslot;
901 ++iter->bucket, ++iter->hash_head) {
902 struct hlist_node *node;
903 struct fib_node *fn;
904
905 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
906 struct fib_alias *fa;
907
908 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
909 iter->fn = fn;
910 iter->fa = fa;
911 goto out;
912 }
913 }
914 }
915 }
916out:
917 return iter->fa;
918}
919
920static struct fib_alias *fib_get_next(struct seq_file *seq)
921{
922 struct fib_iter_state *iter = seq->private;
923 struct fib_node *fn;
924 struct fib_alias *fa;
925
926 /* Advance FA, if any. */
927 fn = iter->fn;
928 fa = iter->fa;
929 if (fa) {
930 BUG_ON(!fn);
931 list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
932 iter->fa = fa;
933 goto out;
934 }
935 }
936
937 fa = iter->fa = NULL;
938
939 /* Advance FN. */
940 if (fn) {
941 struct hlist_node *node = &fn->fn_hash;
942 hlist_for_each_entry_continue(fn, node, fn_hash) {
943 iter->fn = fn;
944
945 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
946 iter->fa = fa;
947 goto out;
948 }
949 }
950 }
951
952 fn = iter->fn = NULL;
953
954 /* Advance hash chain. */
955 if (!iter->zone)
956 goto out;
957
958 for (;;) {
959 struct hlist_node *node;
960 int maxslot;
961
962 maxslot = iter->zone->fz_divisor;
963
964 while (++iter->bucket < maxslot) {
965 iter->hash_head++;
966
967 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
968 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
969 iter->fn = fn;
970 iter->fa = fa;
971 goto out;
972 }
973 }
974 }
975
976 iter->zone = rcu_dereference(iter->zone->fz_next);
977
978 if (!iter->zone)
979 goto out;
980
981 iter->bucket = 0;
982 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
983
984 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
985 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
986 iter->fn = fn;
987 iter->fa = fa;
988 goto out;
989 }
990 }
991 }
992out:
993 iter->pos++;
994 return fa;
995}
996
997static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
998{
999 struct fib_iter_state *iter = seq->private;
1000 struct fib_alias *fa;
1001
1002 if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
1003 fa = iter->fa;
1004 pos -= iter->pos;
1005 } else
1006 fa = fib_get_first(seq);
1007
1008 if (fa)
1009 while (pos && (fa = fib_get_next(seq)))
1010 --pos;
1011 return pos ? NULL : fa;
1012}
1013
1014static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
1015 __acquires(RCU)
1016{
1017 void *v = NULL;
1018
1019 rcu_read_lock();
1020 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
1021 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1022 return v;
1023}
1024
1025static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1026{
1027 ++*pos;
1028 return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
1029}
1030
1031static void fib_seq_stop(struct seq_file *seq, void *v)
1032 __releases(RCU)
1033{
1034 rcu_read_unlock();
1035}
1036
1037static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
1038{
1039 static const unsigned type2flags[RTN_MAX + 1] = {
1040 [7] = RTF_REJECT,
1041 [8] = RTF_REJECT,
1042 };
1043 unsigned flags = type2flags[type];
1044
1045 if (fi && fi->fib_nh->nh_gw)
1046 flags |= RTF_GATEWAY;
1047 if (mask == htonl(0xFFFFFFFF))
1048 flags |= RTF_HOST;
1049 flags |= RTF_UP;
1050 return flags;
1051}
1052
1053/*
1054 * This outputs /proc/net/route.
1055 *
1056 * It always works in backward compatibility mode.
1057 * The format of the file is not supposed to be changed.
1058 */
1059static int fib_seq_show(struct seq_file *seq, void *v)
1060{
1061 struct fib_iter_state *iter;
1062 int len;
1063 __be32 prefix, mask;
1064 unsigned flags;
1065 struct fib_node *f;
1066 struct fib_alias *fa;
1067 struct fib_info *fi;
1068
1069 if (v == SEQ_START_TOKEN) {
1070 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1071 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1072 "\tWindow\tIRTT");
1073 goto out;
1074 }
1075
1076 iter = seq->private;
1077 f = iter->fn;
1078 fa = iter->fa;
1079 fi = fa->fa_info;
1080 prefix = f->fn_key;
1081 mask = FZ_MASK(iter->zone);
1082 flags = fib_flag_trans(fa->fa_type, mask, fi);
1083 if (fi)
1084 seq_printf(seq,
1085 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1086 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1087 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1088 mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1089 fi->fib_window,
1090 fi->fib_rtt >> 3, &len);
1091 else
1092 seq_printf(seq,
1093 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1094 prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len);
1095
1096 seq_printf(seq, "%*s\n", 127 - len, "");
1097out:
1098 return 0;
1099}
1100
1101static const struct seq_operations fib_seq_ops = {
1102 .start = fib_seq_start,
1103 .next = fib_seq_next,
1104 .stop = fib_seq_stop,
1105 .show = fib_seq_show,
1106};
1107
1108static int fib_seq_open(struct inode *inode, struct file *file)
1109{
1110 return seq_open_net(inode, file, &fib_seq_ops,
1111 sizeof(struct fib_iter_state));
1112}
1113
1114static const struct file_operations fib_seq_fops = {
1115 .owner = THIS_MODULE,
1116 .open = fib_seq_open,
1117 .read = seq_read,
1118 .llseek = seq_lseek,
1119 .release = seq_release_net,
1120};
1121
1122int __net_init fib_proc_init(struct net *net)
1123{
1124 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
1125 return -ENOMEM;
1126 return 0;
1127}
1128
1129void __net_exit fib_proc_exit(struct net *net)
1130{
1131 proc_net_remove(net, "route");
1132}
1133#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c079cc0ec65..4ec323875a0 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -25,9 +25,6 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
25} 25}
26 26
27/* Exported by fib_semantics.c */ 27/* Exported by fib_semantics.c */
28extern int fib_semantic_match(struct list_head *head,
29 const struct flowi *flp,
30 struct fib_result *res, int prefixlen, int fib_flags);
31extern void fib_release_info(struct fib_info *); 28extern void fib_release_info(struct fib_info *);
32extern struct fib_info *fib_create_info(struct fib_config *cfg); 29extern struct fib_info *fib_create_info(struct fib_config *cfg);
33extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); 30extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
@@ -51,4 +48,11 @@ static inline void fib_result_assign(struct fib_result *res,
51 res->fi = fi; 48 res->fi = fi;
52} 49}
53 50
51struct fib_prop {
52 int error;
53 u8 scope;
54};
55
56extern const struct fib_prop fib_props[RTN_MAX + 1];
57
54#endif /* _FIB_LOOKUP_H */ 58#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 7981a24f5c7..a53bb1b5b11 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,19 +41,19 @@ struct fib4_rule {
41 __be32 srcmask; 41 __be32 srcmask;
42 __be32 dst; 42 __be32 dst;
43 __be32 dstmask; 43 __be32 dstmask;
44#ifdef CONFIG_NET_CLS_ROUTE 44#ifdef CONFIG_IP_ROUTE_CLASSID
45 u32 tclassid; 45 u32 tclassid;
46#endif 46#endif
47}; 47};
48 48
49#ifdef CONFIG_NET_CLS_ROUTE 49#ifdef CONFIG_IP_ROUTE_CLASSID
50u32 fib_rules_tclass(struct fib_result *res) 50u32 fib_rules_tclass(const struct fib_result *res)
51{ 51{
52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; 52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
53} 53}
54#endif 54#endif
55 55
56int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) 56int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
57{ 57{
58 struct fib_lookup_arg arg = { 58 struct fib_lookup_arg arg = {
59 .result = res, 59 .result = res,
@@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
61 }; 61 };
62 int err; 62 int err;
63 63
64 err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); 64 err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
65 res->r = arg.rule; 65 res->r = arg.rule;
66 66
67 return err; 67 return err;
@@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
95 if (!tbl) 95 if (!tbl)
96 goto errout; 96 goto errout;
97 97
98 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); 98 err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags);
99 if (err > 0) 99 if (err > 0)
100 err = -EAGAIN; 100 err = -EAGAIN;
101errout: 101errout:
@@ -106,14 +106,15 @@ errout:
106static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 106static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
107{ 107{
108 struct fib4_rule *r = (struct fib4_rule *) rule; 108 struct fib4_rule *r = (struct fib4_rule *) rule;
109 __be32 daddr = fl->fl4_dst; 109 struct flowi4 *fl4 = &fl->u.ip4;
110 __be32 saddr = fl->fl4_src; 110 __be32 daddr = fl4->daddr;
111 __be32 saddr = fl4->saddr;
111 112
112 if (((saddr ^ r->src) & r->srcmask) || 113 if (((saddr ^ r->src) & r->srcmask) ||
113 ((daddr ^ r->dst) & r->dstmask)) 114 ((daddr ^ r->dst) & r->dstmask))
114 return 0; 115 return 0;
115 116
116 if (r->tos && (r->tos != fl->fl4_tos)) 117 if (r->tos && (r->tos != fl4->flowi4_tos))
117 return 0; 118 return 0;
118 119
119 return 1; 120 return 1;
@@ -165,7 +166,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
165 if (frh->dst_len) 166 if (frh->dst_len)
166 rule4->dst = nla_get_be32(tb[FRA_DST]); 167 rule4->dst = nla_get_be32(tb[FRA_DST]);
167 168
168#ifdef CONFIG_NET_CLS_ROUTE 169#ifdef CONFIG_IP_ROUTE_CLASSID
169 if (tb[FRA_FLOW]) 170 if (tb[FRA_FLOW])
170 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); 171 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
171#endif 172#endif
@@ -195,7 +196,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
195 if (frh->tos && (rule4->tos != frh->tos)) 196 if (frh->tos && (rule4->tos != frh->tos))
196 return 0; 197 return 0;
197 198
198#ifdef CONFIG_NET_CLS_ROUTE 199#ifdef CONFIG_IP_ROUTE_CLASSID
199 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 200 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
200 return 0; 201 return 0;
201#endif 202#endif
@@ -224,7 +225,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
224 if (rule4->src_len) 225 if (rule4->src_len)
225 NLA_PUT_BE32(skb, FRA_SRC, rule4->src); 226 NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
226 227
227#ifdef CONFIG_NET_CLS_ROUTE 228#ifdef CONFIG_IP_ROUTE_CLASSID
228 if (rule4->tclassid) 229 if (rule4->tclassid)
229 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); 230 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
230#endif 231#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 12d3dc3df1b..622ac4c9502 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -49,7 +49,7 @@
49static DEFINE_SPINLOCK(fib_info_lock); 49static DEFINE_SPINLOCK(fib_info_lock);
50static struct hlist_head *fib_info_hash; 50static struct hlist_head *fib_info_hash;
51static struct hlist_head *fib_info_laddrhash; 51static struct hlist_head *fib_info_laddrhash;
52static unsigned int fib_hash_size; 52static unsigned int fib_info_hash_size;
53static unsigned int fib_info_cnt; 53static unsigned int fib_info_cnt;
54 54
55#define DEVINDEX_HASHBITS 8 55#define DEVINDEX_HASHBITS 8
@@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock);
90#define endfor_nexthops(fi) } 90#define endfor_nexthops(fi) }
91 91
92 92
93static const struct 93const struct fib_prop fib_props[RTN_MAX + 1] = {
94{
95 int error;
96 u8 scope;
97} fib_props[RTN_MAX + 1] = {
98 [RTN_UNSPEC] = { 94 [RTN_UNSPEC] = {
99 .error = 0, 95 .error = 0,
100 .scope = RT_SCOPE_NOWHERE, 96 .scope = RT_SCOPE_NOWHERE,
@@ -152,6 +148,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
152{ 148{
153 struct fib_info *fi = container_of(head, struct fib_info, rcu); 149 struct fib_info *fi = container_of(head, struct fib_info, rcu);
154 150
151 if (fi->fib_metrics != (u32 *) dst_default_metrics)
152 kfree(fi->fib_metrics);
155 kfree(fi); 153 kfree(fi);
156} 154}
157 155
@@ -200,7 +198,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
200#ifdef CONFIG_IP_ROUTE_MULTIPATH 198#ifdef CONFIG_IP_ROUTE_MULTIPATH
201 nh->nh_weight != onh->nh_weight || 199 nh->nh_weight != onh->nh_weight ||
202#endif 200#endif
203#ifdef CONFIG_NET_CLS_ROUTE 201#ifdef CONFIG_IP_ROUTE_CLASSID
204 nh->nh_tclassid != onh->nh_tclassid || 202 nh->nh_tclassid != onh->nh_tclassid ||
205#endif 203#endif
206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) 204 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -221,7 +219,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
221 219
222static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 220static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
223{ 221{
224 unsigned int mask = (fib_hash_size - 1); 222 unsigned int mask = (fib_info_hash_size - 1);
225 unsigned int val = fi->fib_nhs; 223 unsigned int val = fi->fib_nhs;
226 224
227 val ^= fi->fib_protocol; 225 val ^= fi->fib_protocol;
@@ -422,7 +420,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
422 420
423 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 421 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
424 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 422 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
425#ifdef CONFIG_NET_CLS_ROUTE 423#ifdef CONFIG_IP_ROUTE_CLASSID
426 nla = nla_find(attrs, attrlen, RTA_FLOW); 424 nla = nla_find(attrs, attrlen, RTA_FLOW);
427 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 425 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
428#endif 426#endif
@@ -476,7 +474,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
476 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 474 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
477 if (nla && nla_get_be32(nla) != nh->nh_gw) 475 if (nla && nla_get_be32(nla) != nh->nh_gw)
478 return 1; 476 return 1;
479#ifdef CONFIG_NET_CLS_ROUTE 477#ifdef CONFIG_IP_ROUTE_CLASSID
480 nla = nla_find(attrs, attrlen, RTA_FLOW); 478 nla = nla_find(attrs, attrlen, RTA_FLOW);
481 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 479 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
482 return 1; 480 return 1;
@@ -562,16 +560,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
562 } 560 }
563 rcu_read_lock(); 561 rcu_read_lock();
564 { 562 {
565 struct flowi fl = { 563 struct flowi4 fl4 = {
566 .fl4_dst = nh->nh_gw, 564 .daddr = nh->nh_gw,
567 .fl4_scope = cfg->fc_scope + 1, 565 .flowi4_scope = cfg->fc_scope + 1,
568 .oif = nh->nh_oif, 566 .flowi4_oif = nh->nh_oif,
569 }; 567 };
570 568
571 /* It is not necessary, but requires a bit of thinking */ 569 /* It is not necessary, but requires a bit of thinking */
572 if (fl.fl4_scope < RT_SCOPE_LINK) 570 if (fl4.flowi4_scope < RT_SCOPE_LINK)
573 fl.fl4_scope = RT_SCOPE_LINK; 571 fl4.flowi4_scope = RT_SCOPE_LINK;
574 err = fib_lookup(net, &fl, &res); 572 err = fib_lookup(net, &fl4, &res);
575 if (err) { 573 if (err) {
576 rcu_read_unlock(); 574 rcu_read_unlock();
577 return err; 575 return err;
@@ -613,14 +611,14 @@ out:
613 611
614static inline unsigned int fib_laddr_hashfn(__be32 val) 612static inline unsigned int fib_laddr_hashfn(__be32 val)
615{ 613{
616 unsigned int mask = (fib_hash_size - 1); 614 unsigned int mask = (fib_info_hash_size - 1);
617 615
618 return ((__force u32)val ^ 616 return ((__force u32)val ^
619 ((__force u32)val >> 7) ^ 617 ((__force u32)val >> 7) ^
620 ((__force u32)val >> 14)) & mask; 618 ((__force u32)val >> 14)) & mask;
621} 619}
622 620
623static struct hlist_head *fib_hash_alloc(int bytes) 621static struct hlist_head *fib_info_hash_alloc(int bytes)
624{ 622{
625 if (bytes <= PAGE_SIZE) 623 if (bytes <= PAGE_SIZE)
626 return kzalloc(bytes, GFP_KERNEL); 624 return kzalloc(bytes, GFP_KERNEL);
@@ -630,7 +628,7 @@ static struct hlist_head *fib_hash_alloc(int bytes)
630 get_order(bytes)); 628 get_order(bytes));
631} 629}
632 630
633static void fib_hash_free(struct hlist_head *hash, int bytes) 631static void fib_info_hash_free(struct hlist_head *hash, int bytes)
634{ 632{
635 if (!hash) 633 if (!hash)
636 return; 634 return;
@@ -641,18 +639,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes)
641 free_pages((unsigned long) hash, get_order(bytes)); 639 free_pages((unsigned long) hash, get_order(bytes));
642} 640}
643 641
644static void fib_hash_move(struct hlist_head *new_info_hash, 642static void fib_info_hash_move(struct hlist_head *new_info_hash,
645 struct hlist_head *new_laddrhash, 643 struct hlist_head *new_laddrhash,
646 unsigned int new_size) 644 unsigned int new_size)
647{ 645{
648 struct hlist_head *old_info_hash, *old_laddrhash; 646 struct hlist_head *old_info_hash, *old_laddrhash;
649 unsigned int old_size = fib_hash_size; 647 unsigned int old_size = fib_info_hash_size;
650 unsigned int i, bytes; 648 unsigned int i, bytes;
651 649
652 spin_lock_bh(&fib_info_lock); 650 spin_lock_bh(&fib_info_lock);
653 old_info_hash = fib_info_hash; 651 old_info_hash = fib_info_hash;
654 old_laddrhash = fib_info_laddrhash; 652 old_laddrhash = fib_info_laddrhash;
655 fib_hash_size = new_size; 653 fib_info_hash_size = new_size;
656 654
657 for (i = 0; i < old_size; i++) { 655 for (i = 0; i < old_size; i++) {
658 struct hlist_head *head = &fib_info_hash[i]; 656 struct hlist_head *head = &fib_info_hash[i];
@@ -693,8 +691,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
693 spin_unlock_bh(&fib_info_lock); 691 spin_unlock_bh(&fib_info_lock);
694 692
695 bytes = old_size * sizeof(struct hlist_head *); 693 bytes = old_size * sizeof(struct hlist_head *);
696 fib_hash_free(old_info_hash, bytes); 694 fib_info_hash_free(old_info_hash, bytes);
697 fib_hash_free(old_laddrhash, bytes); 695 fib_info_hash_free(old_laddrhash, bytes);
698} 696}
699 697
700struct fib_info *fib_create_info(struct fib_config *cfg) 698struct fib_info *fib_create_info(struct fib_config *cfg)
@@ -705,6 +703,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
705 int nhs = 1; 703 int nhs = 1;
706 struct net *net = cfg->fc_nlinfo.nl_net; 704 struct net *net = cfg->fc_nlinfo.nl_net;
707 705
706 if (cfg->fc_type > RTN_MAX)
707 goto err_inval;
708
708 /* Fast check to catch the most weird cases */ 709 /* Fast check to catch the most weird cases */
709 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 710 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
710 goto err_inval; 711 goto err_inval;
@@ -718,8 +719,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
718#endif 719#endif
719 720
720 err = -ENOBUFS; 721 err = -ENOBUFS;
721 if (fib_info_cnt >= fib_hash_size) { 722 if (fib_info_cnt >= fib_info_hash_size) {
722 unsigned int new_size = fib_hash_size << 1; 723 unsigned int new_size = fib_info_hash_size << 1;
723 struct hlist_head *new_info_hash; 724 struct hlist_head *new_info_hash;
724 struct hlist_head *new_laddrhash; 725 struct hlist_head *new_laddrhash;
725 unsigned int bytes; 726 unsigned int bytes;
@@ -727,21 +728,27 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
727 if (!new_size) 728 if (!new_size)
728 new_size = 1; 729 new_size = 1;
729 bytes = new_size * sizeof(struct hlist_head *); 730 bytes = new_size * sizeof(struct hlist_head *);
730 new_info_hash = fib_hash_alloc(bytes); 731 new_info_hash = fib_info_hash_alloc(bytes);
731 new_laddrhash = fib_hash_alloc(bytes); 732 new_laddrhash = fib_info_hash_alloc(bytes);
732 if (!new_info_hash || !new_laddrhash) { 733 if (!new_info_hash || !new_laddrhash) {
733 fib_hash_free(new_info_hash, bytes); 734 fib_info_hash_free(new_info_hash, bytes);
734 fib_hash_free(new_laddrhash, bytes); 735 fib_info_hash_free(new_laddrhash, bytes);
735 } else 736 } else
736 fib_hash_move(new_info_hash, new_laddrhash, new_size); 737 fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
737 738
738 if (!fib_hash_size) 739 if (!fib_info_hash_size)
739 goto failure; 740 goto failure;
740 } 741 }
741 742
742 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 743 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
743 if (fi == NULL) 744 if (fi == NULL)
744 goto failure; 745 goto failure;
746 if (cfg->fc_mx) {
747 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
748 if (!fi->fib_metrics)
749 goto failure;
750 } else
751 fi->fib_metrics = (u32 *) dst_default_metrics;
745 fib_info_cnt++; 752 fib_info_cnt++;
746 753
747 fi->fib_net = hold_net(net); 754 fi->fib_net = hold_net(net);
@@ -779,7 +786,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
779 goto err_inval; 786 goto err_inval;
780 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 787 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
781 goto err_inval; 788 goto err_inval;
782#ifdef CONFIG_NET_CLS_ROUTE 789#ifdef CONFIG_IP_ROUTE_CLASSID
783 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 790 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
784 goto err_inval; 791 goto err_inval;
785#endif 792#endif
@@ -792,7 +799,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
792 nh->nh_oif = cfg->fc_oif; 799 nh->nh_oif = cfg->fc_oif;
793 nh->nh_gw = cfg->fc_gw; 800 nh->nh_gw = cfg->fc_gw;
794 nh->nh_flags = cfg->fc_flags; 801 nh->nh_flags = cfg->fc_flags;
795#ifdef CONFIG_NET_CLS_ROUTE 802#ifdef CONFIG_IP_ROUTE_CLASSID
796 nh->nh_tclassid = cfg->fc_flow; 803 nh->nh_tclassid = cfg->fc_flow;
797#endif 804#endif
798#ifdef CONFIG_IP_ROUTE_MULTIPATH 805#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -804,6 +811,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
804 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 811 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
805 goto err_inval; 812 goto err_inval;
806 goto link_it; 813 goto link_it;
814 } else {
815 switch (cfg->fc_type) {
816 case RTN_UNICAST:
817 case RTN_LOCAL:
818 case RTN_BROADCAST:
819 case RTN_ANYCAST:
820 case RTN_MULTICAST:
821 break;
822 default:
823 goto err_inval;
824 }
807 } 825 }
808 826
809 if (cfg->fc_scope > RT_SCOPE_HOST) 827 if (cfg->fc_scope > RT_SCOPE_HOST)
@@ -835,6 +853,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
835 goto err_inval; 853 goto err_inval;
836 } 854 }
837 855
856 change_nexthops(fi) {
857 nexthop_nh->nh_cfg_scope = cfg->fc_scope;
858 nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
859 nexthop_nh->nh_gw,
860 nexthop_nh->nh_cfg_scope);
861 } endfor_nexthops(fi)
862
838link_it: 863link_it:
839 ofi = fib_find_info(fi); 864 ofi = fib_find_info(fi);
840 if (ofi) { 865 if (ofi) {
@@ -880,84 +905,6 @@ failure:
880 return ERR_PTR(err); 905 return ERR_PTR(err);
881} 906}
882 907
883/* Note! fib_semantic_match intentionally uses RCU list functions. */
884int fib_semantic_match(struct list_head *head, const struct flowi *flp,
885 struct fib_result *res, int prefixlen, int fib_flags)
886{
887 struct fib_alias *fa;
888 int nh_sel = 0;
889
890 list_for_each_entry_rcu(fa, head, fa_list) {
891 int err;
892
893 if (fa->fa_tos &&
894 fa->fa_tos != flp->fl4_tos)
895 continue;
896
897 if (fa->fa_scope < flp->fl4_scope)
898 continue;
899
900 fib_alias_accessed(fa);
901
902 err = fib_props[fa->fa_type].error;
903 if (err == 0) {
904 struct fib_info *fi = fa->fa_info;
905
906 if (fi->fib_flags & RTNH_F_DEAD)
907 continue;
908
909 switch (fa->fa_type) {
910 case RTN_UNICAST:
911 case RTN_LOCAL:
912 case RTN_BROADCAST:
913 case RTN_ANYCAST:
914 case RTN_MULTICAST:
915 for_nexthops(fi) {
916 if (nh->nh_flags & RTNH_F_DEAD)
917 continue;
918 if (!flp->oif || flp->oif == nh->nh_oif)
919 break;
920 }
921#ifdef CONFIG_IP_ROUTE_MULTIPATH
922 if (nhsel < fi->fib_nhs) {
923 nh_sel = nhsel;
924 goto out_fill_res;
925 }
926#else
927 if (nhsel < 1)
928 goto out_fill_res;
929#endif
930 endfor_nexthops(fi);
931 continue;
932
933 default:
934 pr_warning("fib_semantic_match bad type %#x\n",
935 fa->fa_type);
936 return -EINVAL;
937 }
938 }
939 return err;
940 }
941 return 1;
942
943out_fill_res:
944 res->prefixlen = prefixlen;
945 res->nh_sel = nh_sel;
946 res->type = fa->fa_type;
947 res->scope = fa->fa_scope;
948 res->fi = fa->fa_info;
949 if (!(fib_flags & FIB_LOOKUP_NOREF))
950 atomic_inc(&res->fi->fib_clntref);
951 return 0;
952}
953
954/* Find appropriate source address to this destination */
955
956__be32 __fib_res_prefsrc(struct fib_result *res)
957{
958 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
959}
960
961int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 908int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
962 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 909 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
963 struct fib_info *fi, unsigned int flags) 910 struct fib_info *fi, unsigned int flags)
@@ -1002,7 +949,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
1002 949
1003 if (fi->fib_nh->nh_oif) 950 if (fi->fib_nh->nh_oif)
1004 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 951 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
1005#ifdef CONFIG_NET_CLS_ROUTE 952#ifdef CONFIG_IP_ROUTE_CLASSID
1006 if (fi->fib_nh[0].nh_tclassid) 953 if (fi->fib_nh[0].nh_tclassid)
1007 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 954 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
1008#endif 955#endif
@@ -1027,7 +974,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
1027 974
1028 if (nh->nh_gw) 975 if (nh->nh_gw)
1029 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 976 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1030#ifdef CONFIG_NET_CLS_ROUTE 977#ifdef CONFIG_IP_ROUTE_CLASSID
1031 if (nh->nh_tclassid) 978 if (nh->nh_tclassid)
1032 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 979 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1033#endif 980#endif
@@ -1125,6 +1072,80 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1125 return ret; 1072 return ret;
1126} 1073}
1127 1074
1075/* Must be invoked inside of an RCU protected region. */
1076void fib_select_default(struct fib_result *res)
1077{
1078 struct fib_info *fi = NULL, *last_resort = NULL;
1079 struct list_head *fa_head = res->fa_head;
1080 struct fib_table *tb = res->table;
1081 int order = -1, last_idx = -1;
1082 struct fib_alias *fa;
1083
1084 list_for_each_entry_rcu(fa, fa_head, fa_list) {
1085 struct fib_info *next_fi = fa->fa_info;
1086
1087 if (fa->fa_scope != res->scope ||
1088 fa->fa_type != RTN_UNICAST)
1089 continue;
1090
1091 if (next_fi->fib_priority > res->fi->fib_priority)
1092 break;
1093 if (!next_fi->fib_nh[0].nh_gw ||
1094 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1095 continue;
1096
1097 fib_alias_accessed(fa);
1098
1099 if (fi == NULL) {
1100 if (next_fi != res->fi)
1101 break;
1102 } else if (!fib_detect_death(fi, order, &last_resort,
1103 &last_idx, tb->tb_default)) {
1104 fib_result_assign(res, fi);
1105 tb->tb_default = order;
1106 goto out;
1107 }
1108 fi = next_fi;
1109 order++;
1110 }
1111
1112 if (order <= 0 || fi == NULL) {
1113 tb->tb_default = -1;
1114 goto out;
1115 }
1116
1117 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1118 tb->tb_default)) {
1119 fib_result_assign(res, fi);
1120 tb->tb_default = order;
1121 goto out;
1122 }
1123
1124 if (last_idx >= 0)
1125 fib_result_assign(res, last_resort);
1126 tb->tb_default = last_idx;
1127out:
1128 return;
1129}
1130
1131void fib_update_nh_saddrs(struct net_device *dev)
1132{
1133 struct hlist_head *head;
1134 struct hlist_node *node;
1135 struct fib_nh *nh;
1136 unsigned int hash;
1137
1138 hash = fib_devindex_hashfn(dev->ifindex);
1139 head = &fib_info_devhash[hash];
1140 hlist_for_each_entry(nh, node, head, nh_hash) {
1141 if (nh->nh_dev != dev)
1142 continue;
1143 nh->nh_saddr = inet_select_addr(nh->nh_dev,
1144 nh->nh_gw,
1145 nh->nh_cfg_scope);
1146 }
1147}
1148
1128#ifdef CONFIG_IP_ROUTE_MULTIPATH 1149#ifdef CONFIG_IP_ROUTE_MULTIPATH
1129 1150
1130/* 1151/*
@@ -1189,7 +1210,7 @@ int fib_sync_up(struct net_device *dev)
1189 * The algorithm is suboptimal, but it provides really 1210 * The algorithm is suboptimal, but it provides really
1190 * fair weighted route distribution. 1211 * fair weighted route distribution.
1191 */ 1212 */
1192void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1213void fib_select_multipath(struct fib_result *res)
1193{ 1214{
1194 struct fib_info *fi = res->fi; 1215 struct fib_info *fi = res->fi;
1195 int w; 1216 int w;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0f280348e0f..3d28a35c2e1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -95,7 +95,7 @@ typedef unsigned int t_key;
95#define IS_TNODE(n) (!(n->parent & T_LEAF)) 95#define IS_TNODE(n) (!(n->parent & T_LEAF))
96#define IS_LEAF(n) (n->parent & T_LEAF) 96#define IS_LEAF(n) (n->parent & T_LEAF)
97 97
98struct node { 98struct rt_trie_node {
99 unsigned long parent; 99 unsigned long parent;
100 t_key key; 100 t_key key;
101}; 101};
@@ -126,7 +126,7 @@ struct tnode {
126 struct work_struct work; 126 struct work_struct work;
127 struct tnode *tnode_free; 127 struct tnode *tnode_free;
128 }; 128 };
129 struct node *child[0]; 129 struct rt_trie_node *child[0];
130}; 130};
131 131
132#ifdef CONFIG_IP_FIB_TRIE_STATS 132#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,16 +151,16 @@ struct trie_stat {
151}; 151};
152 152
153struct trie { 153struct trie {
154 struct node *trie; 154 struct rt_trie_node *trie;
155#ifdef CONFIG_IP_FIB_TRIE_STATS 155#ifdef CONFIG_IP_FIB_TRIE_STATS
156 struct trie_use_stats stats; 156 struct trie_use_stats stats;
157#endif 157#endif
158}; 158};
159 159
160static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 160static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
161static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 161static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
162 int wasfull); 162 int wasfull);
163static struct node *resize(struct trie *t, struct tnode *tn); 163static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
164static struct tnode *inflate(struct trie *t, struct tnode *tn); 164static struct tnode *inflate(struct trie *t, struct tnode *tn);
165static struct tnode *halve(struct trie *t, struct tnode *tn); 165static struct tnode *halve(struct trie *t, struct tnode *tn);
166/* tnodes to free after resize(); protected by RTNL */ 166/* tnodes to free after resize(); protected by RTNL */
@@ -177,12 +177,12 @@ static const int sync_pages = 128;
177static struct kmem_cache *fn_alias_kmem __read_mostly; 177static struct kmem_cache *fn_alias_kmem __read_mostly;
178static struct kmem_cache *trie_leaf_kmem __read_mostly; 178static struct kmem_cache *trie_leaf_kmem __read_mostly;
179 179
180static inline struct tnode *node_parent(struct node *node) 180static inline struct tnode *node_parent(struct rt_trie_node *node)
181{ 181{
182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
183} 183}
184 184
185static inline struct tnode *node_parent_rcu(struct node *node) 185static inline struct tnode *node_parent_rcu(struct rt_trie_node *node)
186{ 186{
187 struct tnode *ret = node_parent(node); 187 struct tnode *ret = node_parent(node);
188 188
@@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node)
192/* Same as rcu_assign_pointer 192/* Same as rcu_assign_pointer
193 * but that macro() assumes that value is a pointer. 193 * but that macro() assumes that value is a pointer.
194 */ 194 */
195static inline void node_set_parent(struct node *node, struct tnode *ptr) 195static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
196{ 196{
197 smp_wmb(); 197 smp_wmb();
198 node->parent = (unsigned long)ptr | NODE_TYPE(node); 198 node->parent = (unsigned long)ptr | NODE_TYPE(node);
199} 199}
200 200
201static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) 201static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i)
202{ 202{
203 BUG_ON(i >= 1U << tn->bits); 203 BUG_ON(i >= 1U << tn->bits);
204 204
205 return tn->child[i]; 205 return tn->child[i];
206} 206}
207 207
208static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 208static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
209{ 209{
210 struct node *ret = tnode_get_child(tn, i); 210 struct rt_trie_node *ret = tnode_get_child(tn, i);
211 211
212 return rcu_dereference_rtnl(ret); 212 return rcu_dereference_rtnl(ret);
213} 213}
@@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn)
217 return 1 << tn->bits; 217 return 1 << tn->bits;
218} 218}
219 219
220static inline t_key mask_pfx(t_key k, unsigned short l) 220static inline t_key mask_pfx(t_key k, unsigned int l)
221{ 221{
222 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 222 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
223} 223}
224 224
225static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 225static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
226{ 226{
227 if (offset < KEYLENGTH) 227 if (offset < KEYLENGTH)
228 return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 228 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
@@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head)
378{ 378{
379 struct tnode *tn = container_of(head, struct tnode, rcu); 379 struct tnode *tn = container_of(head, struct tnode, rcu);
380 size_t size = sizeof(struct tnode) + 380 size_t size = sizeof(struct tnode) +
381 (sizeof(struct node *) << tn->bits); 381 (sizeof(struct rt_trie_node *) << tn->bits);
382 382
383 if (size <= PAGE_SIZE) 383 if (size <= PAGE_SIZE)
384 kfree(tn); 384 kfree(tn);
@@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn)
402 tn->tnode_free = tnode_free_head; 402 tn->tnode_free = tnode_free_head;
403 tnode_free_head = tn; 403 tnode_free_head = tn;
404 tnode_free_size += sizeof(struct tnode) + 404 tnode_free_size += sizeof(struct tnode) +
405 (sizeof(struct node *) << tn->bits); 405 (sizeof(struct rt_trie_node *) << tn->bits);
406} 406}
407 407
408static void tnode_free_flush(void) 408static void tnode_free_flush(void)
@@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen)
443 443
444static struct tnode *tnode_new(t_key key, int pos, int bits) 444static struct tnode *tnode_new(t_key key, int pos, int bits)
445{ 445{
446 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); 446 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
447 struct tnode *tn = tnode_alloc(sz); 447 struct tnode *tn = tnode_alloc(sz);
448 448
449 if (tn) { 449 if (tn) {
@@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
456 } 456 }
457 457
458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), 458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
459 sizeof(struct node) << bits); 459 sizeof(struct rt_trie_node) << bits);
460 return tn; 460 return tn;
461} 461}
462 462
@@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
465 * and no bits are skipped. See discussion in dyntree paper p. 6 465 * and no bits are skipped. See discussion in dyntree paper p. 6
466 */ 466 */
467 467
468static inline int tnode_full(const struct tnode *tn, const struct node *n) 468static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
469{ 469{
470 if (n == NULL || IS_LEAF(n)) 470 if (n == NULL || IS_LEAF(n))
471 return 0; 471 return 0;
@@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
474} 474}
475 475
476static inline void put_child(struct trie *t, struct tnode *tn, int i, 476static inline void put_child(struct trie *t, struct tnode *tn, int i,
477 struct node *n) 477 struct rt_trie_node *n)
478{ 478{
479 tnode_put_child_reorg(tn, i, n, -1); 479 tnode_put_child_reorg(tn, i, n, -1);
480} 480}
@@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
484 * Update the value of full_children and empty_children. 484 * Update the value of full_children and empty_children.
485 */ 485 */
486 486
487static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 487static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
488 int wasfull) 488 int wasfull)
489{ 489{
490 struct node *chi = tn->child[i]; 490 struct rt_trie_node *chi = tn->child[i];
491 int isfull; 491 int isfull;
492 492
493 BUG_ON(i >= 1<<tn->bits); 493 BUG_ON(i >= 1<<tn->bits);
@@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
515} 515}
516 516
517#define MAX_WORK 10 517#define MAX_WORK 10
518static struct node *resize(struct trie *t, struct tnode *tn) 518static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
519{ 519{
520 int i; 520 int i;
521 struct tnode *old_tn; 521 struct tnode *old_tn;
@@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
605 605
606 /* Keep root node larger */ 606 /* Keep root node larger */
607 607
608 if (!node_parent((struct node *)tn)) { 608 if (!node_parent((struct rt_trie_node *)tn)) {
609 inflate_threshold_use = inflate_threshold_root; 609 inflate_threshold_use = inflate_threshold_root;
610 halve_threshold_use = halve_threshold_root; 610 halve_threshold_use = halve_threshold_root;
611 } else { 611 } else {
@@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
635 635
636 /* Return if at least one inflate is run */ 636 /* Return if at least one inflate is run */
637 if (max_work != MAX_WORK) 637 if (max_work != MAX_WORK)
638 return (struct node *) tn; 638 return (struct rt_trie_node *) tn;
639 639
640 /* 640 /*
641 * Halve as long as the number of empty children in this 641 * Halve as long as the number of empty children in this
@@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
663 if (tn->empty_children == tnode_child_length(tn) - 1) { 663 if (tn->empty_children == tnode_child_length(tn) - 1) {
664one_child: 664one_child:
665 for (i = 0; i < tnode_child_length(tn); i++) { 665 for (i = 0; i < tnode_child_length(tn); i++) {
666 struct node *n; 666 struct rt_trie_node *n;
667 667
668 n = tn->child[i]; 668 n = tn->child[i];
669 if (!n) 669 if (!n)
@@ -676,7 +676,7 @@ one_child:
676 return n; 676 return n;
677 } 677 }
678 } 678 }
679 return (struct node *) tn; 679 return (struct rt_trie_node *) tn;
680} 680}
681 681
682static struct tnode *inflate(struct trie *t, struct tnode *tn) 682static struct tnode *inflate(struct trie *t, struct tnode *tn)
@@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
723 goto nomem; 723 goto nomem;
724 } 724 }
725 725
726 put_child(t, tn, 2*i, (struct node *) left); 726 put_child(t, tn, 2*i, (struct rt_trie_node *) left);
727 put_child(t, tn, 2*i+1, (struct node *) right); 727 put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
728 } 728 }
729 } 729 }
730 730
731 for (i = 0; i < olen; i++) { 731 for (i = 0; i < olen; i++) {
732 struct tnode *inode; 732 struct tnode *inode;
733 struct node *node = tnode_get_child(oldtnode, i); 733 struct rt_trie_node *node = tnode_get_child(oldtnode, i);
734 struct tnode *left, *right; 734 struct tnode *left, *right;
735 int size, j; 735 int size, j;
736 736
@@ -825,7 +825,7 @@ nomem:
825static struct tnode *halve(struct trie *t, struct tnode *tn) 825static struct tnode *halve(struct trie *t, struct tnode *tn)
826{ 826{
827 struct tnode *oldtnode = tn; 827 struct tnode *oldtnode = tn;
828 struct node *left, *right; 828 struct rt_trie_node *left, *right;
829 int i; 829 int i;
830 int olen = tnode_child_length(tn); 830 int olen = tnode_child_length(tn);
831 831
@@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
856 if (!newn) 856 if (!newn)
857 goto nomem; 857 goto nomem;
858 858
859 put_child(t, tn, i/2, (struct node *)newn); 859 put_child(t, tn, i/2, (struct rt_trie_node *)newn);
860 } 860 }
861 861
862 } 862 }
@@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key)
958{ 958{
959 int pos; 959 int pos;
960 struct tnode *tn; 960 struct tnode *tn;
961 struct node *n; 961 struct rt_trie_node *n;
962 962
963 pos = 0; 963 pos = 0;
964 n = rcu_dereference_rtnl(t->trie); 964 n = rcu_dereference_rtnl(t->trie);
@@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
993 993
994 key = tn->key; 994 key = tn->key;
995 995
996 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 996 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
997 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 997 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
998 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 998 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
999 tn = (struct tnode *) resize(t, (struct tnode *)tn); 999 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1000 1000
1001 tnode_put_child_reorg((struct tnode *)tp, cindex, 1001 tnode_put_child_reorg((struct tnode *)tp, cindex,
1002 (struct node *)tn, wasfull); 1002 (struct rt_trie_node *)tn, wasfull);
1003 1003
1004 tp = node_parent((struct node *) tn); 1004 tp = node_parent((struct rt_trie_node *) tn);
1005 if (!tp) 1005 if (!tp)
1006 rcu_assign_pointer(t->trie, (struct node *)tn); 1006 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1007 1007
1008 tnode_free_flush(); 1008 tnode_free_flush();
1009 if (!tp) 1009 if (!tp)
@@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1015 if (IS_TNODE(tn)) 1015 if (IS_TNODE(tn))
1016 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1016 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1017 1017
1018 rcu_assign_pointer(t->trie, (struct node *)tn); 1018 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1019 tnode_free_flush(); 1019 tnode_free_flush();
1020} 1020}
1021 1021
@@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1025{ 1025{
1026 int pos, newpos; 1026 int pos, newpos;
1027 struct tnode *tp = NULL, *tn = NULL; 1027 struct tnode *tp = NULL, *tn = NULL;
1028 struct node *n; 1028 struct rt_trie_node *n;
1029 struct leaf *l; 1029 struct leaf *l;
1030 int missbit; 1030 int missbit;
1031 struct list_head *fa_head = NULL; 1031 struct list_head *fa_head = NULL;
@@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1111 if (t->trie && n == NULL) { 1111 if (t->trie && n == NULL) {
1112 /* Case 2: n is NULL, and will just insert a new leaf */ 1112 /* Case 2: n is NULL, and will just insert a new leaf */
1113 1113
1114 node_set_parent((struct node *)l, tp); 1114 node_set_parent((struct rt_trie_node *)l, tp);
1115 1115
1116 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1116 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1117 put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 1117 put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l);
1118 } else { 1118 } else {
1119 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1119 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1120 /* 1120 /*
@@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1141 return NULL; 1141 return NULL;
1142 } 1142 }
1143 1143
1144 node_set_parent((struct node *)tn, tp); 1144 node_set_parent((struct rt_trie_node *)tn, tp);
1145 1145
1146 missbit = tkey_extract_bits(key, newpos, 1); 1146 missbit = tkey_extract_bits(key, newpos, 1);
1147 put_child(t, tn, missbit, (struct node *)l); 1147 put_child(t, tn, missbit, (struct rt_trie_node *)l);
1148 put_child(t, tn, 1-missbit, n); 1148 put_child(t, tn, 1-missbit, n);
1149 1149
1150 if (tp) { 1150 if (tp) {
1151 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1151 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1152 put_child(t, (struct tnode *)tp, cindex, 1152 put_child(t, (struct tnode *)tp, cindex,
1153 (struct node *)tn); 1153 (struct rt_trie_node *)tn);
1154 } else { 1154 } else {
1155 rcu_assign_pointer(t->trie, (struct node *)tn); 1155 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1156 tp = tn; 1156 tp = tn;
1157 } 1157 }
1158 } 1158 }
@@ -1340,8 +1340,8 @@ err:
1340} 1340}
1341 1341
1342/* should be called with rcu_read_lock */ 1342/* should be called with rcu_read_lock */
1343static int check_leaf(struct trie *t, struct leaf *l, 1343static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1344 t_key key, const struct flowi *flp, 1344 t_key key, const struct flowi4 *flp,
1345 struct fib_result *res, int fib_flags) 1345 struct fib_result *res, int fib_flags)
1346{ 1346{
1347 struct leaf_info *li; 1347 struct leaf_info *li;
@@ -1349,40 +1349,75 @@ static int check_leaf(struct trie *t, struct leaf *l,
1349 struct hlist_node *node; 1349 struct hlist_node *node;
1350 1350
1351 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1351 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1352 int err; 1352 struct fib_alias *fa;
1353 int plen = li->plen; 1353 int plen = li->plen;
1354 __be32 mask = inet_make_mask(plen); 1354 __be32 mask = inet_make_mask(plen);
1355 1355
1356 if (l->key != (key & ntohl(mask))) 1356 if (l->key != (key & ntohl(mask)))
1357 continue; 1357 continue;
1358 1358
1359 err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); 1359 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1360 struct fib_info *fi = fa->fa_info;
1361 int nhsel, err;
1360 1362
1363 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1364 continue;
1365 if (fa->fa_scope < flp->flowi4_scope)
1366 continue;
1367 fib_alias_accessed(fa);
1368 err = fib_props[fa->fa_type].error;
1369 if (err) {
1361#ifdef CONFIG_IP_FIB_TRIE_STATS 1370#ifdef CONFIG_IP_FIB_TRIE_STATS
1362 if (err <= 0) 1371 t->stats.semantic_match_miss++;
1363 t->stats.semantic_match_passed++; 1372#endif
1364 else 1373 return 1;
1365 t->stats.semantic_match_miss++; 1374 }
1375 if (fi->fib_flags & RTNH_F_DEAD)
1376 continue;
1377 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1378 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1379
1380 if (nh->nh_flags & RTNH_F_DEAD)
1381 continue;
1382 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1383 continue;
1384
1385#ifdef CONFIG_IP_FIB_TRIE_STATS
1386 t->stats.semantic_match_passed++;
1387#endif
1388 res->prefixlen = plen;
1389 res->nh_sel = nhsel;
1390 res->type = fa->fa_type;
1391 res->scope = fa->fa_scope;
1392 res->fi = fi;
1393 res->table = tb;
1394 res->fa_head = &li->falh;
1395 if (!(fib_flags & FIB_LOOKUP_NOREF))
1396 atomic_inc(&res->fi->fib_clntref);
1397 return 0;
1398 }
1399 }
1400
1401#ifdef CONFIG_IP_FIB_TRIE_STATS
1402 t->stats.semantic_match_miss++;
1366#endif 1403#endif
1367 if (err <= 0)
1368 return err;
1369 } 1404 }
1370 1405
1371 return 1; 1406 return 1;
1372} 1407}
1373 1408
1374int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, 1409int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1375 struct fib_result *res, int fib_flags) 1410 struct fib_result *res, int fib_flags)
1376{ 1411{
1377 struct trie *t = (struct trie *) tb->tb_data; 1412 struct trie *t = (struct trie *) tb->tb_data;
1378 int ret; 1413 int ret;
1379 struct node *n; 1414 struct rt_trie_node *n;
1380 struct tnode *pn; 1415 struct tnode *pn;
1381 int pos, bits; 1416 unsigned int pos, bits;
1382 t_key key = ntohl(flp->fl4_dst); 1417 t_key key = ntohl(flp->daddr);
1383 int chopped_off; 1418 unsigned int chopped_off;
1384 t_key cindex = 0; 1419 t_key cindex = 0;
1385 int current_prefix_length = KEYLENGTH; 1420 unsigned int current_prefix_length = KEYLENGTH;
1386 struct tnode *cn; 1421 struct tnode *cn;
1387 t_key pref_mismatch; 1422 t_key pref_mismatch;
1388 1423
@@ -1398,7 +1433,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1398 1433
1399 /* Just a leaf? */ 1434 /* Just a leaf? */
1400 if (IS_LEAF(n)) { 1435 if (IS_LEAF(n)) {
1401 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); 1436 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1402 goto found; 1437 goto found;
1403 } 1438 }
1404 1439
@@ -1423,7 +1458,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1423 } 1458 }
1424 1459
1425 if (IS_LEAF(n)) { 1460 if (IS_LEAF(n)) {
1426 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); 1461 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1427 if (ret > 0) 1462 if (ret > 0)
1428 goto backtrace; 1463 goto backtrace;
1429 goto found; 1464 goto found;
@@ -1541,7 +1576,7 @@ backtrace:
1541 if (chopped_off <= pn->bits) { 1576 if (chopped_off <= pn->bits) {
1542 cindex &= ~(1 << (chopped_off-1)); 1577 cindex &= ~(1 << (chopped_off-1));
1543 } else { 1578 } else {
1544 struct tnode *parent = node_parent_rcu((struct node *) pn); 1579 struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
1545 if (!parent) 1580 if (!parent)
1546 goto failed; 1581 goto failed;
1547 1582
@@ -1568,7 +1603,7 @@ found:
1568 */ 1603 */
1569static void trie_leaf_remove(struct trie *t, struct leaf *l) 1604static void trie_leaf_remove(struct trie *t, struct leaf *l)
1570{ 1605{
1571 struct tnode *tp = node_parent((struct node *) l); 1606 struct tnode *tp = node_parent((struct rt_trie_node *) l);
1572 1607
1573 pr_debug("entering trie_leaf_remove(%p)\n", l); 1608 pr_debug("entering trie_leaf_remove(%p)\n", l);
1574 1609
@@ -1706,7 +1741,7 @@ static int trie_flush_leaf(struct leaf *l)
1706 * Scan for the next right leaf starting at node p->child[idx] 1741 * Scan for the next right leaf starting at node p->child[idx]
1707 * Since we have back pointer, no recursion necessary. 1742 * Since we have back pointer, no recursion necessary.
1708 */ 1743 */
1709static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) 1744static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1710{ 1745{
1711 do { 1746 do {
1712 t_key idx; 1747 t_key idx;
@@ -1732,7 +1767,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1732 } 1767 }
1733 1768
1734 /* Node empty, walk back up to parent */ 1769 /* Node empty, walk back up to parent */
1735 c = (struct node *) p; 1770 c = (struct rt_trie_node *) p;
1736 } while ((p = node_parent_rcu(c)) != NULL); 1771 } while ((p = node_parent_rcu(c)) != NULL);
1737 1772
1738 return NULL; /* Root of trie */ 1773 return NULL; /* Root of trie */
@@ -1753,7 +1788,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
1753 1788
1754static struct leaf *trie_nextleaf(struct leaf *l) 1789static struct leaf *trie_nextleaf(struct leaf *l)
1755{ 1790{
1756 struct node *c = (struct node *) l; 1791 struct rt_trie_node *c = (struct rt_trie_node *) l;
1757 struct tnode *p = node_parent_rcu(c); 1792 struct tnode *p = node_parent_rcu(c);
1758 1793
1759 if (!p) 1794 if (!p)
@@ -1802,80 +1837,6 @@ void fib_free_table(struct fib_table *tb)
1802 kfree(tb); 1837 kfree(tb);
1803} 1838}
1804 1839
1805void fib_table_select_default(struct fib_table *tb,
1806 const struct flowi *flp,
1807 struct fib_result *res)
1808{
1809 struct trie *t = (struct trie *) tb->tb_data;
1810 int order, last_idx;
1811 struct fib_info *fi = NULL;
1812 struct fib_info *last_resort;
1813 struct fib_alias *fa = NULL;
1814 struct list_head *fa_head;
1815 struct leaf *l;
1816
1817 last_idx = -1;
1818 last_resort = NULL;
1819 order = -1;
1820
1821 rcu_read_lock();
1822
1823 l = fib_find_node(t, 0);
1824 if (!l)
1825 goto out;
1826
1827 fa_head = get_fa_head(l, 0);
1828 if (!fa_head)
1829 goto out;
1830
1831 if (list_empty(fa_head))
1832 goto out;
1833
1834 list_for_each_entry_rcu(fa, fa_head, fa_list) {
1835 struct fib_info *next_fi = fa->fa_info;
1836
1837 if (fa->fa_scope != res->scope ||
1838 fa->fa_type != RTN_UNICAST)
1839 continue;
1840
1841 if (next_fi->fib_priority > res->fi->fib_priority)
1842 break;
1843 if (!next_fi->fib_nh[0].nh_gw ||
1844 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1845 continue;
1846
1847 fib_alias_accessed(fa);
1848
1849 if (fi == NULL) {
1850 if (next_fi != res->fi)
1851 break;
1852 } else if (!fib_detect_death(fi, order, &last_resort,
1853 &last_idx, tb->tb_default)) {
1854 fib_result_assign(res, fi);
1855 tb->tb_default = order;
1856 goto out;
1857 }
1858 fi = next_fi;
1859 order++;
1860 }
1861 if (order <= 0 || fi == NULL) {
1862 tb->tb_default = -1;
1863 goto out;
1864 }
1865
1866 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1867 tb->tb_default)) {
1868 fib_result_assign(res, fi);
1869 tb->tb_default = order;
1870 goto out;
1871 }
1872 if (last_idx >= 0)
1873 fib_result_assign(res, last_resort);
1874 tb->tb_default = last_idx;
1875out:
1876 rcu_read_unlock();
1877}
1878
1879static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1840static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1880 struct fib_table *tb, 1841 struct fib_table *tb,
1881 struct sk_buff *skb, struct netlink_callback *cb) 1842 struct sk_buff *skb, struct netlink_callback *cb)
@@ -1990,7 +1951,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1990 return skb->len; 1951 return skb->len;
1991} 1952}
1992 1953
1993void __init fib_hash_init(void) 1954void __init fib_trie_init(void)
1994{ 1955{
1995 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1956 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1996 sizeof(struct fib_alias), 1957 sizeof(struct fib_alias),
@@ -2003,8 +1964,7 @@ void __init fib_hash_init(void)
2003} 1964}
2004 1965
2005 1966
2006/* Fix more generic FIB names for init later */ 1967struct fib_table *fib_trie_table(u32 id)
2007struct fib_table *fib_hash_table(u32 id)
2008{ 1968{
2009 struct fib_table *tb; 1969 struct fib_table *tb;
2010 struct trie *t; 1970 struct trie *t;
@@ -2036,7 +1996,7 @@ struct fib_trie_iter {
2036 unsigned int depth; 1996 unsigned int depth;
2037}; 1997};
2038 1998
2039static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 1999static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
2040{ 2000{
2041 struct tnode *tn = iter->tnode; 2001 struct tnode *tn = iter->tnode;
2042 unsigned int cindex = iter->index; 2002 unsigned int cindex = iter->index;
@@ -2050,7 +2010,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2050 iter->tnode, iter->index, iter->depth); 2010 iter->tnode, iter->index, iter->depth);
2051rescan: 2011rescan:
2052 while (cindex < (1<<tn->bits)) { 2012 while (cindex < (1<<tn->bits)) {
2053 struct node *n = tnode_get_child_rcu(tn, cindex); 2013 struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
2054 2014
2055 if (n) { 2015 if (n) {
2056 if (IS_LEAF(n)) { 2016 if (IS_LEAF(n)) {
@@ -2069,7 +2029,7 @@ rescan:
2069 } 2029 }
2070 2030
2071 /* Current node exhausted, pop back up */ 2031 /* Current node exhausted, pop back up */
2072 p = node_parent_rcu((struct node *)tn); 2032 p = node_parent_rcu((struct rt_trie_node *)tn);
2073 if (p) { 2033 if (p) {
2074 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2034 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2075 tn = p; 2035 tn = p;
@@ -2081,10 +2041,10 @@ rescan:
2081 return NULL; 2041 return NULL;
2082} 2042}
2083 2043
2084static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2044static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
2085 struct trie *t) 2045 struct trie *t)
2086{ 2046{
2087 struct node *n; 2047 struct rt_trie_node *n;
2088 2048
2089 if (!t) 2049 if (!t)
2090 return NULL; 2050 return NULL;
@@ -2108,7 +2068,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2108 2068
2109static void trie_collect_stats(struct trie *t, struct trie_stat *s) 2069static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2110{ 2070{
2111 struct node *n; 2071 struct rt_trie_node *n;
2112 struct fib_trie_iter iter; 2072 struct fib_trie_iter iter;
2113 2073
2114 memset(s, 0, sizeof(*s)); 2074 memset(s, 0, sizeof(*s));
@@ -2181,7 +2141,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2181 seq_putc(seq, '\n'); 2141 seq_putc(seq, '\n');
2182 seq_printf(seq, "\tPointers: %u\n", pointers); 2142 seq_printf(seq, "\tPointers: %u\n", pointers);
2183 2143
2184 bytes += sizeof(struct node *) * pointers; 2144 bytes += sizeof(struct rt_trie_node *) * pointers;
2185 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2145 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2186 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 2146 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2187} 2147}
@@ -2262,7 +2222,7 @@ static const struct file_operations fib_triestat_fops = {
2262 .release = single_release_net, 2222 .release = single_release_net,
2263}; 2223};
2264 2224
2265static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 2225static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2266{ 2226{
2267 struct fib_trie_iter *iter = seq->private; 2227 struct fib_trie_iter *iter = seq->private;
2268 struct net *net = seq_file_net(seq); 2228 struct net *net = seq_file_net(seq);
@@ -2275,7 +2235,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2275 struct fib_table *tb; 2235 struct fib_table *tb;
2276 2236
2277 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 2237 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2278 struct node *n; 2238 struct rt_trie_node *n;
2279 2239
2280 for (n = fib_trie_get_first(iter, 2240 for (n = fib_trie_get_first(iter,
2281 (struct trie *) tb->tb_data); 2241 (struct trie *) tb->tb_data);
@@ -2304,7 +2264,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2304 struct fib_table *tb = iter->tb; 2264 struct fib_table *tb = iter->tb;
2305 struct hlist_node *tb_node; 2265 struct hlist_node *tb_node;
2306 unsigned int h; 2266 unsigned int h;
2307 struct node *n; 2267 struct rt_trie_node *n;
2308 2268
2309 ++*pos; 2269 ++*pos;
2310 /* next node in same table */ 2270 /* next node in same table */
@@ -2390,7 +2350,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2390static int fib_trie_seq_show(struct seq_file *seq, void *v) 2350static int fib_trie_seq_show(struct seq_file *seq, void *v)
2391{ 2351{
2392 const struct fib_trie_iter *iter = seq->private; 2352 const struct fib_trie_iter *iter = seq->private;
2393 struct node *n = v; 2353 struct rt_trie_node *n = v;
2394 2354
2395 if (!node_parent_rcu(n)) 2355 if (!node_parent_rcu(n))
2396 fib_table_print(seq, iter->tb); 2356 fib_table_print(seq, iter->tb);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4aa1b7f01ea..a91dc161108 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk)
233 * Send an ICMP frame. 233 * Send an ICMP frame.
234 */ 234 */
235 235
236/* 236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
237 * Check transmit rate limitation for given message.
238 * The rate information is held in the destination cache now.
239 * This function is generic and could be used for other purposes
240 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
241 *
242 * Note that the same dst_entry fields are modified by functions in
243 * route.c too, but these work for packet destinations while xrlim_allow
244 * works for icmp destinations. This means the rate limiting information
245 * for one "ip object" is shared - and these ICMPs are twice limited:
246 * by source and by destination.
247 *
248 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
249 * SHOULD allow setting of rate limits
250 *
251 * Shared between ICMPv4 and ICMPv6.
252 */
253#define XRLIM_BURST_FACTOR 6
254int xrlim_allow(struct dst_entry *dst, int timeout)
255{
256 unsigned long now, token = dst->rate_tokens;
257 int rc = 0;
258
259 now = jiffies;
260 token += now - dst->rate_last;
261 dst->rate_last = now;
262 if (token > XRLIM_BURST_FACTOR * timeout)
263 token = XRLIM_BURST_FACTOR * timeout;
264 if (token >= timeout) {
265 token -= timeout;
266 rc = 1;
267 }
268 dst->rate_tokens = token;
269 return rc;
270}
271EXPORT_SYMBOL(xrlim_allow);
272
273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
274 int type, int code) 237 int type, int code)
275{ 238{
276 struct dst_entry *dst = &rt->dst; 239 struct dst_entry *dst = &rt->dst;
277 int rc = 1; 240 bool rc = true;
278 241
279 if (type > NR_ICMP_TYPES) 242 if (type > NR_ICMP_TYPES)
280 goto out; 243 goto out;
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
288 goto out; 251 goto out;
289 252
290 /* Limit if icmp type is enabled in ratemask. */ 253 /* Limit if icmp type is enabled in ratemask. */
291 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) 254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
292 rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); 255 if (!rt->peer)
256 rt_bind_peer(rt, 1);
257 rc = inet_peer_xrlim_allow(rt->peer,
258 net->ipv4.sysctl_icmp_ratelimit);
259 }
293out: 260out:
294 return rc; 261 return rc;
295} 262}
@@ -386,12 +353,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
386 daddr = icmp_param->replyopts.faddr; 353 daddr = icmp_param->replyopts.faddr;
387 } 354 }
388 { 355 {
389 struct flowi fl = { .fl4_dst= daddr, 356 struct flowi4 fl4 = {
390 .fl4_src = rt->rt_spec_dst, 357 .daddr = daddr,
391 .fl4_tos = RT_TOS(ip_hdr(skb)->tos), 358 .saddr = rt->rt_spec_dst,
392 .proto = IPPROTO_ICMP }; 359 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
393 security_skb_classify_flow(skb, &fl); 360 .flowi4_proto = IPPROTO_ICMP,
394 if (ip_route_output_key(net, &rt, &fl)) 361 };
362 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
363 rt = ip_route_output_key(net, &fl4);
364 if (IS_ERR(rt))
395 goto out_unlock; 365 goto out_unlock;
396 } 366 }
397 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, 367 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
@@ -402,6 +372,97 @@ out_unlock:
402 icmp_xmit_unlock(sk); 372 icmp_xmit_unlock(sk);
403} 373}
404 374
375static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
376 struct iphdr *iph,
377 __be32 saddr, u8 tos,
378 int type, int code,
379 struct icmp_bxm *param)
380{
381 struct flowi4 fl4 = {
382 .daddr = (param->replyopts.srr ?
383 param->replyopts.faddr : iph->saddr),
384 .saddr = saddr,
385 .flowi4_tos = RT_TOS(tos),
386 .flowi4_proto = IPPROTO_ICMP,
387 .fl4_icmp_type = type,
388 .fl4_icmp_code = code,
389 };
390 struct rtable *rt, *rt2;
391 int err;
392
393 security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4));
394 rt = __ip_route_output_key(net, &fl4);
395 if (IS_ERR(rt))
396 return rt;
397
398 /* No need to clone since we're just using its address. */
399 rt2 = rt;
400
401 if (!fl4.saddr)
402 fl4.saddr = rt->rt_src;
403
404 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
405 flowi4_to_flowi(&fl4), NULL, 0);
406 if (!IS_ERR(rt)) {
407 if (rt != rt2)
408 return rt;
409 } else if (PTR_ERR(rt) == -EPERM) {
410 rt = NULL;
411 } else
412 return rt;
413
414 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET);
415 if (err)
416 goto relookup_failed;
417
418 if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) {
419 rt2 = __ip_route_output_key(net, &fl4);
420 if (IS_ERR(rt2))
421 err = PTR_ERR(rt2);
422 } else {
423 struct flowi4 fl4_2 = {};
424 unsigned long orefdst;
425
426 fl4_2.daddr = fl4.saddr;
427 rt2 = ip_route_output_key(net, &fl4_2);
428 if (IS_ERR(rt2)) {
429 err = PTR_ERR(rt2);
430 goto relookup_failed;
431 }
432 /* Ugh! */
433 orefdst = skb_in->_skb_refdst; /* save old refdst */
434 err = ip_route_input(skb_in, fl4.daddr, fl4.saddr,
435 RT_TOS(tos), rt2->dst.dev);
436
437 dst_release(&rt2->dst);
438 rt2 = skb_rtable(skb_in);
439 skb_in->_skb_refdst = orefdst; /* restore old refdst */
440 }
441
442 if (err)
443 goto relookup_failed;
444
445 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
446 flowi4_to_flowi(&fl4), NULL,
447 XFRM_LOOKUP_ICMP);
448 if (!IS_ERR(rt2)) {
449 dst_release(&rt->dst);
450 rt = rt2;
451 } else if (PTR_ERR(rt2) == -EPERM) {
452 if (rt)
453 dst_release(&rt->dst);
454 return rt2;
455 } else {
456 err = PTR_ERR(rt2);
457 goto relookup_failed;
458 }
459 return rt;
460
461relookup_failed:
462 if (rt)
463 return rt;
464 return ERR_PTR(err);
465}
405 466
406/* 467/*
407 * Send an ICMP message in response to a situation 468 * Send an ICMP message in response to a situation
@@ -507,7 +568,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
507 rcu_read_lock(); 568 rcu_read_lock();
508 if (rt_is_input_route(rt) && 569 if (rt_is_input_route(rt) &&
509 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) 570 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
510 dev = dev_get_by_index_rcu(net, rt->fl.iif); 571 dev = dev_get_by_index_rcu(net, rt->rt_iif);
511 572
512 if (dev) 573 if (dev)
513 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); 574 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -539,86 +600,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
539 ipc.opt = &icmp_param.replyopts; 600 ipc.opt = &icmp_param.replyopts;
540 ipc.tx_flags = 0; 601 ipc.tx_flags = 0;
541 602
542 { 603 rt = icmp_route_lookup(net, skb_in, iph, saddr, tos,
543 struct flowi fl = { 604 type, code, &icmp_param);
544 .fl4_dst = icmp_param.replyopts.srr ? 605 if (IS_ERR(rt))
545 icmp_param.replyopts.faddr : iph->saddr, 606 goto out_unlock;
546 .fl4_src = saddr,
547 .fl4_tos = RT_TOS(tos),
548 .proto = IPPROTO_ICMP,
549 .fl_icmp_type = type,
550 .fl_icmp_code = code,
551 };
552 int err;
553 struct rtable *rt2;
554
555 security_skb_classify_flow(skb_in, &fl);
556 if (__ip_route_output_key(net, &rt, &fl))
557 goto out_unlock;
558
559 /* No need to clone since we're just using its address. */
560 rt2 = rt;
561
562 if (!fl.nl_u.ip4_u.saddr)
563 fl.nl_u.ip4_u.saddr = rt->rt_src;
564
565 err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
566 switch (err) {
567 case 0:
568 if (rt != rt2)
569 goto route_done;
570 break;
571 case -EPERM:
572 rt = NULL;
573 break;
574 default:
575 goto out_unlock;
576 }
577
578 if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
579 goto relookup_failed;
580
581 if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
582 err = __ip_route_output_key(net, &rt2, &fl);
583 else {
584 struct flowi fl2 = {};
585 unsigned long orefdst;
586
587 fl2.fl4_dst = fl.fl4_src;
588 if (ip_route_output_key(net, &rt2, &fl2))
589 goto relookup_failed;
590
591 /* Ugh! */
592 orefdst = skb_in->_skb_refdst; /* save old refdst */
593 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
594 RT_TOS(tos), rt2->dst.dev);
595
596 dst_release(&rt2->dst);
597 rt2 = skb_rtable(skb_in);
598 skb_in->_skb_refdst = orefdst; /* restore old refdst */
599 }
600
601 if (err)
602 goto relookup_failed;
603
604 err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL,
605 XFRM_LOOKUP_ICMP);
606 switch (err) {
607 case 0:
608 dst_release(&rt->dst);
609 rt = rt2;
610 break;
611 case -EPERM:
612 goto ende;
613 default:
614relookup_failed:
615 if (!rt)
616 goto out_unlock;
617 break;
618 }
619 }
620 607
621route_done:
622 if (!icmpv4_xrlim_allow(net, rt, type, code)) 608 if (!icmpv4_xrlim_allow(net, rt, type, code))
623 goto ende; 609 goto ende;
624 610
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index e0e77e297de..1fd3d9ce839 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -321,14 +321,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
321 } 321 }
322 igmp_skb_size(skb) = size; 322 igmp_skb_size(skb) = size;
323 323
324 { 324 rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0,
325 struct flowi fl = { .oif = dev->ifindex, 325 0, 0,
326 .fl4_dst = IGMPV3_ALL_MCR, 326 IPPROTO_IGMP, 0, dev->ifindex);
327 .proto = IPPROTO_IGMP }; 327 if (IS_ERR(rt)) {
328 if (ip_route_output_key(net, &rt, &fl)) { 328 kfree_skb(skb);
329 kfree_skb(skb); 329 return NULL;
330 return NULL;
331 }
332 } 330 }
333 if (rt->rt_src == 0) { 331 if (rt->rt_src == 0) {
334 kfree_skb(skb); 332 kfree_skb(skb);
@@ -666,13 +664,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
666 else 664 else
667 dst = group; 665 dst = group;
668 666
669 { 667 rt = ip_route_output_ports(net, NULL, dst, 0,
670 struct flowi fl = { .oif = dev->ifindex, 668 0, 0,
671 .fl4_dst = dst, 669 IPPROTO_IGMP, 0, dev->ifindex);
672 .proto = IPPROTO_IGMP }; 670 if (IS_ERR(rt))
673 if (ip_route_output_key(net, &rt, &fl)) 671 return -1;
674 return -1; 672
675 }
676 if (rt->rt_src == 0) { 673 if (rt->rt_src == 0) {
677 ip_rt_put(rt); 674 ip_rt_put(rt);
678 return -1; 675 return -1;
@@ -1439,8 +1436,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1439/* RTNL is locked */ 1436/* RTNL is locked */
1440static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) 1437static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1441{ 1438{
1442 struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr };
1443 struct rtable *rt;
1444 struct net_device *dev = NULL; 1439 struct net_device *dev = NULL;
1445 struct in_device *idev = NULL; 1440 struct in_device *idev = NULL;
1446 1441
@@ -1454,9 +1449,14 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1454 return NULL; 1449 return NULL;
1455 } 1450 }
1456 1451
1457 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1452 if (!dev) {
1458 dev = rt->dst.dev; 1453 struct rtable *rt = ip_route_output(net,
1459 ip_rt_put(rt); 1454 imr->imr_multiaddr.s_addr,
1455 0, 0, 0);
1456 if (!IS_ERR(rt)) {
1457 dev = rt->dst.dev;
1458 ip_rt_put(rt);
1459 }
1460 } 1460 }
1461 if (dev) { 1461 if (dev) {
1462 imr->imr_ifindex = dev->ifindex; 1462 imr->imr_ifindex = dev->ifindex;
@@ -2329,13 +2329,13 @@ void ip_mc_drop_socket(struct sock *sk)
2329 rtnl_unlock(); 2329 rtnl_unlock();
2330} 2330}
2331 2331
2332int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) 2332/* called with rcu_read_lock() */
2333int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
2333{ 2334{
2334 struct ip_mc_list *im; 2335 struct ip_mc_list *im;
2335 struct ip_sf_list *psf; 2336 struct ip_sf_list *psf;
2336 int rv = 0; 2337 int rv = 0;
2337 2338
2338 rcu_read_lock();
2339 for_each_pmc_rcu(in_dev, im) { 2339 for_each_pmc_rcu(in_dev, im) {
2340 if (im->multiaddr == mc_addr) 2340 if (im->multiaddr == mc_addr)
2341 break; 2341 break;
@@ -2357,7 +2357,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
2357 } else 2357 } else
2358 rv = 1; /* unspecified source; tentatively allow */ 2358 rv = 1; /* unspecified source; tentatively allow */
2359 } 2359 }
2360 rcu_read_unlock();
2361 return rv; 2360 return rv;
2362} 2361}
2363 2362
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 97e5fb76526..6c0b7f4a3d7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -356,20 +356,23 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
356 struct rtable *rt; 356 struct rtable *rt;
357 const struct inet_request_sock *ireq = inet_rsk(req); 357 const struct inet_request_sock *ireq = inet_rsk(req);
358 struct ip_options *opt = inet_rsk(req)->opt; 358 struct ip_options *opt = inet_rsk(req)->opt;
359 struct flowi fl = { .oif = sk->sk_bound_dev_if, 359 struct flowi4 fl4 = {
360 .mark = sk->sk_mark, 360 .flowi4_oif = sk->sk_bound_dev_if,
361 .fl4_dst = ((opt && opt->srr) ? 361 .flowi4_mark = sk->sk_mark,
362 opt->faddr : ireq->rmt_addr), 362 .daddr = ((opt && opt->srr) ?
363 .fl4_src = ireq->loc_addr, 363 opt->faddr : ireq->rmt_addr),
364 .fl4_tos = RT_CONN_FLAGS(sk), 364 .saddr = ireq->loc_addr,
365 .proto = sk->sk_protocol, 365 .flowi4_tos = RT_CONN_FLAGS(sk),
366 .flags = inet_sk_flowi_flags(sk), 366 .flowi4_proto = sk->sk_protocol,
367 .fl_ip_sport = inet_sk(sk)->inet_sport, 367 .flowi4_flags = inet_sk_flowi_flags(sk),
368 .fl_ip_dport = ireq->rmt_port }; 368 .fl4_sport = inet_sk(sk)->inet_sport,
369 .fl4_dport = ireq->rmt_port,
370 };
369 struct net *net = sock_net(sk); 371 struct net *net = sock_net(sk);
370 372
371 security_req_classify_flow(req, &fl); 373 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
372 if (ip_route_output_flow(net, &rt, &fl, sk, 0)) 374 rt = ip_route_output_flow(net, &fl4, sk);
375 if (IS_ERR(rt))
373 goto no_route; 376 goto no_route;
374 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 377 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
375 goto route_err; 378 goto route_err;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index a96e65674ac..dd1b20eca1a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = {
81 81
82struct inet_peer_base { 82struct inet_peer_base {
83 struct inet_peer __rcu *root; 83 struct inet_peer __rcu *root;
84 spinlock_t lock; 84 seqlock_t lock;
85 int total; 85 int total;
86}; 86};
87 87
88static struct inet_peer_base v4_peers = { 88static struct inet_peer_base v4_peers = {
89 .root = peer_avl_empty_rcu, 89 .root = peer_avl_empty_rcu,
90 .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), 90 .lock = __SEQLOCK_UNLOCKED(v4_peers.lock),
91 .total = 0, 91 .total = 0,
92}; 92};
93 93
94static struct inet_peer_base v6_peers = { 94static struct inet_peer_base v6_peers = {
95 .root = peer_avl_empty_rcu, 95 .root = peer_avl_empty_rcu,
96 .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), 96 .lock = __SEQLOCK_UNLOCKED(v6_peers.lock),
97 .total = 0, 97 .total = 0,
98}; 98};
99 99
@@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a,
167 int i, n = (a->family == AF_INET ? 1 : 4); 167 int i, n = (a->family == AF_INET ? 1 : 4);
168 168
169 for (i = 0; i < n; i++) { 169 for (i = 0; i < n; i++) {
170 if (a->a6[i] == b->a6[i]) 170 if (a->addr.a6[i] == b->addr.a6[i])
171 continue; 171 continue;
172 if (a->a6[i] < b->a6[i]) 172 if (a->addr.a6[i] < b->addr.a6[i])
173 return -1; 173 return -1;
174 return 1; 174 return 1;
175 } 175 }
@@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a,
177 return 0; 177 return 0;
178} 178}
179 179
180#define rcu_deref_locked(X, BASE) \
181 rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
182
180/* 183/*
181 * Called with local BH disabled and the pool lock held. 184 * Called with local BH disabled and the pool lock held.
182 */ 185 */
@@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a,
187 \ 190 \
188 stackptr = _stack; \ 191 stackptr = _stack; \
189 *stackptr++ = &_base->root; \ 192 *stackptr++ = &_base->root; \
190 for (u = rcu_dereference_protected(_base->root, \ 193 for (u = rcu_deref_locked(_base->root, _base); \
191 lockdep_is_held(&_base->lock)); \
192 u != peer_avl_empty; ) { \ 194 u != peer_avl_empty; ) { \
193 int cmp = addr_compare(_daddr, &u->daddr); \ 195 int cmp = addr_compare(_daddr, &u->daddr); \
194 if (cmp == 0) \ 196 if (cmp == 0) \
@@ -198,23 +200,22 @@ static int addr_compare(const struct inetpeer_addr *a,
198 else \ 200 else \
199 v = &u->avl_right; \ 201 v = &u->avl_right; \
200 *stackptr++ = v; \ 202 *stackptr++ = v; \
201 u = rcu_dereference_protected(*v, \ 203 u = rcu_deref_locked(*v, _base); \
202 lockdep_is_held(&_base->lock)); \
203 } \ 204 } \
204 u; \ 205 u; \
205}) 206})
206 207
207/* 208/*
208 * Called with rcu_read_lock_bh() 209 * Called with rcu_read_lock()
209 * Because we hold no lock against a writer, its quite possible we fall 210 * Because we hold no lock against a writer, its quite possible we fall
210 * in an endless loop. 211 * in an endless loop.
211 * But every pointer we follow is guaranteed to be valid thanks to RCU. 212 * But every pointer we follow is guaranteed to be valid thanks to RCU.
212 * We exit from this function if number of links exceeds PEER_MAXDEPTH 213 * We exit from this function if number of links exceeds PEER_MAXDEPTH
213 */ 214 */
214static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, 215static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
215 struct inet_peer_base *base) 216 struct inet_peer_base *base)
216{ 217{
217 struct inet_peer *u = rcu_dereference_bh(base->root); 218 struct inet_peer *u = rcu_dereference(base->root);
218 int count = 0; 219 int count = 0;
219 220
220 while (u != peer_avl_empty) { 221 while (u != peer_avl_empty) {
@@ -230,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
230 return u; 231 return u;
231 } 232 }
232 if (cmp == -1) 233 if (cmp == -1)
233 u = rcu_dereference_bh(u->avl_left); 234 u = rcu_dereference(u->avl_left);
234 else 235 else
235 u = rcu_dereference_bh(u->avl_right); 236 u = rcu_dereference(u->avl_right);
236 if (unlikely(++count == PEER_MAXDEPTH)) 237 if (unlikely(++count == PEER_MAXDEPTH))
237 break; 238 break;
238 } 239 }
@@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
246 struct inet_peer __rcu **v; \ 247 struct inet_peer __rcu **v; \
247 *stackptr++ = &start->avl_left; \ 248 *stackptr++ = &start->avl_left; \
248 v = &start->avl_left; \ 249 v = &start->avl_left; \
249 for (u = rcu_dereference_protected(*v, \ 250 for (u = rcu_deref_locked(*v, base); \
250 lockdep_is_held(&base->lock)); \
251 u->avl_right != peer_avl_empty_rcu; ) { \ 251 u->avl_right != peer_avl_empty_rcu; ) { \
252 v = &u->avl_right; \ 252 v = &u->avl_right; \
253 *stackptr++ = v; \ 253 *stackptr++ = v; \
254 u = rcu_dereference_protected(*v, \ 254 u = rcu_deref_locked(*v, base); \
255 lockdep_is_held(&base->lock)); \
256 } \ 255 } \
257 u; \ 256 u; \
258}) 257})
@@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
271 270
272 while (stackend > stack) { 271 while (stackend > stack) {
273 nodep = *--stackend; 272 nodep = *--stackend;
274 node = rcu_dereference_protected(*nodep, 273 node = rcu_deref_locked(*nodep, base);
275 lockdep_is_held(&base->lock)); 274 l = rcu_deref_locked(node->avl_left, base);
276 l = rcu_dereference_protected(node->avl_left, 275 r = rcu_deref_locked(node->avl_right, base);
277 lockdep_is_held(&base->lock));
278 r = rcu_dereference_protected(node->avl_right,
279 lockdep_is_held(&base->lock));
280 lh = node_height(l); 276 lh = node_height(l);
281 rh = node_height(r); 277 rh = node_height(r);
282 if (lh > rh + 1) { /* l: RH+2 */ 278 if (lh > rh + 1) { /* l: RH+2 */
283 struct inet_peer *ll, *lr, *lrl, *lrr; 279 struct inet_peer *ll, *lr, *lrl, *lrr;
284 int lrh; 280 int lrh;
285 ll = rcu_dereference_protected(l->avl_left, 281 ll = rcu_deref_locked(l->avl_left, base);
286 lockdep_is_held(&base->lock)); 282 lr = rcu_deref_locked(l->avl_right, base);
287 lr = rcu_dereference_protected(l->avl_right,
288 lockdep_is_held(&base->lock));
289 lrh = node_height(lr); 283 lrh = node_height(lr);
290 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 284 if (lrh <= node_height(ll)) { /* ll: RH+1 */
291 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ 285 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
@@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
296 l->avl_height = node->avl_height + 1; 290 l->avl_height = node->avl_height + 1;
297 RCU_INIT_POINTER(*nodep, l); 291 RCU_INIT_POINTER(*nodep, l);
298 } else { /* ll: RH, lr: RH+1 */ 292 } else { /* ll: RH, lr: RH+1 */
299 lrl = rcu_dereference_protected(lr->avl_left, 293 lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
300 lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ 294 lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
301 lrr = rcu_dereference_protected(lr->avl_right,
302 lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
303 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ 295 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
304 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ 296 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
305 node->avl_height = rh + 1; /* node: RH+1 */ 297 node->avl_height = rh + 1; /* node: RH+1 */
@@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
314 } else if (rh > lh + 1) { /* r: LH+2 */ 306 } else if (rh > lh + 1) { /* r: LH+2 */
315 struct inet_peer *rr, *rl, *rlr, *rll; 307 struct inet_peer *rr, *rl, *rlr, *rll;
316 int rlh; 308 int rlh;
317 rr = rcu_dereference_protected(r->avl_right, 309 rr = rcu_deref_locked(r->avl_right, base);
318 lockdep_is_held(&base->lock)); 310 rl = rcu_deref_locked(r->avl_left, base);
319 rl = rcu_dereference_protected(r->avl_left,
320 lockdep_is_held(&base->lock));
321 rlh = node_height(rl); 311 rlh = node_height(rl);
322 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 312 if (rlh <= node_height(rr)) { /* rr: LH+1 */
323 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ 313 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
@@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
328 r->avl_height = node->avl_height + 1; 318 r->avl_height = node->avl_height + 1;
329 RCU_INIT_POINTER(*nodep, r); 319 RCU_INIT_POINTER(*nodep, r);
330 } else { /* rr: RH, rl: RH+1 */ 320 } else { /* rr: RH, rl: RH+1 */
331 rlr = rcu_dereference_protected(rl->avl_right, 321 rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
332 lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ 322 rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
333 rll = rcu_dereference_protected(rl->avl_left,
334 lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
335 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ 323 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
336 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ 324 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
337 node->avl_height = lh + 1; /* node: LH+1 */ 325 node->avl_height = lh + 1; /* node: LH+1 */
@@ -372,7 +360,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
372 360
373 do_free = 0; 361 do_free = 0;
374 362
375 spin_lock_bh(&base->lock); 363 write_seqlock_bh(&base->lock);
376 /* Check the reference counter. It was artificially incremented by 1 364 /* Check the reference counter. It was artificially incremented by 1
377 * in cleanup() function to prevent sudden disappearing. If we can 365 * in cleanup() function to prevent sudden disappearing. If we can
378 * atomically (because of lockless readers) take this last reference, 366 * atomically (because of lockless readers) take this last reference,
@@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
392 /* look for a node to insert instead of p */ 380 /* look for a node to insert instead of p */
393 struct inet_peer *t; 381 struct inet_peer *t;
394 t = lookup_rightempty(p, base); 382 t = lookup_rightempty(p, base);
395 BUG_ON(rcu_dereference_protected(*stackptr[-1], 383 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
396 lockdep_is_held(&base->lock)) != t);
397 **--stackptr = t->avl_left; 384 **--stackptr = t->avl_left;
398 /* t is removed, t->daddr > x->daddr for any 385 /* t is removed, t->daddr > x->daddr for any
399 * x in p->avl_left subtree. 386 * x in p->avl_left subtree.
@@ -409,10 +396,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
409 base->total--; 396 base->total--;
410 do_free = 1; 397 do_free = 1;
411 } 398 }
412 spin_unlock_bh(&base->lock); 399 write_sequnlock_bh(&base->lock);
413 400
414 if (do_free) 401 if (do_free)
415 call_rcu_bh(&p->rcu, inetpeer_free_rcu); 402 call_rcu(&p->rcu, inetpeer_free_rcu);
416 else 403 else
417 /* The node is used again. Decrease the reference counter 404 /* The node is used again. Decrease the reference counter
418 * back. The loop "cleanup -> unlink_from_unused 405 * back. The loop "cleanup -> unlink_from_unused
@@ -477,13 +464,17 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
477 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 464 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478 struct inet_peer_base *base = family_to_base(daddr->family); 465 struct inet_peer_base *base = family_to_base(daddr->family);
479 struct inet_peer *p; 466 struct inet_peer *p;
467 unsigned int sequence;
468 int invalidated;
480 469
481 /* Look up for the address quickly, lockless. 470 /* Look up for the address quickly, lockless.
482 * Because of a concurrent writer, we might not find an existing entry. 471 * Because of a concurrent writer, we might not find an existing entry.
483 */ 472 */
484 rcu_read_lock_bh(); 473 rcu_read_lock();
485 p = lookup_rcu_bh(daddr, base); 474 sequence = read_seqbegin(&base->lock);
486 rcu_read_unlock_bh(); 475 p = lookup_rcu(daddr, base);
476 invalidated = read_seqretry(&base->lock, sequence);
477 rcu_read_unlock();
487 478
488 if (p) { 479 if (p) {
489 /* The existing node has been found. 480 /* The existing node has been found.
@@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
493 return p; 484 return p;
494 } 485 }
495 486
487 /* If no writer did a change during our lookup, we can return early. */
488 if (!create && !invalidated)
489 return NULL;
490
496 /* retry an exact lookup, taking the lock before. 491 /* retry an exact lookup, taking the lock before.
497 * At least, nodes should be hot in our cache. 492 * At least, nodes should be hot in our cache.
498 */ 493 */
499 spin_lock_bh(&base->lock); 494 write_seqlock_bh(&base->lock);
500 p = lookup(daddr, stack, base); 495 p = lookup(daddr, stack, base);
501 if (p != peer_avl_empty) { 496 if (p != peer_avl_empty) {
502 atomic_inc(&p->refcnt); 497 atomic_inc(&p->refcnt);
503 spin_unlock_bh(&base->lock); 498 write_sequnlock_bh(&base->lock);
504 /* Remove the entry from unused list if it was there. */ 499 /* Remove the entry from unused list if it was there. */
505 unlink_from_unused(p); 500 unlink_from_unused(p);
506 return p; 501 return p;
@@ -510,8 +505,14 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
510 p->daddr = *daddr; 505 p->daddr = *daddr;
511 atomic_set(&p->refcnt, 1); 506 atomic_set(&p->refcnt, 1);
512 atomic_set(&p->rid, 0); 507 atomic_set(&p->rid, 0);
513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); 508 atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4));
514 p->tcp_ts_stamp = 0; 509 p->tcp_ts_stamp = 0;
510 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
511 p->rate_tokens = 0;
512 p->rate_last = 0;
513 p->pmtu_expires = 0;
514 p->pmtu_orig = 0;
515 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
515 INIT_LIST_HEAD(&p->unused); 516 INIT_LIST_HEAD(&p->unused);
516 517
517 518
@@ -519,7 +520,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
519 link_to_pool(p, base); 520 link_to_pool(p, base);
520 base->total++; 521 base->total++;
521 } 522 }
522 spin_unlock_bh(&base->lock); 523 write_sequnlock_bh(&base->lock);
523 524
524 if (base->total >= inet_peer_threshold) 525 if (base->total >= inet_peer_threshold)
525 /* Remove one less-recently-used entry. */ 526 /* Remove one less-recently-used entry. */
@@ -579,3 +580,44 @@ void inet_putpeer(struct inet_peer *p)
579 local_bh_enable(); 580 local_bh_enable();
580} 581}
581EXPORT_SYMBOL_GPL(inet_putpeer); 582EXPORT_SYMBOL_GPL(inet_putpeer);
583
584/*
585 * Check transmit rate limitation for given message.
586 * The rate information is held in the inet_peer entries now.
587 * This function is generic and could be used for other purposes
588 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
589 *
590 * Note that the same inet_peer fields are modified by functions in
591 * route.c too, but these work for packet destinations while xrlim_allow
592 * works for icmp destinations. This means the rate limiting information
593 * for one "ip object" is shared - and these ICMPs are twice limited:
594 * by source and by destination.
595 *
596 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
597 * SHOULD allow setting of rate limits
598 *
599 * Shared between ICMPv4 and ICMPv6.
600 */
601#define XRLIM_BURST_FACTOR 6
602bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
603{
604 unsigned long now, token;
605 bool rc = false;
606
607 if (!peer)
608 return true;
609
610 token = peer->rate_tokens;
611 now = jiffies;
612 token += now - peer->rate_last;
613 peer->rate_last = now;
614 if (token > XRLIM_BURST_FACTOR * timeout)
615 token = XRLIM_BURST_FACTOR * timeout;
616 if (token >= timeout) {
617 token -= timeout;
618 rc = true;
619 }
620 peer->rate_tokens = token;
621 return rc;
622}
623EXPORT_SYMBOL(inet_peer_xrlim_allow);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6613edfac28..da5941f18c3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -769,19 +769,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
769 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 769 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
770 } 770 }
771 771
772 { 772 rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr,
773 struct flowi fl = { 773 tunnel->parms.o_key, RT_TOS(tos),
774 .oif = tunnel->parms.link, 774 tunnel->parms.link);
775 .fl4_dst = dst, 775 if (IS_ERR(rt)) {
776 .fl4_src = tiph->saddr, 776 dev->stats.tx_carrier_errors++;
777 .fl4_tos = RT_TOS(tos), 777 goto tx_error;
778 .proto = IPPROTO_GRE,
779 .fl_gre_key = tunnel->parms.o_key
780 };
781 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
782 dev->stats.tx_carrier_errors++;
783 goto tx_error;
784 }
785 } 778 }
786 tdev = rt->dst.dev; 779 tdev = rt->dst.dev;
787 780
@@ -945,17 +938,13 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
945 /* Guess output device to choose reasonable mtu and needed_headroom */ 938 /* Guess output device to choose reasonable mtu and needed_headroom */
946 939
947 if (iph->daddr) { 940 if (iph->daddr) {
948 struct flowi fl = { 941 struct rtable *rt = ip_route_output_gre(dev_net(dev),
949 .oif = tunnel->parms.link, 942 iph->daddr, iph->saddr,
950 .fl4_dst = iph->daddr, 943 tunnel->parms.o_key,
951 .fl4_src = iph->saddr, 944 RT_TOS(iph->tos),
952 .fl4_tos = RT_TOS(iph->tos), 945 tunnel->parms.link);
953 .proto = IPPROTO_GRE, 946
954 .fl_gre_key = tunnel->parms.o_key 947 if (!IS_ERR(rt)) {
955 };
956 struct rtable *rt;
957
958 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
959 tdev = rt->dst.dev; 948 tdev = rt->dst.dev;
960 ip_rt_put(rt); 949 ip_rt_put(rt);
961 } 950 }
@@ -1207,17 +1196,14 @@ static int ipgre_open(struct net_device *dev)
1207 struct ip_tunnel *t = netdev_priv(dev); 1196 struct ip_tunnel *t = netdev_priv(dev);
1208 1197
1209 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1198 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1210 struct flowi fl = { 1199 struct rtable *rt = ip_route_output_gre(dev_net(dev),
1211 .oif = t->parms.link, 1200 t->parms.iph.daddr,
1212 .fl4_dst = t->parms.iph.daddr, 1201 t->parms.iph.saddr,
1213 .fl4_src = t->parms.iph.saddr, 1202 t->parms.o_key,
1214 .fl4_tos = RT_TOS(t->parms.iph.tos), 1203 RT_TOS(t->parms.iph.tos),
1215 .proto = IPPROTO_GRE, 1204 t->parms.link);
1216 .fl_gre_key = t->parms.o_key 1205
1217 }; 1206 if (IS_ERR(rt))
1218 struct rtable *rt;
1219
1220 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1221 return -EADDRNOTAVAIL; 1207 return -EADDRNOTAVAIL;
1222 dev = rt->dst.dev; 1208 dev = rt->dst.dev;
1223 ip_rt_put(rt); 1209 ip_rt_put(rt);
@@ -1765,4 +1751,4 @@ module_exit(ipgre_fini);
1765MODULE_LICENSE("GPL"); 1751MODULE_LICENSE("GPL");
1766MODULE_ALIAS_RTNL_LINK("gre"); 1752MODULE_ALIAS_RTNL_LINK("gre");
1767MODULE_ALIAS_RTNL_LINK("gretap"); 1753MODULE_ALIAS_RTNL_LINK("gretap");
1768MODULE_ALIAS("gre0"); 1754MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d859bcc26cb..d7b2b0987a3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
340 } 340 }
341 } 341 }
342 342
343#ifdef CONFIG_NET_CLS_ROUTE 343#ifdef CONFIG_IP_ROUTE_CLASSID
344 if (unlikely(skb_dst(skb)->tclassid)) { 344 if (unlikely(skb_dst(skb)->tclassid)) {
345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
346 u32 idx = skb_dst(skb)->tclassid; 346 u32 idx = skb_dst(skb)->tclassid;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04c7b3ba6b3..67f241b9764 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -339,25 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb)
339 if(opt && opt->srr) 339 if(opt && opt->srr)
340 daddr = opt->faddr; 340 daddr = opt->faddr;
341 341
342 { 342 /* If this fails, retransmit mechanism of transport layer will
343 struct flowi fl = { .oif = sk->sk_bound_dev_if, 343 * keep trying until route appears or the connection times
344 .mark = sk->sk_mark, 344 * itself out.
345 .fl4_dst = daddr, 345 */
346 .fl4_src = inet->inet_saddr, 346 rt = ip_route_output_ports(sock_net(sk), sk,
347 .fl4_tos = RT_CONN_FLAGS(sk), 347 daddr, inet->inet_saddr,
348 .proto = sk->sk_protocol, 348 inet->inet_dport,
349 .flags = inet_sk_flowi_flags(sk), 349 inet->inet_sport,
350 .fl_ip_sport = inet->inet_sport, 350 sk->sk_protocol,
351 .fl_ip_dport = inet->inet_dport }; 351 RT_CONN_FLAGS(sk),
352 352 sk->sk_bound_dev_if);
353 /* If this fails, retransmit mechanism of transport layer will 353 if (IS_ERR(rt))
354 * keep trying until route appears or the connection times 354 goto no_route;
355 * itself out.
356 */
357 security_sk_classify_flow(sk, &fl);
358 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
359 goto no_route;
360 }
361 sk_setup_caps(sk, &rt->dst); 355 sk_setup_caps(sk, &rt->dst);
362 } 356 }
363 skb_dst_set_noref(skb, &rt->dst); 357 skb_dst_set_noref(skb, &rt->dst);
@@ -733,6 +727,7 @@ csum_page(struct page *page, int offset, int copy)
733} 727}
734 728
735static inline int ip_ufo_append_data(struct sock *sk, 729static inline int ip_ufo_append_data(struct sock *sk,
730 struct sk_buff_head *queue,
736 int getfrag(void *from, char *to, int offset, int len, 731 int getfrag(void *from, char *to, int offset, int len,
737 int odd, struct sk_buff *skb), 732 int odd, struct sk_buff *skb),
738 void *from, int length, int hh_len, int fragheaderlen, 733 void *from, int length, int hh_len, int fragheaderlen,
@@ -745,7 +740,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
745 * device, so create one single skb packet containing complete 740 * device, so create one single skb packet containing complete
746 * udp datagram 741 * udp datagram
747 */ 742 */
748 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 743 if ((skb = skb_peek_tail(queue)) == NULL) {
749 skb = sock_alloc_send_skb(sk, 744 skb = sock_alloc_send_skb(sk,
750 hh_len + fragheaderlen + transhdrlen + 20, 745 hh_len + fragheaderlen + transhdrlen + 20,
751 (flags & MSG_DONTWAIT), &err); 746 (flags & MSG_DONTWAIT), &err);
@@ -767,40 +762,28 @@ static inline int ip_ufo_append_data(struct sock *sk,
767 762
768 skb->ip_summed = CHECKSUM_PARTIAL; 763 skb->ip_summed = CHECKSUM_PARTIAL;
769 skb->csum = 0; 764 skb->csum = 0;
770 sk->sk_sndmsg_off = 0;
771 765
772 /* specify the length of each IP datagram fragment */ 766 /* specify the length of each IP datagram fragment */
773 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 767 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
774 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 768 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
775 __skb_queue_tail(&sk->sk_write_queue, skb); 769 __skb_queue_tail(queue, skb);
776 } 770 }
777 771
778 return skb_append_datato_frags(sk, skb, getfrag, from, 772 return skb_append_datato_frags(sk, skb, getfrag, from,
779 (length - transhdrlen)); 773 (length - transhdrlen));
780} 774}
781 775
782/* 776static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue,
783 * ip_append_data() and ip_append_page() can make one large IP datagram 777 struct inet_cork *cork,
784 * from many pieces of data. Each pieces will be holded on the socket 778 int getfrag(void *from, char *to, int offset,
785 * until ip_push_pending_frames() is called. Each piece can be a page 779 int len, int odd, struct sk_buff *skb),
786 * or non-page data. 780 void *from, int length, int transhdrlen,
787 * 781 unsigned int flags)
788 * Not only UDP, other transport protocols - e.g. raw sockets - can use
789 * this interface potentially.
790 *
791 * LATER: length must be adjusted by pad at tail, when it is required.
792 */
793int ip_append_data(struct sock *sk,
794 int getfrag(void *from, char *to, int offset, int len,
795 int odd, struct sk_buff *skb),
796 void *from, int length, int transhdrlen,
797 struct ipcm_cookie *ipc, struct rtable **rtp,
798 unsigned int flags)
799{ 782{
800 struct inet_sock *inet = inet_sk(sk); 783 struct inet_sock *inet = inet_sk(sk);
801 struct sk_buff *skb; 784 struct sk_buff *skb;
802 785
803 struct ip_options *opt = NULL; 786 struct ip_options *opt = cork->opt;
804 int hh_len; 787 int hh_len;
805 int exthdrlen; 788 int exthdrlen;
806 int mtu; 789 int mtu;
@@ -809,58 +792,19 @@ int ip_append_data(struct sock *sk,
809 int offset = 0; 792 int offset = 0;
810 unsigned int maxfraglen, fragheaderlen; 793 unsigned int maxfraglen, fragheaderlen;
811 int csummode = CHECKSUM_NONE; 794 int csummode = CHECKSUM_NONE;
812 struct rtable *rt; 795 struct rtable *rt = (struct rtable *)cork->dst;
813 796
814 if (flags&MSG_PROBE) 797 exthdrlen = transhdrlen ? rt->dst.header_len : 0;
815 return 0; 798 length += exthdrlen;
816 799 transhdrlen += exthdrlen;
817 if (skb_queue_empty(&sk->sk_write_queue)) { 800 mtu = cork->fragsize;
818 /*
819 * setup for corking.
820 */
821 opt = ipc->opt;
822 if (opt) {
823 if (inet->cork.opt == NULL) {
824 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
825 if (unlikely(inet->cork.opt == NULL))
826 return -ENOBUFS;
827 }
828 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
829 inet->cork.flags |= IPCORK_OPT;
830 inet->cork.addr = ipc->addr;
831 }
832 rt = *rtp;
833 if (unlikely(!rt))
834 return -EFAULT;
835 /*
836 * We steal reference to this route, caller should not release it
837 */
838 *rtp = NULL;
839 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
840 rt->dst.dev->mtu :
841 dst_mtu(rt->dst.path);
842 inet->cork.dst = &rt->dst;
843 inet->cork.length = 0;
844 sk->sk_sndmsg_page = NULL;
845 sk->sk_sndmsg_off = 0;
846 exthdrlen = rt->dst.header_len;
847 length += exthdrlen;
848 transhdrlen += exthdrlen;
849 } else {
850 rt = (struct rtable *)inet->cork.dst;
851 if (inet->cork.flags & IPCORK_OPT)
852 opt = inet->cork.opt;
853 801
854 transhdrlen = 0;
855 exthdrlen = 0;
856 mtu = inet->cork.fragsize;
857 }
858 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 802 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
859 803
860 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 804 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
861 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 805 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
862 806
863 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 807 if (cork->length + length > 0xFFFF - fragheaderlen) {
864 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 808 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
865 mtu-exthdrlen); 809 mtu-exthdrlen);
866 return -EMSGSIZE; 810 return -EMSGSIZE;
@@ -876,15 +820,15 @@ int ip_append_data(struct sock *sk,
876 !exthdrlen) 820 !exthdrlen)
877 csummode = CHECKSUM_PARTIAL; 821 csummode = CHECKSUM_PARTIAL;
878 822
879 skb = skb_peek_tail(&sk->sk_write_queue); 823 skb = skb_peek_tail(queue);
880 824
881 inet->cork.length += length; 825 cork->length += length;
882 if (((length > mtu) || (skb && skb_is_gso(skb))) && 826 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
883 (sk->sk_protocol == IPPROTO_UDP) && 827 (sk->sk_protocol == IPPROTO_UDP) &&
884 (rt->dst.dev->features & NETIF_F_UFO)) { 828 (rt->dst.dev->features & NETIF_F_UFO)) {
885 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 829 err = ip_ufo_append_data(sk, queue, getfrag, from, length,
886 fragheaderlen, transhdrlen, mtu, 830 hh_len, fragheaderlen, transhdrlen,
887 flags); 831 mtu, flags);
888 if (err) 832 if (err)
889 goto error; 833 goto error;
890 return 0; 834 return 0;
@@ -961,7 +905,7 @@ alloc_new_skb:
961 else 905 else
962 /* only the initial fragment is 906 /* only the initial fragment is
963 time stamped */ 907 time stamped */
964 ipc->tx_flags = 0; 908 cork->tx_flags = 0;
965 } 909 }
966 if (skb == NULL) 910 if (skb == NULL)
967 goto error; 911 goto error;
@@ -972,7 +916,7 @@ alloc_new_skb:
972 skb->ip_summed = csummode; 916 skb->ip_summed = csummode;
973 skb->csum = 0; 917 skb->csum = 0;
974 skb_reserve(skb, hh_len); 918 skb_reserve(skb, hh_len);
975 skb_shinfo(skb)->tx_flags = ipc->tx_flags; 919 skb_shinfo(skb)->tx_flags = cork->tx_flags;
976 920
977 /* 921 /*
978 * Find where to start putting bytes. 922 * Find where to start putting bytes.
@@ -1009,7 +953,7 @@ alloc_new_skb:
1009 /* 953 /*
1010 * Put the packet on the pending queue. 954 * Put the packet on the pending queue.
1011 */ 955 */
1012 __skb_queue_tail(&sk->sk_write_queue, skb); 956 __skb_queue_tail(queue, skb);
1013 continue; 957 continue;
1014 } 958 }
1015 959
@@ -1029,8 +973,8 @@ alloc_new_skb:
1029 } else { 973 } else {
1030 int i = skb_shinfo(skb)->nr_frags; 974 int i = skb_shinfo(skb)->nr_frags;
1031 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 975 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1032 struct page *page = sk->sk_sndmsg_page; 976 struct page *page = cork->page;
1033 int off = sk->sk_sndmsg_off; 977 int off = cork->off;
1034 unsigned int left; 978 unsigned int left;
1035 979
1036 if (page && (left = PAGE_SIZE - off) > 0) { 980 if (page && (left = PAGE_SIZE - off) > 0) {
@@ -1042,7 +986,7 @@ alloc_new_skb:
1042 goto error; 986 goto error;
1043 } 987 }
1044 get_page(page); 988 get_page(page);
1045 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 989 skb_fill_page_desc(skb, i, page, off, 0);
1046 frag = &skb_shinfo(skb)->frags[i]; 990 frag = &skb_shinfo(skb)->frags[i];
1047 } 991 }
1048 } else if (i < MAX_SKB_FRAGS) { 992 } else if (i < MAX_SKB_FRAGS) {
@@ -1053,8 +997,8 @@ alloc_new_skb:
1053 err = -ENOMEM; 997 err = -ENOMEM;
1054 goto error; 998 goto error;
1055 } 999 }
1056 sk->sk_sndmsg_page = page; 1000 cork->page = page;
1057 sk->sk_sndmsg_off = 0; 1001 cork->off = 0;
1058 1002
1059 skb_fill_page_desc(skb, i, page, 0, 0); 1003 skb_fill_page_desc(skb, i, page, 0, 0);
1060 frag = &skb_shinfo(skb)->frags[i]; 1004 frag = &skb_shinfo(skb)->frags[i];
@@ -1066,7 +1010,7 @@ alloc_new_skb:
1066 err = -EFAULT; 1010 err = -EFAULT;
1067 goto error; 1011 goto error;
1068 } 1012 }
1069 sk->sk_sndmsg_off += copy; 1013 cork->off += copy;
1070 frag->size += copy; 1014 frag->size += copy;
1071 skb->len += copy; 1015 skb->len += copy;
1072 skb->data_len += copy; 1016 skb->data_len += copy;
@@ -1080,11 +1024,87 @@ alloc_new_skb:
1080 return 0; 1024 return 0;
1081 1025
1082error: 1026error:
1083 inet->cork.length -= length; 1027 cork->length -= length;
1084 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1028 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1085 return err; 1029 return err;
1086} 1030}
1087 1031
1032static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1033 struct ipcm_cookie *ipc, struct rtable **rtp)
1034{
1035 struct inet_sock *inet = inet_sk(sk);
1036 struct ip_options *opt;
1037 struct rtable *rt;
1038
1039 /*
1040 * setup for corking.
1041 */
1042 opt = ipc->opt;
1043 if (opt) {
1044 if (cork->opt == NULL) {
1045 cork->opt = kmalloc(sizeof(struct ip_options) + 40,
1046 sk->sk_allocation);
1047 if (unlikely(cork->opt == NULL))
1048 return -ENOBUFS;
1049 }
1050 memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen);
1051 cork->flags |= IPCORK_OPT;
1052 cork->addr = ipc->addr;
1053 }
1054 rt = *rtp;
1055 if (unlikely(!rt))
1056 return -EFAULT;
1057 /*
1058 * We steal reference to this route, caller should not release it
1059 */
1060 *rtp = NULL;
1061 cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ?
1062 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1063 cork->dst = &rt->dst;
1064 cork->length = 0;
1065 cork->tx_flags = ipc->tx_flags;
1066 cork->page = NULL;
1067 cork->off = 0;
1068
1069 return 0;
1070}
1071
1072/*
1073 * ip_append_data() and ip_append_page() can make one large IP datagram
1074 * from many pieces of data. Each pieces will be holded on the socket
1075 * until ip_push_pending_frames() is called. Each piece can be a page
1076 * or non-page data.
1077 *
1078 * Not only UDP, other transport protocols - e.g. raw sockets - can use
1079 * this interface potentially.
1080 *
1081 * LATER: length must be adjusted by pad at tail, when it is required.
1082 */
1083int ip_append_data(struct sock *sk,
1084 int getfrag(void *from, char *to, int offset, int len,
1085 int odd, struct sk_buff *skb),
1086 void *from, int length, int transhdrlen,
1087 struct ipcm_cookie *ipc, struct rtable **rtp,
1088 unsigned int flags)
1089{
1090 struct inet_sock *inet = inet_sk(sk);
1091 int err;
1092
1093 if (flags&MSG_PROBE)
1094 return 0;
1095
1096 if (skb_queue_empty(&sk->sk_write_queue)) {
1097 err = ip_setup_cork(sk, &inet->cork, ipc, rtp);
1098 if (err)
1099 return err;
1100 } else {
1101 transhdrlen = 0;
1102 }
1103
1104 return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag,
1105 from, length, transhdrlen, flags);
1106}
1107
1088ssize_t ip_append_page(struct sock *sk, struct page *page, 1108ssize_t ip_append_page(struct sock *sk, struct page *page,
1089 int offset, size_t size, int flags) 1109 int offset, size_t size, int flags)
1090{ 1110{
@@ -1228,40 +1248,41 @@ error:
1228 return err; 1248 return err;
1229} 1249}
1230 1250
1231static void ip_cork_release(struct inet_sock *inet) 1251static void ip_cork_release(struct inet_cork *cork)
1232{ 1252{
1233 inet->cork.flags &= ~IPCORK_OPT; 1253 cork->flags &= ~IPCORK_OPT;
1234 kfree(inet->cork.opt); 1254 kfree(cork->opt);
1235 inet->cork.opt = NULL; 1255 cork->opt = NULL;
1236 dst_release(inet->cork.dst); 1256 dst_release(cork->dst);
1237 inet->cork.dst = NULL; 1257 cork->dst = NULL;
1238} 1258}
1239 1259
1240/* 1260/*
1241 * Combined all pending IP fragments on the socket as one IP datagram 1261 * Combined all pending IP fragments on the socket as one IP datagram
1242 * and push them out. 1262 * and push them out.
1243 */ 1263 */
1244int ip_push_pending_frames(struct sock *sk) 1264struct sk_buff *__ip_make_skb(struct sock *sk,
1265 struct sk_buff_head *queue,
1266 struct inet_cork *cork)
1245{ 1267{
1246 struct sk_buff *skb, *tmp_skb; 1268 struct sk_buff *skb, *tmp_skb;
1247 struct sk_buff **tail_skb; 1269 struct sk_buff **tail_skb;
1248 struct inet_sock *inet = inet_sk(sk); 1270 struct inet_sock *inet = inet_sk(sk);
1249 struct net *net = sock_net(sk); 1271 struct net *net = sock_net(sk);
1250 struct ip_options *opt = NULL; 1272 struct ip_options *opt = NULL;
1251 struct rtable *rt = (struct rtable *)inet->cork.dst; 1273 struct rtable *rt = (struct rtable *)cork->dst;
1252 struct iphdr *iph; 1274 struct iphdr *iph;
1253 __be16 df = 0; 1275 __be16 df = 0;
1254 __u8 ttl; 1276 __u8 ttl;
1255 int err = 0;
1256 1277
1257 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1278 if ((skb = __skb_dequeue(queue)) == NULL)
1258 goto out; 1279 goto out;
1259 tail_skb = &(skb_shinfo(skb)->frag_list); 1280 tail_skb = &(skb_shinfo(skb)->frag_list);
1260 1281
1261 /* move skb->data to ip header from ext header */ 1282 /* move skb->data to ip header from ext header */
1262 if (skb->data < skb_network_header(skb)) 1283 if (skb->data < skb_network_header(skb))
1263 __skb_pull(skb, skb_network_offset(skb)); 1284 __skb_pull(skb, skb_network_offset(skb));
1264 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1285 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1265 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1286 __skb_pull(tmp_skb, skb_network_header_len(skb));
1266 *tail_skb = tmp_skb; 1287 *tail_skb = tmp_skb;
1267 tail_skb = &(tmp_skb->next); 1288 tail_skb = &(tmp_skb->next);
@@ -1287,8 +1308,8 @@ int ip_push_pending_frames(struct sock *sk)
1287 ip_dont_fragment(sk, &rt->dst))) 1308 ip_dont_fragment(sk, &rt->dst)))
1288 df = htons(IP_DF); 1309 df = htons(IP_DF);
1289 1310
1290 if (inet->cork.flags & IPCORK_OPT) 1311 if (cork->flags & IPCORK_OPT)
1291 opt = inet->cork.opt; 1312 opt = cork->opt;
1292 1313
1293 if (rt->rt_type == RTN_MULTICAST) 1314 if (rt->rt_type == RTN_MULTICAST)
1294 ttl = inet->mc_ttl; 1315 ttl = inet->mc_ttl;
@@ -1300,7 +1321,7 @@ int ip_push_pending_frames(struct sock *sk)
1300 iph->ihl = 5; 1321 iph->ihl = 5;
1301 if (opt) { 1322 if (opt) {
1302 iph->ihl += opt->optlen>>2; 1323 iph->ihl += opt->optlen>>2;
1303 ip_options_build(skb, opt, inet->cork.addr, rt, 0); 1324 ip_options_build(skb, opt, cork->addr, rt, 0);
1304 } 1325 }
1305 iph->tos = inet->tos; 1326 iph->tos = inet->tos;
1306 iph->frag_off = df; 1327 iph->frag_off = df;
@@ -1316,44 +1337,95 @@ int ip_push_pending_frames(struct sock *sk)
1316 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec 1337 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
1317 * on dst refcount 1338 * on dst refcount
1318 */ 1339 */
1319 inet->cork.dst = NULL; 1340 cork->dst = NULL;
1320 skb_dst_set(skb, &rt->dst); 1341 skb_dst_set(skb, &rt->dst);
1321 1342
1322 if (iph->protocol == IPPROTO_ICMP) 1343 if (iph->protocol == IPPROTO_ICMP)
1323 icmp_out_count(net, ((struct icmphdr *) 1344 icmp_out_count(net, ((struct icmphdr *)
1324 skb_transport_header(skb))->type); 1345 skb_transport_header(skb))->type);
1325 1346
1326 /* Netfilter gets whole the not fragmented skb. */ 1347 ip_cork_release(cork);
1348out:
1349 return skb;
1350}
1351
1352int ip_send_skb(struct sk_buff *skb)
1353{
1354 struct net *net = sock_net(skb->sk);
1355 int err;
1356
1327 err = ip_local_out(skb); 1357 err = ip_local_out(skb);
1328 if (err) { 1358 if (err) {
1329 if (err > 0) 1359 if (err > 0)
1330 err = net_xmit_errno(err); 1360 err = net_xmit_errno(err);
1331 if (err) 1361 if (err)
1332 goto error; 1362 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1333 } 1363 }
1334 1364
1335out:
1336 ip_cork_release(inet);
1337 return err; 1365 return err;
1366}
1338 1367
1339error: 1368int ip_push_pending_frames(struct sock *sk)
1340 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); 1369{
1341 goto out; 1370 struct sk_buff *skb;
1371
1372 skb = ip_finish_skb(sk);
1373 if (!skb)
1374 return 0;
1375
1376 /* Netfilter gets whole the not fragmented skb. */
1377 return ip_send_skb(skb);
1342} 1378}
1343 1379
1344/* 1380/*
1345 * Throw away all pending data on the socket. 1381 * Throw away all pending data on the socket.
1346 */ 1382 */
1347void ip_flush_pending_frames(struct sock *sk) 1383static void __ip_flush_pending_frames(struct sock *sk,
1384 struct sk_buff_head *queue,
1385 struct inet_cork *cork)
1348{ 1386{
1349 struct sk_buff *skb; 1387 struct sk_buff *skb;
1350 1388
1351 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) 1389 while ((skb = __skb_dequeue_tail(queue)) != NULL)
1352 kfree_skb(skb); 1390 kfree_skb(skb);
1353 1391
1354 ip_cork_release(inet_sk(sk)); 1392 ip_cork_release(cork);
1393}
1394
1395void ip_flush_pending_frames(struct sock *sk)
1396{
1397 __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork);
1355} 1398}
1356 1399
1400struct sk_buff *ip_make_skb(struct sock *sk,
1401 int getfrag(void *from, char *to, int offset,
1402 int len, int odd, struct sk_buff *skb),
1403 void *from, int length, int transhdrlen,
1404 struct ipcm_cookie *ipc, struct rtable **rtp,
1405 unsigned int flags)
1406{
1407 struct inet_cork cork = {};
1408 struct sk_buff_head queue;
1409 int err;
1410
1411 if (flags & MSG_PROBE)
1412 return NULL;
1413
1414 __skb_queue_head_init(&queue);
1415
1416 err = ip_setup_cork(sk, &cork, ipc, rtp);
1417 if (err)
1418 return ERR_PTR(err);
1419
1420 err = __ip_append_data(sk, &queue, &cork, getfrag,
1421 from, length, transhdrlen, flags);
1422 if (err) {
1423 __ip_flush_pending_frames(sk, &queue, &cork);
1424 return ERR_PTR(err);
1425 }
1426
1427 return __ip_make_skb(sk, &queue, &cork);
1428}
1357 1429
1358/* 1430/*
1359 * Fetch data from kernel space and fill in checksum if needed. 1431 * Fetch data from kernel space and fill in checksum if needed.
@@ -1402,16 +1474,19 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1402 } 1474 }
1403 1475
1404 { 1476 {
1405 struct flowi fl = { .oif = arg->bound_dev_if, 1477 struct flowi4 fl4 = {
1406 .fl4_dst = daddr, 1478 .flowi4_oif = arg->bound_dev_if,
1407 .fl4_src = rt->rt_spec_dst, 1479 .daddr = daddr,
1408 .fl4_tos = RT_TOS(ip_hdr(skb)->tos), 1480 .saddr = rt->rt_spec_dst,
1409 .fl_ip_sport = tcp_hdr(skb)->dest, 1481 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
1410 .fl_ip_dport = tcp_hdr(skb)->source, 1482 .fl4_sport = tcp_hdr(skb)->dest,
1411 .proto = sk->sk_protocol, 1483 .fl4_dport = tcp_hdr(skb)->source,
1412 .flags = ip_reply_arg_flowi_flags(arg) }; 1484 .flowi4_proto = sk->sk_protocol,
1413 security_skb_classify_flow(skb, &fl); 1485 .flowi4_flags = ip_reply_arg_flowi_flags(arg),
1414 if (ip_route_output_key(sock_net(sk), &rt, &fl)) 1486 };
1487 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1488 rt = ip_route_output_key(sock_net(sk), &fl4);
1489 if (IS_ERR(rt))
1415 return; 1490 return;
1416 } 1491 }
1417 1492
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54..bfc17c5914e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -460,19 +460,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
460 goto tx_error_icmp; 460 goto tx_error_icmp;
461 } 461 }
462 462
463 { 463 rt = ip_route_output_ports(dev_net(dev), NULL,
464 struct flowi fl = { 464 dst, tiph->saddr,
465 .oif = tunnel->parms.link, 465 0, 0,
466 .fl4_dst = dst, 466 IPPROTO_IPIP, RT_TOS(tos),
467 .fl4_src= tiph->saddr, 467 tunnel->parms.link);
468 .fl4_tos = RT_TOS(tos), 468 if (IS_ERR(rt)) {
469 .proto = IPPROTO_IPIP 469 dev->stats.tx_carrier_errors++;
470 }; 470 goto tx_error_icmp;
471
472 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
473 dev->stats.tx_carrier_errors++;
474 goto tx_error_icmp;
475 }
476 } 471 }
477 tdev = rt->dst.dev; 472 tdev = rt->dst.dev;
478 473
@@ -583,16 +578,14 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
583 iph = &tunnel->parms.iph; 578 iph = &tunnel->parms.iph;
584 579
585 if (iph->daddr) { 580 if (iph->daddr) {
586 struct flowi fl = { 581 struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL,
587 .oif = tunnel->parms.link, 582 iph->daddr, iph->saddr,
588 .fl4_dst = iph->daddr, 583 0, 0,
589 .fl4_src = iph->saddr, 584 IPPROTO_IPIP,
590 .fl4_tos = RT_TOS(iph->tos), 585 RT_TOS(iph->tos),
591 .proto = IPPROTO_IPIP 586 tunnel->parms.link);
592 }; 587
593 struct rtable *rt; 588 if (!IS_ERR(rt)) {
594
595 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
596 tdev = rt->dst.dev; 589 tdev = rt->dst.dev;
597 ip_rt_put(rt); 590 ip_rt_put(rt);
598 } 591 }
@@ -913,4 +906,4 @@ static void __exit ipip_fini(void)
913module_init(ipip_init); 906module_init(ipip_init);
914module_exit(ipip_fini); 907module_exit(ipip_fini);
915MODULE_LICENSE("GPL"); 908MODULE_LICENSE("GPL");
916MODULE_ALIAS("tunl0"); 909MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8b65a12654e..1f62eaeb6de 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -148,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
148 return NULL; 148 return NULL;
149} 149}
150 150
151static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 151static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
152 struct mr_table **mrt) 152 struct mr_table **mrt)
153{ 153{
154 struct ipmr_result res; 154 struct ipmr_result res;
155 struct fib_lookup_arg arg = { .result = &res, }; 155 struct fib_lookup_arg arg = { .result = &res, };
156 int err; 156 int err;
157 157
158 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); 158 err = fib_rules_lookup(net->ipv4.mr_rules_ops,
159 flowi4_to_flowi(flp4), 0, &arg);
159 if (err < 0) 160 if (err < 0)
160 return err; 161 return err;
161 *mrt = res.mrt; 162 *mrt = res.mrt;
@@ -283,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
283 return net->ipv4.mrt; 284 return net->ipv4.mrt;
284} 285}
285 286
286static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 287static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
287 struct mr_table **mrt) 288 struct mr_table **mrt)
288{ 289{
289 *mrt = net->ipv4.mrt; 290 *mrt = net->ipv4.mrt;
@@ -435,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
435{ 436{
436 struct net *net = dev_net(dev); 437 struct net *net = dev_net(dev);
437 struct mr_table *mrt; 438 struct mr_table *mrt;
438 struct flowi fl = { 439 struct flowi4 fl4 = {
439 .oif = dev->ifindex, 440 .flowi4_oif = dev->ifindex,
440 .iif = skb->skb_iif, 441 .flowi4_iif = skb->skb_iif,
441 .mark = skb->mark, 442 .flowi4_mark = skb->mark,
442 }; 443 };
443 int err; 444 int err;
444 445
445 err = ipmr_fib_lookup(net, &fl, &mrt); 446 err = ipmr_fib_lookup(net, &fl4, &mrt);
446 if (err < 0) { 447 if (err < 0) {
447 kfree_skb(skb); 448 kfree_skb(skb);
448 return err; 449 return err;
@@ -1611,26 +1612,20 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1611#endif 1612#endif
1612 1613
1613 if (vif->flags & VIFF_TUNNEL) { 1614 if (vif->flags & VIFF_TUNNEL) {
1614 struct flowi fl = { 1615 rt = ip_route_output_ports(net, NULL,
1615 .oif = vif->link, 1616 vif->remote, vif->local,
1616 .fl4_dst = vif->remote, 1617 0, 0,
1617 .fl4_src = vif->local, 1618 IPPROTO_IPIP,
1618 .fl4_tos = RT_TOS(iph->tos), 1619 RT_TOS(iph->tos), vif->link);
1619 .proto = IPPROTO_IPIP 1620 if (IS_ERR(rt))
1620 };
1621
1622 if (ip_route_output_key(net, &rt, &fl))
1623 goto out_free; 1621 goto out_free;
1624 encap = sizeof(struct iphdr); 1622 encap = sizeof(struct iphdr);
1625 } else { 1623 } else {
1626 struct flowi fl = { 1624 rt = ip_route_output_ports(net, NULL, iph->daddr, 0,
1627 .oif = vif->link, 1625 0, 0,
1628 .fl4_dst = iph->daddr, 1626 IPPROTO_IPIP,
1629 .fl4_tos = RT_TOS(iph->tos), 1627 RT_TOS(iph->tos), vif->link);
1630 .proto = IPPROTO_IPIP 1628 if (IS_ERR(rt))
1631 };
1632
1633 if (ip_route_output_key(net, &rt, &fl))
1634 goto out_free; 1629 goto out_free;
1635 } 1630 }
1636 1631
@@ -1793,6 +1788,24 @@ dont_forward:
1793 return 0; 1788 return 0;
1794} 1789}
1795 1790
1791static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt)
1792{
1793 struct flowi4 fl4 = {
1794 .daddr = rt->rt_key_dst,
1795 .saddr = rt->rt_key_src,
1796 .flowi4_tos = rt->rt_tos,
1797 .flowi4_oif = rt->rt_oif,
1798 .flowi4_iif = rt->rt_iif,
1799 .flowi4_mark = rt->rt_mark,
1800 };
1801 struct mr_table *mrt;
1802 int err;
1803
1804 err = ipmr_fib_lookup(net, &fl4, &mrt);
1805 if (err)
1806 return ERR_PTR(err);
1807 return mrt;
1808}
1796 1809
1797/* 1810/*
1798 * Multicast packets for forwarding arrive here 1811 * Multicast packets for forwarding arrive here
@@ -1805,7 +1818,6 @@ int ip_mr_input(struct sk_buff *skb)
1805 struct net *net = dev_net(skb->dev); 1818 struct net *net = dev_net(skb->dev);
1806 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1819 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1807 struct mr_table *mrt; 1820 struct mr_table *mrt;
1808 int err;
1809 1821
1810 /* Packet is looped back after forward, it should not be 1822 /* Packet is looped back after forward, it should not be
1811 * forwarded second time, but still can be delivered locally. 1823 * forwarded second time, but still can be delivered locally.
@@ -1813,12 +1825,11 @@ int ip_mr_input(struct sk_buff *skb)
1813 if (IPCB(skb)->flags & IPSKB_FORWARDED) 1825 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1814 goto dont_forward; 1826 goto dont_forward;
1815 1827
1816 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1828 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
1817 if (err < 0) { 1829 if (IS_ERR(mrt)) {
1818 kfree_skb(skb); 1830 kfree_skb(skb);
1819 return err; 1831 return PTR_ERR(mrt);
1820 } 1832 }
1821
1822 if (!local) { 1833 if (!local) {
1823 if (IPCB(skb)->opt.router_alert) { 1834 if (IPCB(skb)->opt.router_alert) {
1824 if (ip_call_ra_chain(skb)) 1835 if (ip_call_ra_chain(skb))
@@ -1946,9 +1957,9 @@ int pim_rcv_v1(struct sk_buff *skb)
1946 1957
1947 pim = igmp_hdr(skb); 1958 pim = igmp_hdr(skb);
1948 1959
1949 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1960 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
1961 if (IS_ERR(mrt))
1950 goto drop; 1962 goto drop;
1951
1952 if (!mrt->mroute_do_pim || 1963 if (!mrt->mroute_do_pim ||
1953 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1964 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1954 goto drop; 1965 goto drop;
@@ -1978,9 +1989,9 @@ static int pim_rcv(struct sk_buff *skb)
1978 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1989 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1979 goto drop; 1990 goto drop;
1980 1991
1981 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1992 mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
1993 if (IS_ERR(mrt))
1982 goto drop; 1994 goto drop;
1983
1984 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1995 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1985drop: 1996drop:
1986 kfree_skb(skb); 1997 kfree_skb(skb);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 994a1f29ebb..f3c0b549b8e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
16 struct net *net = dev_net(skb_dst(skb)->dev); 16 struct net *net = dev_net(skb_dst(skb)->dev);
17 const struct iphdr *iph = ip_hdr(skb); 17 const struct iphdr *iph = ip_hdr(skb);
18 struct rtable *rt; 18 struct rtable *rt;
19 struct flowi fl = {}; 19 struct flowi4 fl4 = {};
20 unsigned long orefdst; 20 unsigned long orefdst;
21 unsigned int hh_len; 21 unsigned int hh_len;
22 unsigned int type; 22 unsigned int type;
@@ -31,14 +31,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. 31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
32 */ 32 */
33 if (addr_type == RTN_LOCAL) { 33 if (addr_type == RTN_LOCAL) {
34 fl.fl4_dst = iph->daddr; 34 fl4.daddr = iph->daddr;
35 if (type == RTN_LOCAL) 35 if (type == RTN_LOCAL)
36 fl.fl4_src = iph->saddr; 36 fl4.saddr = iph->saddr;
37 fl.fl4_tos = RT_TOS(iph->tos); 37 fl4.flowi4_tos = RT_TOS(iph->tos);
38 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 38 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
39 fl.mark = skb->mark; 39 fl4.flowi4_mark = skb->mark;
40 fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; 40 fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
41 if (ip_route_output_key(net, &rt, &fl) != 0) 41 rt = ip_route_output_key(net, &fl4);
42 if (IS_ERR(rt))
42 return -1; 43 return -1;
43 44
44 /* Drop old route. */ 45 /* Drop old route. */
@@ -47,8 +48,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
47 } else { 48 } else {
48 /* non-local src, find valid iif to satisfy 49 /* non-local src, find valid iif to satisfy
49 * rp-filter when calling ip_route_input. */ 50 * rp-filter when calling ip_route_input. */
50 fl.fl4_dst = iph->saddr; 51 fl4.daddr = iph->saddr;
51 if (ip_route_output_key(net, &rt, &fl) != 0) 52 rt = ip_route_output_key(net, &fl4);
53 if (IS_ERR(rt))
52 return -1; 54 return -1;
53 55
54 orefdst = skb->_skb_refdst; 56 orefdst = skb->_skb_refdst;
@@ -66,10 +68,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
66 68
67#ifdef CONFIG_XFRM 69#ifdef CONFIG_XFRM
68 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 70 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
69 xfrm_decode_session(skb, &fl, AF_INET) == 0) { 71 xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
70 struct dst_entry *dst = skb_dst(skb); 72 struct dst_entry *dst = skb_dst(skb);
71 skb_dst_set(skb, NULL); 73 skb_dst_set(skb, NULL);
72 if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) 74 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
75 if (IS_ERR(dst))
73 return -1; 76 return -1;
74 skb_dst_set(skb, dst); 77 skb_dst_set(skb, dst);
75 } 78 }
@@ -102,7 +105,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
102 dst = ((struct xfrm_dst *)dst)->route; 105 dst = ((struct xfrm_dst *)dst)->route;
103 dst_hold(dst); 106 dst_hold(dst);
104 107
105 if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) 108 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
109 if (IS_ERR(dst))
106 return -1; 110 return -1;
107 111
108 skb_dst_drop(skb); 112 skb_dst_drop(skb);
@@ -219,7 +223,11 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
219 223
220static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) 224static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
221{ 225{
222 return ip_route_output_key(&init_net, (struct rtable **)dst, fl); 226 struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4);
227 if (IS_ERR(rt))
228 return PTR_ERR(rt);
229 *dst = &rt->dst;
230 return 0;
223} 231}
224 232
225static const struct nf_afinfo nf_ip_afinfo = { 233static const struct nf_afinfo nf_ip_afinfo = {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index babd1a2bae5..1dfc18a03fd 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -64,16 +64,6 @@ config IP_NF_IPTABLES
64if IP_NF_IPTABLES 64if IP_NF_IPTABLES
65 65
66# The matches. 66# The matches.
67config IP_NF_MATCH_ADDRTYPE
68 tristate '"addrtype" address type match support'
69 depends on NETFILTER_ADVANCED
70 help
71 This option allows you to match what routing thinks of an address,
72 eg. UNICAST, LOCAL, BROADCAST, ...
73
74 If you want to compile it as a module, say M here and read
75 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
76
77config IP_NF_MATCH_AH 67config IP_NF_MATCH_AH
78 tristate '"ah" match support' 68 tristate '"ah" match support'
79 depends on NETFILTER_ADVANCED 69 depends on NETFILTER_ADVANCED
@@ -206,8 +196,9 @@ config IP_NF_TARGET_REDIRECT
206 196
207config NF_NAT_SNMP_BASIC 197config NF_NAT_SNMP_BASIC
208 tristate "Basic SNMP-ALG support" 198 tristate "Basic SNMP-ALG support"
209 depends on NF_NAT 199 depends on NF_CONNTRACK_SNMP && NF_NAT
210 depends on NETFILTER_ADVANCED 200 depends on NETFILTER_ADVANCED
201 default NF_NAT && NF_CONNTRACK_SNMP
211 ---help--- 202 ---help---
212 203
213 This module implements an Application Layer Gateway (ALG) for 204 This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 19eb59d0103..dca2082ec68 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
48obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o 48obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
49 49
50# matches 50# matches
51obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
52obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 51obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
53obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 52obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
54 53
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e855fffaed9..4b5d457c2d7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
866 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 866 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
867 newinfo->initial_entries = 0; 867 newinfo->initial_entries = 0;
868 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 868 loc_cpu_entry = info->entries[raw_smp_processor_id()];
869 xt_compat_init_offsets(NFPROTO_ARP, info->number);
869 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 870 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
870 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 871 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
871 if (ret != 0) 872 if (ret != 0)
@@ -1065,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user,
1065 /* overflow check */ 1066 /* overflow check */
1066 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1067 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1067 return -ENOMEM; 1068 return -ENOMEM;
1069 tmp.name[sizeof(tmp.name)-1] = 0;
1068 1070
1069 newinfo = xt_alloc_table_info(tmp.size); 1071 newinfo = xt_alloc_table_info(tmp.size);
1070 if (!newinfo) 1072 if (!newinfo)
@@ -1333,6 +1335,7 @@ static int translate_compat_table(const char *name,
1333 duprintf("translate_compat_table: size %u\n", info->size); 1335 duprintf("translate_compat_table: size %u\n", info->size);
1334 j = 0; 1336 j = 0;
1335 xt_compat_lock(NFPROTO_ARP); 1337 xt_compat_lock(NFPROTO_ARP);
1338 xt_compat_init_offsets(NFPROTO_ARP, number);
1336 /* Walk through entries, checking offsets. */ 1339 /* Walk through entries, checking offsets. */
1337 xt_entry_foreach(iter0, entry0, total_size) { 1340 xt_entry_foreach(iter0, entry0, total_size) {
1338 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1341 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1486,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user,
1486 return -ENOMEM; 1489 return -ENOMEM;
1487 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1490 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1488 return -ENOMEM; 1491 return -ENOMEM;
1492 tmp.name[sizeof(tmp.name)-1] = 0;
1489 1493
1490 newinfo = xt_alloc_table_info(tmp.size); 1494 newinfo = xt_alloc_table_info(tmp.size);
1491 if (!newinfo) 1495 if (!newinfo)
@@ -1738,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1738 ret = -EFAULT; 1742 ret = -EFAULT;
1739 break; 1743 break;
1740 } 1744 }
1745 rev.name[sizeof(rev.name)-1] = 0;
1741 1746
1742 try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, 1747 try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
1743 rev.revision, 1, &ret), 1748 rev.revision, 1, &ret),
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 652efea013d..ffcea0d1678 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb,
387 verdict = (unsigned)(-v) - 1; 387 verdict = (unsigned)(-v) - 1;
388 break; 388 break;
389 } 389 }
390 if (*stackptr == 0) { 390 if (*stackptr <= origptr) {
391 e = get_entry(table_base, 391 e = get_entry(table_base,
392 private->underflow[hook]); 392 private->underflow[hook]);
393 pr_debug("Underflow (this is normal) " 393 pr_debug("Underflow (this is normal) "
@@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb,
427 /* Verdict */ 427 /* Verdict */
428 break; 428 break;
429 } while (!acpar.hotdrop); 429 } while (!acpar.hotdrop);
430 xt_info_rdunlock_bh();
431 pr_debug("Exiting %s; resetting sp from %u to %u\n", 430 pr_debug("Exiting %s; resetting sp from %u to %u\n",
432 __func__, *stackptr, origptr); 431 __func__, *stackptr, origptr);
433 *stackptr = origptr; 432 *stackptr = origptr;
433 xt_info_rdunlock_bh();
434#ifdef DEBUG_ALLOW_ALL 434#ifdef DEBUG_ALLOW_ALL
435 return NF_ACCEPT; 435 return NF_ACCEPT;
436#else 436#else
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
1063 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1063 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1064 newinfo->initial_entries = 0; 1064 newinfo->initial_entries = 0;
1065 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1065 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1066 xt_compat_init_offsets(AF_INET, info->number);
1066 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1067 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1067 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1068 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1068 if (ret != 0) 1069 if (ret != 0)
@@ -1261,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1261 /* overflow check */ 1262 /* overflow check */
1262 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1263 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1263 return -ENOMEM; 1264 return -ENOMEM;
1265 tmp.name[sizeof(tmp.name)-1] = 0;
1264 1266
1265 newinfo = xt_alloc_table_info(tmp.size); 1267 newinfo = xt_alloc_table_info(tmp.size);
1266 if (!newinfo) 1268 if (!newinfo)
@@ -1664,6 +1666,7 @@ translate_compat_table(struct net *net,
1664 duprintf("translate_compat_table: size %u\n", info->size); 1666 duprintf("translate_compat_table: size %u\n", info->size);
1665 j = 0; 1667 j = 0;
1666 xt_compat_lock(AF_INET); 1668 xt_compat_lock(AF_INET);
1669 xt_compat_init_offsets(AF_INET, number);
1667 /* Walk through entries, checking offsets. */ 1670 /* Walk through entries, checking offsets. */
1668 xt_entry_foreach(iter0, entry0, total_size) { 1671 xt_entry_foreach(iter0, entry0, total_size) {
1669 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1672 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1805,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1805 return -ENOMEM; 1808 return -ENOMEM;
1806 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1809 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1807 return -ENOMEM; 1810 return -ENOMEM;
1811 tmp.name[sizeof(tmp.name)-1] = 0;
1808 1812
1809 newinfo = xt_alloc_table_info(tmp.size); 1813 newinfo = xt_alloc_table_info(tmp.size);
1810 if (!newinfo) 1814 if (!newinfo)
@@ -2034,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2034 ret = -EFAULT; 2038 ret = -EFAULT;
2035 break; 2039 break;
2036 } 2040 }
2041 rev.name[sizeof(rev.name)-1] = 0;
2037 2042
2038 if (cmd == IPT_SO_GET_REVISION_TARGET) 2043 if (cmd == IPT_SO_GET_REVISION_TARGET)
2039 target = 1; 2044 target = 1;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a489765..d609ac3cb9a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
300 * that the ->target() function isn't called after ->destroy() */ 300 * that the ->target() function isn't called after ->destroy() */
301 301
302 ct = nf_ct_get(skb, &ctinfo); 302 ct = nf_ct_get(skb, &ctinfo);
303 if (ct == NULL) { 303 if (ct == NULL)
304 pr_info("no conntrack!\n");
305 /* FIXME: need to drop invalid ones, since replies
306 * to outgoing connections of other nodes will be
307 * marked as INVALID */
308 return NF_DROP; 304 return NF_DROP;
309 }
310 305
311 /* special case: ICMP error handling. conntrack distinguishes between 306 /* special case: ICMP error handling. conntrack distinguishes between
312 * error messages (RELATED) and information requests (see below) */ 307 * error messages (RELATED) and information requests (see below) */
@@ -669,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
669 char buffer[PROC_WRITELEN+1]; 664 char buffer[PROC_WRITELEN+1];
670 unsigned long nodenum; 665 unsigned long nodenum;
671 666
672 if (copy_from_user(buffer, input, PROC_WRITELEN)) 667 if (size > PROC_WRITELEN)
668 return -EIO;
669 if (copy_from_user(buffer, input, size))
673 return -EFAULT; 670 return -EFAULT;
671 buffer[size] = 0;
674 672
675 if (*buffer == '+') { 673 if (*buffer == '+') {
676 nodenum = simple_strtoul(buffer+1, NULL, 10); 674 nodenum = simple_strtoul(buffer+1, NULL, 10);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 72ffc8fda2e..d76d6c9ed94 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
442 } 442 }
443#endif 443#endif
444 444
445 /* MAC logging for input path only. */ 445 if (in != NULL)
446 if (in && !out)
447 dump_mac_header(m, loginfo, skb); 446 dump_mac_header(m, loginfo, skb);
448 447
449 dump_packet(m, loginfo, skb, 0); 448 dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
deleted file mode 100644
index db8bff0fb86..00000000000
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * iptables module to match inet_addr_type() of an ip.
3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/ip.h>
17#include <net/route.h>
18
19#include <linux/netfilter_ipv4/ipt_addrtype.h>
20#include <linux/netfilter/x_tables.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_DESCRIPTION("Xtables: address type match for IPv4");
25
26static inline bool match_type(struct net *net, const struct net_device *dev,
27 __be32 addr, u_int16_t mask)
28{
29 return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
30}
31
32static bool
33addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
34{
35 struct net *net = dev_net(par->in ? par->in : par->out);
36 const struct ipt_addrtype_info *info = par->matchinfo;
37 const struct iphdr *iph = ip_hdr(skb);
38 bool ret = true;
39
40 if (info->source)
41 ret &= match_type(net, NULL, iph->saddr, info->source) ^
42 info->invert_source;
43 if (info->dest)
44 ret &= match_type(net, NULL, iph->daddr, info->dest) ^
45 info->invert_dest;
46
47 return ret;
48}
49
50static bool
51addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
52{
53 struct net *net = dev_net(par->in ? par->in : par->out);
54 const struct ipt_addrtype_info_v1 *info = par->matchinfo;
55 const struct iphdr *iph = ip_hdr(skb);
56 const struct net_device *dev = NULL;
57 bool ret = true;
58
59 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
60 dev = par->in;
61 else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
62 dev = par->out;
63
64 if (info->source)
65 ret &= match_type(net, dev, iph->saddr, info->source) ^
66 (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
67 if (ret && info->dest)
68 ret &= match_type(net, dev, iph->daddr, info->dest) ^
69 !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
70 return ret;
71}
72
73static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
74{
75 struct ipt_addrtype_info_v1 *info = par->matchinfo;
76
77 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
78 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
79 pr_info("both incoming and outgoing "
80 "interface limitation cannot be selected\n");
81 return -EINVAL;
82 }
83
84 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
85 (1 << NF_INET_LOCAL_IN)) &&
86 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
87 pr_info("output interface limitation "
88 "not valid in PREROUTING and INPUT\n");
89 return -EINVAL;
90 }
91
92 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
93 (1 << NF_INET_LOCAL_OUT)) &&
94 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
95 pr_info("input interface limitation "
96 "not valid in POSTROUTING and OUTPUT\n");
97 return -EINVAL;
98 }
99
100 return 0;
101}
102
103static struct xt_match addrtype_mt_reg[] __read_mostly = {
104 {
105 .name = "addrtype",
106 .family = NFPROTO_IPV4,
107 .match = addrtype_mt_v0,
108 .matchsize = sizeof(struct ipt_addrtype_info),
109 .me = THIS_MODULE
110 },
111 {
112 .name = "addrtype",
113 .family = NFPROTO_IPV4,
114 .revision = 1,
115 .match = addrtype_mt_v1,
116 .checkentry = addrtype_mt_checkentry_v1,
117 .matchsize = sizeof(struct ipt_addrtype_info_v1),
118 .me = THIS_MODULE
119 }
120};
121
122static int __init addrtype_mt_init(void)
123{
124 return xt_register_matches(addrtype_mt_reg,
125 ARRAY_SIZE(addrtype_mt_reg));
126}
127
128static void __exit addrtype_mt_exit(void)
129{
130 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
131}
132
133module_init(addrtype_mt_init);
134module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 294a2a32f29..aef5d1fbe77 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
60 ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, 60 ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
61 dev_net(out)->ipv4.iptable_mangle); 61 dev_net(out)->ipv4.iptable_mangle);
62 /* Reroute for ANY change. */ 62 /* Reroute for ANY change. */
63 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { 63 if (ret != NF_DROP && ret != NF_STOLEN) {
64 iph = ip_hdr(skb); 64 iph = ip_hdr(skb);
65 65
66 if (iph->saddr != saddr || 66 if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 63f60fc5d26..5585980fce2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
20#include <net/netfilter/nf_conntrack_l4proto.h> 20#include <net/netfilter/nf_conntrack_l4proto.h>
21#include <net/netfilter/nf_conntrack_expect.h> 21#include <net/netfilter/nf_conntrack_expect.h>
22#include <net/netfilter/nf_conntrack_acct.h> 22#include <net/netfilter/nf_conntrack_acct.h>
23#include <linux/rculist_nulls.h>
23 24
24struct ct_iter_state { 25struct ct_iter_state {
25 struct seq_net_private p; 26 struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
35 for (st->bucket = 0; 36 for (st->bucket = 0;
36 st->bucket < net->ct.htable_size; 37 st->bucket < net->ct.htable_size;
37 st->bucket++) { 38 st->bucket++) {
38 n = rcu_dereference(net->ct.hash[st->bucket].first); 39 n = rcu_dereference(
40 hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
39 if (!is_a_nulls(n)) 41 if (!is_a_nulls(n))
40 return n; 42 return n;
41 } 43 }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
48 struct net *net = seq_file_net(seq); 50 struct net *net = seq_file_net(seq);
49 struct ct_iter_state *st = seq->private; 51 struct ct_iter_state *st = seq->private;
50 52
51 head = rcu_dereference(head->next); 53 head = rcu_dereference(hlist_nulls_next_rcu(head));
52 while (is_a_nulls(head)) { 54 while (is_a_nulls(head)) {
53 if (likely(get_nulls_value(head) == st->bucket)) { 55 if (likely(get_nulls_value(head) == st->bucket)) {
54 if (++st->bucket >= net->ct.htable_size) 56 if (++st->bucket >= net->ct.htable_size)
55 return NULL; 57 return NULL;
56 } 58 }
57 head = rcu_dereference(net->ct.hash[st->bucket].first); 59 head = rcu_dereference(
60 hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
58 } 61 }
59 return head; 62 return head;
60} 63}
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
217 struct hlist_node *n; 220 struct hlist_node *n;
218 221
219 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 222 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
220 n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 223 n = rcu_dereference(
224 hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
221 if (n) 225 if (n)
222 return n; 226 return n;
223 } 227 }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
230 struct net *net = seq_file_net(seq); 234 struct net *net = seq_file_net(seq);
231 struct ct_expect_iter_state *st = seq->private; 235 struct ct_expect_iter_state *st = seq->private;
232 236
233 head = rcu_dereference(head->next); 237 head = rcu_dereference(hlist_next_rcu(head));
234 while (head == NULL) { 238 while (head == NULL) {
235 if (++st->bucket >= nf_ct_expect_hsize) 239 if (++st->bucket >= nf_ct_expect_hsize)
236 return NULL; 240 return NULL;
237 head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 241 head = rcu_dereference(
242 hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
238 } 243 }
239 return head; 244 return head;
240} 245}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f23b3f06df..703f366fd23 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
44 44
45 /* Try to get same port: if not, try to change it. */ 45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int ret; 47 int res;
48 48
49 exp->tuple.dst.u.tcp.port = htons(port); 49 exp->tuple.dst.u.tcp.port = htons(port);
50 ret = nf_ct_expect_related(exp); 50 res = nf_ct_expect_related(exp);
51 if (ret == 0) 51 if (res == 0)
52 break; 52 break;
53 else if (ret != -EBUSY) { 53 else if (res != -EBUSY) {
54 port = 0; 54 port = 0;
55 break; 55 break;
56 } 56 }
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c04787ce1a7..21bcf471b25 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
221 manips not an issue. */ 221 manips not an issue. */
222 if (maniptype == IP_NAT_MANIP_SRC && 222 if (maniptype == IP_NAT_MANIP_SRC &&
223 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { 223 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
224 if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { 224 /* try the original tuple first */
225 if (in_range(orig_tuple, range)) {
226 if (!nf_nat_used_tuple(orig_tuple, ct)) {
227 *tuple = *orig_tuple;
228 return;
229 }
230 } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
231 range)) {
225 pr_debug("get_unique_tuple: Found current src map\n"); 232 pr_debug("get_unique_tuple: Found current src map\n");
226 if (!nf_nat_used_tuple(tuple, ct)) 233 if (!nf_nat_used_tuple(tuple, ct))
227 return; 234 return;
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct,
266 struct net *net = nf_ct_net(ct); 273 struct net *net = nf_ct_net(ct);
267 struct nf_conntrack_tuple curr_tuple, new_tuple; 274 struct nf_conntrack_tuple curr_tuple, new_tuple;
268 struct nf_conn_nat *nat; 275 struct nf_conn_nat *nat;
269 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
270 276
271 /* nat helper or nfctnetlink also setup binding */ 277 /* nat helper or nfctnetlink also setup binding */
272 nat = nfct_nat(ct); 278 nat = nfct_nat(ct);
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct,
306 ct->status |= IPS_DST_NAT; 312 ct->status |= IPS_DST_NAT;
307 } 313 }
308 314
309 /* Place in source hash if this is the first time. */ 315 if (maniptype == IP_NAT_MANIP_SRC) {
310 if (have_to_hash) {
311 unsigned int srchash; 316 unsigned int srchash;
312 317
313 srchash = hash_by_src(net, nf_ct_zone(ct), 318 srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct,
323 328
324 /* It's done. */ 329 /* It's done. */
325 if (maniptype == IP_NAT_MANIP_DST) 330 if (maniptype == IP_NAT_MANIP_DST)
326 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 331 ct->status |= IPS_DST_NAT_DONE;
327 else 332 else
328 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 333 ct->status |= IPS_SRC_NAT_DONE;
329 334
330 return NF_ACCEPT; 335 return NF_ACCEPT;
331} 336}
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
502 int ret = 0; 507 int ret = 0;
503 508
504 spin_lock_bh(&nf_nat_lock); 509 spin_lock_bh(&nf_nat_lock);
505 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 510 if (rcu_dereference_protected(
511 nf_nat_protos[proto->protonum],
512 lockdep_is_held(&nf_nat_lock)
513 ) != &nf_nat_unknown_protocol) {
506 ret = -EBUSY; 514 ret = -EBUSY;
507 goto out; 515 goto out;
508 } 516 }
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
532 if (nat == NULL || nat->ct == NULL) 540 if (nat == NULL || nat->ct == NULL)
533 return; 541 return;
534 542
535 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); 543 NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
536 544
537 spin_lock_bh(&nf_nat_lock); 545 spin_lock_bh(&nf_nat_lock);
538 hlist_del_rcu(&nat->bysource); 546 hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old)
545 struct nf_conn_nat *old_nat = old; 553 struct nf_conn_nat *old_nat = old;
546 struct nf_conn *ct = old_nat->ct; 554 struct nf_conn *ct = old_nat->ct;
547 555
548 if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) 556 if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
549 return; 557 return;
550 558
551 spin_lock_bh(&nf_nat_lock); 559 spin_lock_bh(&nf_nat_lock);
552 new_nat->ct = ct;
553 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 560 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
554 spin_unlock_bh(&nf_nat_lock); 561 spin_unlock_bh(&nf_nat_lock);
555} 562}
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net)
679{ 686{
680 /* Leave them the same for the moment. */ 687 /* Leave them the same for the moment. */
681 net->ipv4.nat_htable_size = net->ct.htable_size; 688 net->ipv4.nat_htable_size = net->ct.htable_size;
682 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 689 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
683 &net->ipv4.nat_vmalloced, 0);
684 if (!net->ipv4.nat_bysource) 690 if (!net->ipv4.nat_bysource)
685 return -ENOMEM; 691 return -ENOMEM;
686 return 0; 692 return 0;
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
702{ 708{
703 nf_ct_iterate_cleanup(net, &clean_nat, NULL); 709 nf_ct_iterate_cleanup(net, &clean_nat, NULL);
704 synchronize_rcu(); 710 synchronize_rcu();
705 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, 711 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
706 net->ipv4.nat_htable_size);
707} 712}
708 713
709static struct pernet_operations nf_nat_net_ops = { 714static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ee5f419d0a5..8812a02078a 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
54#include <net/netfilter/nf_conntrack_expect.h> 54#include <net/netfilter/nf_conntrack_expect.h>
55#include <net/netfilter/nf_conntrack_helper.h> 55#include <net/netfilter/nf_conntrack_helper.h>
56#include <net/netfilter/nf_nat_helper.h> 56#include <net/netfilter/nf_nat_helper.h>
57#include <linux/netfilter/nf_conntrack_snmp.h>
57 58
58MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
59MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 60MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
1310{ 1311{
1311 int ret = 0; 1312 int ret = 0;
1312 1313
1313 ret = nf_conntrack_helper_register(&snmp_helper); 1314 BUG_ON(nf_nat_snmp_hook != NULL);
1314 if (ret < 0) 1315 rcu_assign_pointer(nf_nat_snmp_hook, help);
1315 return ret; 1316
1316 ret = nf_conntrack_helper_register(&snmp_trap_helper); 1317 ret = nf_conntrack_helper_register(&snmp_trap_helper);
1317 if (ret < 0) { 1318 if (ret < 0) {
1318 nf_conntrack_helper_unregister(&snmp_helper); 1319 nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
1323 1324
1324static void __exit nf_nat_snmp_basic_fini(void) 1325static void __exit nf_nat_snmp_basic_fini(void)
1325{ 1326{
1326 nf_conntrack_helper_unregister(&snmp_helper); 1327 rcu_assign_pointer(nf_nat_snmp_hook, NULL);
1327 nf_conntrack_helper_unregister(&snmp_trap_helper); 1328 nf_conntrack_helper_unregister(&snmp_trap_helper);
1328} 1329}
1329 1330
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 95481fee8bd..7317bdf1d45 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -31,6 +31,7 @@
31#ifdef CONFIG_XFRM 31#ifdef CONFIG_XFRM
32static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) 32static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
33{ 33{
34 struct flowi4 *fl4 = &fl->u.ip4;
34 const struct nf_conn *ct; 35 const struct nf_conn *ct;
35 const struct nf_conntrack_tuple *t; 36 const struct nf_conntrack_tuple *t;
36 enum ip_conntrack_info ctinfo; 37 enum ip_conntrack_info ctinfo;
@@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
49 statusbit = IPS_SRC_NAT; 50 statusbit = IPS_SRC_NAT;
50 51
51 if (ct->status & statusbit) { 52 if (ct->status & statusbit) {
52 fl->fl4_dst = t->dst.u3.ip; 53 fl4->daddr = t->dst.u3.ip;
53 if (t->dst.protonum == IPPROTO_TCP || 54 if (t->dst.protonum == IPPROTO_TCP ||
54 t->dst.protonum == IPPROTO_UDP || 55 t->dst.protonum == IPPROTO_UDP ||
55 t->dst.protonum == IPPROTO_UDPLITE || 56 t->dst.protonum == IPPROTO_UDPLITE ||
56 t->dst.protonum == IPPROTO_DCCP || 57 t->dst.protonum == IPPROTO_DCCP ||
57 t->dst.protonum == IPPROTO_SCTP) 58 t->dst.protonum == IPPROTO_SCTP)
58 fl->fl_ip_dport = t->dst.u.tcp.port; 59 fl4->fl4_dport = t->dst.u.tcp.port;
59 } 60 }
60 61
61 statusbit ^= IPS_NAT_MASK; 62 statusbit ^= IPS_NAT_MASK;
62 63
63 if (ct->status & statusbit) { 64 if (ct->status & statusbit) {
64 fl->fl4_src = t->src.u3.ip; 65 fl4->saddr = t->src.u3.ip;
65 if (t->dst.protonum == IPPROTO_TCP || 66 if (t->dst.protonum == IPPROTO_TCP ||
66 t->dst.protonum == IPPROTO_UDP || 67 t->dst.protonum == IPPROTO_UDP ||
67 t->dst.protonum == IPPROTO_UDPLITE || 68 t->dst.protonum == IPPROTO_UDPLITE ||
68 t->dst.protonum == IPPROTO_DCCP || 69 t->dst.protonum == IPPROTO_DCCP ||
69 t->dst.protonum == IPPROTO_SCTP) 70 t->dst.protonum == IPPROTO_SCTP)
70 fl->fl_ip_sport = t->src.u.tcp.port; 71 fl4->fl4_sport = t->src.u.tcp.port;
71 } 72 }
72} 73}
73#endif 74#endif
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 6390ba299b3..e837ffd3edc 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -402,7 +402,7 @@ error:
402 return err; 402 return err;
403} 403}
404 404
405static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) 405static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg)
406{ 406{
407 struct iovec *iov; 407 struct iovec *iov;
408 u8 __user *type = NULL; 408 u8 __user *type = NULL;
@@ -418,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
418 if (!iov) 418 if (!iov)
419 continue; 419 continue;
420 420
421 switch (fl->proto) { 421 switch (fl4->flowi4_proto) {
422 case IPPROTO_ICMP: 422 case IPPROTO_ICMP:
423 /* check if one-byte field is readable or not. */ 423 /* check if one-byte field is readable or not. */
424 if (iov->iov_base && iov->iov_len < 1) 424 if (iov->iov_base && iov->iov_len < 1)
@@ -433,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
433 code = iov->iov_base; 433 code = iov->iov_base;
434 434
435 if (type && code) { 435 if (type && code) {
436 if (get_user(fl->fl_icmp_type, type) || 436 if (get_user(fl4->fl4_icmp_type, type) ||
437 get_user(fl->fl_icmp_code, code)) 437 get_user(fl4->fl4_icmp_code, code))
438 return -EFAULT; 438 return -EFAULT;
439 probed = 1; 439 probed = 1;
440 } 440 }
@@ -548,25 +548,30 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
548 } 548 }
549 549
550 { 550 {
551 struct flowi fl = { .oif = ipc.oif, 551 struct flowi4 fl4 = {
552 .mark = sk->sk_mark, 552 .flowi4_oif = ipc.oif,
553 .fl4_dst = daddr, 553 .flowi4_mark = sk->sk_mark,
554 .fl4_src = saddr, 554 .daddr = daddr,
555 .fl4_tos = tos, 555 .saddr = saddr,
556 .proto = inet->hdrincl ? IPPROTO_RAW : 556 .flowi4_tos = tos,
557 sk->sk_protocol, 557 .flowi4_proto = (inet->hdrincl ?
558 }; 558 IPPROTO_RAW :
559 sk->sk_protocol),
560 .flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
561 };
559 if (!inet->hdrincl) { 562 if (!inet->hdrincl) {
560 err = raw_probe_proto_opt(&fl, msg); 563 err = raw_probe_proto_opt(&fl4, msg);
561 if (err) 564 if (err)
562 goto done; 565 goto done;
563 } 566 }
564 567
565 security_sk_classify_flow(sk, &fl); 568 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
566 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); 569 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
570 if (IS_ERR(rt)) {
571 err = PTR_ERR(rt);
572 goto done;
573 }
567 } 574 }
568 if (err)
569 goto done;
570 575
571 err = -EACCES; 576 err = -EACCES;
572 if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) 577 if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ed6603c2f6..870b5182ddd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,8 +109,8 @@
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111 111
112#define RT_FL_TOS(oldflp) \ 112#define RT_FL_TOS(oldflp4) \
113 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 113 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
114 114
115#define IP_MAX_MTU 0xFFF0 115#define IP_MAX_MTU 0xFFF0
116 116
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256; 131static int ip_rt_min_advmss __read_mostly = 256;
132static int rt_chain_length_max __read_mostly = 20; 132static int rt_chain_length_max __read_mostly = 20;
133 133
134static struct delayed_work expires_work;
135static unsigned long expires_ljiffies;
136
137/* 134/*
138 * Interface to generic destination cache. 135 * Interface to generic destination cache.
139 */ 136 */
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152{ 149{
153} 150}
154 151
152static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
153{
154 struct rtable *rt = (struct rtable *) dst;
155 struct inet_peer *peer;
156 u32 *p = NULL;
157
158 if (!rt->peer)
159 rt_bind_peer(rt, 1);
160
161 peer = rt->peer;
162 if (peer) {
163 u32 *old_p = __DST_METRICS_PTR(old);
164 unsigned long prev, new;
165
166 p = peer->metrics;
167 if (inet_metrics_new(peer))
168 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
169
170 new = (unsigned long) p;
171 prev = cmpxchg(&dst->_metrics, old, new);
172
173 if (prev != old) {
174 p = __DST_METRICS_PTR(prev);
175 if (prev & DST_METRICS_READ_ONLY)
176 p = NULL;
177 } else {
178 if (rt->fi) {
179 fib_info_put(rt->fi);
180 rt->fi = NULL;
181 }
182 }
183 }
184 return p;
185}
186
155static struct dst_ops ipv4_dst_ops = { 187static struct dst_ops ipv4_dst_ops = {
156 .family = AF_INET, 188 .family = AF_INET,
157 .protocol = cpu_to_be16(ETH_P_IP), 189 .protocol = cpu_to_be16(ETH_P_IP),
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = {
159 .check = ipv4_dst_check, 191 .check = ipv4_dst_check,
160 .default_advmss = ipv4_default_advmss, 192 .default_advmss = ipv4_default_advmss,
161 .default_mtu = ipv4_default_mtu, 193 .default_mtu = ipv4_default_mtu,
194 .cow_metrics = ipv4_cow_metrics,
162 .destroy = ipv4_dst_destroy, 195 .destroy = ipv4_dst_destroy,
163 .ifdown = ipv4_dst_ifdown, 196 .ifdown = ipv4_dst_ifdown,
164 .negative_advice = ipv4_negative_advice, 197 .negative_advice = ipv4_negative_advice,
@@ -171,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = {
171 204
172const __u8 ip_tos2prio[16] = { 205const __u8 ip_tos2prio[16] = {
173 TC_PRIO_BESTEFFORT, 206 TC_PRIO_BESTEFFORT,
174 ECN_OR_COST(FILLER), 207 ECN_OR_COST(BESTEFFORT),
175 TC_PRIO_BESTEFFORT, 208 TC_PRIO_BESTEFFORT,
176 ECN_OR_COST(BESTEFFORT), 209 ECN_OR_COST(BESTEFFORT),
177 TC_PRIO_BULK, 210 TC_PRIO_BULK,
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
391 dst_metric(&r->dst, RTAX_WINDOW), 424 dst_metric(&r->dst, RTAX_WINDOW),
392 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
393 dst_metric(&r->dst, RTAX_RTTVAR)), 426 dst_metric(&r->dst, RTAX_RTTVAR)),
394 r->fl.fl4_tos, 427 r->rt_tos,
395 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
396 r->dst.hh ? (r->dst.hh->hh_output == 429 r->dst.hh ? (r->dst.hh->hh_output ==
397 dev_queue_xmit) : 0, 430 dev_queue_xmit) : 0,
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = {
514 .release = seq_release, 547 .release = seq_release,
515}; 548};
516 549
517#ifdef CONFIG_NET_CLS_ROUTE 550#ifdef CONFIG_IP_ROUTE_CLASSID
518static int rt_acct_proc_show(struct seq_file *m, void *v) 551static int rt_acct_proc_show(struct seq_file *m, void *v)
519{ 552{
520 struct ip_rt_acct *dst, *src; 553 struct ip_rt_acct *dst, *src;
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
567 if (!pde) 600 if (!pde)
568 goto err2; 601 goto err2;
569 602
570#ifdef CONFIG_NET_CLS_ROUTE 603#ifdef CONFIG_IP_ROUTE_CLASSID
571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); 604 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
572 if (!pde) 605 if (!pde)
573 goto err3; 606 goto err3;
574#endif 607#endif
575 return 0; 608 return 0;
576 609
577#ifdef CONFIG_NET_CLS_ROUTE 610#ifdef CONFIG_IP_ROUTE_CLASSID
578err3: 611err3:
579 remove_proc_entry("rt_cache", net->proc_net_stat); 612 remove_proc_entry("rt_cache", net->proc_net_stat);
580#endif 613#endif
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
588{ 621{
589 remove_proc_entry("rt_cache", net->proc_net_stat); 622 remove_proc_entry("rt_cache", net->proc_net_stat);
590 remove_proc_entry("rt_cache", net->proc_net); 623 remove_proc_entry("rt_cache", net->proc_net);
591#ifdef CONFIG_NET_CLS_ROUTE 624#ifdef CONFIG_IP_ROUTE_CLASSID
592 remove_proc_entry("rt_acct", net->proc_net); 625 remove_proc_entry("rt_acct", net->proc_net);
593#endif 626#endif
594} 627}
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth)
632static inline int rt_valuable(struct rtable *rth) 665static inline int rt_valuable(struct rtable *rth)
633{ 666{
634 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 667 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
635 rth->dst.expires; 668 (rth->peer && rth->peer->pmtu_expires);
636} 669}
637 670
638static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 671static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
643 if (atomic_read(&rth->dst.__refcnt)) 676 if (atomic_read(&rth->dst.__refcnt))
644 goto out; 677 goto out;
645 678
646 ret = 1;
647 if (rth->dst.expires &&
648 time_after_eq(jiffies, rth->dst.expires))
649 goto out;
650
651 age = jiffies - rth->dst.lastuse; 679 age = jiffies - rth->dst.lastuse;
652 ret = 0;
653 if ((age <= tmo1 && !rt_fast_clean(rth)) || 680 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
654 (age <= tmo2 && rt_valuable(rth))) 681 (age <= tmo2 && rt_valuable(rth)))
655 goto out; 682 goto out;
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net)
684 net->ipv4.sysctl_rt_cache_rebuild_count; 711 net->ipv4.sysctl_rt_cache_rebuild_count;
685} 712}
686 713
687static inline bool compare_hash_inputs(const struct flowi *fl1, 714static inline bool compare_hash_inputs(const struct rtable *rt1,
688 const struct flowi *fl2) 715 const struct rtable *rt2)
689{ 716{
690 return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 717 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
691 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 718 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
692 (fl1->iif ^ fl2->iif)) == 0); 719 (rt1->rt_iif ^ rt2->rt_iif)) == 0);
693} 720}
694 721
695static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 722static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
696{ 723{
697 return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
698 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
699 (fl1->mark ^ fl2->mark) | 726 (rt1->rt_mark ^ rt2->rt_mark) |
700 (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | 727 (rt1->rt_tos ^ rt2->rt_tos) |
701 (fl1->oif ^ fl2->oif) | 728 (rt1->rt_oif ^ rt2->rt_oif) |
702 (fl1->iif ^ fl2->iif)) == 0; 729 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
703} 730}
704 731
705static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 732static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
786 const struct rtable *aux = head; 813 const struct rtable *aux = head;
787 814
788 while (aux != rth) { 815 while (aux != rth) {
789 if (compare_hash_inputs(&aux->fl, &rth->fl)) 816 if (compare_hash_inputs(aux, rth))
790 return 0; 817 return 0;
791 aux = rcu_dereference_protected(aux->dst.rt_next, 1); 818 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
792 } 819 }
793 return ONE; 820 return ONE;
794} 821}
795 822
796static void rt_check_expire(void)
797{
798 static unsigned int rover;
799 unsigned int i = rover, goal;
800 struct rtable *rth;
801 struct rtable __rcu **rthp;
802 unsigned long samples = 0;
803 unsigned long sum = 0, sum2 = 0;
804 unsigned long delta;
805 u64 mult;
806
807 delta = jiffies - expires_ljiffies;
808 expires_ljiffies = jiffies;
809 mult = ((u64)delta) << rt_hash_log;
810 if (ip_rt_gc_timeout > 1)
811 do_div(mult, ip_rt_gc_timeout);
812 goal = (unsigned int)mult;
813 if (goal > rt_hash_mask)
814 goal = rt_hash_mask + 1;
815 for (; goal > 0; goal--) {
816 unsigned long tmo = ip_rt_gc_timeout;
817 unsigned long length;
818
819 i = (i + 1) & rt_hash_mask;
820 rthp = &rt_hash_table[i].chain;
821
822 if (need_resched())
823 cond_resched();
824
825 samples++;
826
827 if (rcu_dereference_raw(*rthp) == NULL)
828 continue;
829 length = 0;
830 spin_lock_bh(rt_hash_lock_addr(i));
831 while ((rth = rcu_dereference_protected(*rthp,
832 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next;
836 rt_free(rth);
837 continue;
838 }
839 if (rth->dst.expires) {
840 /* Entry is expired even if it is in use */
841 if (time_before_eq(jiffies, rth->dst.expires)) {
842nofree:
843 tmo >>= 1;
844 rthp = &rth->dst.rt_next;
845 /*
846 * We only count entries on
847 * a chain with equal hash inputs once
848 * so that entries for different QOS
849 * levels, and other non-hash input
850 * attributes don't unfairly skew
851 * the length computation
852 */
853 length += has_noalias(rt_hash_table[i].chain, rth);
854 continue;
855 }
856 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
857 goto nofree;
858
859 /* Cleanup aged off entries. */
860 *rthp = rth->dst.rt_next;
861 rt_free(rth);
862 }
863 spin_unlock_bh(rt_hash_lock_addr(i));
864 sum += length;
865 sum2 += length*length;
866 }
867 if (samples) {
868 unsigned long avg = sum / samples;
869 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
870 rt_chain_length_max = max_t(unsigned long,
871 ip_rt_gc_elasticity,
872 (avg + 4*sd) >> FRACT_BITS);
873 }
874 rover = i;
875}
876
877/*
878 * rt_worker_func() is run in process context.
879 * we call rt_check_expire() to scan part of the hash table
880 */
881static void rt_worker_func(struct work_struct *work)
882{
883 rt_check_expire();
884 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
885}
886
887/* 823/*
888 * Pertubation of rt_genid by a small quantity [1..256] 824 * Pertubation of rt_genid by a small quantity [1..256]
889 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 825 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head)
1078 return length >> FRACT_BITS; 1014 return length >> FRACT_BITS;
1079} 1015}
1080 1016
1081static int rt_intern_hash(unsigned hash, struct rtable *rt, 1017static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1082 struct rtable **rp, struct sk_buff *skb, int ifindex) 1018 struct sk_buff *skb, int ifindex)
1083{ 1019{
1084 struct rtable *rth, *cand; 1020 struct rtable *rth, *cand;
1085 struct rtable __rcu **rthp, **candp; 1021 struct rtable __rcu **rthp, **candp;
@@ -1120,7 +1056,7 @@ restart:
1120 printk(KERN_WARNING 1056 printk(KERN_WARNING
1121 "Neighbour table failure & not caching routes.\n"); 1057 "Neighbour table failure & not caching routes.\n");
1122 ip_rt_put(rt); 1058 ip_rt_put(rt);
1123 return err; 1059 return ERR_PTR(err);
1124 } 1060 }
1125 } 1061 }
1126 1062
@@ -1137,7 +1073,7 @@ restart:
1137 rt_free(rth); 1073 rt_free(rth);
1138 continue; 1074 continue;
1139 } 1075 }
1140 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1076 if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
1141 /* Put it first */ 1077 /* Put it first */
1142 *rthp = rth->dst.rt_next; 1078 *rthp = rth->dst.rt_next;
1143 /* 1079 /*
@@ -1157,11 +1093,9 @@ restart:
1157 spin_unlock_bh(rt_hash_lock_addr(hash)); 1093 spin_unlock_bh(rt_hash_lock_addr(hash));
1158 1094
1159 rt_drop(rt); 1095 rt_drop(rt);
1160 if (rp) 1096 if (skb)
1161 *rp = rth;
1162 else
1163 skb_dst_set(skb, &rth->dst); 1097 skb_dst_set(skb, &rth->dst);
1164 return 0; 1098 return rth;
1165 } 1099 }
1166 1100
1167 if (!atomic_read(&rth->dst.__refcnt)) { 1101 if (!atomic_read(&rth->dst.__refcnt)) {
@@ -1202,7 +1136,7 @@ restart:
1202 rt_emergency_hash_rebuild(net); 1136 rt_emergency_hash_rebuild(net);
1203 spin_unlock_bh(rt_hash_lock_addr(hash)); 1137 spin_unlock_bh(rt_hash_lock_addr(hash));
1204 1138
1205 hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1139 hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1206 ifindex, rt_genid(net)); 1140 ifindex, rt_genid(net));
1207 goto restart; 1141 goto restart;
1208 } 1142 }
@@ -1218,7 +1152,7 @@ restart:
1218 1152
1219 if (err != -ENOBUFS) { 1153 if (err != -ENOBUFS) {
1220 rt_drop(rt); 1154 rt_drop(rt);
1221 return err; 1155 return ERR_PTR(err);
1222 } 1156 }
1223 1157
1224 /* Neighbour tables are full and nothing 1158 /* Neighbour tables are full and nothing
@@ -1239,7 +1173,7 @@ restart:
1239 if (net_ratelimit()) 1173 if (net_ratelimit())
1240 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); 1174 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
1241 rt_drop(rt); 1175 rt_drop(rt);
1242 return -ENOBUFS; 1176 return ERR_PTR(-ENOBUFS);
1243 } 1177 }
1244 } 1178 }
1245 1179
@@ -1265,11 +1199,16 @@ restart:
1265 spin_unlock_bh(rt_hash_lock_addr(hash)); 1199 spin_unlock_bh(rt_hash_lock_addr(hash));
1266 1200
1267skip_hashing: 1201skip_hashing:
1268 if (rp) 1202 if (skb)
1269 *rp = rt;
1270 else
1271 skb_dst_set(skb, &rt->dst); 1203 skb_dst_set(skb, &rt->dst);
1272 return 0; 1204 return rt;
1205}
1206
1207static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
1208
1209static u32 rt_peer_genid(void)
1210{
1211 return atomic_read(&__rt_peer_genid);
1273} 1212}
1274 1213
1275void rt_bind_peer(struct rtable *rt, int create) 1214void rt_bind_peer(struct rtable *rt, int create)
@@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create)
1280 1219
1281 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1220 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1282 inet_putpeer(peer); 1221 inet_putpeer(peer);
1222 else
1223 rt->rt_peer_genid = rt_peer_genid();
1283} 1224}
1284 1225
1285/* 1226/*
@@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt)
1349void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1290void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1350 __be32 saddr, struct net_device *dev) 1291 __be32 saddr, struct net_device *dev)
1351{ 1292{
1352 int i, k;
1353 struct in_device *in_dev = __in_dev_get_rcu(dev); 1293 struct in_device *in_dev = __in_dev_get_rcu(dev);
1354 struct rtable *rth; 1294 struct inet_peer *peer;
1355 struct rtable __rcu **rthp;
1356 __be32 skeys[2] = { saddr, 0 };
1357 int ikeys[2] = { dev->ifindex, 0 };
1358 struct netevent_redirect netevent;
1359 struct net *net; 1295 struct net *net;
1360 1296
1361 if (!in_dev) 1297 if (!in_dev)
@@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1367 ipv4_is_zeronet(new_gw)) 1303 ipv4_is_zeronet(new_gw))
1368 goto reject_redirect; 1304 goto reject_redirect;
1369 1305
1370 if (!rt_caching(net))
1371 goto reject_redirect;
1372
1373 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1306 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1374 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1307 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1375 goto reject_redirect; 1308 goto reject_redirect;
@@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1380 goto reject_redirect; 1313 goto reject_redirect;
1381 } 1314 }
1382 1315
1383 for (i = 0; i < 2; i++) { 1316 peer = inet_getpeer_v4(daddr, 1);
1384 for (k = 0; k < 2; k++) { 1317 if (peer) {
1385 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1318 peer->redirect_learned.a4 = new_gw;
1386 rt_genid(net));
1387
1388 rthp = &rt_hash_table[hash].chain;
1389
1390 while ((rth = rcu_dereference(*rthp)) != NULL) {
1391 struct rtable *rt;
1392
1393 if (rth->fl.fl4_dst != daddr ||
1394 rth->fl.fl4_src != skeys[i] ||
1395 rth->fl.oif != ikeys[k] ||
1396 rt_is_input_route(rth) ||
1397 rt_is_expired(rth) ||
1398 !net_eq(dev_net(rth->dst.dev), net)) {
1399 rthp = &rth->dst.rt_next;
1400 continue;
1401 }
1402
1403 if (rth->rt_dst != daddr ||
1404 rth->rt_src != saddr ||
1405 rth->dst.error ||
1406 rth->rt_gateway != old_gw ||
1407 rth->dst.dev != dev)
1408 break;
1409
1410 dst_hold(&rth->dst);
1411
1412 rt = dst_alloc(&ipv4_dst_ops);
1413 if (rt == NULL) {
1414 ip_rt_put(rth);
1415 return;
1416 }
1417
1418 /* Copy all the information. */
1419 *rt = *rth;
1420 rt->dst.__use = 1;
1421 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->dst.child = NULL;
1423 if (rt->dst.dev)
1424 dev_hold(rt->dst.dev);
1425 rt->dst.obsolete = -1;
1426 rt->dst.lastuse = jiffies;
1427 rt->dst.path = &rt->dst;
1428 rt->dst.neighbour = NULL;
1429 rt->dst.hh = NULL;
1430#ifdef CONFIG_XFRM
1431 rt->dst.xfrm = NULL;
1432#endif
1433 rt->rt_genid = rt_genid(net);
1434 rt->rt_flags |= RTCF_REDIRECTED;
1435
1436 /* Gateway is different ... */
1437 rt->rt_gateway = new_gw;
1438
1439 /* Redirect received -> path was valid */
1440 dst_confirm(&rth->dst);
1441
1442 if (rt->peer)
1443 atomic_inc(&rt->peer->refcnt);
1444
1445 if (arp_bind_neighbour(&rt->dst) ||
1446 !(rt->dst.neighbour->nud_state &
1447 NUD_VALID)) {
1448 if (rt->dst.neighbour)
1449 neigh_event_send(rt->dst.neighbour, NULL);
1450 ip_rt_put(rth);
1451 rt_drop(rt);
1452 goto do_next;
1453 }
1454 1319
1455 netevent.old = &rth->dst; 1320 inet_putpeer(peer);
1456 netevent.new = &rt->dst;
1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1458 &netevent);
1459 1321
1460 rt_del(hash, rth); 1322 atomic_inc(&__rt_peer_genid);
1461 if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1462 ip_rt_put(rt);
1463 goto do_next;
1464 }
1465 do_next:
1466 ;
1467 }
1468 } 1323 }
1469 return; 1324 return;
1470 1325
@@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1488 if (dst->obsolete > 0) { 1343 if (dst->obsolete > 0) {
1489 ip_rt_put(rt); 1344 ip_rt_put(rt);
1490 ret = NULL; 1345 ret = NULL;
1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1346 } else if (rt->rt_flags & RTCF_REDIRECTED) {
1492 (rt->dst.expires && 1347 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1493 time_after_eq(jiffies, rt->dst.expires))) { 1348 rt->rt_oif,
1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1495 rt->fl.oif,
1496 rt_genid(dev_net(dst->dev))); 1349 rt_genid(dev_net(dst->dev)));
1497#if RT_CACHE_DEBUG >= 1 1350#if RT_CACHE_DEBUG >= 1
1498 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", 1351 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1499 &rt->rt_dst, rt->fl.fl4_tos); 1352 &rt->rt_dst, rt->rt_tos);
1500#endif 1353#endif
1501 rt_del(hash, rt); 1354 rt_del(hash, rt);
1502 ret = NULL; 1355 ret = NULL;
1356 } else if (rt->peer &&
1357 rt->peer->pmtu_expires &&
1358 time_after_eq(jiffies, rt->peer->pmtu_expires)) {
1359 unsigned long orig = rt->peer->pmtu_expires;
1360
1361 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1362 dst_metric_set(dst, RTAX_MTU,
1363 rt->peer->pmtu_orig);
1503 } 1364 }
1504 } 1365 }
1505 return ret; 1366 return ret;
@@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1525{ 1386{
1526 struct rtable *rt = skb_rtable(skb); 1387 struct rtable *rt = skb_rtable(skb);
1527 struct in_device *in_dev; 1388 struct in_device *in_dev;
1389 struct inet_peer *peer;
1528 int log_martians; 1390 int log_martians;
1529 1391
1530 rcu_read_lock(); 1392 rcu_read_lock();
@@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1536 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1398 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1537 rcu_read_unlock(); 1399 rcu_read_unlock();
1538 1400
1401 if (!rt->peer)
1402 rt_bind_peer(rt, 1);
1403 peer = rt->peer;
1404 if (!peer) {
1405 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1406 return;
1407 }
1408
1539 /* No redirected packets during ip_rt_redirect_silence; 1409 /* No redirected packets during ip_rt_redirect_silence;
1540 * reset the algorithm. 1410 * reset the algorithm.
1541 */ 1411 */
1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) 1412 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1543 rt->dst.rate_tokens = 0; 1413 peer->rate_tokens = 0;
1544 1414
1545 /* Too many ignored redirects; do not send anything 1415 /* Too many ignored redirects; do not send anything
1546 * set dst.rate_last to the last seen redirected packet. 1416 * set dst.rate_last to the last seen redirected packet.
1547 */ 1417 */
1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) { 1418 if (peer->rate_tokens >= ip_rt_redirect_number) {
1549 rt->dst.rate_last = jiffies; 1419 peer->rate_last = jiffies;
1550 return; 1420 return;
1551 } 1421 }
1552 1422
1553 /* Check for load limit; set rate_last to the latest sent 1423 /* Check for load limit; set rate_last to the latest sent
1554 * redirect. 1424 * redirect.
1555 */ 1425 */
1556 if (rt->dst.rate_tokens == 0 || 1426 if (peer->rate_tokens == 0 ||
1557 time_after(jiffies, 1427 time_after(jiffies,
1558 (rt->dst.rate_last + 1428 (peer->rate_last +
1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) { 1429 (ip_rt_redirect_load << peer->rate_tokens)))) {
1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1430 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1561 rt->dst.rate_last = jiffies; 1431 peer->rate_last = jiffies;
1562 ++rt->dst.rate_tokens; 1432 ++peer->rate_tokens;
1563#ifdef CONFIG_IP_ROUTE_VERBOSE 1433#ifdef CONFIG_IP_ROUTE_VERBOSE
1564 if (log_martians && 1434 if (log_martians &&
1565 rt->dst.rate_tokens == ip_rt_redirect_number && 1435 peer->rate_tokens == ip_rt_redirect_number &&
1566 net_ratelimit()) 1436 net_ratelimit())
1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1437 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1568 &rt->rt_src, rt->rt_iif, 1438 &rt->rt_src, rt->rt_iif,
@@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1574static int ip_error(struct sk_buff *skb) 1444static int ip_error(struct sk_buff *skb)
1575{ 1445{
1576 struct rtable *rt = skb_rtable(skb); 1446 struct rtable *rt = skb_rtable(skb);
1447 struct inet_peer *peer;
1577 unsigned long now; 1448 unsigned long now;
1449 bool send;
1578 int code; 1450 int code;
1579 1451
1580 switch (rt->dst.error) { 1452 switch (rt->dst.error) {
@@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb)
1594 break; 1466 break;
1595 } 1467 }
1596 1468
1597 now = jiffies; 1469 if (!rt->peer)
1598 rt->dst.rate_tokens += now - rt->dst.rate_last; 1470 rt_bind_peer(rt, 1);
1599 if (rt->dst.rate_tokens > ip_rt_error_burst) 1471 peer = rt->peer;
1600 rt->dst.rate_tokens = ip_rt_error_burst; 1472
1601 rt->dst.rate_last = now; 1473 send = true;
1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) { 1474 if (peer) {
1603 rt->dst.rate_tokens -= ip_rt_error_cost; 1475 now = jiffies;
1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1476 peer->rate_tokens += now - peer->rate_last;
1477 if (peer->rate_tokens > ip_rt_error_burst)
1478 peer->rate_tokens = ip_rt_error_burst;
1479 peer->rate_last = now;
1480 if (peer->rate_tokens >= ip_rt_error_cost)
1481 peer->rate_tokens -= ip_rt_error_cost;
1482 else
1483 send = false;
1605 } 1484 }
1485 if (send)
1486 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1606 1487
1607out: kfree_skb(skb); 1488out: kfree_skb(skb);
1608 return 0; 1489 return 0;
@@ -1630,88 +1511,142 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1630 unsigned short new_mtu, 1511 unsigned short new_mtu,
1631 struct net_device *dev) 1512 struct net_device *dev)
1632{ 1513{
1633 int i, k;
1634 unsigned short old_mtu = ntohs(iph->tot_len); 1514 unsigned short old_mtu = ntohs(iph->tot_len);
1635 struct rtable *rth;
1636 int ikeys[2] = { dev->ifindex, 0 };
1637 __be32 skeys[2] = { iph->saddr, 0, };
1638 __be32 daddr = iph->daddr;
1639 unsigned short est_mtu = 0; 1515 unsigned short est_mtu = 0;
1516 struct inet_peer *peer;
1640 1517
1641 for (k = 0; k < 2; k++) { 1518 peer = inet_getpeer_v4(iph->daddr, 1);
1642 for (i = 0; i < 2; i++) { 1519 if (peer) {
1643 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1520 unsigned short mtu = new_mtu;
1644 rt_genid(net));
1645
1646 rcu_read_lock();
1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1648 rth = rcu_dereference(rth->dst.rt_next)) {
1649 unsigned short mtu = new_mtu;
1650
1651 if (rth->fl.fl4_dst != daddr ||
1652 rth->fl.fl4_src != skeys[i] ||
1653 rth->rt_dst != daddr ||
1654 rth->rt_src != iph->saddr ||
1655 rth->fl.oif != ikeys[k] ||
1656 rt_is_input_route(rth) ||
1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1658 !net_eq(dev_net(rth->dst.dev), net) ||
1659 rt_is_expired(rth))
1660 continue;
1661 1521
1662 if (new_mtu < 68 || new_mtu >= old_mtu) { 1522 if (new_mtu < 68 || new_mtu >= old_mtu) {
1523 /* BSD 4.2 derived systems incorrectly adjust
1524 * tot_len by the IP header length, and report
1525 * a zero MTU in the ICMP message.
1526 */
1527 if (mtu == 0 &&
1528 old_mtu >= 68 + (iph->ihl << 2))
1529 old_mtu -= iph->ihl << 2;
1530 mtu = guess_mtu(old_mtu);
1531 }
1663 1532
1664 /* BSD 4.2 compatibility hack :-( */ 1533 if (mtu < ip_rt_min_pmtu)
1665 if (mtu == 0 && 1534 mtu = ip_rt_min_pmtu;
1666 old_mtu >= dst_mtu(&rth->dst) && 1535 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1667 old_mtu >= 68 + (iph->ihl << 2)) 1536 unsigned long pmtu_expires;
1668 old_mtu -= iph->ihl << 2;
1669 1537
1670 mtu = guess_mtu(old_mtu); 1538 pmtu_expires = jiffies + ip_rt_mtu_expires;
1671 } 1539 if (!pmtu_expires)
1672 if (mtu <= dst_mtu(&rth->dst)) { 1540 pmtu_expires = 1UL;
1673 if (mtu < dst_mtu(&rth->dst)) { 1541
1674 dst_confirm(&rth->dst); 1542 est_mtu = mtu;
1675 if (mtu < ip_rt_min_pmtu) { 1543 peer->pmtu_learned = mtu;
1676 u32 lock = dst_metric(&rth->dst, 1544 peer->pmtu_expires = pmtu_expires;
1677 RTAX_LOCK);
1678 mtu = ip_rt_min_pmtu;
1679 lock |= (1 << RTAX_MTU);
1680 dst_metric_set(&rth->dst, RTAX_LOCK,
1681 lock);
1682 }
1683 dst_metric_set(&rth->dst, RTAX_MTU, mtu);
1684 dst_set_expires(&rth->dst,
1685 ip_rt_mtu_expires);
1686 }
1687 est_mtu = mtu;
1688 }
1689 }
1690 rcu_read_unlock();
1691 } 1545 }
1546
1547 inet_putpeer(peer);
1548
1549 atomic_inc(&__rt_peer_genid);
1692 } 1550 }
1693 return est_mtu ? : new_mtu; 1551 return est_mtu ? : new_mtu;
1694} 1552}
1695 1553
1554static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1555{
1556 unsigned long expires = peer->pmtu_expires;
1557
1558 if (time_before(jiffies, expires)) {
1559 u32 orig_dst_mtu = dst_mtu(dst);
1560 if (peer->pmtu_learned < orig_dst_mtu) {
1561 if (!peer->pmtu_orig)
1562 peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
1563 dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
1564 }
1565 } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
1566 dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1567}
1568
1696static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1569static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1697{ 1570{
1698 if (dst_mtu(dst) > mtu && mtu >= 68 && 1571 struct rtable *rt = (struct rtable *) dst;
1699 !(dst_metric_locked(dst, RTAX_MTU))) { 1572 struct inet_peer *peer;
1700 if (mtu < ip_rt_min_pmtu) { 1573
1701 u32 lock = dst_metric(dst, RTAX_LOCK); 1574 dst_confirm(dst);
1575
1576 if (!rt->peer)
1577 rt_bind_peer(rt, 1);
1578 peer = rt->peer;
1579 if (peer) {
1580 if (mtu < ip_rt_min_pmtu)
1702 mtu = ip_rt_min_pmtu; 1581 mtu = ip_rt_min_pmtu;
1703 dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); 1582 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1583 unsigned long pmtu_expires;
1584
1585 pmtu_expires = jiffies + ip_rt_mtu_expires;
1586 if (!pmtu_expires)
1587 pmtu_expires = 1UL;
1588
1589 peer->pmtu_learned = mtu;
1590 peer->pmtu_expires = pmtu_expires;
1591
1592 atomic_inc(&__rt_peer_genid);
1593 rt->rt_peer_genid = rt_peer_genid();
1704 } 1594 }
1705 dst_metric_set(dst, RTAX_MTU, mtu); 1595 check_peer_pmtu(dst, peer);
1706 dst_set_expires(dst, ip_rt_mtu_expires); 1596
1707 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 1597 inet_putpeer(peer);
1598 }
1599}
1600
1601static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1602{
1603 struct rtable *rt = (struct rtable *) dst;
1604 __be32 orig_gw = rt->rt_gateway;
1605
1606 dst_confirm(&rt->dst);
1607
1608 neigh_release(rt->dst.neighbour);
1609 rt->dst.neighbour = NULL;
1610
1611 rt->rt_gateway = peer->redirect_learned.a4;
1612 if (arp_bind_neighbour(&rt->dst) ||
1613 !(rt->dst.neighbour->nud_state & NUD_VALID)) {
1614 if (rt->dst.neighbour)
1615 neigh_event_send(rt->dst.neighbour, NULL);
1616 rt->rt_gateway = orig_gw;
1617 return -EAGAIN;
1618 } else {
1619 rt->rt_flags |= RTCF_REDIRECTED;
1620 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
1621 rt->dst.neighbour);
1708 } 1622 }
1623 return 0;
1709} 1624}
1710 1625
1711static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1626static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1712{ 1627{
1713 if (rt_is_expired((struct rtable *)dst)) 1628 struct rtable *rt = (struct rtable *) dst;
1629
1630 if (rt_is_expired(rt))
1714 return NULL; 1631 return NULL;
1632 if (rt->rt_peer_genid != rt_peer_genid()) {
1633 struct inet_peer *peer;
1634
1635 if (!rt->peer)
1636 rt_bind_peer(rt, 0);
1637
1638 peer = rt->peer;
1639 if (peer && peer->pmtu_expires)
1640 check_peer_pmtu(dst, peer);
1641
1642 if (peer && peer->redirect_learned.a4 &&
1643 peer->redirect_learned.a4 != rt->rt_gateway) {
1644 if (check_peer_redir(dst, peer))
1645 return NULL;
1646 }
1647
1648 rt->rt_peer_genid = rt_peer_genid();
1649 }
1715 return dst; 1650 return dst;
1716} 1651}
1717 1652
@@ -1720,6 +1655,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1720 struct rtable *rt = (struct rtable *) dst; 1655 struct rtable *rt = (struct rtable *) dst;
1721 struct inet_peer *peer = rt->peer; 1656 struct inet_peer *peer = rt->peer;
1722 1657
1658 if (rt->fi) {
1659 fib_info_put(rt->fi);
1660 rt->fi = NULL;
1661 }
1723 if (peer) { 1662 if (peer) {
1724 rt->peer = NULL; 1663 rt->peer = NULL;
1725 inet_putpeer(peer); 1664 inet_putpeer(peer);
@@ -1734,8 +1673,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
1734 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1673 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1735 1674
1736 rt = skb_rtable(skb); 1675 rt = skb_rtable(skb);
1737 if (rt) 1676 if (rt &&
1738 dst_set_expires(&rt->dst, 0); 1677 rt->peer &&
1678 rt->peer->pmtu_expires) {
1679 unsigned long orig = rt->peer->pmtu_expires;
1680
1681 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1682 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1683 }
1739} 1684}
1740 1685
1741static int ip_rt_bug(struct sk_buff *skb) 1686static int ip_rt_bug(struct sk_buff *skb)
@@ -1764,8 +1709,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1764 if (rt_is_output_route(rt)) 1709 if (rt_is_output_route(rt))
1765 src = rt->rt_src; 1710 src = rt->rt_src;
1766 else { 1711 else {
1712 struct flowi4 fl4 = {
1713 .daddr = rt->rt_key_dst,
1714 .saddr = rt->rt_key_src,
1715 .flowi4_tos = rt->rt_tos,
1716 .flowi4_oif = rt->rt_oif,
1717 .flowi4_iif = rt->rt_iif,
1718 .flowi4_mark = rt->rt_mark,
1719 };
1720
1767 rcu_read_lock(); 1721 rcu_read_lock();
1768 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) 1722 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
1769 src = FIB_RES_PREFSRC(res); 1723 src = FIB_RES_PREFSRC(res);
1770 else 1724 else
1771 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1725 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
@@ -1775,7 +1729,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1775 memcpy(addr, &src, 4); 1729 memcpy(addr, &src, 4);
1776} 1730}
1777 1731
1778#ifdef CONFIG_NET_CLS_ROUTE 1732#ifdef CONFIG_IP_ROUTE_CLASSID
1779static void set_class_tag(struct rtable *rt, u32 tag) 1733static void set_class_tag(struct rtable *rt, u32 tag)
1780{ 1734{
1781 if (!(rt->dst.tclassid & 0xFFFF)) 1735 if (!(rt->dst.tclassid & 0xFFFF))
@@ -1815,17 +1769,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1815 return mtu; 1769 return mtu;
1816} 1770}
1817 1771
1818static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1772static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1773 struct fib_info *fi)
1774{
1775 struct inet_peer *peer;
1776 int create = 0;
1777
1778 /* If a peer entry exists for this destination, we must hook
1779 * it up in order to get at cached metrics.
1780 */
1781 if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1782 create = 1;
1783
1784 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
1785 if (peer) {
1786 rt->rt_peer_genid = rt_peer_genid();
1787 if (inet_metrics_new(peer))
1788 memcpy(peer->metrics, fi->fib_metrics,
1789 sizeof(u32) * RTAX_MAX);
1790 dst_init_metrics(&rt->dst, peer->metrics, false);
1791
1792 if (peer->pmtu_expires)
1793 check_peer_pmtu(&rt->dst, peer);
1794 if (peer->redirect_learned.a4 &&
1795 peer->redirect_learned.a4 != rt->rt_gateway) {
1796 rt->rt_gateway = peer->redirect_learned.a4;
1797 rt->rt_flags |= RTCF_REDIRECTED;
1798 }
1799 } else {
1800 if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1801 rt->fi = fi;
1802 atomic_inc(&fi->fib_clntref);
1803 }
1804 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1805 }
1806}
1807
1808static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1809 const struct fib_result *res,
1810 struct fib_info *fi, u16 type, u32 itag)
1819{ 1811{
1820 struct dst_entry *dst = &rt->dst; 1812 struct dst_entry *dst = &rt->dst;
1821 struct fib_info *fi = res->fi;
1822 1813
1823 if (fi) { 1814 if (fi) {
1824 if (FIB_RES_GW(*res) && 1815 if (FIB_RES_GW(*res) &&
1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1816 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826 rt->rt_gateway = FIB_RES_GW(*res); 1817 rt->rt_gateway = FIB_RES_GW(*res);
1827 dst_import_metrics(dst, fi->fib_metrics); 1818 rt_init_metrics(rt, oldflp4, fi);
1828#ifdef CONFIG_NET_CLS_ROUTE 1819#ifdef CONFIG_IP_ROUTE_CLASSID
1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1820 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830#endif 1821#endif
1831 } 1822 }
@@ -1835,13 +1826,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1826 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1827 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1837 1828
1838#ifdef CONFIG_NET_CLS_ROUTE 1829#ifdef CONFIG_IP_ROUTE_CLASSID
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1830#ifdef CONFIG_IP_MULTIPLE_TABLES
1840 set_class_tag(rt, fib_rules_tclass(res)); 1831 set_class_tag(rt, fib_rules_tclass(res));
1841#endif 1832#endif
1842 set_class_tag(rt, itag); 1833 set_class_tag(rt, itag);
1843#endif 1834#endif
1844 rt->rt_type = res->type; 1835 rt->rt_type = type;
1836}
1837
1838static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm)
1839{
1840 struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1);
1841 if (rt) {
1842 rt->dst.obsolete = -1;
1843
1844 rt->dst.flags = DST_HOST |
1845 (nopolicy ? DST_NOPOLICY : 0) |
1846 (noxfrm ? DST_NOXFRM : 0);
1847 }
1848 return rt;
1845} 1849}
1846 1850
1847/* called in rcu_read_lock() section */ 1851/* called in rcu_read_lock() section */
@@ -1874,31 +1878,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1874 if (err < 0) 1878 if (err < 0)
1875 goto e_err; 1879 goto e_err;
1876 } 1880 }
1877 rth = dst_alloc(&ipv4_dst_ops); 1881 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1878 if (!rth) 1882 if (!rth)
1879 goto e_nobufs; 1883 goto e_nobufs;
1880 1884
1881 rth->dst.output = ip_rt_bug; 1885 rth->dst.output = ip_rt_bug;
1882 rth->dst.obsolete = -1;
1883 1886
1884 atomic_set(&rth->dst.__refcnt, 1); 1887 rth->rt_key_dst = daddr;
1885 rth->dst.flags= DST_HOST;
1886 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1887 rth->dst.flags |= DST_NOPOLICY;
1888 rth->fl.fl4_dst = daddr;
1889 rth->rt_dst = daddr; 1888 rth->rt_dst = daddr;
1890 rth->fl.fl4_tos = tos; 1889 rth->rt_tos = tos;
1891 rth->fl.mark = skb->mark; 1890 rth->rt_mark = skb->mark;
1892 rth->fl.fl4_src = saddr; 1891 rth->rt_key_src = saddr;
1893 rth->rt_src = saddr; 1892 rth->rt_src = saddr;
1894#ifdef CONFIG_NET_CLS_ROUTE 1893#ifdef CONFIG_IP_ROUTE_CLASSID
1895 rth->dst.tclassid = itag; 1894 rth->dst.tclassid = itag;
1896#endif 1895#endif
1897 rth->rt_iif = 1896 rth->rt_iif = dev->ifindex;
1898 rth->fl.iif = dev->ifindex;
1899 rth->dst.dev = init_net.loopback_dev; 1897 rth->dst.dev = init_net.loopback_dev;
1900 dev_hold(rth->dst.dev); 1898 dev_hold(rth->dst.dev);
1901 rth->fl.oif = 0; 1899 rth->rt_oif = 0;
1902 rth->rt_gateway = daddr; 1900 rth->rt_gateway = daddr;
1903 rth->rt_spec_dst= spec_dst; 1901 rth->rt_spec_dst= spec_dst;
1904 rth->rt_genid = rt_genid(dev_net(dev)); 1902 rth->rt_genid = rt_genid(dev_net(dev));
@@ -1916,7 +1914,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1916 RT_CACHE_STAT_INC(in_slow_mc); 1914 RT_CACHE_STAT_INC(in_slow_mc);
1917 1915
1918 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1916 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1919 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1917 rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
1918 err = 0;
1919 if (IS_ERR(rth))
1920 err = PTR_ERR(rth);
1920 1921
1921e_nobufs: 1922e_nobufs:
1922 return -ENOBUFS; 1923 return -ENOBUFS;
@@ -1959,7 +1960,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1959 1960
1960/* called in rcu_read_lock() section */ 1961/* called in rcu_read_lock() section */
1961static int __mkroute_input(struct sk_buff *skb, 1962static int __mkroute_input(struct sk_buff *skb,
1962 struct fib_result *res, 1963 const struct fib_result *res,
1963 struct in_device *in_dev, 1964 struct in_device *in_dev,
1964 __be32 daddr, __be32 saddr, u32 tos, 1965 __be32 daddr, __be32 saddr, u32 tos,
1965 struct rtable **result) 1966 struct rtable **result)
@@ -2013,39 +2014,31 @@ static int __mkroute_input(struct sk_buff *skb,
2013 } 2014 }
2014 } 2015 }
2015 2016
2016 2017 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
2017 rth = dst_alloc(&ipv4_dst_ops); 2018 IN_DEV_CONF_GET(out_dev, NOXFRM));
2018 if (!rth) { 2019 if (!rth) {
2019 err = -ENOBUFS; 2020 err = -ENOBUFS;
2020 goto cleanup; 2021 goto cleanup;
2021 } 2022 }
2022 2023
2023 atomic_set(&rth->dst.__refcnt, 1); 2024 rth->rt_key_dst = daddr;
2024 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2025 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2026 rth->rt_tos = tos;
2032 rth->fl.mark = skb->mark; 2027 rth->rt_mark = skb->mark;
2033 rth->fl.fl4_src = saddr; 2028 rth->rt_key_src = saddr;
2034 rth->rt_src = saddr; 2029 rth->rt_src = saddr;
2035 rth->rt_gateway = daddr; 2030 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2031 rth->rt_iif = in_dev->dev->ifindex;
2037 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->dst.dev = (out_dev)->dev; 2032 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->dst.dev); 2033 dev_hold(rth->dst.dev);
2040 rth->fl.oif = 0; 2034 rth->rt_oif = 0;
2041 rth->rt_spec_dst= spec_dst; 2035 rth->rt_spec_dst= spec_dst;
2042 2036
2043 rth->dst.obsolete = -1;
2044 rth->dst.input = ip_forward; 2037 rth->dst.input = ip_forward;
2045 rth->dst.output = ip_output; 2038 rth->dst.output = ip_output;
2046 rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); 2039 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2047 2040
2048 rt_set_nexthop(rth, res, itag); 2041 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2049 2042
2050 rth->rt_flags = flags; 2043 rth->rt_flags = flags;
2051 2044
@@ -2057,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb,
2057 2050
2058static int ip_mkroute_input(struct sk_buff *skb, 2051static int ip_mkroute_input(struct sk_buff *skb,
2059 struct fib_result *res, 2052 struct fib_result *res,
2060 const struct flowi *fl, 2053 const struct flowi4 *fl4,
2061 struct in_device *in_dev, 2054 struct in_device *in_dev,
2062 __be32 daddr, __be32 saddr, u32 tos) 2055 __be32 daddr, __be32 saddr, u32 tos)
2063{ 2056{
@@ -2066,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
2066 unsigned hash; 2059 unsigned hash;
2067 2060
2068#ifdef CONFIG_IP_ROUTE_MULTIPATH 2061#ifdef CONFIG_IP_ROUTE_MULTIPATH
2069 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) 2062 if (res->fi && res->fi->fib_nhs > 1)
2070 fib_select_multipath(fl, res); 2063 fib_select_multipath(res);
2071#endif 2064#endif
2072 2065
2073 /* create a routing cache entry */ 2066 /* create a routing cache entry */
@@ -2076,9 +2069,12 @@ static int ip_mkroute_input(struct sk_buff *skb,
2076 return err; 2069 return err;
2077 2070
2078 /* put it into the cache */ 2071 /* put it into the cache */
2079 hash = rt_hash(daddr, saddr, fl->iif, 2072 hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
2080 rt_genid(dev_net(rth->dst.dev))); 2073 rt_genid(dev_net(rth->dst.dev)));
2081 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2074 rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
2075 if (IS_ERR(rth))
2076 return PTR_ERR(rth);
2077 return 0;
2082} 2078}
2083 2079
2084/* 2080/*
@@ -2097,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2097{ 2093{
2098 struct fib_result res; 2094 struct fib_result res;
2099 struct in_device *in_dev = __in_dev_get_rcu(dev); 2095 struct in_device *in_dev = __in_dev_get_rcu(dev);
2100 struct flowi fl = { .fl4_dst = daddr, 2096 struct flowi4 fl4;
2101 .fl4_src = saddr,
2102 .fl4_tos = tos,
2103 .fl4_scope = RT_SCOPE_UNIVERSE,
2104 .mark = skb->mark,
2105 .iif = dev->ifindex };
2106 unsigned flags = 0; 2097 unsigned flags = 0;
2107 u32 itag = 0; 2098 u32 itag = 0;
2108 struct rtable * rth; 2099 struct rtable * rth;
@@ -2139,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2139 /* 2130 /*
2140 * Now we are ready to route packet. 2131 * Now we are ready to route packet.
2141 */ 2132 */
2142 err = fib_lookup(net, &fl, &res); 2133 fl4.flowi4_oif = 0;
2134 fl4.flowi4_iif = dev->ifindex;
2135 fl4.flowi4_mark = skb->mark;
2136 fl4.flowi4_tos = tos;
2137 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
2138 fl4.daddr = daddr;
2139 fl4.saddr = saddr;
2140 err = fib_lookup(net, &fl4, &res);
2143 if (err != 0) { 2141 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2142 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2143 goto e_hostunreach;
@@ -2168,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2168 if (res.type != RTN_UNICAST) 2166 if (res.type != RTN_UNICAST)
2169 goto martian_destination; 2167 goto martian_destination;
2170 2168
2171 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2169 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
2172out: return err; 2170out: return err;
2173 2171
2174brd_input: 2172brd_input:
@@ -2190,29 +2188,23 @@ brd_input:
2190 RT_CACHE_STAT_INC(in_brd); 2188 RT_CACHE_STAT_INC(in_brd);
2191 2189
2192local_input: 2190local_input:
2193 rth = dst_alloc(&ipv4_dst_ops); 2191 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2194 if (!rth) 2192 if (!rth)
2195 goto e_nobufs; 2193 goto e_nobufs;
2196 2194
2197 rth->dst.output= ip_rt_bug; 2195 rth->dst.output= ip_rt_bug;
2198 rth->dst.obsolete = -1;
2199 rth->rt_genid = rt_genid(net); 2196 rth->rt_genid = rt_genid(net);
2200 2197
2201 atomic_set(&rth->dst.__refcnt, 1); 2198 rth->rt_key_dst = daddr;
2202 rth->dst.flags= DST_HOST;
2203 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2204 rth->dst.flags |= DST_NOPOLICY;
2205 rth->fl.fl4_dst = daddr;
2206 rth->rt_dst = daddr; 2199 rth->rt_dst = daddr;
2207 rth->fl.fl4_tos = tos; 2200 rth->rt_tos = tos;
2208 rth->fl.mark = skb->mark; 2201 rth->rt_mark = skb->mark;
2209 rth->fl.fl4_src = saddr; 2202 rth->rt_key_src = saddr;
2210 rth->rt_src = saddr; 2203 rth->rt_src = saddr;
2211#ifdef CONFIG_NET_CLS_ROUTE 2204#ifdef CONFIG_IP_ROUTE_CLASSID
2212 rth->dst.tclassid = itag; 2205 rth->dst.tclassid = itag;
2213#endif 2206#endif
2214 rth->rt_iif = 2207 rth->rt_iif = dev->ifindex;
2215 rth->fl.iif = dev->ifindex;
2216 rth->dst.dev = net->loopback_dev; 2208 rth->dst.dev = net->loopback_dev;
2217 dev_hold(rth->dst.dev); 2209 dev_hold(rth->dst.dev);
2218 rth->rt_gateway = daddr; 2210 rth->rt_gateway = daddr;
@@ -2225,8 +2217,11 @@ local_input:
2225 rth->rt_flags &= ~RTCF_LOCAL; 2217 rth->rt_flags &= ~RTCF_LOCAL;
2226 } 2218 }
2227 rth->rt_type = res.type; 2219 rth->rt_type = res.type;
2228 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2220 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2229 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2221 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2222 err = 0;
2223 if (IS_ERR(rth))
2224 err = PTR_ERR(rth);
2230 goto out; 2225 goto out;
2231 2226
2232no_route: 2227no_route:
@@ -2288,12 +2283,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2288 2283
2289 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2284 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2290 rth = rcu_dereference(rth->dst.rt_next)) { 2285 rth = rcu_dereference(rth->dst.rt_next)) {
2291 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2286 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2292 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2287 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2293 (rth->fl.iif ^ iif) | 2288 (rth->rt_iif ^ iif) |
2294 rth->fl.oif | 2289 rth->rt_oif |
2295 (rth->fl.fl4_tos ^ tos)) == 0 && 2290 (rth->rt_tos ^ tos)) == 0 &&
2296 rth->fl.mark == skb->mark && 2291 rth->rt_mark == skb->mark &&
2297 net_eq(dev_net(rth->dst.dev), net) && 2292 net_eq(dev_net(rth->dst.dev), net) &&
2298 !rt_is_expired(rth)) { 2293 !rt_is_expired(rth)) {
2299 if (noref) { 2294 if (noref) {
@@ -2326,8 +2321,8 @@ skip_cache:
2326 struct in_device *in_dev = __in_dev_get_rcu(dev); 2321 struct in_device *in_dev = __in_dev_get_rcu(dev);
2327 2322
2328 if (in_dev) { 2323 if (in_dev) {
2329 int our = ip_check_mc(in_dev, daddr, saddr, 2324 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
2330 ip_hdr(skb)->protocol); 2325 ip_hdr(skb)->protocol);
2331 if (our 2326 if (our
2332#ifdef CONFIG_IP_MROUTE 2327#ifdef CONFIG_IP_MROUTE
2333 || 2328 ||
@@ -2351,98 +2346,91 @@ skip_cache:
2351EXPORT_SYMBOL(ip_route_input_common); 2346EXPORT_SYMBOL(ip_route_input_common);
2352 2347
2353/* called with rcu_read_lock() */ 2348/* called with rcu_read_lock() */
2354static int __mkroute_output(struct rtable **result, 2349static struct rtable *__mkroute_output(const struct fib_result *res,
2355 struct fib_result *res, 2350 const struct flowi4 *fl4,
2356 const struct flowi *fl, 2351 const struct flowi4 *oldflp4,
2357 const struct flowi *oldflp, 2352 struct net_device *dev_out,
2358 struct net_device *dev_out, 2353 unsigned int flags)
2359 unsigned flags)
2360{ 2354{
2361 struct rtable *rth; 2355 struct fib_info *fi = res->fi;
2356 u32 tos = RT_FL_TOS(oldflp4);
2362 struct in_device *in_dev; 2357 struct in_device *in_dev;
2363 u32 tos = RT_FL_TOS(oldflp); 2358 u16 type = res->type;
2359 struct rtable *rth;
2364 2360
2365 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) 2361 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2366 return -EINVAL; 2362 return ERR_PTR(-EINVAL);
2367 2363
2368 if (ipv4_is_lbcast(fl->fl4_dst)) 2364 if (ipv4_is_lbcast(fl4->daddr))
2369 res->type = RTN_BROADCAST; 2365 type = RTN_BROADCAST;
2370 else if (ipv4_is_multicast(fl->fl4_dst)) 2366 else if (ipv4_is_multicast(fl4->daddr))
2371 res->type = RTN_MULTICAST; 2367 type = RTN_MULTICAST;
2372 else if (ipv4_is_zeronet(fl->fl4_dst)) 2368 else if (ipv4_is_zeronet(fl4->daddr))
2373 return -EINVAL; 2369 return ERR_PTR(-EINVAL);
2374 2370
2375 if (dev_out->flags & IFF_LOOPBACK) 2371 if (dev_out->flags & IFF_LOOPBACK)
2376 flags |= RTCF_LOCAL; 2372 flags |= RTCF_LOCAL;
2377 2373
2378 in_dev = __in_dev_get_rcu(dev_out); 2374 in_dev = __in_dev_get_rcu(dev_out);
2379 if (!in_dev) 2375 if (!in_dev)
2380 return -EINVAL; 2376 return ERR_PTR(-EINVAL);
2381 2377
2382 if (res->type == RTN_BROADCAST) { 2378 if (type == RTN_BROADCAST) {
2383 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2379 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2384 res->fi = NULL; 2380 fi = NULL;
2385 } else if (res->type == RTN_MULTICAST) { 2381 } else if (type == RTN_MULTICAST) {
2386 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2382 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2387 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2383 if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr,
2388 oldflp->proto)) 2384 oldflp4->flowi4_proto))
2389 flags &= ~RTCF_LOCAL; 2385 flags &= ~RTCF_LOCAL;
2390 /* If multicast route do not exist use 2386 /* If multicast route do not exist use
2391 * default one, but do not gateway in this case. 2387 * default one, but do not gateway in this case.
2392 * Yes, it is hack. 2388 * Yes, it is hack.
2393 */ 2389 */
2394 if (res->fi && res->prefixlen < 4) 2390 if (fi && res->prefixlen < 4)
2395 res->fi = NULL; 2391 fi = NULL;
2396 } 2392 }
2397 2393
2398 2394 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
2399 rth = dst_alloc(&ipv4_dst_ops); 2395 IN_DEV_CONF_GET(in_dev, NOXFRM));
2400 if (!rth) 2396 if (!rth)
2401 return -ENOBUFS; 2397 return ERR_PTR(-ENOBUFS);
2402 2398
2403 atomic_set(&rth->dst.__refcnt, 1); 2399 rth->rt_key_dst = oldflp4->daddr;
2404 rth->dst.flags= DST_HOST; 2400 rth->rt_tos = tos;
2405 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2401 rth->rt_key_src = oldflp4->saddr;
2406 rth->dst.flags |= DST_NOXFRM; 2402 rth->rt_oif = oldflp4->flowi4_oif;
2407 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2403 rth->rt_mark = oldflp4->flowi4_mark;
2408 rth->dst.flags |= DST_NOPOLICY; 2404 rth->rt_dst = fl4->daddr;
2409 2405 rth->rt_src = fl4->saddr;
2410 rth->fl.fl4_dst = oldflp->fl4_dst; 2406 rth->rt_iif = 0;
2411 rth->fl.fl4_tos = tos;
2412 rth->fl.fl4_src = oldflp->fl4_src;
2413 rth->fl.oif = oldflp->oif;
2414 rth->fl.mark = oldflp->mark;
2415 rth->rt_dst = fl->fl4_dst;
2416 rth->rt_src = fl->fl4_src;
2417 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2418 /* get references to the devices that are to be hold by the routing 2407 /* get references to the devices that are to be hold by the routing
2419 cache entry */ 2408 cache entry */
2420 rth->dst.dev = dev_out; 2409 rth->dst.dev = dev_out;
2421 dev_hold(dev_out); 2410 dev_hold(dev_out);
2422 rth->rt_gateway = fl->fl4_dst; 2411 rth->rt_gateway = fl4->daddr;
2423 rth->rt_spec_dst= fl->fl4_src; 2412 rth->rt_spec_dst= fl4->saddr;
2424 2413
2425 rth->dst.output=ip_output; 2414 rth->dst.output=ip_output;
2426 rth->dst.obsolete = -1;
2427 rth->rt_genid = rt_genid(dev_net(dev_out)); 2415 rth->rt_genid = rt_genid(dev_net(dev_out));
2428 2416
2429 RT_CACHE_STAT_INC(out_slow_tot); 2417 RT_CACHE_STAT_INC(out_slow_tot);
2430 2418
2431 if (flags & RTCF_LOCAL) { 2419 if (flags & RTCF_LOCAL) {
2432 rth->dst.input = ip_local_deliver; 2420 rth->dst.input = ip_local_deliver;
2433 rth->rt_spec_dst = fl->fl4_dst; 2421 rth->rt_spec_dst = fl4->daddr;
2434 } 2422 }
2435 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2423 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2436 rth->rt_spec_dst = fl->fl4_src; 2424 rth->rt_spec_dst = fl4->saddr;
2437 if (flags & RTCF_LOCAL && 2425 if (flags & RTCF_LOCAL &&
2438 !(dev_out->flags & IFF_LOOPBACK)) { 2426 !(dev_out->flags & IFF_LOOPBACK)) {
2439 rth->dst.output = ip_mc_output; 2427 rth->dst.output = ip_mc_output;
2440 RT_CACHE_STAT_INC(out_slow_mc); 2428 RT_CACHE_STAT_INC(out_slow_mc);
2441 } 2429 }
2442#ifdef CONFIG_IP_MROUTE 2430#ifdef CONFIG_IP_MROUTE
2443 if (res->type == RTN_MULTICAST) { 2431 if (type == RTN_MULTICAST) {
2444 if (IN_DEV_MFORWARD(in_dev) && 2432 if (IN_DEV_MFORWARD(in_dev) &&
2445 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2433 !ipv4_is_local_multicast(oldflp4->daddr)) {
2446 rth->dst.input = ip_mr_input; 2434 rth->dst.input = ip_mr_input;
2447 rth->dst.output = ip_mc_output; 2435 rth->dst.output = ip_mc_output;
2448 } 2436 }
@@ -2450,31 +2438,10 @@ static int __mkroute_output(struct rtable **result,
2450#endif 2438#endif
2451 } 2439 }
2452 2440
2453 rt_set_nexthop(rth, res, 0); 2441 rt_set_nexthop(rth, oldflp4, res, fi, type, 0);
2454 2442
2455 rth->rt_flags = flags; 2443 rth->rt_flags = flags;
2456 *result = rth; 2444 return rth;
2457 return 0;
2458}
2459
2460/* called with rcu_read_lock() */
2461static int ip_mkroute_output(struct rtable **rp,
2462 struct fib_result *res,
2463 const struct flowi *fl,
2464 const struct flowi *oldflp,
2465 struct net_device *dev_out,
2466 unsigned flags)
2467{
2468 struct rtable *rth = NULL;
2469 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2470 unsigned hash;
2471 if (err == 0) {
2472 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2473 rt_genid(dev_net(dev_out)));
2474 err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2475 }
2476
2477 return err;
2478} 2445}
2479 2446
2480/* 2447/*
@@ -2482,34 +2449,36 @@ static int ip_mkroute_output(struct rtable **rp,
2482 * called with rcu_read_lock(); 2449 * called with rcu_read_lock();
2483 */ 2450 */
2484 2451
2485static int ip_route_output_slow(struct net *net, struct rtable **rp, 2452static struct rtable *ip_route_output_slow(struct net *net,
2486 const struct flowi *oldflp) 2453 const struct flowi4 *oldflp4)
2487{ 2454{
2488 u32 tos = RT_FL_TOS(oldflp); 2455 u32 tos = RT_FL_TOS(oldflp4);
2489 struct flowi fl = { .fl4_dst = oldflp->fl4_dst, 2456 struct flowi4 fl4;
2490 .fl4_src = oldflp->fl4_src,
2491 .fl4_tos = tos & IPTOS_RT_MASK,
2492 .fl4_scope = ((tos & RTO_ONLINK) ?
2493 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2494 .mark = oldflp->mark,
2495 .iif = net->loopback_dev->ifindex,
2496 .oif = oldflp->oif };
2497 struct fib_result res; 2457 struct fib_result res;
2498 unsigned int flags = 0; 2458 unsigned int flags = 0;
2499 struct net_device *dev_out = NULL; 2459 struct net_device *dev_out = NULL;
2500 int err; 2460 struct rtable *rth;
2501
2502 2461
2503 res.fi = NULL; 2462 res.fi = NULL;
2504#ifdef CONFIG_IP_MULTIPLE_TABLES 2463#ifdef CONFIG_IP_MULTIPLE_TABLES
2505 res.r = NULL; 2464 res.r = NULL;
2506#endif 2465#endif
2507 2466
2508 if (oldflp->fl4_src) { 2467 fl4.flowi4_oif = oldflp4->flowi4_oif;
2509 err = -EINVAL; 2468 fl4.flowi4_iif = net->loopback_dev->ifindex;
2510 if (ipv4_is_multicast(oldflp->fl4_src) || 2469 fl4.flowi4_mark = oldflp4->flowi4_mark;
2511 ipv4_is_lbcast(oldflp->fl4_src) || 2470 fl4.daddr = oldflp4->daddr;
2512 ipv4_is_zeronet(oldflp->fl4_src)) 2471 fl4.saddr = oldflp4->saddr;
2472 fl4.flowi4_tos = tos & IPTOS_RT_MASK;
2473 fl4.flowi4_scope = ((tos & RTO_ONLINK) ?
2474 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2475
2476 rcu_read_lock();
2477 if (oldflp4->saddr) {
2478 rth = ERR_PTR(-EINVAL);
2479 if (ipv4_is_multicast(oldflp4->saddr) ||
2480 ipv4_is_lbcast(oldflp4->saddr) ||
2481 ipv4_is_zeronet(oldflp4->saddr))
2513 goto out; 2482 goto out;
2514 2483
2515 /* I removed check for oif == dev_out->oif here. 2484 /* I removed check for oif == dev_out->oif here.
@@ -2520,11 +2489,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2520 of another iface. --ANK 2489 of another iface. --ANK
2521 */ 2490 */
2522 2491
2523 if (oldflp->oif == 0 && 2492 if (oldflp4->flowi4_oif == 0 &&
2524 (ipv4_is_multicast(oldflp->fl4_dst) || 2493 (ipv4_is_multicast(oldflp4->daddr) ||
2525 ipv4_is_lbcast(oldflp->fl4_dst))) { 2494 ipv4_is_lbcast(oldflp4->daddr))) {
2526 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2495 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2527 dev_out = __ip_dev_find(net, oldflp->fl4_src, false); 2496 dev_out = __ip_dev_find(net, oldflp4->saddr, false);
2528 if (dev_out == NULL) 2497 if (dev_out == NULL)
2529 goto out; 2498 goto out;
2530 2499
@@ -2543,60 +2512,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 Luckily, this hack is good workaround. 2512 Luckily, this hack is good workaround.
2544 */ 2513 */
2545 2514
2546 fl.oif = dev_out->ifindex; 2515 fl4.flowi4_oif = dev_out->ifindex;
2547 goto make_route; 2516 goto make_route;
2548 } 2517 }
2549 2518
2550 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2519 if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2551 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2520 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2552 if (!__ip_dev_find(net, oldflp->fl4_src, false)) 2521 if (!__ip_dev_find(net, oldflp4->saddr, false))
2553 goto out; 2522 goto out;
2554 } 2523 }
2555 } 2524 }
2556 2525
2557 2526
2558 if (oldflp->oif) { 2527 if (oldflp4->flowi4_oif) {
2559 dev_out = dev_get_by_index_rcu(net, oldflp->oif); 2528 dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif);
2560 err = -ENODEV; 2529 rth = ERR_PTR(-ENODEV);
2561 if (dev_out == NULL) 2530 if (dev_out == NULL)
2562 goto out; 2531 goto out;
2563 2532
2564 /* RACE: Check return value of inet_select_addr instead. */ 2533 /* RACE: Check return value of inet_select_addr instead. */
2565 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 2534 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2566 err = -ENETUNREACH; 2535 rth = ERR_PTR(-ENETUNREACH);
2567 goto out; 2536 goto out;
2568 } 2537 }
2569 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2538 if (ipv4_is_local_multicast(oldflp4->daddr) ||
2570 ipv4_is_lbcast(oldflp->fl4_dst)) { 2539 ipv4_is_lbcast(oldflp4->daddr)) {
2571 if (!fl.fl4_src) 2540 if (!fl4.saddr)
2572 fl.fl4_src = inet_select_addr(dev_out, 0, 2541 fl4.saddr = inet_select_addr(dev_out, 0,
2573 RT_SCOPE_LINK); 2542 RT_SCOPE_LINK);
2574 goto make_route; 2543 goto make_route;
2575 } 2544 }
2576 if (!fl.fl4_src) { 2545 if (!fl4.saddr) {
2577 if (ipv4_is_multicast(oldflp->fl4_dst)) 2546 if (ipv4_is_multicast(oldflp4->daddr))
2578 fl.fl4_src = inet_select_addr(dev_out, 0, 2547 fl4.saddr = inet_select_addr(dev_out, 0,
2579 fl.fl4_scope); 2548 fl4.flowi4_scope);
2580 else if (!oldflp->fl4_dst) 2549 else if (!oldflp4->daddr)
2581 fl.fl4_src = inet_select_addr(dev_out, 0, 2550 fl4.saddr = inet_select_addr(dev_out, 0,
2582 RT_SCOPE_HOST); 2551 RT_SCOPE_HOST);
2583 } 2552 }
2584 } 2553 }
2585 2554
2586 if (!fl.fl4_dst) { 2555 if (!fl4.daddr) {
2587 fl.fl4_dst = fl.fl4_src; 2556 fl4.daddr = fl4.saddr;
2588 if (!fl.fl4_dst) 2557 if (!fl4.daddr)
2589 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2558 fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK);
2590 dev_out = net->loopback_dev; 2559 dev_out = net->loopback_dev;
2591 fl.oif = net->loopback_dev->ifindex; 2560 fl4.flowi4_oif = net->loopback_dev->ifindex;
2592 res.type = RTN_LOCAL; 2561 res.type = RTN_LOCAL;
2593 flags |= RTCF_LOCAL; 2562 flags |= RTCF_LOCAL;
2594 goto make_route; 2563 goto make_route;
2595 } 2564 }
2596 2565
2597 if (fib_lookup(net, &fl, &res)) { 2566 if (fib_lookup(net, &fl4, &res)) {
2598 res.fi = NULL; 2567 res.fi = NULL;
2599 if (oldflp->oif) { 2568 if (oldflp4->flowi4_oif) {
2600 /* Apparently, routing tables are wrong. Assume, 2569 /* Apparently, routing tables are wrong. Assume,
2601 that the destination is on link. 2570 that the destination is on link.
2602 2571
@@ -2615,90 +2584,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2615 likely IPv6, but we do not. 2584 likely IPv6, but we do not.
2616 */ 2585 */
2617 2586
2618 if (fl.fl4_src == 0) 2587 if (fl4.saddr == 0)
2619 fl.fl4_src = inet_select_addr(dev_out, 0, 2588 fl4.saddr = inet_select_addr(dev_out, 0,
2620 RT_SCOPE_LINK); 2589 RT_SCOPE_LINK);
2621 res.type = RTN_UNICAST; 2590 res.type = RTN_UNICAST;
2622 goto make_route; 2591 goto make_route;
2623 } 2592 }
2624 err = -ENETUNREACH; 2593 rth = ERR_PTR(-ENETUNREACH);
2625 goto out; 2594 goto out;
2626 } 2595 }
2627 2596
2628 if (res.type == RTN_LOCAL) { 2597 if (res.type == RTN_LOCAL) {
2629 if (!fl.fl4_src) { 2598 if (!fl4.saddr) {
2630 if (res.fi->fib_prefsrc) 2599 if (res.fi->fib_prefsrc)
2631 fl.fl4_src = res.fi->fib_prefsrc; 2600 fl4.saddr = res.fi->fib_prefsrc;
2632 else 2601 else
2633 fl.fl4_src = fl.fl4_dst; 2602 fl4.saddr = fl4.daddr;
2634 } 2603 }
2635 dev_out = net->loopback_dev; 2604 dev_out = net->loopback_dev;
2636 fl.oif = dev_out->ifindex; 2605 fl4.flowi4_oif = dev_out->ifindex;
2637 res.fi = NULL; 2606 res.fi = NULL;
2638 flags |= RTCF_LOCAL; 2607 flags |= RTCF_LOCAL;
2639 goto make_route; 2608 goto make_route;
2640 } 2609 }
2641 2610
2642#ifdef CONFIG_IP_ROUTE_MULTIPATH 2611#ifdef CONFIG_IP_ROUTE_MULTIPATH
2643 if (res.fi->fib_nhs > 1 && fl.oif == 0) 2612 if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0)
2644 fib_select_multipath(&fl, &res); 2613 fib_select_multipath(&res);
2645 else 2614 else
2646#endif 2615#endif
2647 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2616 if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif)
2648 fib_select_default(net, &fl, &res); 2617 fib_select_default(&res);
2649 2618
2650 if (!fl.fl4_src) 2619 if (!fl4.saddr)
2651 fl.fl4_src = FIB_RES_PREFSRC(res); 2620 fl4.saddr = FIB_RES_PREFSRC(res);
2652 2621
2653 dev_out = FIB_RES_DEV(res); 2622 dev_out = FIB_RES_DEV(res);
2654 fl.oif = dev_out->ifindex; 2623 fl4.flowi4_oif = dev_out->ifindex;
2655 2624
2656 2625
2657make_route: 2626make_route:
2658 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2627 rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags);
2628 if (!IS_ERR(rth)) {
2629 unsigned int hash;
2659 2630
2660out: return err; 2631 hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif,
2632 rt_genid(dev_net(dev_out)));
2633 rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif);
2634 }
2635
2636out:
2637 rcu_read_unlock();
2638 return rth;
2661} 2639}
2662 2640
2663int __ip_route_output_key(struct net *net, struct rtable **rp, 2641struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
2664 const struct flowi *flp)
2665{ 2642{
2666 unsigned int hash;
2667 int res;
2668 struct rtable *rth; 2643 struct rtable *rth;
2644 unsigned int hash;
2669 2645
2670 if (!rt_caching(net)) 2646 if (!rt_caching(net))
2671 goto slow_output; 2647 goto slow_output;
2672 2648
2673 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); 2649 hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
2674 2650
2675 rcu_read_lock_bh(); 2651 rcu_read_lock_bh();
2676 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2652 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2677 rth = rcu_dereference_bh(rth->dst.rt_next)) { 2653 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2678 if (rth->fl.fl4_dst == flp->fl4_dst && 2654 if (rth->rt_key_dst == flp4->daddr &&
2679 rth->fl.fl4_src == flp->fl4_src && 2655 rth->rt_key_src == flp4->saddr &&
2680 rt_is_output_route(rth) && 2656 rt_is_output_route(rth) &&
2681 rth->fl.oif == flp->oif && 2657 rth->rt_oif == flp4->flowi4_oif &&
2682 rth->fl.mark == flp->mark && 2658 rth->rt_mark == flp4->flowi4_mark &&
2683 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2659 !((rth->rt_tos ^ flp4->flowi4_tos) &
2684 (IPTOS_RT_MASK | RTO_ONLINK)) && 2660 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2685 net_eq(dev_net(rth->dst.dev), net) && 2661 net_eq(dev_net(rth->dst.dev), net) &&
2686 !rt_is_expired(rth)) { 2662 !rt_is_expired(rth)) {
2687 dst_use(&rth->dst, jiffies); 2663 dst_use(&rth->dst, jiffies);
2688 RT_CACHE_STAT_INC(out_hit); 2664 RT_CACHE_STAT_INC(out_hit);
2689 rcu_read_unlock_bh(); 2665 rcu_read_unlock_bh();
2690 *rp = rth; 2666 return rth;
2691 return 0;
2692 } 2667 }
2693 RT_CACHE_STAT_INC(out_hlist_search); 2668 RT_CACHE_STAT_INC(out_hlist_search);
2694 } 2669 }
2695 rcu_read_unlock_bh(); 2670 rcu_read_unlock_bh();
2696 2671
2697slow_output: 2672slow_output:
2698 rcu_read_lock(); 2673 return ip_route_output_slow(net, flp4);
2699 res = ip_route_output_slow(net, rp, flp);
2700 rcu_read_unlock();
2701 return res;
2702} 2674}
2703EXPORT_SYMBOL_GPL(__ip_route_output_key); 2675EXPORT_SYMBOL_GPL(__ip_route_output_key);
2704 2676
@@ -2726,17 +2698,14 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2726 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2698 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2727}; 2699};
2728 2700
2729 2701struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2730static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
2731{ 2702{
2732 struct rtable *ort = *rp; 2703 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1);
2733 struct rtable *rt = (struct rtable *) 2704 struct rtable *ort = (struct rtable *) dst_orig;
2734 dst_alloc(&ipv4_dst_blackhole_ops);
2735 2705
2736 if (rt) { 2706 if (rt) {
2737 struct dst_entry *new = &rt->dst; 2707 struct dst_entry *new = &rt->dst;
2738 2708
2739 atomic_set(&new->__refcnt, 1);
2740 new->__use = 1; 2709 new->__use = 1;
2741 new->input = dst_discard; 2710 new->input = dst_discard;
2742 new->output = dst_discard; 2711 new->output = dst_discard;
@@ -2746,7 +2715,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2746 if (new->dev) 2715 if (new->dev)
2747 dev_hold(new->dev); 2716 dev_hold(new->dev);
2748 2717
2749 rt->fl = ort->fl; 2718 rt->rt_key_dst = ort->rt_key_dst;
2719 rt->rt_key_src = ort->rt_key_src;
2720 rt->rt_tos = ort->rt_tos;
2721 rt->rt_iif = ort->rt_iif;
2722 rt->rt_oif = ort->rt_oif;
2723 rt->rt_mark = ort->rt_mark;
2750 2724
2751 rt->rt_genid = rt_genid(net); 2725 rt->rt_genid = rt_genid(net);
2752 rt->rt_flags = ort->rt_flags; 2726 rt->rt_flags = ort->rt_flags;
@@ -2759,46 +2733,40 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2759 rt->peer = ort->peer; 2733 rt->peer = ort->peer;
2760 if (rt->peer) 2734 if (rt->peer)
2761 atomic_inc(&rt->peer->refcnt); 2735 atomic_inc(&rt->peer->refcnt);
2736 rt->fi = ort->fi;
2737 if (rt->fi)
2738 atomic_inc(&rt->fi->fib_clntref);
2762 2739
2763 dst_free(new); 2740 dst_free(new);
2764 } 2741 }
2765 2742
2766 dst_release(&(*rp)->dst); 2743 dst_release(dst_orig);
2767 *rp = rt; 2744
2768 return rt ? 0 : -ENOMEM; 2745 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2769} 2746}
2770 2747
2771int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2748struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2772 struct sock *sk, int flags) 2749 struct sock *sk)
2773{ 2750{
2774 int err; 2751 struct rtable *rt = __ip_route_output_key(net, flp4);
2775 2752
2776 if ((err = __ip_route_output_key(net, rp, flp)) != 0) 2753 if (IS_ERR(rt))
2777 return err; 2754 return rt;
2778 2755
2779 if (flp->proto) { 2756 if (flp4->flowi4_proto) {
2780 if (!flp->fl4_src) 2757 if (!flp4->saddr)
2781 flp->fl4_src = (*rp)->rt_src; 2758 flp4->saddr = rt->rt_src;
2782 if (!flp->fl4_dst) 2759 if (!flp4->daddr)
2783 flp->fl4_dst = (*rp)->rt_dst; 2760 flp4->daddr = rt->rt_dst;
2784 err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 2761 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2785 flags ? XFRM_LOOKUP_WAIT : 0); 2762 flowi4_to_flowi(flp4),
2786 if (err == -EREMOTE) 2763 sk, 0);
2787 err = ipv4_dst_blackhole(net, rp, flp);
2788
2789 return err;
2790 } 2764 }
2791 2765
2792 return 0; 2766 return rt;
2793} 2767}
2794EXPORT_SYMBOL_GPL(ip_route_output_flow); 2768EXPORT_SYMBOL_GPL(ip_route_output_flow);
2795 2769
2796int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2797{
2798 return ip_route_output_flow(net, rp, flp, NULL, 0);
2799}
2800EXPORT_SYMBOL(ip_route_output_key);
2801
2802static int rt_fill_info(struct net *net, 2770static int rt_fill_info(struct net *net,
2803 struct sk_buff *skb, u32 pid, u32 seq, int event, 2771 struct sk_buff *skb, u32 pid, u32 seq, int event,
2804 int nowait, unsigned int flags) 2772 int nowait, unsigned int flags)
@@ -2817,7 +2785,7 @@ static int rt_fill_info(struct net *net,
2817 r->rtm_family = AF_INET; 2785 r->rtm_family = AF_INET;
2818 r->rtm_dst_len = 32; 2786 r->rtm_dst_len = 32;
2819 r->rtm_src_len = 0; 2787 r->rtm_src_len = 0;
2820 r->rtm_tos = rt->fl.fl4_tos; 2788 r->rtm_tos = rt->rt_tos;
2821 r->rtm_table = RT_TABLE_MAIN; 2789 r->rtm_table = RT_TABLE_MAIN;
2822 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2790 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2823 r->rtm_type = rt->rt_type; 2791 r->rtm_type = rt->rt_type;
@@ -2829,19 +2797,19 @@ static int rt_fill_info(struct net *net,
2829 2797
2830 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); 2798 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2831 2799
2832 if (rt->fl.fl4_src) { 2800 if (rt->rt_key_src) {
2833 r->rtm_src_len = 32; 2801 r->rtm_src_len = 32;
2834 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2802 NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
2835 } 2803 }
2836 if (rt->dst.dev) 2804 if (rt->dst.dev)
2837 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2805 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2838#ifdef CONFIG_NET_CLS_ROUTE 2806#ifdef CONFIG_IP_ROUTE_CLASSID
2839 if (rt->dst.tclassid) 2807 if (rt->dst.tclassid)
2840 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2808 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2841#endif 2809#endif
2842 if (rt_is_input_route(rt)) 2810 if (rt_is_input_route(rt))
2843 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2811 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2844 else if (rt->rt_src != rt->fl.fl4_src) 2812 else if (rt->rt_src != rt->rt_key_src)
2845 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); 2813 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2846 2814
2847 if (rt->rt_dst != rt->rt_gateway) 2815 if (rt->rt_dst != rt->rt_gateway)
@@ -2850,11 +2818,12 @@ static int rt_fill_info(struct net *net,
2850 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2818 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2851 goto nla_put_failure; 2819 goto nla_put_failure;
2852 2820
2853 if (rt->fl.mark) 2821 if (rt->rt_mark)
2854 NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); 2822 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
2855 2823
2856 error = rt->dst.error; 2824 error = rt->dst.error;
2857 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; 2825 expires = (rt->peer && rt->peer->pmtu_expires) ?
2826 rt->peer->pmtu_expires - jiffies : 0;
2858 if (rt->peer) { 2827 if (rt->peer) {
2859 inet_peer_refcheck(rt->peer); 2828 inet_peer_refcheck(rt->peer);
2860 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2829 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
@@ -2884,7 +2853,7 @@ static int rt_fill_info(struct net *net,
2884 } 2853 }
2885 } else 2854 } else
2886#endif 2855#endif
2887 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2856 NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
2888 } 2857 }
2889 2858
2890 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, 2859 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -2958,14 +2927,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2958 if (err == 0 && rt->dst.error) 2927 if (err == 0 && rt->dst.error)
2959 err = -rt->dst.error; 2928 err = -rt->dst.error;
2960 } else { 2929 } else {
2961 struct flowi fl = { 2930 struct flowi4 fl4 = {
2962 .fl4_dst = dst, 2931 .daddr = dst,
2963 .fl4_src = src, 2932 .saddr = src,
2964 .fl4_tos = rtm->rtm_tos, 2933 .flowi4_tos = rtm->rtm_tos,
2965 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2934 .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2966 .mark = mark, 2935 .flowi4_mark = mark,
2967 }; 2936 };
2968 err = ip_route_output_key(net, &rt, &fl); 2937 rt = ip_route_output_key(net, &fl4);
2938
2939 err = 0;
2940 if (IS_ERR(rt))
2941 err = PTR_ERR(rt);
2969 } 2942 }
2970 2943
2971 if (err) 2944 if (err)
@@ -3256,9 +3229,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
3256}; 3229};
3257 3230
3258 3231
3259#ifdef CONFIG_NET_CLS_ROUTE 3232#ifdef CONFIG_IP_ROUTE_CLASSID
3260struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3233struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3261#endif /* CONFIG_NET_CLS_ROUTE */ 3234#endif /* CONFIG_IP_ROUTE_CLASSID */
3262 3235
3263static __initdata unsigned long rhash_entries; 3236static __initdata unsigned long rhash_entries;
3264static int __init set_rhash_entries(char *str) 3237static int __init set_rhash_entries(char *str)
@@ -3274,7 +3247,7 @@ int __init ip_rt_init(void)
3274{ 3247{
3275 int rc = 0; 3248 int rc = 0;
3276 3249
3277#ifdef CONFIG_NET_CLS_ROUTE 3250#ifdef CONFIG_IP_ROUTE_CLASSID
3278 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 3251 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3279 if (!ip_rt_acct) 3252 if (!ip_rt_acct)
3280 panic("IP: failed to allocate ip_rt_acct\n"); 3253 panic("IP: failed to allocate ip_rt_acct\n");
@@ -3311,14 +3284,6 @@ int __init ip_rt_init(void)
3311 devinet_init(); 3284 devinet_init();
3312 ip_fib_init(); 3285 ip_fib_init();
3313 3286
3314 /* All the timers, started at system startup tend
3315 to synchronize. Perturb it a bit.
3316 */
3317 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3318 expires_ljiffies = jiffies;
3319 schedule_delayed_work(&expires_work,
3320 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3321
3322 if (ip_rt_proc_init()) 3287 if (ip_rt_proc_init())
3323 printk(KERN_ERR "Unable to create route proc files\n"); 3288 printk(KERN_ERR "Unable to create route proc files\n");
3324#ifdef CONFIG_XFRM 3289#ifdef CONFIG_XFRM
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 47519205a01..8b44c6d2a79 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -345,17 +345,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
345 * no easy way to do this. 345 * no easy way to do this.
346 */ 346 */
347 { 347 {
348 struct flowi fl = { .mark = sk->sk_mark, 348 struct flowi4 fl4 = {
349 .fl4_dst = ((opt && opt->srr) ? 349 .flowi4_mark = sk->sk_mark,
350 opt->faddr : ireq->rmt_addr), 350 .daddr = ((opt && opt->srr) ?
351 .fl4_src = ireq->loc_addr, 351 opt->faddr : ireq->rmt_addr),
352 .fl4_tos = RT_CONN_FLAGS(sk), 352 .saddr = ireq->loc_addr,
353 .proto = IPPROTO_TCP, 353 .flowi4_tos = RT_CONN_FLAGS(sk),
354 .flags = inet_sk_flowi_flags(sk), 354 .flowi4_proto = IPPROTO_TCP,
355 .fl_ip_sport = th->dest, 355 .flowi4_flags = inet_sk_flowi_flags(sk),
356 .fl_ip_dport = th->source }; 356 .fl4_sport = th->dest,
357 security_req_classify_flow(req, &fl); 357 .fl4_dport = th->source,
358 if (ip_route_output_key(sock_net(sk), &rt, &fl)) { 358 };
359 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
360 rt = ip_route_output_key(sock_net(sk), &fl4);
361 if (IS_ERR(rt)) {
359 reqsk_free(req); 362 reqsk_free(req);
360 goto out; 363 goto out;
361 } 364 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6c11eece262..b22d4501054 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
505 else 505 else
506 answ = tp->write_seq - tp->snd_una; 506 answ = tp->write_seq - tp->snd_una;
507 break; 507 break;
508 case SIOCOUTQNSD:
509 if (sk->sk_state == TCP_LISTEN)
510 return -EINVAL;
511
512 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
513 answ = 0;
514 else
515 answ = tp->write_seq - tp->snd_nxt;
516 break;
508 default: 517 default:
509 return -ENOIOCTLCMD; 518 return -ENOIOCTLCMD;
510 } 519 }
@@ -873,9 +882,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
873 flags); 882 flags);
874 883
875 lock_sock(sk); 884 lock_sock(sk);
876 TCP_CHECK_TIMER(sk);
877 res = do_tcp_sendpages(sk, &page, offset, size, flags); 885 res = do_tcp_sendpages(sk, &page, offset, size, flags);
878 TCP_CHECK_TIMER(sk);
879 release_sock(sk); 886 release_sock(sk);
880 return res; 887 return res;
881} 888}
@@ -916,7 +923,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
916 long timeo; 923 long timeo;
917 924
918 lock_sock(sk); 925 lock_sock(sk);
919 TCP_CHECK_TIMER(sk);
920 926
921 flags = msg->msg_flags; 927 flags = msg->msg_flags;
922 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 928 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -1104,7 +1110,6 @@ wait_for_memory:
1104out: 1110out:
1105 if (copied) 1111 if (copied)
1106 tcp_push(sk, flags, mss_now, tp->nonagle); 1112 tcp_push(sk, flags, mss_now, tp->nonagle);
1107 TCP_CHECK_TIMER(sk);
1108 release_sock(sk); 1113 release_sock(sk);
1109 return copied; 1114 return copied;
1110 1115
@@ -1123,7 +1128,6 @@ do_error:
1123 goto out; 1128 goto out;
1124out_err: 1129out_err:
1125 err = sk_stream_error(sk, flags, err); 1130 err = sk_stream_error(sk, flags, err);
1126 TCP_CHECK_TIMER(sk);
1127 release_sock(sk); 1131 release_sock(sk);
1128 return err; 1132 return err;
1129} 1133}
@@ -1415,8 +1419,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1415 1419
1416 lock_sock(sk); 1420 lock_sock(sk);
1417 1421
1418 TCP_CHECK_TIMER(sk);
1419
1420 err = -ENOTCONN; 1422 err = -ENOTCONN;
1421 if (sk->sk_state == TCP_LISTEN) 1423 if (sk->sk_state == TCP_LISTEN)
1422 goto out; 1424 goto out;
@@ -1767,12 +1769,10 @@ skip_copy:
1767 /* Clean up data we have read: This will do ACK frames. */ 1769 /* Clean up data we have read: This will do ACK frames. */
1768 tcp_cleanup_rbuf(sk, copied); 1770 tcp_cleanup_rbuf(sk, copied);
1769 1771
1770 TCP_CHECK_TIMER(sk);
1771 release_sock(sk); 1772 release_sock(sk);
1772 return copied; 1773 return copied;
1773 1774
1774out: 1775out:
1775 TCP_CHECK_TIMER(sk);
1776 release_sock(sk); 1776 release_sock(sk);
1777 return err; 1777 return err;
1778 1778
@@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2653EXPORT_SYMBOL(compat_tcp_getsockopt); 2653EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif 2654#endif
2655 2655
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) 2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
2657{ 2657{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL); 2658 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th; 2659 struct tcphdr *th;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 3b53fd1af23..6187eb4d1dc 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
209} 209}
210 210
211 211
212static struct tcp_congestion_ops bictcp = { 212static struct tcp_congestion_ops bictcp __read_mostly = {
213 .init = bictcp_init, 213 .init = bictcp_init,
214 .ssthresh = bictcp_recalc_ssthresh, 214 .ssthresh = bictcp_recalc_ssthresh,
215 .cong_avoid = bictcp_cong_avoid, 215 .cong_avoid = bictcp_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 71d5f2f29fa..34340c9c95f 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -39,7 +39,7 @@
39 39
40/* Number of delay samples for detecting the increase of delay */ 40/* Number of delay samples for detecting the increase of delay */
41#define HYSTART_MIN_SAMPLES 8 41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (2U<<3) 42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3) 43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) 44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45 45
@@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1;
52static int hystart __read_mostly = 1; 52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; 53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16; 54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
55 56
56static u32 cube_rtt_scale __read_mostly; 57static u32 cube_rtt_scale __read_mostly;
57static u32 beta_scale __read_mostly; 58static u32 beta_scale __read_mostly;
@@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
75 " 1: packet-train 2: delay 3: both packet-train and delay"); 76 " 1: packet-train 2: delay 3: both packet-train and delay");
76module_param(hystart_low_window, int, 0644); 77module_param(hystart_low_window, int, 0644);
77MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); 78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
78 81
79/* BIC TCP Parameters */ 82/* BIC TCP Parameters */
80struct bictcp { 83struct bictcp {
@@ -85,7 +88,7 @@ struct bictcp {
85 u32 last_time; /* time when updated last_cwnd */ 88 u32 last_time; /* time when updated last_cwnd */
86 u32 bic_origin_point;/* origin point of bic function */ 89 u32 bic_origin_point;/* origin point of bic function */
87 u32 bic_K; /* time to origin point from the beginning of the current epoch */ 90 u32 bic_K; /* time to origin point from the beginning of the current epoch */
88 u32 delay_min; /* min delay */ 91 u32 delay_min; /* min delay (msec << 3) */
89 u32 epoch_start; /* beginning of an epoch */ 92 u32 epoch_start; /* beginning of an epoch */
90 u32 ack_cnt; /* number of acks */ 93 u32 ack_cnt; /* number of acks */
91 u32 tcp_cwnd; /* estimated tcp cwnd */ 94 u32 tcp_cwnd; /* estimated tcp cwnd */
@@ -95,7 +98,7 @@ struct bictcp {
95 u8 found; /* the exit point is found? */ 98 u8 found; /* the exit point is found? */
96 u32 round_start; /* beginning of each round */ 99 u32 round_start; /* beginning of each round */
97 u32 end_seq; /* end_seq of the round */ 100 u32 end_seq; /* end_seq of the round */
98 u32 last_jiffies; /* last time when the ACK spacing is close */ 101 u32 last_ack; /* last time when the ACK spacing is close */
99 u32 curr_rtt; /* the minimum rtt of current round */ 102 u32 curr_rtt; /* the minimum rtt of current round */
100}; 103};
101 104
@@ -116,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca)
116 ca->found = 0; 119 ca->found = 0;
117} 120}
118 121
122static inline u32 bictcp_clock(void)
123{
124#if HZ < 1000
125 return ktime_to_ms(ktime_get_real());
126#else
127 return jiffies_to_msecs(jiffies);
128#endif
129}
130
119static inline void bictcp_hystart_reset(struct sock *sk) 131static inline void bictcp_hystart_reset(struct sock *sk)
120{ 132{
121 struct tcp_sock *tp = tcp_sk(sk); 133 struct tcp_sock *tp = tcp_sk(sk);
122 struct bictcp *ca = inet_csk_ca(sk); 134 struct bictcp *ca = inet_csk_ca(sk);
123 135
124 ca->round_start = ca->last_jiffies = jiffies; 136 ca->round_start = ca->last_ack = bictcp_clock();
125 ca->end_seq = tp->snd_nxt; 137 ca->end_seq = tp->snd_nxt;
126 ca->curr_rtt = 0; 138 ca->curr_rtt = 0;
127 ca->sample_cnt = 0; 139 ca->sample_cnt = 0;
@@ -236,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
236 */ 248 */
237 249
238 /* change the unit from HZ to bictcp_HZ */ 250 /* change the unit from HZ to bictcp_HZ */
239 t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) 251 t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
240 << BICTCP_HZ) / HZ; 252 - ca->epoch_start) << BICTCP_HZ) / HZ;
241 253
242 if (t < ca->bic_K) /* t - K */ 254 if (t < ca->bic_K) /* t - K */
243 offs = ca->bic_K - t; 255 offs = ca->bic_K - t;
@@ -258,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
258 ca->cnt = 100 * cwnd; /* very small increment*/ 270 ca->cnt = 100 * cwnd; /* very small increment*/
259 } 271 }
260 272
273 /*
274 * The initial growth of cubic function may be too conservative
275 * when the available bandwidth is still unknown.
276 */
277 if (ca->loss_cwnd == 0 && ca->cnt > 20)
278 ca->cnt = 20; /* increase cwnd 5% per RTT */
279
261 /* TCP Friendly */ 280 /* TCP Friendly */
262 if (tcp_friendliness) { 281 if (tcp_friendliness) {
263 u32 scale = beta_scale; 282 u32 scale = beta_scale;
@@ -339,12 +358,12 @@ static void hystart_update(struct sock *sk, u32 delay)
339 struct bictcp *ca = inet_csk_ca(sk); 358 struct bictcp *ca = inet_csk_ca(sk);
340 359
341 if (!(ca->found & hystart_detect)) { 360 if (!(ca->found & hystart_detect)) {
342 u32 curr_jiffies = jiffies; 361 u32 now = bictcp_clock();
343 362
344 /* first detection parameter - ack-train detection */ 363 /* first detection parameter - ack-train detection */
345 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { 364 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
346 ca->last_jiffies = curr_jiffies; 365 ca->last_ack = now;
347 if (curr_jiffies - ca->round_start >= ca->delay_min>>4) 366 if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
348 ca->found |= HYSTART_ACK_TRAIN; 367 ca->found |= HYSTART_ACK_TRAIN;
349 } 368 }
350 369
@@ -391,7 +410,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
391 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) 410 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
392 return; 411 return;
393 412
394 delay = usecs_to_jiffies(rtt_us) << 3; 413 delay = (rtt_us << 3) / USEC_PER_MSEC;
395 if (delay == 0) 414 if (delay == 0)
396 delay = 1; 415 delay = 1;
397 416
@@ -405,7 +424,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
405 hystart_update(sk, delay); 424 hystart_update(sk, delay);
406} 425}
407 426
408static struct tcp_congestion_ops cubictcp = { 427static struct tcp_congestion_ops cubictcp __read_mostly = {
409 .init = bictcp_init, 428 .init = bictcp_init,
410 .ssthresh = bictcp_recalc_ssthresh, 429 .ssthresh = bictcp_recalc_ssthresh,
411 .cong_avoid = bictcp_cong_avoid, 430 .cong_avoid = bictcp_cong_avoid,
@@ -447,6 +466,10 @@ static int __init cubictcp_register(void)
447 /* divide by bic_scale and by constant Srtt (100ms) */ 466 /* divide by bic_scale and by constant Srtt (100ms) */
448 do_div(cube_factor, bic_scale * 10); 467 do_div(cube_factor, bic_scale * 10);
449 468
469 /* hystart needs ms clock resolution */
470 if (hystart && HZ < 1000)
471 cubictcp.flags |= TCP_CONG_RTT_STAMP;
472
450 return tcp_register_congestion_control(&cubictcp); 473 return tcp_register_congestion_control(&cubictcp);
451} 474}
452 475
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b6caaf75bb..30f27f6b365 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
158} 158}
159 159
160 160
161static struct tcp_congestion_ops tcp_highspeed = { 161static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
162 .init = hstcp_init, 162 .init = hstcp_init,
163 .ssthresh = hstcp_ssthresh, 163 .ssthresh = hstcp_ssthresh,
164 .cong_avoid = hstcp_cong_avoid, 164 .cong_avoid = hstcp_cong_avoid,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 7c94a495541..c1a8175361e 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state)
284 } 284 }
285} 285}
286 286
287static struct tcp_congestion_ops htcp = { 287static struct tcp_congestion_ops htcp __read_mostly = {
288 .init = htcp_init, 288 .init = htcp_init,
289 .ssthresh = htcp_recalc_ssthresh, 289 .ssthresh = htcp_recalc_ssthresh,
290 .cong_avoid = htcp_cong_avoid, 290 .cong_avoid = htcp_cong_avoid,
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 377bc934937..fe3ecf484b4 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
162 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); 162 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
163} 163}
164 164
165static struct tcp_congestion_ops tcp_hybla = { 165static struct tcp_congestion_ops tcp_hybla __read_mostly = {
166 .init = hybla_init, 166 .init = hybla_init,
167 .ssthresh = tcp_reno_ssthresh, 167 .ssthresh = tcp_reno_ssthresh,
168 .min_cwnd = tcp_reno_min_cwnd, 168 .min_cwnd = tcp_reno_min_cwnd,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00ca688d896..813b43a76fe 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
322 } 322 }
323} 323}
324 324
325static struct tcp_congestion_ops tcp_illinois = { 325static struct tcp_congestion_ops tcp_illinois __read_mostly = {
326 .flags = TCP_CONG_RTT_STAMP, 326 .flags = TCP_CONG_RTT_STAMP,
327 .init = tcp_illinois_init, 327 .init = tcp_illinois_init,
328 .ssthresh = tcp_illinois_ssthresh, 328 .ssthresh = tcp_illinois_ssthresh,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65f6c040624..da782e7ab16 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
818 818
819 if (!cwnd) 819 if (!cwnd)
820 cwnd = rfc3390_bytes_to_packets(tp->mss_cache); 820 cwnd = TCP_INIT_CWND;
821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
822} 822}
823 823
@@ -3350,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3350 net_invalid_timestamp())) 3350 net_invalid_timestamp()))
3351 rtt_us = ktime_us_delta(ktime_get_real(), 3351 rtt_us = ktime_us_delta(ktime_get_real(),
3352 last_ackt); 3352 last_ackt);
3353 else if (ca_seq_rtt > 0) 3353 else if (ca_seq_rtt >= 0)
3354 rtt_us = jiffies_to_usecs(ca_seq_rtt); 3354 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3355 } 3355 }
3356 3356
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 02f583b3744..f7e6c2c2d2b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -149,9 +149,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
149 struct inet_sock *inet = inet_sk(sk); 149 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk); 150 struct tcp_sock *tp = tcp_sk(sk);
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152 __be16 orig_sport, orig_dport;
152 struct rtable *rt; 153 struct rtable *rt;
153 __be32 daddr, nexthop; 154 __be32 daddr, nexthop;
154 int tmp;
155 int err; 155 int err;
156 156
157 if (addr_len < sizeof(struct sockaddr_in)) 157 if (addr_len < sizeof(struct sockaddr_in))
@@ -167,14 +167,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
167 nexthop = inet->opt->faddr; 167 nexthop = inet->opt->faddr;
168 } 168 }
169 169
170 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, 170 orig_sport = inet->inet_sport;
171 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 171 orig_dport = usin->sin_port;
172 IPPROTO_TCP, 172 rt = ip_route_connect(nexthop, inet->inet_saddr,
173 inet->inet_sport, usin->sin_port, sk, 1); 173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 if (tmp < 0) { 174 IPPROTO_TCP,
175 if (tmp == -ENETUNREACH) 175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
176 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 179 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
177 return tmp; 180 return err;
178 } 181 }
179 182
180 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
@@ -233,11 +236,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
233 if (err) 236 if (err)
234 goto failure; 237 goto failure;
235 238
236 err = ip_route_newports(&rt, IPPROTO_TCP, 239 rt = ip_route_newports(rt, IPPROTO_TCP,
237 inet->inet_sport, inet->inet_dport, sk); 240 orig_sport, orig_dport,
238 if (err) 241 inet->inet_sport, inet->inet_dport, sk);
242 if (IS_ERR(rt)) {
243 err = PTR_ERR(rt);
244 rt = NULL;
239 goto failure; 245 goto failure;
240 246 }
241 /* OK, now commit destination to socket. */ 247 /* OK, now commit destination to socket. */
242 sk->sk_gso_type = SKB_GSO_TCPV4; 248 sk->sk_gso_type = SKB_GSO_TCPV4;
243 sk_setup_caps(sk, &rt->dst); 249 sk_setup_caps(sk, &rt->dst);
@@ -1341,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1341 tcp_death_row.sysctl_tw_recycle && 1347 tcp_death_row.sysctl_tw_recycle &&
1342 (dst = inet_csk_route_req(sk, req)) != NULL && 1348 (dst = inet_csk_route_req(sk, req)) != NULL &&
1343 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1349 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1344 peer->daddr.a4 == saddr) { 1350 peer->daddr.addr.a4 == saddr) {
1345 inet_peer_refcheck(peer); 1351 inet_peer_refcheck(peer);
1346 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1347 (s32)(peer->tcp_ts - req->ts_recent) > 1353 (s32)(peer->tcp_ts - req->ts_recent) >
@@ -1556,12 +1562,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1556 1562
1557 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1563 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1558 sock_rps_save_rxhash(sk, skb->rxhash); 1564 sock_rps_save_rxhash(sk, skb->rxhash);
1559 TCP_CHECK_TIMER(sk);
1560 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1565 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1561 rsk = sk; 1566 rsk = sk;
1562 goto reset; 1567 goto reset;
1563 } 1568 }
1564 TCP_CHECK_TIMER(sk);
1565 return 0; 1569 return 0;
1566 } 1570 }
1567 1571
@@ -1583,13 +1587,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1583 } else 1587 } else
1584 sock_rps_save_rxhash(sk, skb->rxhash); 1588 sock_rps_save_rxhash(sk, skb->rxhash);
1585 1589
1586
1587 TCP_CHECK_TIMER(sk);
1588 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1590 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1589 rsk = sk; 1591 rsk = sk;
1590 goto reset; 1592 goto reset;
1591 } 1593 }
1592 TCP_CHECK_TIMER(sk);
1593 return 0; 1594 return 0;
1594 1595
1595reset: 1596reset:
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index de870377fbb..656d431c99a 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
313 lp->last_drop = tcp_time_stamp; 313 lp->last_drop = tcp_time_stamp;
314} 314}
315 315
316static struct tcp_congestion_ops tcp_lp = { 316static struct tcp_congestion_ops tcp_lp __read_mostly = {
317 .flags = TCP_CONG_RTT_STAMP, 317 .flags = TCP_CONG_RTT_STAMP,
318 .init = tcp_lp_init, 318 .init = tcp_lp_init,
319 .ssthresh = tcp_reno_ssthresh, 319 .ssthresh = tcp_reno_ssthresh,
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index a76513779e2..8ce55b8aaec 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
35} 35}
36 36
37 37
38static struct tcp_congestion_ops tcp_scalable = { 38static struct tcp_congestion_ops tcp_scalable __read_mostly = {
39 .ssthresh = tcp_scalable_ssthresh, 39 .ssthresh = tcp_scalable_ssthresh,
40 .cong_avoid = tcp_scalable_cong_avoid, 40 .cong_avoid = tcp_scalable_cong_avoid,
41 .min_cwnd = tcp_reno_min_cwnd, 41 .min_cwnd = tcp_reno_min_cwnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74a6aa00365..ecd44b0c45f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data)
259 tcp_send_ack(sk); 259 tcp_send_ack(sk);
260 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); 260 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
261 } 261 }
262 TCP_CHECK_TIMER(sk);
263 262
264out: 263out:
265 if (tcp_memory_pressure) 264 if (tcp_memory_pressure)
@@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data)
481 tcp_probe_timer(sk); 480 tcp_probe_timer(sk);
482 break; 481 break;
483 } 482 }
484 TCP_CHECK_TIMER(sk);
485 483
486out: 484out:
487 sk_mem_reclaim(sk); 485 sk_mem_reclaim(sk);
@@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data)
589 elapsed = keepalive_time_when(tp) - elapsed; 587 elapsed = keepalive_time_when(tp) - elapsed;
590 } 588 }
591 589
592 TCP_CHECK_TIMER(sk);
593 sk_mem_reclaim(sk); 590 sk_mem_reclaim(sk);
594 591
595resched: 592resched:
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c6743eec9b7..80fa2bfd7ed 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
304} 304}
305EXPORT_SYMBOL_GPL(tcp_vegas_get_info); 305EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
306 306
307static struct tcp_congestion_ops tcp_vegas = { 307static struct tcp_congestion_ops tcp_vegas __read_mostly = {
308 .flags = TCP_CONG_RTT_STAMP, 308 .flags = TCP_CONG_RTT_STAMP,
309 .init = tcp_vegas_init, 309 .init = tcp_vegas_init,
310 .ssthresh = tcp_reno_ssthresh, 310 .ssthresh = tcp_reno_ssthresh,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 38bc0b52d74..ac43cd747bc 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
201 return max(tp->snd_cwnd >> 1U, 2U); 201 return max(tp->snd_cwnd >> 1U, 2U);
202} 202}
203 203
204static struct tcp_congestion_ops tcp_veno = { 204static struct tcp_congestion_ops tcp_veno __read_mostly = {
205 .flags = TCP_CONG_RTT_STAMP, 205 .flags = TCP_CONG_RTT_STAMP,
206 .init = tcp_veno_init, 206 .init = tcp_veno_init,
207 .ssthresh = tcp_veno_ssthresh, 207 .ssthresh = tcp_veno_ssthresh,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index a534dda5456..1b91bf48e27 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
272} 272}
273 273
274 274
275static struct tcp_congestion_ops tcp_westwood = { 275static struct tcp_congestion_ops tcp_westwood __read_mostly = {
276 .init = tcp_westwood_init, 276 .init = tcp_westwood_init,
277 .ssthresh = tcp_reno_ssthresh, 277 .ssthresh = tcp_reno_ssthresh,
278 .cong_avoid = tcp_reno_cong_avoid, 278 .cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index a0f24035889..dc7f43179c9 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
225 return tp->snd_cwnd - reduction; 225 return tp->snd_cwnd - reduction;
226} 226}
227 227
228static struct tcp_congestion_ops tcp_yeah = { 228static struct tcp_congestion_ops tcp_yeah __read_mostly = {
229 .flags = TCP_CONG_RTT_STAMP, 229 .flags = TCP_CONG_RTT_STAMP,
230 .init = tcp_yeah_init, 230 .init = tcp_yeah_init,
231 .ssthresh = tcp_yeah_ssthresh, 231 .ssthresh = tcp_yeah_ssthresh,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8157b17959e..588f47af5fa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -663,75 +663,72 @@ void udp_flush_pending_frames(struct sock *sk)
663EXPORT_SYMBOL(udp_flush_pending_frames); 663EXPORT_SYMBOL(udp_flush_pending_frames);
664 664
665/** 665/**
666 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 666 * udp4_hwcsum - handle outgoing HW checksumming
667 * @sk: socket we are sending on
668 * @skb: sk_buff containing the filled-in UDP header 667 * @skb: sk_buff containing the filled-in UDP header
669 * (checksum field must be zeroed out) 668 * (checksum field must be zeroed out)
669 * @src: source IP address
670 * @dst: destination IP address
670 */ 671 */
671static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, 672static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
672 __be32 src, __be32 dst, int len)
673{ 673{
674 unsigned int offset;
675 struct udphdr *uh = udp_hdr(skb); 674 struct udphdr *uh = udp_hdr(skb);
675 struct sk_buff *frags = skb_shinfo(skb)->frag_list;
676 int offset = skb_transport_offset(skb);
677 int len = skb->len - offset;
678 int hlen = len;
676 __wsum csum = 0; 679 __wsum csum = 0;
677 680
678 if (skb_queue_len(&sk->sk_write_queue) == 1) { 681 if (!frags) {
679 /* 682 /*
680 * Only one fragment on the socket. 683 * Only one fragment on the socket.
681 */ 684 */
682 skb->csum_start = skb_transport_header(skb) - skb->head; 685 skb->csum_start = skb_transport_header(skb) - skb->head;
683 skb->csum_offset = offsetof(struct udphdr, check); 686 skb->csum_offset = offsetof(struct udphdr, check);
684 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); 687 uh->check = ~csum_tcpudp_magic(src, dst, len,
688 IPPROTO_UDP, 0);
685 } else { 689 } else {
686 /* 690 /*
687 * HW-checksum won't work as there are two or more 691 * HW-checksum won't work as there are two or more
688 * fragments on the socket so that all csums of sk_buffs 692 * fragments on the socket so that all csums of sk_buffs
689 * should be together 693 * should be together
690 */ 694 */
691 offset = skb_transport_offset(skb); 695 do {
692 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 696 csum = csum_add(csum, frags->csum);
697 hlen -= frags->len;
698 } while ((frags = frags->next));
693 699
700 csum = skb_checksum(skb, offset, hlen, csum);
694 skb->ip_summed = CHECKSUM_NONE; 701 skb->ip_summed = CHECKSUM_NONE;
695 702
696 skb_queue_walk(&sk->sk_write_queue, skb) {
697 csum = csum_add(csum, skb->csum);
698 }
699
700 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); 703 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
701 if (uh->check == 0) 704 if (uh->check == 0)
702 uh->check = CSUM_MANGLED_0; 705 uh->check = CSUM_MANGLED_0;
703 } 706 }
704} 707}
705 708
706/* 709static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
707 * Push out all pending data as one UDP datagram. Socket is locked.
708 */
709static int udp_push_pending_frames(struct sock *sk)
710{ 710{
711 struct udp_sock *up = udp_sk(sk); 711 struct sock *sk = skb->sk;
712 struct inet_sock *inet = inet_sk(sk); 712 struct inet_sock *inet = inet_sk(sk);
713 struct flowi *fl = &inet->cork.fl;
714 struct sk_buff *skb;
715 struct udphdr *uh; 713 struct udphdr *uh;
714 struct rtable *rt = (struct rtable *)skb_dst(skb);
716 int err = 0; 715 int err = 0;
717 int is_udplite = IS_UDPLITE(sk); 716 int is_udplite = IS_UDPLITE(sk);
717 int offset = skb_transport_offset(skb);
718 int len = skb->len - offset;
718 __wsum csum = 0; 719 __wsum csum = 0;
719 720
720 /* Grab the skbuff where UDP header space exists. */
721 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
722 goto out;
723
724 /* 721 /*
725 * Create a UDP header 722 * Create a UDP header
726 */ 723 */
727 uh = udp_hdr(skb); 724 uh = udp_hdr(skb);
728 uh->source = fl->fl_ip_sport; 725 uh->source = inet->inet_sport;
729 uh->dest = fl->fl_ip_dport; 726 uh->dest = dport;
730 uh->len = htons(up->len); 727 uh->len = htons(len);
731 uh->check = 0; 728 uh->check = 0;
732 729
733 if (is_udplite) /* UDP-Lite */ 730 if (is_udplite) /* UDP-Lite */
734 csum = udplite_csum_outgoing(sk, skb); 731 csum = udplite_csum(skb);
735 732
736 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 733 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
737 734
@@ -740,20 +737,20 @@ static int udp_push_pending_frames(struct sock *sk)
740 737
741 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 738 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
742 739
743 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); 740 udp4_hwcsum(skb, rt->rt_src, daddr);
744 goto send; 741 goto send;
745 742
746 } else /* `normal' UDP */ 743 } else
747 csum = udp_csum_outgoing(sk, skb); 744 csum = udp_csum(skb);
748 745
749 /* add protocol-dependent pseudo-header */ 746 /* add protocol-dependent pseudo-header */
750 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, 747 uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len,
751 sk->sk_protocol, csum); 748 sk->sk_protocol, csum);
752 if (uh->check == 0) 749 if (uh->check == 0)
753 uh->check = CSUM_MANGLED_0; 750 uh->check = CSUM_MANGLED_0;
754 751
755send: 752send:
756 err = ip_push_pending_frames(sk); 753 err = ip_send_skb(skb);
757 if (err) { 754 if (err) {
758 if (err == -ENOBUFS && !inet->recverr) { 755 if (err == -ENOBUFS && !inet->recverr) {
759 UDP_INC_STATS_USER(sock_net(sk), 756 UDP_INC_STATS_USER(sock_net(sk),
@@ -763,6 +760,26 @@ send:
763 } else 760 } else
764 UDP_INC_STATS_USER(sock_net(sk), 761 UDP_INC_STATS_USER(sock_net(sk),
765 UDP_MIB_OUTDATAGRAMS, is_udplite); 762 UDP_MIB_OUTDATAGRAMS, is_udplite);
763 return err;
764}
765
766/*
767 * Push out all pending data as one UDP datagram. Socket is locked.
768 */
769static int udp_push_pending_frames(struct sock *sk)
770{
771 struct udp_sock *up = udp_sk(sk);
772 struct inet_sock *inet = inet_sk(sk);
773 struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
774 struct sk_buff *skb;
775 int err = 0;
776
777 skb = ip_finish_skb(sk);
778 if (!skb)
779 goto out;
780
781 err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport);
782
766out: 783out:
767 up->len = 0; 784 up->len = 0;
768 up->pending = 0; 785 up->pending = 0;
@@ -774,6 +791,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
774{ 791{
775 struct inet_sock *inet = inet_sk(sk); 792 struct inet_sock *inet = inet_sk(sk);
776 struct udp_sock *up = udp_sk(sk); 793 struct udp_sock *up = udp_sk(sk);
794 struct flowi4 *fl4;
777 int ulen = len; 795 int ulen = len;
778 struct ipcm_cookie ipc; 796 struct ipcm_cookie ipc;
779 struct rtable *rt = NULL; 797 struct rtable *rt = NULL;
@@ -785,6 +803,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
785 int err, is_udplite = IS_UDPLITE(sk); 803 int err, is_udplite = IS_UDPLITE(sk);
786 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 804 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
787 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 805 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
806 struct sk_buff *skb;
788 807
789 if (len > 0xFFFF) 808 if (len > 0xFFFF)
790 return -EMSGSIZE; 809 return -EMSGSIZE;
@@ -799,6 +818,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
799 ipc.opt = NULL; 818 ipc.opt = NULL;
800 ipc.tx_flags = 0; 819 ipc.tx_flags = 0;
801 820
821 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
822
802 if (up->pending) { 823 if (up->pending) {
803 /* 824 /*
804 * There are pending frames. 825 * There are pending frames.
@@ -888,20 +909,25 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
888 rt = (struct rtable *)sk_dst_check(sk, 0); 909 rt = (struct rtable *)sk_dst_check(sk, 0);
889 910
890 if (rt == NULL) { 911 if (rt == NULL) {
891 struct flowi fl = { .oif = ipc.oif, 912 struct flowi4 fl4 = {
892 .mark = sk->sk_mark, 913 .flowi4_oif = ipc.oif,
893 .fl4_dst = faddr, 914 .flowi4_mark = sk->sk_mark,
894 .fl4_src = saddr, 915 .daddr = faddr,
895 .fl4_tos = tos, 916 .saddr = saddr,
896 .proto = sk->sk_protocol, 917 .flowi4_tos = tos,
897 .flags = inet_sk_flowi_flags(sk), 918 .flowi4_proto = sk->sk_protocol,
898 .fl_ip_sport = inet->inet_sport, 919 .flowi4_flags = (inet_sk_flowi_flags(sk) |
899 .fl_ip_dport = dport }; 920 FLOWI_FLAG_CAN_SLEEP),
921 .fl4_sport = inet->inet_sport,
922 .fl4_dport = dport,
923 };
900 struct net *net = sock_net(sk); 924 struct net *net = sock_net(sk);
901 925
902 security_sk_classify_flow(sk, &fl); 926 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
903 err = ip_route_output_flow(net, &rt, &fl, sk, 1); 927 rt = ip_route_output_flow(net, &fl4, sk);
904 if (err) { 928 if (IS_ERR(rt)) {
929 err = PTR_ERR(rt);
930 rt = NULL;
905 if (err == -ENETUNREACH) 931 if (err == -ENETUNREACH)
906 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 932 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
907 goto out; 933 goto out;
@@ -923,6 +949,17 @@ back_from_confirm:
923 if (!ipc.addr) 949 if (!ipc.addr)
924 daddr = ipc.addr = rt->rt_dst; 950 daddr = ipc.addr = rt->rt_dst;
925 951
952 /* Lockless fast path for the non-corking case. */
953 if (!corkreq) {
954 skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen,
955 sizeof(struct udphdr), &ipc, &rt,
956 msg->msg_flags);
957 err = PTR_ERR(skb);
958 if (skb && !IS_ERR(skb))
959 err = udp_send_skb(skb, daddr, dport);
960 goto out;
961 }
962
926 lock_sock(sk); 963 lock_sock(sk);
927 if (unlikely(up->pending)) { 964 if (unlikely(up->pending)) {
928 /* The socket is already corked while preparing it. */ 965 /* The socket is already corked while preparing it. */
@@ -936,15 +973,15 @@ back_from_confirm:
936 /* 973 /*
937 * Now cork the socket to pend data. 974 * Now cork the socket to pend data.
938 */ 975 */
939 inet->cork.fl.fl4_dst = daddr; 976 fl4 = &inet->cork.fl.u.ip4;
940 inet->cork.fl.fl_ip_dport = dport; 977 fl4->daddr = daddr;
941 inet->cork.fl.fl4_src = saddr; 978 fl4->saddr = saddr;
942 inet->cork.fl.fl_ip_sport = inet->inet_sport; 979 fl4->fl4_dport = dport;
980 fl4->fl4_sport = inet->inet_sport;
943 up->pending = AF_INET; 981 up->pending = AF_INET;
944 982
945do_append_data: 983do_append_data:
946 up->len += ulen; 984 up->len += ulen;
947 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
948 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, 985 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
949 sizeof(struct udphdr), &ipc, &rt, 986 sizeof(struct udphdr), &ipc, &rt,
950 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 987 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
@@ -2199,7 +2236,7 @@ int udp4_ufo_send_check(struct sk_buff *skb)
2199 return 0; 2236 return 0;
2200} 2237}
2201 2238
2202struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) 2239struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
2203{ 2240{
2204 struct sk_buff *segs = ERR_PTR(-EINVAL); 2241 struct sk_buff *segs = ERR_PTR(-EINVAL);
2205 unsigned int mss; 2242 unsigned int mss;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b057d40adde..13e0e7f659f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -19,25 +19,23 @@
19static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 19static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
20 20
21static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, 21static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
22 xfrm_address_t *saddr, 22 const xfrm_address_t *saddr,
23 xfrm_address_t *daddr) 23 const xfrm_address_t *daddr)
24{ 24{
25 struct flowi fl = { 25 struct flowi4 fl4 = {
26 .fl4_dst = daddr->a4, 26 .daddr = daddr->a4,
27 .fl4_tos = tos, 27 .flowi4_tos = tos,
28 }; 28 };
29 struct dst_entry *dst;
30 struct rtable *rt; 29 struct rtable *rt;
31 int err;
32 30
33 if (saddr) 31 if (saddr)
34 fl.fl4_src = saddr->a4; 32 fl4.saddr = saddr->a4;
33
34 rt = __ip_route_output_key(net, &fl4);
35 if (!IS_ERR(rt))
36 return &rt->dst;
35 37
36 err = __ip_route_output_key(net, &rt, &fl); 38 return ERR_CAST(rt);
37 dst = &rt->dst;
38 if (err)
39 dst = ERR_PTR(err);
40 return dst;
41} 39}
42 40
43static int xfrm4_get_saddr(struct net *net, 41static int xfrm4_get_saddr(struct net *net,
@@ -56,9 +54,9 @@ static int xfrm4_get_saddr(struct net *net,
56 return 0; 54 return 0;
57} 55}
58 56
59static int xfrm4_get_tos(struct flowi *fl) 57static int xfrm4_get_tos(const struct flowi *fl)
60{ 58{
61 return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ 59 return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
62} 60}
63 61
64static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, 62static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
@@ -68,11 +66,17 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
68} 66}
69 67
70static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 68static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
71 struct flowi *fl) 69 const struct flowi *fl)
72{ 70{
73 struct rtable *rt = (struct rtable *)xdst->route; 71 struct rtable *rt = (struct rtable *)xdst->route;
72 const struct flowi4 *fl4 = &fl->u.ip4;
74 73
75 xdst->u.rt.fl = *fl; 74 rt->rt_key_dst = fl4->daddr;
75 rt->rt_key_src = fl4->saddr;
76 rt->rt_tos = fl4->flowi4_tos;
77 rt->rt_iif = fl4->flowi4_iif;
78 rt->rt_oif = fl4->flowi4_oif;
79 rt->rt_mark = fl4->flowi4_mark;
76 80
77 xdst->u.dst.dev = dev; 81 xdst->u.dst.dev = dev;
78 dev_hold(dev); 82 dev_hold(dev);
@@ -99,9 +103,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
99{ 103{
100 struct iphdr *iph = ip_hdr(skb); 104 struct iphdr *iph = ip_hdr(skb);
101 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 105 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
106 struct flowi4 *fl4 = &fl->u.ip4;
102 107
103 memset(fl, 0, sizeof(struct flowi)); 108 memset(fl4, 0, sizeof(struct flowi4));
104 fl->mark = skb->mark; 109 fl4->flowi4_mark = skb->mark;
105 110
106 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 111 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
107 switch (iph->protocol) { 112 switch (iph->protocol) {
@@ -114,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
114 pskb_may_pull(skb, xprth + 4 - skb->data)) { 119 pskb_may_pull(skb, xprth + 4 - skb->data)) {
115 __be16 *ports = (__be16 *)xprth; 120 __be16 *ports = (__be16 *)xprth;
116 121
117 fl->fl_ip_sport = ports[!!reverse]; 122 fl4->fl4_sport = ports[!!reverse];
118 fl->fl_ip_dport = ports[!reverse]; 123 fl4->fl4_dport = ports[!reverse];
119 } 124 }
120 break; 125 break;
121 126
@@ -123,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
123 if (pskb_may_pull(skb, xprth + 2 - skb->data)) { 128 if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
124 u8 *icmp = xprth; 129 u8 *icmp = xprth;
125 130
126 fl->fl_icmp_type = icmp[0]; 131 fl4->fl4_icmp_type = icmp[0];
127 fl->fl_icmp_code = icmp[1]; 132 fl4->fl4_icmp_code = icmp[1];
128 } 133 }
129 break; 134 break;
130 135
@@ -132,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
132 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 137 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
133 __be32 *ehdr = (__be32 *)xprth; 138 __be32 *ehdr = (__be32 *)xprth;
134 139
135 fl->fl_ipsec_spi = ehdr[0]; 140 fl4->fl4_ipsec_spi = ehdr[0];
136 } 141 }
137 break; 142 break;
138 143
@@ -140,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
140 if (pskb_may_pull(skb, xprth + 8 - skb->data)) { 145 if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
141 __be32 *ah_hdr = (__be32*)xprth; 146 __be32 *ah_hdr = (__be32*)xprth;
142 147
143 fl->fl_ipsec_spi = ah_hdr[1]; 148 fl4->fl4_ipsec_spi = ah_hdr[1];
144 } 149 }
145 break; 150 break;
146 151
@@ -148,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
148 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 153 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
149 __be16 *ipcomp_hdr = (__be16 *)xprth; 154 __be16 *ipcomp_hdr = (__be16 *)xprth;
150 155
151 fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); 156 fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
152 } 157 }
153 break; 158 break;
154 159
@@ -160,20 +165,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
160 if (greflags[0] & GRE_KEY) { 165 if (greflags[0] & GRE_KEY) {
161 if (greflags[0] & GRE_CSUM) 166 if (greflags[0] & GRE_CSUM)
162 gre_hdr++; 167 gre_hdr++;
163 fl->fl_gre_key = gre_hdr[1]; 168 fl4->fl4_gre_key = gre_hdr[1];
164 } 169 }
165 } 170 }
166 break; 171 break;
167 172
168 default: 173 default:
169 fl->fl_ipsec_spi = 0; 174 fl4->fl4_ipsec_spi = 0;
170 break; 175 break;
171 } 176 }
172 } 177 }
173 fl->proto = iph->protocol; 178 fl4->flowi4_proto = iph->protocol;
174 fl->fl4_dst = reverse ? iph->saddr : iph->daddr; 179 fl4->daddr = reverse ? iph->saddr : iph->daddr;
175 fl->fl4_src = reverse ? iph->daddr : iph->saddr; 180 fl4->saddr = reverse ? iph->daddr : iph->saddr;
176 fl->fl4_tos = iph->tos; 181 fl4->flowi4_tos = iph->tos;
177} 182}
178 183
179static inline int xfrm4_garbage_collect(struct dst_ops *ops) 184static inline int xfrm4_garbage_collect(struct dst_ops *ops)
@@ -196,8 +201,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
196{ 201{
197 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 202 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
198 203
204 dst_destroy_metrics_generic(dst);
205
199 if (likely(xdst->u.rt.peer)) 206 if (likely(xdst->u.rt.peer))
200 inet_putpeer(xdst->u.rt.peer); 207 inet_putpeer(xdst->u.rt.peer);
208
201 xfrm_dst_destroy(xdst); 209 xfrm_dst_destroy(xdst);
202} 210}
203 211
@@ -215,6 +223,7 @@ static struct dst_ops xfrm4_dst_ops = {
215 .protocol = cpu_to_be16(ETH_P_IP), 223 .protocol = cpu_to_be16(ETH_P_IP),
216 .gc = xfrm4_garbage_collect, 224 .gc = xfrm4_garbage_collect,
217 .update_pmtu = xfrm4_update_pmtu, 225 .update_pmtu = xfrm4_update_pmtu,
226 .cow_metrics = dst_cow_metrics_generic,
218 .destroy = xfrm4_dst_destroy, 227 .destroy = xfrm4_dst_destroy,
219 .ifdown = xfrm4_dst_ifdown, 228 .ifdown = xfrm4_dst_ifdown,
220 .local_out = __ip_local_out, 229 .local_out = __ip_local_out,
@@ -230,6 +239,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
230 .get_tos = xfrm4_get_tos, 239 .get_tos = xfrm4_get_tos,
231 .init_path = xfrm4_init_path, 240 .init_path = xfrm4_init_path,
232 .fill_dst = xfrm4_fill_dst, 241 .fill_dst = xfrm4_fill_dst,
242 .blackhole_route = ipv4_blackhole_route,
233}; 243};
234 244
235#ifdef CONFIG_SYSCTL 245#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 47947624ecc..1717c64628d 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,24 +21,26 @@ static int xfrm4_init_flags(struct xfrm_state *x)
21} 21}
22 22
23static void 23static void
24__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) 24__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
25{ 25{
26 sel->daddr.a4 = fl->fl4_dst; 26 const struct flowi4 *fl4 = &fl->u.ip4;
27 sel->saddr.a4 = fl->fl4_src; 27
28 sel->dport = xfrm_flowi_dport(fl); 28 sel->daddr.a4 = fl4->daddr;
29 sel->saddr.a4 = fl4->saddr;
30 sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
29 sel->dport_mask = htons(0xffff); 31 sel->dport_mask = htons(0xffff);
30 sel->sport = xfrm_flowi_sport(fl); 32 sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
31 sel->sport_mask = htons(0xffff); 33 sel->sport_mask = htons(0xffff);
32 sel->family = AF_INET; 34 sel->family = AF_INET;
33 sel->prefixlen_d = 32; 35 sel->prefixlen_d = 32;
34 sel->prefixlen_s = 32; 36 sel->prefixlen_s = 32;
35 sel->proto = fl->proto; 37 sel->proto = fl4->flowi4_proto;
36 sel->ifindex = fl->oif; 38 sel->ifindex = fl4->flowi4_oif;
37} 39}
38 40
39static void 41static void
40xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, 42xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
41 xfrm_address_t *daddr, xfrm_address_t *saddr) 43 const xfrm_address_t *daddr, const xfrm_address_t *saddr)
42{ 44{
43 x->id = tmpl->id; 45 x->id = tmpl->id;
44 if (x->id.daddr.a4 == 0) 46 if (x->id.daddr.a4 == 0)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd6782e3a03..3daaf3c7703 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -718,12 +718,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
718 struct inet6_ifaddr *ifa, *ifn; 718 struct inet6_ifaddr *ifa, *ifn;
719 struct inet6_dev *idev = ifp->idev; 719 struct inet6_dev *idev = ifp->idev;
720 int state; 720 int state;
721 int hash;
722 int deleted = 0, onlink = 0; 721 int deleted = 0, onlink = 0;
723 unsigned long expires = jiffies; 722 unsigned long expires = jiffies;
724 723
725 hash = ipv6_addr_hash(&ifp->addr);
726
727 spin_lock_bh(&ifp->state_lock); 724 spin_lock_bh(&ifp->state_lock);
728 state = ifp->state; 725 state = ifp->state;
729 ifp->state = INET6_IFADDR_STATE_DEAD; 726 ifp->state = INET6_IFADDR_STATE_DEAD;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 978e80e2c4a..4b13d5d8890 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -644,41 +644,34 @@ EXPORT_SYMBOL(inet6_unregister_protosw);
644 644
645int inet6_sk_rebuild_header(struct sock *sk) 645int inet6_sk_rebuild_header(struct sock *sk)
646{ 646{
647 int err;
648 struct dst_entry *dst;
649 struct ipv6_pinfo *np = inet6_sk(sk); 647 struct ipv6_pinfo *np = inet6_sk(sk);
648 struct dst_entry *dst;
650 649
651 dst = __sk_dst_check(sk, np->dst_cookie); 650 dst = __sk_dst_check(sk, np->dst_cookie);
652 651
653 if (dst == NULL) { 652 if (dst == NULL) {
654 struct inet_sock *inet = inet_sk(sk); 653 struct inet_sock *inet = inet_sk(sk);
655 struct in6_addr *final_p, final; 654 struct in6_addr *final_p, final;
656 struct flowi fl; 655 struct flowi6 fl6;
657 656
658 memset(&fl, 0, sizeof(fl)); 657 memset(&fl6, 0, sizeof(fl6));
659 fl.proto = sk->sk_protocol; 658 fl6.flowi6_proto = sk->sk_protocol;
660 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 659 ipv6_addr_copy(&fl6.daddr, &np->daddr);
661 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 660 ipv6_addr_copy(&fl6.saddr, &np->saddr);
662 fl.fl6_flowlabel = np->flow_label; 661 fl6.flowlabel = np->flow_label;
663 fl.oif = sk->sk_bound_dev_if; 662 fl6.flowi6_oif = sk->sk_bound_dev_if;
664 fl.mark = sk->sk_mark; 663 fl6.flowi6_mark = sk->sk_mark;
665 fl.fl_ip_dport = inet->inet_dport; 664 fl6.fl6_dport = inet->inet_dport;
666 fl.fl_ip_sport = inet->inet_sport; 665 fl6.fl6_sport = inet->inet_sport;
667 security_sk_classify_flow(sk, &fl); 666 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
668 667
669 final_p = fl6_update_dst(&fl, np->opt, &final); 668 final_p = fl6_update_dst(&fl6, np->opt, &final);
670 669
671 err = ip6_dst_lookup(sk, &dst, &fl); 670 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
672 if (err) { 671 if (IS_ERR(dst)) {
673 sk->sk_route_caps = 0; 672 sk->sk_route_caps = 0;
674 return err; 673 sk->sk_err_soft = -PTR_ERR(dst);
675 } 674 return PTR_ERR(dst);
676 if (final_p)
677 ipv6_addr_copy(&fl.fl6_dst, final_p);
678
679 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
680 sk->sk_err_soft = -err;
681 return err;
682 } 675 }
683 676
684 __ip6_dst_store(sk, dst, NULL, NULL); 677 __ip6_dst_store(sk, dst, NULL, NULL);
@@ -772,7 +765,7 @@ out:
772 return err; 765 return err;
773} 766}
774 767
775static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) 768static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
776{ 769{
777 struct sk_buff *segs = ERR_PTR(-EINVAL); 770 struct sk_buff *segs = ERR_PTR(-EINVAL);
778 struct ipv6hdr *ipv6h; 771 struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 1aba54ae53c..2195ae65192 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -409,7 +409,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
409 409
410 ah->reserved = 0; 410 ah->reserved = 0;
411 ah->spi = x->id.spi; 411 ah->spi = x->id.spi;
412 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 412 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
413 413
414 sg_init_table(sg, nfrags); 414 sg_init_table(sg, nfrags);
415 skb_to_sgvec(skb, sg, 0, skb->len); 415 skb_to_sgvec(skb, sg, 0, skb->len);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 320bdb877ee..16560336eb7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,7 +40,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
40 struct ipv6_pinfo *np = inet6_sk(sk); 40 struct ipv6_pinfo *np = inet6_sk(sk);
41 struct in6_addr *daddr, *final_p, final; 41 struct in6_addr *daddr, *final_p, final;
42 struct dst_entry *dst; 42 struct dst_entry *dst;
43 struct flowi fl; 43 struct flowi6 fl6;
44 struct ip6_flowlabel *flowlabel = NULL; 44 struct ip6_flowlabel *flowlabel = NULL;
45 struct ipv6_txoptions *opt; 45 struct ipv6_txoptions *opt;
46 int addr_type; 46 int addr_type;
@@ -59,11 +59,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
59 if (usin->sin6_family != AF_INET6) 59 if (usin->sin6_family != AF_INET6)
60 return -EAFNOSUPPORT; 60 return -EAFNOSUPPORT;
61 61
62 memset(&fl, 0, sizeof(fl)); 62 memset(&fl6, 0, sizeof(fl6));
63 if (np->sndflow) { 63 if (np->sndflow) {
64 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 64 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
65 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 65 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
66 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 66 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
67 if (flowlabel == NULL) 67 if (flowlabel == NULL)
68 return -EINVAL; 68 return -EINVAL;
69 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); 69 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
@@ -137,7 +137,7 @@ ipv4_connected:
137 } 137 }
138 138
139 ipv6_addr_copy(&np->daddr, daddr); 139 ipv6_addr_copy(&np->daddr, daddr);
140 np->flow_label = fl.fl6_flowlabel; 140 np->flow_label = fl6.flowlabel;
141 141
142 inet->inet_dport = usin->sin6_port; 142 inet->inet_dport = usin->sin6_port;
143 143
@@ -146,53 +146,46 @@ ipv4_connected:
146 * destination cache for it. 146 * destination cache for it.
147 */ 147 */
148 148
149 fl.proto = sk->sk_protocol; 149 fl6.flowi6_proto = sk->sk_protocol;
150 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 150 ipv6_addr_copy(&fl6.daddr, &np->daddr);
151 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 151 ipv6_addr_copy(&fl6.saddr, &np->saddr);
152 fl.oif = sk->sk_bound_dev_if; 152 fl6.flowi6_oif = sk->sk_bound_dev_if;
153 fl.mark = sk->sk_mark; 153 fl6.flowi6_mark = sk->sk_mark;
154 fl.fl_ip_dport = inet->inet_dport; 154 fl6.fl6_dport = inet->inet_dport;
155 fl.fl_ip_sport = inet->inet_sport; 155 fl6.fl6_sport = inet->inet_sport;
156 156
157 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) 157 if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
158 fl.oif = np->mcast_oif; 158 fl6.flowi6_oif = np->mcast_oif;
159 159
160 security_sk_classify_flow(sk, &fl); 160 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
161 161
162 opt = flowlabel ? flowlabel->opt : np->opt; 162 opt = flowlabel ? flowlabel->opt : np->opt;
163 final_p = fl6_update_dst(&fl, opt, &final); 163 final_p = fl6_update_dst(&fl6, opt, &final);
164 164
165 err = ip6_dst_lookup(sk, &dst, &fl); 165 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
166 if (err) 166 err = 0;
167 if (IS_ERR(dst)) {
168 err = PTR_ERR(dst);
167 goto out; 169 goto out;
168 if (final_p)
169 ipv6_addr_copy(&fl.fl6_dst, final_p);
170
171 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
172 if (err < 0) {
173 if (err == -EREMOTE)
174 err = ip6_dst_blackhole(sk, &dst, &fl);
175 if (err < 0)
176 goto out;
177 } 170 }
178 171
179 /* source address lookup done in ip6_dst_lookup */ 172 /* source address lookup done in ip6_dst_lookup */
180 173
181 if (ipv6_addr_any(&np->saddr)) 174 if (ipv6_addr_any(&np->saddr))
182 ipv6_addr_copy(&np->saddr, &fl.fl6_src); 175 ipv6_addr_copy(&np->saddr, &fl6.saddr);
183 176
184 if (ipv6_addr_any(&np->rcv_saddr)) { 177 if (ipv6_addr_any(&np->rcv_saddr)) {
185 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); 178 ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr);
186 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 179 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
187 if (sk->sk_prot->rehash) 180 if (sk->sk_prot->rehash)
188 sk->sk_prot->rehash(sk); 181 sk->sk_prot->rehash(sk);
189 } 182 }
190 183
191 ip6_dst_store(sk, dst, 184 ip6_dst_store(sk, dst,
192 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? 185 ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
193 &np->daddr : NULL, 186 &np->daddr : NULL,
194#ifdef CONFIG_IPV6_SUBTREES 187#ifdef CONFIG_IPV6_SUBTREES
195 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? 188 ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
196 &np->saddr : 189 &np->saddr :
197#endif 190#endif
198 NULL); 191 NULL);
@@ -238,7 +231,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
238 kfree_skb(skb); 231 kfree_skb(skb);
239} 232}
240 233
241void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) 234void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
242{ 235{
243 struct ipv6_pinfo *np = inet6_sk(sk); 236 struct ipv6_pinfo *np = inet6_sk(sk);
244 struct sock_exterr_skb *serr; 237 struct sock_exterr_skb *serr;
@@ -257,7 +250,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
257 skb_put(skb, sizeof(struct ipv6hdr)); 250 skb_put(skb, sizeof(struct ipv6hdr));
258 skb_reset_network_header(skb); 251 skb_reset_network_header(skb);
259 iph = ipv6_hdr(skb); 252 iph = ipv6_hdr(skb);
260 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); 253 ipv6_addr_copy(&iph->daddr, &fl6->daddr);
261 254
262 serr = SKB_EXT_ERR(skb); 255 serr = SKB_EXT_ERR(skb);
263 serr->ee.ee_errno = err; 256 serr->ee.ee_errno = err;
@@ -268,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
268 serr->ee.ee_info = info; 261 serr->ee.ee_info = info;
269 serr->ee.ee_data = 0; 262 serr->ee.ee_data = 0;
270 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 263 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
271 serr->port = fl->fl_ip_dport; 264 serr->port = fl6->fl6_dport;
272 265
273 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 266 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
274 skb_reset_transport_header(skb); 267 skb_reset_transport_header(skb);
@@ -277,7 +270,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
277 kfree_skb(skb); 270 kfree_skb(skb);
278} 271}
279 272
280void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) 273void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
281{ 274{
282 struct ipv6_pinfo *np = inet6_sk(sk); 275 struct ipv6_pinfo *np = inet6_sk(sk);
283 struct ipv6hdr *iph; 276 struct ipv6hdr *iph;
@@ -294,7 +287,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
294 skb_put(skb, sizeof(struct ipv6hdr)); 287 skb_put(skb, sizeof(struct ipv6hdr));
295 skb_reset_network_header(skb); 288 skb_reset_network_header(skb);
296 iph = ipv6_hdr(skb); 289 iph = ipv6_hdr(skb);
297 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); 290 ipv6_addr_copy(&iph->daddr, &fl6->daddr);
298 291
299 mtu_info = IP6CBMTU(skb); 292 mtu_info = IP6CBMTU(skb);
300 if (!mtu_info) { 293 if (!mtu_info) {
@@ -306,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
306 mtu_info->ip6m_addr.sin6_family = AF_INET6; 299 mtu_info->ip6m_addr.sin6_family = AF_INET6;
307 mtu_info->ip6m_addr.sin6_port = 0; 300 mtu_info->ip6m_addr.sin6_port = 0;
308 mtu_info->ip6m_addr.sin6_flowinfo = 0; 301 mtu_info->ip6m_addr.sin6_flowinfo = 0;
309 mtu_info->ip6m_addr.sin6_scope_id = fl->oif; 302 mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
310 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); 303 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
311 304
312 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 305 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
@@ -600,7 +593,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
600} 593}
601 594
602int datagram_send_ctl(struct net *net, 595int datagram_send_ctl(struct net *net,
603 struct msghdr *msg, struct flowi *fl, 596 struct msghdr *msg, struct flowi6 *fl6,
604 struct ipv6_txoptions *opt, 597 struct ipv6_txoptions *opt,
605 int *hlimit, int *tclass, int *dontfrag) 598 int *hlimit, int *tclass, int *dontfrag)
606{ 599{
@@ -636,16 +629,17 @@ int datagram_send_ctl(struct net *net,
636 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 629 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
637 630
638 if (src_info->ipi6_ifindex) { 631 if (src_info->ipi6_ifindex) {
639 if (fl->oif && src_info->ipi6_ifindex != fl->oif) 632 if (fl6->flowi6_oif &&
633 src_info->ipi6_ifindex != fl6->flowi6_oif)
640 return -EINVAL; 634 return -EINVAL;
641 fl->oif = src_info->ipi6_ifindex; 635 fl6->flowi6_oif = src_info->ipi6_ifindex;
642 } 636 }
643 637
644 addr_type = __ipv6_addr_type(&src_info->ipi6_addr); 638 addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
645 639
646 rcu_read_lock(); 640 rcu_read_lock();
647 if (fl->oif) { 641 if (fl6->flowi6_oif) {
648 dev = dev_get_by_index_rcu(net, fl->oif); 642 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
649 if (!dev) { 643 if (!dev) {
650 rcu_read_unlock(); 644 rcu_read_unlock();
651 return -ENODEV; 645 return -ENODEV;
@@ -661,7 +655,7 @@ int datagram_send_ctl(struct net *net,
661 strict ? dev : NULL, 0)) 655 strict ? dev : NULL, 0))
662 err = -EINVAL; 656 err = -EINVAL;
663 else 657 else
664 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); 658 ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr);
665 } 659 }
666 660
667 rcu_read_unlock(); 661 rcu_read_unlock();
@@ -678,13 +672,13 @@ int datagram_send_ctl(struct net *net,
678 goto exit_f; 672 goto exit_f;
679 } 673 }
680 674
681 if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { 675 if (fl6->flowlabel&IPV6_FLOWINFO_MASK) {
682 if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { 676 if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
683 err = -EINVAL; 677 err = -EINVAL;
684 goto exit_f; 678 goto exit_f;
685 } 679 }
686 } 680 }
687 fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); 681 fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
688 break; 682 break;
689 683
690 case IPV6_2292HOPOPTS: 684 case IPV6_2292HOPOPTS:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1b5c9825743..5aa8ec88f19 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -54,16 +54,20 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
54/* 54/*
55 * Allocate an AEAD request structure with extra space for SG and IV. 55 * Allocate an AEAD request structure with extra space for SG and IV.
56 * 56 *
57 * For alignment considerations the IV is placed at the front, followed 57 * For alignment considerations the upper 32 bits of the sequence number are
58 * by the request and finally the SG list. 58 * placed at the front, if present. Followed by the IV, the request and finally
59 * the SG list.
59 * 60 *
60 * TODO: Use spare space in skb for this where possible. 61 * TODO: Use spare space in skb for this where possible.
61 */ 62 */
62static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) 63static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
63{ 64{
64 unsigned int len; 65 unsigned int len;
65 66
66 len = crypto_aead_ivsize(aead); 67 len = seqihlen;
68
69 len += crypto_aead_ivsize(aead);
70
67 if (len) { 71 if (len) {
68 len += crypto_aead_alignmask(aead) & 72 len += crypto_aead_alignmask(aead) &
69 ~(crypto_tfm_ctx_alignment() - 1); 73 ~(crypto_tfm_ctx_alignment() - 1);
@@ -78,10 +82,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
78 return kmalloc(len, GFP_ATOMIC); 82 return kmalloc(len, GFP_ATOMIC);
79} 83}
80 84
81static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) 85static inline __be32 *esp_tmp_seqhi(void *tmp)
86{
87 return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
88}
89
90static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
82{ 91{
83 return crypto_aead_ivsize(aead) ? 92 return crypto_aead_ivsize(aead) ?
84 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; 93 PTR_ALIGN((u8 *)tmp + seqhilen,
94 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
85} 95}
86 96
87static inline struct aead_givcrypt_request *esp_tmp_givreq( 97static inline struct aead_givcrypt_request *esp_tmp_givreq(
@@ -145,8 +155,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
145 int plen; 155 int plen;
146 int tfclen; 156 int tfclen;
147 int nfrags; 157 int nfrags;
158 int assoclen;
159 int sglists;
160 int seqhilen;
148 u8 *iv; 161 u8 *iv;
149 u8 *tail; 162 u8 *tail;
163 __be32 *seqhi;
150 struct esp_data *esp = x->data; 164 struct esp_data *esp = x->data;
151 165
152 /* skb is pure payload to encrypt */ 166 /* skb is pure payload to encrypt */
@@ -175,14 +189,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
175 goto error; 189 goto error;
176 nfrags = err; 190 nfrags = err;
177 191
178 tmp = esp_alloc_tmp(aead, nfrags + 1); 192 assoclen = sizeof(*esph);
193 sglists = 1;
194 seqhilen = 0;
195
196 if (x->props.flags & XFRM_STATE_ESN) {
197 sglists += 2;
198 seqhilen += sizeof(__be32);
199 assoclen += seqhilen;
200 }
201
202 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
179 if (!tmp) 203 if (!tmp)
180 goto error; 204 goto error;
181 205
182 iv = esp_tmp_iv(aead, tmp); 206 seqhi = esp_tmp_seqhi(tmp);
207 iv = esp_tmp_iv(aead, tmp, seqhilen);
183 req = esp_tmp_givreq(aead, iv); 208 req = esp_tmp_givreq(aead, iv);
184 asg = esp_givreq_sg(aead, req); 209 asg = esp_givreq_sg(aead, req);
185 sg = asg + 1; 210 sg = asg + sglists;
186 211
187 /* Fill padding... */ 212 /* Fill padding... */
188 tail = skb_tail_pointer(trailer); 213 tail = skb_tail_pointer(trailer);
@@ -204,19 +229,27 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
204 *skb_mac_header(skb) = IPPROTO_ESP; 229 *skb_mac_header(skb) = IPPROTO_ESP;
205 230
206 esph->spi = x->id.spi; 231 esph->spi = x->id.spi;
207 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 232 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
208 233
209 sg_init_table(sg, nfrags); 234 sg_init_table(sg, nfrags);
210 skb_to_sgvec(skb, sg, 235 skb_to_sgvec(skb, sg,
211 esph->enc_data + crypto_aead_ivsize(aead) - skb->data, 236 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
212 clen + alen); 237 clen + alen);
213 sg_init_one(asg, esph, sizeof(*esph)); 238
239 if ((x->props.flags & XFRM_STATE_ESN)) {
240 sg_init_table(asg, 3);
241 sg_set_buf(asg, &esph->spi, sizeof(__be32));
242 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
243 sg_set_buf(asg + 1, seqhi, seqhilen);
244 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
245 } else
246 sg_init_one(asg, esph, sizeof(*esph));
214 247
215 aead_givcrypt_set_callback(req, 0, esp_output_done, skb); 248 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
216 aead_givcrypt_set_crypt(req, sg, sg, clen, iv); 249 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
217 aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); 250 aead_givcrypt_set_assoc(req, asg, assoclen);
218 aead_givcrypt_set_giv(req, esph->enc_data, 251 aead_givcrypt_set_giv(req, esph->enc_data,
219 XFRM_SKB_CB(skb)->seq.output); 252 XFRM_SKB_CB(skb)->seq.output.low);
220 253
221 ESP_SKB_CB(skb)->tmp = tmp; 254 ESP_SKB_CB(skb)->tmp = tmp;
222 err = crypto_aead_givencrypt(req); 255 err = crypto_aead_givencrypt(req);
@@ -292,8 +325,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
292 struct sk_buff *trailer; 325 struct sk_buff *trailer;
293 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); 326 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
294 int nfrags; 327 int nfrags;
328 int assoclen;
329 int sglists;
330 int seqhilen;
295 int ret = 0; 331 int ret = 0;
296 void *tmp; 332 void *tmp;
333 __be32 *seqhi;
297 u8 *iv; 334 u8 *iv;
298 struct scatterlist *sg; 335 struct scatterlist *sg;
299 struct scatterlist *asg; 336 struct scatterlist *asg;
@@ -314,12 +351,24 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
314 } 351 }
315 352
316 ret = -ENOMEM; 353 ret = -ENOMEM;
317 tmp = esp_alloc_tmp(aead, nfrags + 1); 354
355 assoclen = sizeof(*esph);
356 sglists = 1;
357 seqhilen = 0;
358
359 if (x->props.flags & XFRM_STATE_ESN) {
360 sglists += 2;
361 seqhilen += sizeof(__be32);
362 assoclen += seqhilen;
363 }
364
365 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
318 if (!tmp) 366 if (!tmp)
319 goto out; 367 goto out;
320 368
321 ESP_SKB_CB(skb)->tmp = tmp; 369 ESP_SKB_CB(skb)->tmp = tmp;
322 iv = esp_tmp_iv(aead, tmp); 370 seqhi = esp_tmp_seqhi(tmp);
371 iv = esp_tmp_iv(aead, tmp, seqhilen);
323 req = esp_tmp_req(aead, iv); 372 req = esp_tmp_req(aead, iv);
324 asg = esp_req_sg(aead, req); 373 asg = esp_req_sg(aead, req);
325 sg = asg + 1; 374 sg = asg + 1;
@@ -333,11 +382,19 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
333 382
334 sg_init_table(sg, nfrags); 383 sg_init_table(sg, nfrags);
335 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); 384 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
336 sg_init_one(asg, esph, sizeof(*esph)); 385
386 if ((x->props.flags & XFRM_STATE_ESN)) {
387 sg_init_table(asg, 3);
388 sg_set_buf(asg, &esph->spi, sizeof(__be32));
389 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
390 sg_set_buf(asg + 1, seqhi, seqhilen);
391 sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
392 } else
393 sg_init_one(asg, esph, sizeof(*esph));
337 394
338 aead_request_set_callback(req, 0, esp_input_done, skb); 395 aead_request_set_callback(req, 0, esp_input_done, skb);
339 aead_request_set_crypt(req, sg, sg, elen, iv); 396 aead_request_set_crypt(req, sg, sg, elen, iv);
340 aead_request_set_assoc(req, asg, sizeof(*esph)); 397 aead_request_set_assoc(req, asg, assoclen);
341 398
342 ret = crypto_aead_decrypt(req); 399 ret = crypto_aead_decrypt(req);
343 if (ret == -EINPROGRESS) 400 if (ret == -EINPROGRESS)
@@ -443,10 +500,20 @@ static int esp_init_authenc(struct xfrm_state *x)
443 goto error; 500 goto error;
444 501
445 err = -ENAMETOOLONG; 502 err = -ENAMETOOLONG;
446 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", 503
447 x->aalg ? x->aalg->alg_name : "digest_null", 504 if ((x->props.flags & XFRM_STATE_ESN)) {
448 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) 505 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
449 goto error; 506 "authencesn(%s,%s)",
507 x->aalg ? x->aalg->alg_name : "digest_null",
508 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
509 goto error;
510 } else {
511 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
512 "authenc(%s,%s)",
513 x->aalg ? x->aalg->alg_name : "digest_null",
514 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
515 goto error;
516 }
450 517
451 aead = crypto_alloc_aead(authenc_name, 0, 0); 518 aead = crypto_alloc_aead(authenc_name, 0, 0);
452 err = PTR_ERR(aead); 519 err = PTR_ERR(aead);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 262f105d23b..79a485e8a70 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -876,22 +876,22 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
876 * fl6_update_dst - update flowi destination address with info given 876 * fl6_update_dst - update flowi destination address with info given
877 * by srcrt option, if any. 877 * by srcrt option, if any.
878 * 878 *
879 * @fl: flowi for which fl6_dst is to be updated 879 * @fl6: flowi6 for which daddr is to be updated
880 * @opt: struct ipv6_txoptions in which to look for srcrt opt 880 * @opt: struct ipv6_txoptions in which to look for srcrt opt
881 * @orig: copy of original fl6_dst address if modified 881 * @orig: copy of original daddr address if modified
882 * 882 *
883 * Returns NULL if no txoptions or no srcrt, otherwise returns orig 883 * Returns NULL if no txoptions or no srcrt, otherwise returns orig
884 * and initial value of fl->fl6_dst set in orig 884 * and initial value of fl6->daddr set in orig
885 */ 885 */
886struct in6_addr *fl6_update_dst(struct flowi *fl, 886struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
887 const struct ipv6_txoptions *opt, 887 const struct ipv6_txoptions *opt,
888 struct in6_addr *orig) 888 struct in6_addr *orig)
889{ 889{
890 if (!opt || !opt->srcrt) 890 if (!opt || !opt->srcrt)
891 return NULL; 891 return NULL;
892 892
893 ipv6_addr_copy(orig, &fl->fl6_dst); 893 ipv6_addr_copy(orig, &fl6->daddr);
894 ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); 894 ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr);
895 return orig; 895 return orig;
896} 896}
897 897
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d829874d894..34d244df907 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -29,7 +29,7 @@ struct fib6_rule
29 u8 tclass; 29 u8 tclass;
30}; 30};
31 31
32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, 32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
33 int flags, pol_lookup_t lookup) 33 int flags, pol_lookup_t lookup)
34{ 34{
35 struct fib_lookup_arg arg = { 35 struct fib_lookup_arg arg = {
@@ -37,7 +37,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
37 .flags = FIB_LOOKUP_NOREF, 37 .flags = FIB_LOOKUP_NOREF,
38 }; 38 };
39 39
40 fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); 40 fib_rules_lookup(net->ipv6.fib6_rules_ops,
41 flowi6_to_flowi(fl6), flags, &arg);
41 42
42 if (arg.result) 43 if (arg.result)
43 return arg.result; 44 return arg.result;
@@ -49,6 +50,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
49static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, 50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
50 int flags, struct fib_lookup_arg *arg) 51 int flags, struct fib_lookup_arg *arg)
51{ 52{
53 struct flowi6 *flp6 = &flp->u.ip6;
52 struct rt6_info *rt = NULL; 54 struct rt6_info *rt = NULL;
53 struct fib6_table *table; 55 struct fib6_table *table;
54 struct net *net = rule->fr_net; 56 struct net *net = rule->fr_net;
@@ -71,7 +73,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
71 73
72 table = fib6_get_table(net, rule->table); 74 table = fib6_get_table(net, rule->table);
73 if (table) 75 if (table)
74 rt = lookup(net, table, flp, flags); 76 rt = lookup(net, table, flp6, flags);
75 77
76 if (rt != net->ipv6.ip6_null_entry) { 78 if (rt != net->ipv6.ip6_null_entry) {
77 struct fib6_rule *r = (struct fib6_rule *)rule; 79 struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -86,14 +88,14 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
86 88
87 if (ipv6_dev_get_saddr(net, 89 if (ipv6_dev_get_saddr(net,
88 ip6_dst_idev(&rt->dst)->dev, 90 ip6_dst_idev(&rt->dst)->dev,
89 &flp->fl6_dst, 91 &flp6->daddr,
90 rt6_flags2srcprefs(flags), 92 rt6_flags2srcprefs(flags),
91 &saddr)) 93 &saddr))
92 goto again; 94 goto again;
93 if (!ipv6_prefix_equal(&saddr, &r->src.addr, 95 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
94 r->src.plen)) 96 r->src.plen))
95 goto again; 97 goto again;
96 ipv6_addr_copy(&flp->fl6_src, &saddr); 98 ipv6_addr_copy(&flp6->saddr, &saddr);
97 } 99 }
98 goto out; 100 goto out;
99 } 101 }
@@ -113,9 +115,10 @@ out:
113static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 115static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
114{ 116{
115 struct fib6_rule *r = (struct fib6_rule *) rule; 117 struct fib6_rule *r = (struct fib6_rule *) rule;
118 struct flowi6 *fl6 = &fl->u.ip6;
116 119
117 if (r->dst.plen && 120 if (r->dst.plen &&
118 !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) 121 !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen))
119 return 0; 122 return 0;
120 123
121 /* 124 /*
@@ -125,14 +128,14 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
125 */ 128 */
126 if (r->src.plen) { 129 if (r->src.plen) {
127 if (flags & RT6_LOOKUP_F_HAS_SADDR) { 130 if (flags & RT6_LOOKUP_F_HAS_SADDR) {
128 if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, 131 if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr,
129 r->src.plen)) 132 r->src.plen))
130 return 0; 133 return 0;
131 } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) 134 } else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
132 return 0; 135 return 0;
133 } 136 }
134 137
135 if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) 138 if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff))
136 return 0; 139 return 0;
137 140
138 return 1; 141 return 1;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 03e62f94ff8..83cb4f9add8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -157,32 +157,32 @@ static int is_ineligible(struct sk_buff *skb)
157/* 157/*
158 * Check the ICMP output rate limit 158 * Check the ICMP output rate limit
159 */ 159 */
160static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, 160static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
161 struct flowi *fl) 161 struct flowi6 *fl6)
162{ 162{
163 struct dst_entry *dst; 163 struct dst_entry *dst;
164 struct net *net = sock_net(sk); 164 struct net *net = sock_net(sk);
165 int res = 0; 165 bool res = false;
166 166
167 /* Informational messages are not limited. */ 167 /* Informational messages are not limited. */
168 if (type & ICMPV6_INFOMSG_MASK) 168 if (type & ICMPV6_INFOMSG_MASK)
169 return 1; 169 return true;
170 170
171 /* Do not limit pmtu discovery, it would break it. */ 171 /* Do not limit pmtu discovery, it would break it. */
172 if (type == ICMPV6_PKT_TOOBIG) 172 if (type == ICMPV6_PKT_TOOBIG)
173 return 1; 173 return true;
174 174
175 /* 175 /*
176 * Look up the output route. 176 * Look up the output route.
177 * XXX: perhaps the expire for routing entries cloned by 177 * XXX: perhaps the expire for routing entries cloned by
178 * this lookup should be more aggressive (not longer than timeout). 178 * this lookup should be more aggressive (not longer than timeout).
179 */ 179 */
180 dst = ip6_route_output(net, sk, fl); 180 dst = ip6_route_output(net, sk, fl6);
181 if (dst->error) { 181 if (dst->error) {
182 IP6_INC_STATS(net, ip6_dst_idev(dst), 182 IP6_INC_STATS(net, ip6_dst_idev(dst),
183 IPSTATS_MIB_OUTNOROUTES); 183 IPSTATS_MIB_OUTNOROUTES);
184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 res = 1; 185 res = true;
186 } else { 186 } else {
187 struct rt6_info *rt = (struct rt6_info *)dst; 187 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = net->ipv6.sysctl.icmpv6_time; 188 int tmo = net->ipv6.sysctl.icmpv6_time;
@@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
191 if (rt->rt6i_dst.plen < 128) 191 if (rt->rt6i_dst.plen < 128)
192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 193
194 res = xrlim_allow(dst, tmo); 194 if (!rt->rt6i_peer)
195 rt6_bind_peer(rt, 1);
196 res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
195 } 197 }
196 dst_release(dst); 198 dst_release(dst);
197 return res; 199 return res;
@@ -215,7 +217,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
215 return (*op & 0xC0) == 0x80; 217 return (*op & 0xC0) == 0x80;
216} 218}
217 219
218static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len) 220static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
219{ 221{
220 struct sk_buff *skb; 222 struct sk_buff *skb;
221 struct icmp6hdr *icmp6h; 223 struct icmp6hdr *icmp6h;
@@ -231,9 +233,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
231 if (skb_queue_len(&sk->sk_write_queue) == 1) { 233 if (skb_queue_len(&sk->sk_write_queue) == 1) {
232 skb->csum = csum_partial(icmp6h, 234 skb->csum = csum_partial(icmp6h,
233 sizeof(struct icmp6hdr), skb->csum); 235 sizeof(struct icmp6hdr), skb->csum);
234 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, 236 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
235 &fl->fl6_dst, 237 &fl6->daddr,
236 len, fl->proto, 238 len, fl6->flowi6_proto,
237 skb->csum); 239 skb->csum);
238 } else { 240 } else {
239 __wsum tmp_csum = 0; 241 __wsum tmp_csum = 0;
@@ -244,9 +246,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
244 246
245 tmp_csum = csum_partial(icmp6h, 247 tmp_csum = csum_partial(icmp6h,
246 sizeof(struct icmp6hdr), tmp_csum); 248 sizeof(struct icmp6hdr), tmp_csum);
247 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, 249 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
248 &fl->fl6_dst, 250 &fl6->daddr,
249 len, fl->proto, 251 len, fl6->flowi6_proto,
250 tmp_csum); 252 tmp_csum);
251 } 253 }
252 ip6_push_pending_frames(sk); 254 ip6_push_pending_frames(sk);
@@ -298,6 +300,68 @@ static void mip6_addr_swap(struct sk_buff *skb)
298static inline void mip6_addr_swap(struct sk_buff *skb) {} 300static inline void mip6_addr_swap(struct sk_buff *skb) {}
299#endif 301#endif
300 302
303static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
304 struct sock *sk, struct flowi6 *fl6)
305{
306 struct dst_entry *dst, *dst2;
307 struct flowi6 fl2;
308 int err;
309
310 err = ip6_dst_lookup(sk, &dst, fl6);
311 if (err)
312 return ERR_PTR(err);
313
314 /*
315 * We won't send icmp if the destination is known
316 * anycast.
317 */
318 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
319 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
320 dst_release(dst);
321 return ERR_PTR(-EINVAL);
322 }
323
324 /* No need to clone since we're just using its address. */
325 dst2 = dst;
326
327 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
328 if (!IS_ERR(dst)) {
329 if (dst != dst2)
330 return dst;
331 } else {
332 if (PTR_ERR(dst) == -EPERM)
333 dst = NULL;
334 else
335 return dst;
336 }
337
338 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
339 if (err)
340 goto relookup_failed;
341
342 err = ip6_dst_lookup(sk, &dst2, &fl2);
343 if (err)
344 goto relookup_failed;
345
346 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
347 if (!IS_ERR(dst2)) {
348 dst_release(dst);
349 dst = dst2;
350 } else {
351 err = PTR_ERR(dst2);
352 if (err == -EPERM) {
353 dst_release(dst);
354 return dst2;
355 } else
356 goto relookup_failed;
357 }
358
359relookup_failed:
360 if (dst)
361 return dst;
362 return ERR_PTR(err);
363}
364
301/* 365/*
302 * Send an ICMP message in response to a packet in error 366 * Send an ICMP message in response to a packet in error
303 */ 367 */
@@ -310,10 +374,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
310 struct ipv6_pinfo *np; 374 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL; 375 struct in6_addr *saddr = NULL;
312 struct dst_entry *dst; 376 struct dst_entry *dst;
313 struct dst_entry *dst2;
314 struct icmp6hdr tmp_hdr; 377 struct icmp6hdr tmp_hdr;
315 struct flowi fl; 378 struct flowi6 fl6;
316 struct flowi fl2;
317 struct icmpv6_msg msg; 379 struct icmpv6_msg msg;
318 int iif = 0; 380 int iif = 0;
319 int addr_type = 0; 381 int addr_type = 0;
@@ -380,22 +442,22 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
380 442
381 mip6_addr_swap(skb); 443 mip6_addr_swap(skb);
382 444
383 memset(&fl, 0, sizeof(fl)); 445 memset(&fl6, 0, sizeof(fl6));
384 fl.proto = IPPROTO_ICMPV6; 446 fl6.flowi6_proto = IPPROTO_ICMPV6;
385 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); 447 ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
386 if (saddr) 448 if (saddr)
387 ipv6_addr_copy(&fl.fl6_src, saddr); 449 ipv6_addr_copy(&fl6.saddr, saddr);
388 fl.oif = iif; 450 fl6.flowi6_oif = iif;
389 fl.fl_icmp_type = type; 451 fl6.fl6_icmp_type = type;
390 fl.fl_icmp_code = code; 452 fl6.fl6_icmp_code = code;
391 security_skb_classify_flow(skb, &fl); 453 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
392 454
393 sk = icmpv6_xmit_lock(net); 455 sk = icmpv6_xmit_lock(net);
394 if (sk == NULL) 456 if (sk == NULL)
395 return; 457 return;
396 np = inet6_sk(sk); 458 np = inet6_sk(sk);
397 459
398 if (!icmpv6_xrlim_allow(sk, type, &fl)) 460 if (!icmpv6_xrlim_allow(sk, type, &fl6))
399 goto out; 461 goto out;
400 462
401 tmp_hdr.icmp6_type = type; 463 tmp_hdr.icmp6_type = type;
@@ -403,61 +465,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
403 tmp_hdr.icmp6_cksum = 0; 465 tmp_hdr.icmp6_cksum = 0;
404 tmp_hdr.icmp6_pointer = htonl(info); 466 tmp_hdr.icmp6_pointer = htonl(info);
405 467
406 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 468 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
407 fl.oif = np->mcast_oif; 469 fl6.flowi6_oif = np->mcast_oif;
408 470
409 err = ip6_dst_lookup(sk, &dst, &fl); 471 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
410 if (err) 472 if (IS_ERR(dst))
411 goto out; 473 goto out;
412 474
413 /* 475 if (ipv6_addr_is_multicast(&fl6.daddr))
414 * We won't send icmp if the destination is known
415 * anycast.
416 */
417 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
418 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
419 goto out_dst_release;
420 }
421
422 /* No need to clone since we're just using its address. */
423 dst2 = dst;
424
425 err = xfrm_lookup(net, &dst, &fl, sk, 0);
426 switch (err) {
427 case 0:
428 if (dst != dst2)
429 goto route_done;
430 break;
431 case -EPERM:
432 dst = NULL;
433 break;
434 default:
435 goto out;
436 }
437
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto relookup_failed;
440
441 if (ip6_dst_lookup(sk, &dst2, &fl2))
442 goto relookup_failed;
443
444 err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
445 switch (err) {
446 case 0:
447 dst_release(dst);
448 dst = dst2;
449 break;
450 case -EPERM:
451 goto out_dst_release;
452 default:
453relookup_failed:
454 if (!dst)
455 goto out;
456 break;
457 }
458
459route_done:
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops; 476 hlimit = np->mcast_hops;
462 else 477 else
463 hlimit = np->hop_limit; 478 hlimit = np->hop_limit;
@@ -480,14 +495,14 @@ route_done:
480 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 495 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
481 len + sizeof(struct icmp6hdr), 496 len + sizeof(struct icmp6hdr),
482 sizeof(struct icmp6hdr), hlimit, 497 sizeof(struct icmp6hdr), hlimit,
483 np->tclass, NULL, &fl, (struct rt6_info*)dst, 498 np->tclass, NULL, &fl6, (struct rt6_info*)dst,
484 MSG_DONTWAIT, np->dontfrag); 499 MSG_DONTWAIT, np->dontfrag);
485 if (err) { 500 if (err) {
486 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 501 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
487 ip6_flush_pending_frames(sk); 502 ip6_flush_pending_frames(sk);
488 goto out_put; 503 goto out_put;
489 } 504 }
490 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); 505 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr));
491 506
492out_put: 507out_put:
493 if (likely(idev != NULL)) 508 if (likely(idev != NULL))
@@ -509,7 +524,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
509 struct in6_addr *saddr = NULL; 524 struct in6_addr *saddr = NULL;
510 struct icmp6hdr *icmph = icmp6_hdr(skb); 525 struct icmp6hdr *icmph = icmp6_hdr(skb);
511 struct icmp6hdr tmp_hdr; 526 struct icmp6hdr tmp_hdr;
512 struct flowi fl; 527 struct flowi6 fl6;
513 struct icmpv6_msg msg; 528 struct icmpv6_msg msg;
514 struct dst_entry *dst; 529 struct dst_entry *dst;
515 int err = 0; 530 int err = 0;
@@ -523,30 +538,31 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
523 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 538 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
524 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; 539 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
525 540
526 memset(&fl, 0, sizeof(fl)); 541 memset(&fl6, 0, sizeof(fl6));
527 fl.proto = IPPROTO_ICMPV6; 542 fl6.flowi6_proto = IPPROTO_ICMPV6;
528 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); 543 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
529 if (saddr) 544 if (saddr)
530 ipv6_addr_copy(&fl.fl6_src, saddr); 545 ipv6_addr_copy(&fl6.saddr, saddr);
531 fl.oif = skb->dev->ifindex; 546 fl6.flowi6_oif = skb->dev->ifindex;
532 fl.fl_icmp_type = ICMPV6_ECHO_REPLY; 547 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
533 security_skb_classify_flow(skb, &fl); 548 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
534 549
535 sk = icmpv6_xmit_lock(net); 550 sk = icmpv6_xmit_lock(net);
536 if (sk == NULL) 551 if (sk == NULL)
537 return; 552 return;
538 np = inet6_sk(sk); 553 np = inet6_sk(sk);
539 554
540 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 555 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
541 fl.oif = np->mcast_oif; 556 fl6.flowi6_oif = np->mcast_oif;
542 557
543 err = ip6_dst_lookup(sk, &dst, &fl); 558 err = ip6_dst_lookup(sk, &dst, &fl6);
544 if (err) 559 if (err)
545 goto out; 560 goto out;
546 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) 561 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
562 if (IS_ERR(dst))
547 goto out; 563 goto out;
548 564
549 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 565 if (ipv6_addr_is_multicast(&fl6.daddr))
550 hlimit = np->mcast_hops; 566 hlimit = np->mcast_hops;
551 else 567 else
552 hlimit = np->hop_limit; 568 hlimit = np->hop_limit;
@@ -560,7 +576,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
560 msg.type = ICMPV6_ECHO_REPLY; 576 msg.type = ICMPV6_ECHO_REPLY;
561 577
562 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 578 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
563 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, 579 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
564 (struct rt6_info*)dst, MSG_DONTWAIT, 580 (struct rt6_info*)dst, MSG_DONTWAIT,
565 np->dontfrag); 581 np->dontfrag);
566 582
@@ -569,7 +585,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
569 ip6_flush_pending_frames(sk); 585 ip6_flush_pending_frames(sk);
570 goto out_put; 586 goto out_put;
571 } 587 }
572 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); 588 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
573 589
574out_put: 590out_put:
575 if (likely(idev != NULL)) 591 if (likely(idev != NULL))
@@ -768,20 +784,20 @@ drop_no_count:
768 return 0; 784 return 0;
769} 785}
770 786
771void icmpv6_flow_init(struct sock *sk, struct flowi *fl, 787void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
772 u8 type, 788 u8 type,
773 const struct in6_addr *saddr, 789 const struct in6_addr *saddr,
774 const struct in6_addr *daddr, 790 const struct in6_addr *daddr,
775 int oif) 791 int oif)
776{ 792{
777 memset(fl, 0, sizeof(*fl)); 793 memset(fl6, 0, sizeof(*fl6));
778 ipv6_addr_copy(&fl->fl6_src, saddr); 794 ipv6_addr_copy(&fl6->saddr, saddr);
779 ipv6_addr_copy(&fl->fl6_dst, daddr); 795 ipv6_addr_copy(&fl6->daddr, daddr);
780 fl->proto = IPPROTO_ICMPV6; 796 fl6->flowi6_proto = IPPROTO_ICMPV6;
781 fl->fl_icmp_type = type; 797 fl6->fl6_icmp_type = type;
782 fl->fl_icmp_code = 0; 798 fl6->fl6_icmp_code = 0;
783 fl->oif = oif; 799 fl6->flowi6_oif = oif;
784 security_sk_classify_flow(sk, fl); 800 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
785} 801}
786 802
787/* 803/*
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d144e629d2b..16605465046 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -61,26 +61,21 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
61 struct ipv6_pinfo *np = inet6_sk(sk); 61 struct ipv6_pinfo *np = inet6_sk(sk);
62 struct in6_addr *final_p, final; 62 struct in6_addr *final_p, final;
63 struct dst_entry *dst; 63 struct dst_entry *dst;
64 struct flowi fl; 64 struct flowi6 fl6;
65 65
66 memset(&fl, 0, sizeof(fl)); 66 memset(&fl6, 0, sizeof(fl6));
67 fl.proto = IPPROTO_TCP; 67 fl6.flowi6_proto = IPPROTO_TCP;
68 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 68 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
69 final_p = fl6_update_dst(&fl, np->opt, &final); 69 final_p = fl6_update_dst(&fl6, np->opt, &final);
70 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 70 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
71 fl.oif = sk->sk_bound_dev_if; 71 fl6.flowi6_oif = sk->sk_bound_dev_if;
72 fl.mark = sk->sk_mark; 72 fl6.flowi6_mark = sk->sk_mark;
73 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 73 fl6.fl6_dport = inet_rsk(req)->rmt_port;
74 fl.fl_ip_sport = inet_rsk(req)->loc_port; 74 fl6.fl6_sport = inet_rsk(req)->loc_port;
75 security_req_classify_flow(req, &fl); 75 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
76 76
77 if (ip6_dst_lookup(sk, &dst, &fl)) 77 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
78 return NULL; 78 if (IS_ERR(dst))
79
80 if (final_p)
81 ipv6_addr_copy(&fl.fl6_dst, final_p);
82
83 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
84 return NULL; 79 return NULL;
85 80
86 return dst; 81 return dst;
@@ -213,42 +208,34 @@ int inet6_csk_xmit(struct sk_buff *skb)
213 struct sock *sk = skb->sk; 208 struct sock *sk = skb->sk;
214 struct inet_sock *inet = inet_sk(sk); 209 struct inet_sock *inet = inet_sk(sk);
215 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct ipv6_pinfo *np = inet6_sk(sk);
216 struct flowi fl; 211 struct flowi6 fl6;
217 struct dst_entry *dst; 212 struct dst_entry *dst;
218 struct in6_addr *final_p, final; 213 struct in6_addr *final_p, final;
219 214
220 memset(&fl, 0, sizeof(fl)); 215 memset(&fl6, 0, sizeof(fl6));
221 fl.proto = sk->sk_protocol; 216 fl6.flowi6_proto = sk->sk_protocol;
222 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 217 ipv6_addr_copy(&fl6.daddr, &np->daddr);
223 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 218 ipv6_addr_copy(&fl6.saddr, &np->saddr);
224 fl.fl6_flowlabel = np->flow_label; 219 fl6.flowlabel = np->flow_label;
225 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); 220 IP6_ECN_flow_xmit(sk, fl6.flowlabel);
226 fl.oif = sk->sk_bound_dev_if; 221 fl6.flowi6_oif = sk->sk_bound_dev_if;
227 fl.mark = sk->sk_mark; 222 fl6.flowi6_mark = sk->sk_mark;
228 fl.fl_ip_sport = inet->inet_sport; 223 fl6.fl6_sport = inet->inet_sport;
229 fl.fl_ip_dport = inet->inet_dport; 224 fl6.fl6_dport = inet->inet_dport;
230 security_sk_classify_flow(sk, &fl); 225 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
231 226
232 final_p = fl6_update_dst(&fl, np->opt, &final); 227 final_p = fl6_update_dst(&fl6, np->opt, &final);
233 228
234 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 229 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
235 230
236 if (dst == NULL) { 231 if (dst == NULL) {
237 int err = ip6_dst_lookup(sk, &dst, &fl); 232 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
238
239 if (err) {
240 sk->sk_err_soft = -err;
241 kfree_skb(skb);
242 return err;
243 }
244
245 if (final_p)
246 ipv6_addr_copy(&fl.fl6_dst, final_p);
247 233
248 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { 234 if (IS_ERR(dst)) {
235 sk->sk_err_soft = -PTR_ERR(dst);
249 sk->sk_route_caps = 0; 236 sk->sk_route_caps = 0;
250 kfree_skb(skb); 237 kfree_skb(skb);
251 return err; 238 return PTR_ERR(dst);
252 } 239 }
253 240
254 __inet6_csk_dst_store(sk, dst, NULL, NULL); 241 __inet6_csk_dst_store(sk, dst, NULL, NULL);
@@ -257,9 +244,9 @@ int inet6_csk_xmit(struct sk_buff *skb)
257 skb_dst_set(skb, dst_clone(dst)); 244 skb_dst_set(skb, dst_clone(dst));
258 245
259 /* Restore final destination back after routing done */ 246 /* Restore final destination back after routing done */
260 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 247 ipv6_addr_copy(&fl6.daddr, &np->daddr);
261 248
262 return ip6_xmit(sk, skb, &fl, np->opt); 249 return ip6_xmit(sk, skb, &fl6, np->opt);
263} 250}
264 251
265EXPORT_SYMBOL_GPL(inet6_csk_xmit); 252EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 633a6c26613..b5319723370 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -124,7 +124,7 @@ out:
124} 124}
125EXPORT_SYMBOL(__inet6_lookup_established); 125EXPORT_SYMBOL(__inet6_lookup_established);
126 126
127static int inline compute_score(struct sock *sk, struct net *net, 127static inline int compute_score(struct sock *sk, struct net *net,
128 const unsigned short hnum, 128 const unsigned short hnum,
129 const struct in6_addr *daddr, 129 const struct in6_addr *daddr,
130 const int dif) 130 const int dif)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index de382114609..7548905e79e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -260,10 +260,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
260 return net->ipv6.fib6_main_tbl; 260 return net->ipv6.fib6_main_tbl;
261} 261}
262 262
263struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, 263struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
264 int flags, pol_lookup_t lookup) 264 int flags, pol_lookup_t lookup)
265{ 265{
266 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); 266 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
267} 267}
268 268
269static void __net_init fib6_tables_init(struct net *net) 269static void __net_init fib6_tables_init(struct net *net)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 13654686aea..f3caf1b8d57 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -342,7 +342,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
342 342
343 if (olen > 0) { 343 if (olen > 0) {
344 struct msghdr msg; 344 struct msghdr msg;
345 struct flowi flowi; 345 struct flowi6 flowi6;
346 int junk; 346 int junk;
347 347
348 err = -ENOMEM; 348 err = -ENOMEM;
@@ -358,9 +358,9 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
358 358
359 msg.msg_controllen = olen; 359 msg.msg_controllen = olen;
360 msg.msg_control = (void*)(fl->opt+1); 360 msg.msg_control = (void*)(fl->opt+1);
361 flowi.oif = 0; 361 memset(&flowi6, 0, sizeof(flowi6));
362 362
363 err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, 363 err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk,
364 &junk, &junk); 364 &junk, &junk);
365 if (err) 365 if (err)
366 goto done; 366 goto done;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f8d242be3f..18208876aa8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,15 +174,15 @@ int ip6_output(struct sk_buff *skb)
174 * xmit an sk_buff (used by TCP, SCTP and DCCP) 174 * xmit an sk_buff (used by TCP, SCTP and DCCP)
175 */ 175 */
176 176
177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
178 struct ipv6_txoptions *opt) 178 struct ipv6_txoptions *opt)
179{ 179{
180 struct net *net = sock_net(sk); 180 struct net *net = sock_net(sk);
181 struct ipv6_pinfo *np = inet6_sk(sk); 181 struct ipv6_pinfo *np = inet6_sk(sk);
182 struct in6_addr *first_hop = &fl->fl6_dst; 182 struct in6_addr *first_hop = &fl6->daddr;
183 struct dst_entry *dst = skb_dst(skb); 183 struct dst_entry *dst = skb_dst(skb);
184 struct ipv6hdr *hdr; 184 struct ipv6hdr *hdr;
185 u8 proto = fl->proto; 185 u8 proto = fl6->flowi6_proto;
186 int seg_len = skb->len; 186 int seg_len = skb->len;
187 int hlimit = -1; 187 int hlimit = -1;
188 int tclass = 0; 188 int tclass = 0;
@@ -230,13 +230,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
230 if (hlimit < 0) 230 if (hlimit < 0)
231 hlimit = ip6_dst_hoplimit(dst); 231 hlimit = ip6_dst_hoplimit(dst);
232 232
233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
234 234
235 hdr->payload_len = htons(seg_len); 235 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 236 hdr->nexthdr = proto;
237 hdr->hop_limit = hlimit; 237 hdr->hop_limit = hlimit;
238 238
239 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 239 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
240 ipv6_addr_copy(&hdr->daddr, first_hop); 240 ipv6_addr_copy(&hdr->daddr, first_hop);
241 241
242 skb->priority = sk->sk_priority; 242 skb->priority = sk->sk_priority;
@@ -274,13 +274,10 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274{ 274{
275 struct ipv6_pinfo *np = inet6_sk(sk); 275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct ipv6hdr *hdr; 276 struct ipv6hdr *hdr;
277 int totlen;
278 277
279 skb->protocol = htons(ETH_P_IPV6); 278 skb->protocol = htons(ETH_P_IPV6);
280 skb->dev = dev; 279 skb->dev = dev;
281 280
282 totlen = len + sizeof(struct ipv6hdr);
283
284 skb_reset_network_header(skb); 281 skb_reset_network_header(skb);
285 skb_put(skb, sizeof(struct ipv6hdr)); 282 skb_put(skb, sizeof(struct ipv6hdr));
286 hdr = ipv6_hdr(skb); 283 hdr = ipv6_hdr(skb);
@@ -479,10 +476,13 @@ int ip6_forward(struct sk_buff *skb)
479 else 476 else
480 target = &hdr->daddr; 477 target = &hdr->daddr;
481 478
479 if (!rt->rt6i_peer)
480 rt6_bind_peer(rt, 1);
481
482 /* Limit redirects both by destination (here) 482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect) 483 and by source (inside ndisc_send_redirect)
484 */ 484 */
485 if (xrlim_allow(dst, 1*HZ)) 485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 ndisc_send_redirect(skb, n, target); 486 ndisc_send_redirect(skb, n, target);
487 } else { 487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr); 488 int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -879,7 +879,7 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
879 879
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst, 881 struct dst_entry *dst,
882 struct flowi *fl) 882 struct flowi6 *fl6)
883{ 883{
884 struct ipv6_pinfo *np = inet6_sk(sk); 884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst; 885 struct rt6_info *rt = (struct rt6_info *)dst;
@@ -904,11 +904,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
904 * sockets. 904 * sockets.
905 * 2. oif also should be the same. 905 * 2. oif also should be the same.
906 */ 906 */
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 907 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
908#ifdef CONFIG_IPV6_SUBTREES 908#ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 909 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
910#endif 910#endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) { 911 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
912 dst_release(dst); 912 dst_release(dst);
913 dst = NULL; 913 dst = NULL;
914 } 914 }
@@ -918,22 +918,22 @@ out:
918} 918}
919 919
920static int ip6_dst_lookup_tail(struct sock *sk, 920static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl) 921 struct dst_entry **dst, struct flowi6 *fl6)
922{ 922{
923 int err; 923 int err;
924 struct net *net = sock_net(sk); 924 struct net *net = sock_net(sk);
925 925
926 if (*dst == NULL) 926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl); 927 *dst = ip6_route_output(net, sk, fl6);
928 928
929 if ((err = (*dst)->error)) 929 if ((err = (*dst)->error))
930 goto out_err_release; 930 goto out_err_release;
931 931
932 if (ipv6_addr_any(&fl->fl6_src)) { 932 if (ipv6_addr_any(&fl6->saddr)) {
933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934 &fl->fl6_dst, 934 &fl6->daddr,
935 sk ? inet6_sk(sk)->srcprefs : 0, 935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src); 936 &fl6->saddr);
937 if (err) 937 if (err)
938 goto out_err_release; 938 goto out_err_release;
939 } 939 }
@@ -949,10 +949,10 @@ static int ip6_dst_lookup_tail(struct sock *sk,
949 */ 949 */
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp; 951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw; 952 struct flowi6 fl_gw6;
953 int redirect; 953 int redirect;
954 954
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 955 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
956 (*dst)->dev, 1); 956 (*dst)->dev, 1);
957 957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
@@ -965,9 +965,9 @@ static int ip6_dst_lookup_tail(struct sock *sk,
965 * default router instead 965 * default router instead
966 */ 966 */
967 dst_release(*dst); 967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi)); 968 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 969 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw); 970 *dst = ip6_route_output(net, sk, &fl_gw6);
971 if ((err = (*dst)->error)) 971 if ((err = (*dst)->error))
972 goto out_err_release; 972 goto out_err_release;
973 } 973 }
@@ -988,43 +988,85 @@ out_err_release:
988 * ip6_dst_lookup - perform route lookup on flow 988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info 989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result 990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup 991 * @fl6: flow to lookup
992 * 992 *
993 * This function performs a route lookup on the given flow. 993 * This function performs a route lookup on the given flow.
994 * 994 *
995 * It returns zero on success, or a standard errno code on error. 995 * It returns zero on success, or a standard errno code on error.
996 */ 996 */
997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
998{ 998{
999 *dst = NULL; 999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl); 1000 return ip6_dst_lookup_tail(sk, dst, fl6);
1001} 1001}
1002EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1002EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003 1003
1004/** 1004/**
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1005 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1006 * @sk: socket which provides route info
1007 * @fl6: flow to lookup
1008 * @final_dst: final destination address for ipsec lookup
1009 * @can_sleep: we are in a sleepable context
1010 *
1011 * This function performs a route lookup on the given flow.
1012 *
1013 * It returns a valid dst pointer on success, or a pointer encoded
1014 * error code.
1015 */
1016struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1017 const struct in6_addr *final_dst,
1018 bool can_sleep)
1019{
1020 struct dst_entry *dst = NULL;
1021 int err;
1022
1023 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1024 if (err)
1025 return ERR_PTR(err);
1026 if (final_dst)
1027 ipv6_addr_copy(&fl6->daddr, final_dst);
1028 if (can_sleep)
1029 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1030
1031 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1032}
1033EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1034
1035/**
1036 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info 1037 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result 1038 * @fl6: flow to lookup
1008 * @fl: flow to lookup 1039 * @final_dst: final destination address for ipsec lookup
1040 * @can_sleep: we are in a sleepable context
1009 * 1041 *
1010 * This function performs a route lookup on the given flow with the 1042 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid. 1043 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache. 1044 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context. 1045 * As a result, this function can only be used in process context.
1014 * 1046 *
1015 * It returns zero on success, or a standard errno code on error. 1047 * It returns a valid dst pointer on success, or a pointer encoded
1048 * error code.
1016 */ 1049 */
1017int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1050struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1051 const struct in6_addr *final_dst,
1052 bool can_sleep)
1018{ 1053{
1019 *dst = NULL; 1054 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1020 if (sk) { 1055 int err;
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1056
1022 *dst = ip6_sk_dst_check(sk, *dst, fl); 1057 dst = ip6_sk_dst_check(sk, dst, fl6);
1023 } 1058
1059 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1060 if (err)
1061 return ERR_PTR(err);
1062 if (final_dst)
1063 ipv6_addr_copy(&fl6->daddr, final_dst);
1064 if (can_sleep)
1065 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1024 1066
1025 return ip6_dst_lookup_tail(sk, dst, fl); 1067 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1026} 1068}
1027EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1069EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1028 1070
1029static inline int ip6_ufo_append_data(struct sock *sk, 1071static inline int ip6_ufo_append_data(struct sock *sk,
1030 int getfrag(void *from, char *to, int offset, int len, 1072 int getfrag(void *from, char *to, int offset, int len,
@@ -1061,7 +1103,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1061 1103
1062 skb->ip_summed = CHECKSUM_PARTIAL; 1104 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum = 0; 1105 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1065 } 1106 }
1066 1107
1067 err = skb_append_datato_frags(sk,skb, getfrag, from, 1108 err = skb_append_datato_frags(sk,skb, getfrag, from,
@@ -1104,7 +1145,7 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1104int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1145int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1105 int offset, int len, int odd, struct sk_buff *skb), 1146 int offset, int len, int odd, struct sk_buff *skb),
1106 void *from, int length, int transhdrlen, 1147 void *from, int length, int transhdrlen,
1107 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1148 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1108 struct rt6_info *rt, unsigned int flags, int dontfrag) 1149 struct rt6_info *rt, unsigned int flags, int dontfrag)
1109{ 1150{
1110 struct inet_sock *inet = inet_sk(sk); 1151 struct inet_sock *inet = inet_sk(sk);
@@ -1118,6 +1159,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1118 int err; 1159 int err;
1119 int offset = 0; 1160 int offset = 0;
1120 int csummode = CHECKSUM_NONE; 1161 int csummode = CHECKSUM_NONE;
1162 __u8 tx_flags = 0;
1121 1163
1122 if (flags&MSG_PROBE) 1164 if (flags&MSG_PROBE)
1123 return 0; 1165 return 0;
@@ -1161,7 +1203,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1161 } 1203 }
1162 dst_hold(&rt->dst); 1204 dst_hold(&rt->dst);
1163 inet->cork.dst = &rt->dst; 1205 inet->cork.dst = &rt->dst;
1164 inet->cork.fl = *fl; 1206 inet->cork.fl.u.ip6 = *fl6;
1165 np->cork.hop_limit = hlimit; 1207 np->cork.hop_limit = hlimit;
1166 np->cork.tclass = tclass; 1208 np->cork.tclass = tclass;
1167 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1209 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
@@ -1182,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1182 transhdrlen += exthdrlen; 1224 transhdrlen += exthdrlen;
1183 } else { 1225 } else {
1184 rt = (struct rt6_info *)inet->cork.dst; 1226 rt = (struct rt6_info *)inet->cork.dst;
1185 fl = &inet->cork.fl; 1227 fl6 = &inet->cork.fl.u.ip6;
1186 opt = np->cork.opt; 1228 opt = np->cork.opt;
1187 transhdrlen = 0; 1229 transhdrlen = 0;
1188 exthdrlen = 0; 1230 exthdrlen = 0;
@@ -1197,11 +1239,18 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1197 1239
1198 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1240 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1199 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1241 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1200 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1242 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1201 return -EMSGSIZE; 1243 return -EMSGSIZE;
1202 } 1244 }
1203 } 1245 }
1204 1246
1247 /* For UDP, check if TX timestamp is enabled */
1248 if (sk->sk_type == SOCK_DGRAM) {
1249 err = sock_tx_timestamp(sk, &tx_flags);
1250 if (err)
1251 goto error;
1252 }
1253
1205 /* 1254 /*
1206 * Let's try using as much space as possible. 1255 * Let's try using as much space as possible.
1207 * Use MTU if total length of the message fits into the MTU. 1256 * Use MTU if total length of the message fits into the MTU.
@@ -1222,7 +1271,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1222 if (length > mtu) { 1271 if (length > mtu) {
1223 int proto = sk->sk_protocol; 1272 int proto = sk->sk_protocol;
1224 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1273 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1225 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); 1274 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1226 return -EMSGSIZE; 1275 return -EMSGSIZE;
1227 } 1276 }
1228 1277
@@ -1306,6 +1355,12 @@ alloc_new_skb:
1306 sk->sk_allocation); 1355 sk->sk_allocation);
1307 if (unlikely(skb == NULL)) 1356 if (unlikely(skb == NULL))
1308 err = -ENOBUFS; 1357 err = -ENOBUFS;
1358 else {
1359 /* Only the initial fragment
1360 * is time stamped.
1361 */
1362 tx_flags = 0;
1363 }
1309 } 1364 }
1310 if (skb == NULL) 1365 if (skb == NULL)
1311 goto error; 1366 goto error;
@@ -1317,6 +1372,9 @@ alloc_new_skb:
1317 /* reserve for fragmentation */ 1372 /* reserve for fragmentation */
1318 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1373 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1319 1374
1375 if (sk->sk_type == SOCK_DGRAM)
1376 skb_shinfo(skb)->tx_flags = tx_flags;
1377
1320 /* 1378 /*
1321 * Find where to start putting bytes 1379 * Find where to start putting bytes
1322 */ 1380 */
@@ -1458,8 +1516,8 @@ int ip6_push_pending_frames(struct sock *sk)
1458 struct ipv6hdr *hdr; 1516 struct ipv6hdr *hdr;
1459 struct ipv6_txoptions *opt = np->cork.opt; 1517 struct ipv6_txoptions *opt = np->cork.opt;
1460 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1518 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1461 struct flowi *fl = &inet->cork.fl; 1519 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1462 unsigned char proto = fl->proto; 1520 unsigned char proto = fl6->flowi6_proto;
1463 int err = 0; 1521 int err = 0;
1464 1522
1465 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1523 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
@@ -1484,7 +1542,7 @@ int ip6_push_pending_frames(struct sock *sk)
1484 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1542 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1485 skb->local_df = 1; 1543 skb->local_df = 1;
1486 1544
1487 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1545 ipv6_addr_copy(final_dst, &fl6->daddr);
1488 __skb_pull(skb, skb_network_header_len(skb)); 1546 __skb_pull(skb, skb_network_header_len(skb));
1489 if (opt && opt->opt_flen) 1547 if (opt && opt->opt_flen)
1490 ipv6_push_frag_opts(skb, opt, &proto); 1548 ipv6_push_frag_opts(skb, opt, &proto);
@@ -1495,12 +1553,12 @@ int ip6_push_pending_frames(struct sock *sk)
1495 skb_reset_network_header(skb); 1553 skb_reset_network_header(skb);
1496 hdr = ipv6_hdr(skb); 1554 hdr = ipv6_hdr(skb);
1497 1555
1498 *(__be32*)hdr = fl->fl6_flowlabel | 1556 *(__be32*)hdr = fl6->flowlabel |
1499 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1557 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1500 1558
1501 hdr->hop_limit = np->cork.hop_limit; 1559 hdr->hop_limit = np->cork.hop_limit;
1502 hdr->nexthdr = proto; 1560 hdr->nexthdr = proto;
1503 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1561 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1504 ipv6_addr_copy(&hdr->daddr, final_dst); 1562 ipv6_addr_copy(&hdr->daddr, final_dst);
1505 1563
1506 skb->priority = sk->sk_priority; 1564 skb->priority = sk->sk_priority;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697b..c1b1bd312df 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
57MODULE_AUTHOR("Ville Nuorvala"); 57MODULE_AUTHOR("Ville Nuorvala");
58MODULE_DESCRIPTION("IPv6 tunneling device"); 58MODULE_DESCRIPTION("IPv6 tunneling device");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
60MODULE_ALIAS_NETDEV("ip6tnl0");
60 61
61#ifdef IP6_TNL_DEBUG 62#ifdef IP6_TNL_DEBUG
62#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
@@ -535,7 +536,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
535 int err; 536 int err;
536 struct sk_buff *skb2; 537 struct sk_buff *skb2;
537 struct iphdr *eiph; 538 struct iphdr *eiph;
538 struct flowi fl;
539 struct rtable *rt; 539 struct rtable *rt;
540 540
541 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, 541 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
@@ -577,11 +577,11 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
577 eiph = ip_hdr(skb2); 577 eiph = ip_hdr(skb2);
578 578
579 /* Try to guess incoming interface */ 579 /* Try to guess incoming interface */
580 memset(&fl, 0, sizeof(fl)); 580 rt = ip_route_output_ports(dev_net(skb->dev), NULL,
581 fl.fl4_dst = eiph->saddr; 581 eiph->saddr, 0,
582 fl.fl4_tos = RT_TOS(eiph->tos); 582 0, 0,
583 fl.proto = IPPROTO_IPIP; 583 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
584 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) 584 if (IS_ERR(rt))
585 goto out; 585 goto out;
586 586
587 skb2->dev = rt->dst.dev; 587 skb2->dev = rt->dst.dev;
@@ -590,15 +590,18 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
590 if (rt->rt_flags & RTCF_LOCAL) { 590 if (rt->rt_flags & RTCF_LOCAL) {
591 ip_rt_put(rt); 591 ip_rt_put(rt);
592 rt = NULL; 592 rt = NULL;
593 fl.fl4_dst = eiph->daddr; 593 rt = ip_route_output_ports(dev_net(skb->dev), NULL,
594 fl.fl4_src = eiph->saddr; 594 eiph->daddr, eiph->saddr,
595 fl.fl4_tos = eiph->tos; 595 0, 0,
596 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || 596 IPPROTO_IPIP,
597 RT_TOS(eiph->tos), 0);
598 if (IS_ERR(rt) ||
597 rt->dst.dev->type != ARPHRD_TUNNEL) { 599 rt->dst.dev->type != ARPHRD_TUNNEL) {
598 ip_rt_put(rt); 600 if (!IS_ERR(rt))
601 ip_rt_put(rt);
599 goto out; 602 goto out;
600 } 603 }
601 skb_dst_set(skb2, (struct dst_entry *)rt); 604 skb_dst_set(skb2, &rt->dst);
602 } else { 605 } else {
603 ip_rt_put(rt); 606 ip_rt_put(rt);
604 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 607 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
@@ -881,7 +884,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
881static int ip6_tnl_xmit2(struct sk_buff *skb, 884static int ip6_tnl_xmit2(struct sk_buff *skb,
882 struct net_device *dev, 885 struct net_device *dev,
883 __u8 dsfield, 886 __u8 dsfield,
884 struct flowi *fl, 887 struct flowi6 *fl6,
885 int encap_limit, 888 int encap_limit,
886 __u32 *pmtu) 889 __u32 *pmtu)
887{ 890{
@@ -901,10 +904,16 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
901 if ((dst = ip6_tnl_dst_check(t)) != NULL) 904 if ((dst = ip6_tnl_dst_check(t)) != NULL)
902 dst_hold(dst); 905 dst_hold(dst);
903 else { 906 else {
904 dst = ip6_route_output(net, NULL, fl); 907 dst = ip6_route_output(net, NULL, fl6);
905 908
906 if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) 909 if (dst->error)
907 goto tx_err_link_failure; 910 goto tx_err_link_failure;
911 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
912 if (IS_ERR(dst)) {
913 err = PTR_ERR(dst);
914 dst = NULL;
915 goto tx_err_link_failure;
916 }
908 } 917 }
909 918
910 tdev = dst->dev; 919 tdev = dst->dev;
@@ -954,7 +963,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
954 963
955 skb->transport_header = skb->network_header; 964 skb->transport_header = skb->network_header;
956 965
957 proto = fl->proto; 966 proto = fl6->flowi6_proto;
958 if (encap_limit >= 0) { 967 if (encap_limit >= 0) {
959 init_tel_txopt(&opt, encap_limit); 968 init_tel_txopt(&opt, encap_limit);
960 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 969 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
@@ -962,13 +971,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
962 skb_push(skb, sizeof(struct ipv6hdr)); 971 skb_push(skb, sizeof(struct ipv6hdr));
963 skb_reset_network_header(skb); 972 skb_reset_network_header(skb);
964 ipv6h = ipv6_hdr(skb); 973 ipv6h = ipv6_hdr(skb);
965 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); 974 *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
966 dsfield = INET_ECN_encapsulate(0, dsfield); 975 dsfield = INET_ECN_encapsulate(0, dsfield);
967 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 976 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
968 ipv6h->hop_limit = t->parms.hop_limit; 977 ipv6h->hop_limit = t->parms.hop_limit;
969 ipv6h->nexthdr = proto; 978 ipv6h->nexthdr = proto;
970 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); 979 ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr);
971 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); 980 ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr);
972 nf_reset(skb); 981 nf_reset(skb);
973 pkt_len = skb->len; 982 pkt_len = skb->len;
974 err = ip6_local_out(skb); 983 err = ip6_local_out(skb);
@@ -998,7 +1007,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
998 struct ip6_tnl *t = netdev_priv(dev); 1007 struct ip6_tnl *t = netdev_priv(dev);
999 struct iphdr *iph = ip_hdr(skb); 1008 struct iphdr *iph = ip_hdr(skb);
1000 int encap_limit = -1; 1009 int encap_limit = -1;
1001 struct flowi fl; 1010 struct flowi6 fl6;
1002 __u8 dsfield; 1011 __u8 dsfield;
1003 __u32 mtu; 1012 __u32 mtu;
1004 int err; 1013 int err;
@@ -1010,16 +1019,16 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1010 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1019 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1011 encap_limit = t->parms.encap_limit; 1020 encap_limit = t->parms.encap_limit;
1012 1021
1013 memcpy(&fl, &t->fl, sizeof (fl)); 1022 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
1014 fl.proto = IPPROTO_IPIP; 1023 fl6.flowi6_proto = IPPROTO_IPIP;
1015 1024
1016 dsfield = ipv4_get_dsfield(iph); 1025 dsfield = ipv4_get_dsfield(iph);
1017 1026
1018 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1027 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1019 fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 1028 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1020 & IPV6_TCLASS_MASK; 1029 & IPV6_TCLASS_MASK;
1021 1030
1022 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); 1031 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1023 if (err != 0) { 1032 if (err != 0) {
1024 /* XXX: send ICMP error even if DF is not set. */ 1033 /* XXX: send ICMP error even if DF is not set. */
1025 if (err == -EMSGSIZE) 1034 if (err == -EMSGSIZE)
@@ -1038,7 +1047,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1038 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1047 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1039 int encap_limit = -1; 1048 int encap_limit = -1;
1040 __u16 offset; 1049 __u16 offset;
1041 struct flowi fl; 1050 struct flowi6 fl6;
1042 __u8 dsfield; 1051 __u8 dsfield;
1043 __u32 mtu; 1052 __u32 mtu;
1044 int err; 1053 int err;
@@ -1060,16 +1069,16 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1060 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1069 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1061 encap_limit = t->parms.encap_limit; 1070 encap_limit = t->parms.encap_limit;
1062 1071
1063 memcpy(&fl, &t->fl, sizeof (fl)); 1072 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
1064 fl.proto = IPPROTO_IPV6; 1073 fl6.flowi6_proto = IPPROTO_IPV6;
1065 1074
1066 dsfield = ipv6_get_dsfield(ipv6h); 1075 dsfield = ipv6_get_dsfield(ipv6h);
1067 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1076 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1068 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1077 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1069 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1078 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1070 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); 1079 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1071 1080
1072 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); 1081 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1073 if (err != 0) { 1082 if (err != 0) {
1074 if (err == -EMSGSIZE) 1083 if (err == -EMSGSIZE)
1075 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1084 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1132,21 +1141,21 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1132{ 1141{
1133 struct net_device *dev = t->dev; 1142 struct net_device *dev = t->dev;
1134 struct ip6_tnl_parm *p = &t->parms; 1143 struct ip6_tnl_parm *p = &t->parms;
1135 struct flowi *fl = &t->fl; 1144 struct flowi6 *fl6 = &t->fl.u.ip6;
1136 1145
1137 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1146 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1138 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1147 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1139 1148
1140 /* Set up flowi template */ 1149 /* Set up flowi template */
1141 ipv6_addr_copy(&fl->fl6_src, &p->laddr); 1150 ipv6_addr_copy(&fl6->saddr, &p->laddr);
1142 ipv6_addr_copy(&fl->fl6_dst, &p->raddr); 1151 ipv6_addr_copy(&fl6->daddr, &p->raddr);
1143 fl->oif = p->link; 1152 fl6->flowi6_oif = p->link;
1144 fl->fl6_flowlabel = 0; 1153 fl6->flowlabel = 0;
1145 1154
1146 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) 1155 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1147 fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; 1156 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1148 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1157 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1149 fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1158 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1150 1159
1151 ip6_tnl_set_cap(t); 1160 ip6_tnl_set_cap(t);
1152 1161
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e1d53bcf1e..7ff0343e05c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -135,14 +135,15 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
135 return NULL; 135 return NULL;
136} 136}
137 137
138static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, 138static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 struct mr6_table **mrt) 139 struct mr6_table **mrt)
140{ 140{
141 struct ip6mr_result res; 141 struct ip6mr_result res;
142 struct fib_lookup_arg arg = { .result = &res, }; 142 struct fib_lookup_arg arg = { .result = &res, };
143 int err; 143 int err;
144 144
145 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); 145 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146 flowi6_to_flowi(flp6), 0, &arg);
146 if (err < 0) 147 if (err < 0)
147 return err; 148 return err;
148 *mrt = res.mrt; 149 *mrt = res.mrt;
@@ -270,7 +271,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
270 return net->ipv6.mrt6; 271 return net->ipv6.mrt6;
271} 272}
272 273
273static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, 274static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
274 struct mr6_table **mrt) 275 struct mr6_table **mrt)
275{ 276{
276 *mrt = net->ipv6.mrt6; 277 *mrt = net->ipv6.mrt6;
@@ -617,9 +618,9 @@ static int pim6_rcv(struct sk_buff *skb)
617 struct net_device *reg_dev = NULL; 618 struct net_device *reg_dev = NULL;
618 struct net *net = dev_net(skb->dev); 619 struct net *net = dev_net(skb->dev);
619 struct mr6_table *mrt; 620 struct mr6_table *mrt;
620 struct flowi fl = { 621 struct flowi6 fl6 = {
621 .iif = skb->dev->ifindex, 622 .flowi6_iif = skb->dev->ifindex,
622 .mark = skb->mark, 623 .flowi6_mark = skb->mark,
623 }; 624 };
624 int reg_vif_num; 625 int reg_vif_num;
625 626
@@ -644,7 +645,7 @@ static int pim6_rcv(struct sk_buff *skb)
644 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 645 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
645 goto drop; 646 goto drop;
646 647
647 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) 648 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
648 goto drop; 649 goto drop;
649 reg_vif_num = mrt->mroute_reg_vif_num; 650 reg_vif_num = mrt->mroute_reg_vif_num;
650 651
@@ -687,14 +688,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
687{ 688{
688 struct net *net = dev_net(dev); 689 struct net *net = dev_net(dev);
689 struct mr6_table *mrt; 690 struct mr6_table *mrt;
690 struct flowi fl = { 691 struct flowi6 fl6 = {
691 .oif = dev->ifindex, 692 .flowi6_oif = dev->ifindex,
692 .iif = skb->skb_iif, 693 .flowi6_iif = skb->skb_iif,
693 .mark = skb->mark, 694 .flowi6_mark = skb->mark,
694 }; 695 };
695 int err; 696 int err;
696 697
697 err = ip6mr_fib_lookup(net, &fl, &mrt); 698 err = ip6mr_fib_lookup(net, &fl6, &mrt);
698 if (err < 0) 699 if (err < 0)
699 return err; 700 return err;
700 701
@@ -1039,7 +1040,6 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1039 1040
1040 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1041 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1041 if (ipv6_hdr(skb)->version == 0) { 1042 if (ipv6_hdr(skb)->version == 0) {
1042 int err;
1043 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1043 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1044 1044
1045 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 1045 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -1050,7 +1050,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1050 skb_trim(skb, nlh->nlmsg_len); 1050 skb_trim(skb, nlh->nlmsg_len);
1051 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1051 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1052 } 1052 }
1053 err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 1053 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1054 } else 1054 } else
1055 ip6_mr_forward(net, mrt, skb, c); 1055 ip6_mr_forward(net, mrt, skb, c);
1056 } 1056 }
@@ -1548,13 +1548,13 @@ int ip6mr_sk_done(struct sock *sk)
1548struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) 1548struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1549{ 1549{
1550 struct mr6_table *mrt; 1550 struct mr6_table *mrt;
1551 struct flowi fl = { 1551 struct flowi6 fl6 = {
1552 .iif = skb->skb_iif, 1552 .flowi6_iif = skb->skb_iif,
1553 .oif = skb->dev->ifindex, 1553 .flowi6_oif = skb->dev->ifindex,
1554 .mark = skb->mark, 1554 .flowi6_mark = skb->mark,
1555 }; 1555 };
1556 1556
1557 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) 1557 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1558 return NULL; 1558 return NULL;
1559 1559
1560 return mrt->mroute6_sk; 1560 return mrt->mroute6_sk;
@@ -1898,7 +1898,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1898 struct mif_device *vif = &mrt->vif6_table[vifi]; 1898 struct mif_device *vif = &mrt->vif6_table[vifi];
1899 struct net_device *dev; 1899 struct net_device *dev;
1900 struct dst_entry *dst; 1900 struct dst_entry *dst;
1901 struct flowi fl; 1901 struct flowi6 fl6;
1902 1902
1903 if (vif->dev == NULL) 1903 if (vif->dev == NULL)
1904 goto out_free; 1904 goto out_free;
@@ -1916,12 +1916,12 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1916 1916
1917 ipv6h = ipv6_hdr(skb); 1917 ipv6h = ipv6_hdr(skb);
1918 1918
1919 fl = (struct flowi) { 1919 fl6 = (struct flowi6) {
1920 .oif = vif->link, 1920 .flowi6_oif = vif->link,
1921 .fl6_dst = ipv6h->daddr, 1921 .daddr = ipv6h->daddr,
1922 }; 1922 };
1923 1923
1924 dst = ip6_route_output(net, NULL, &fl); 1924 dst = ip6_route_output(net, NULL, &fl6);
1925 if (!dst) 1925 if (!dst)
1926 goto out_free; 1926 goto out_free;
1927 1927
@@ -2044,13 +2044,13 @@ int ip6_mr_input(struct sk_buff *skb)
2044 struct mfc6_cache *cache; 2044 struct mfc6_cache *cache;
2045 struct net *net = dev_net(skb->dev); 2045 struct net *net = dev_net(skb->dev);
2046 struct mr6_table *mrt; 2046 struct mr6_table *mrt;
2047 struct flowi fl = { 2047 struct flowi6 fl6 = {
2048 .iif = skb->dev->ifindex, 2048 .flowi6_iif = skb->dev->ifindex,
2049 .mark = skb->mark, 2049 .flowi6_mark = skb->mark,
2050 }; 2050 };
2051 int err; 2051 int err;
2052 2052
2053 err = ip6mr_fib_lookup(net, &fl, &mrt); 2053 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2054 if (err < 0) 2054 if (err < 0)
2055 return err; 2055 return err;
2056 2056
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1770e061c0..9cb191ecaba 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -444,12 +444,12 @@ sticky_done:
444 { 444 {
445 struct ipv6_txoptions *opt = NULL; 445 struct ipv6_txoptions *opt = NULL;
446 struct msghdr msg; 446 struct msghdr msg;
447 struct flowi fl; 447 struct flowi6 fl6;
448 int junk; 448 int junk;
449 449
450 fl.fl6_flowlabel = 0; 450 memset(&fl6, 0, sizeof(fl6));
451 fl.oif = sk->sk_bound_dev_if; 451 fl6.flowi6_oif = sk->sk_bound_dev_if;
452 fl.mark = sk->sk_mark; 452 fl6.flowi6_mark = sk->sk_mark;
453 453
454 if (optlen == 0) 454 if (optlen == 0)
455 goto update; 455 goto update;
@@ -475,7 +475,7 @@ sticky_done:
475 msg.msg_controllen = optlen; 475 msg.msg_controllen = optlen;
476 msg.msg_control = (void*)(opt+1); 476 msg.msg_control = (void*)(opt+1);
477 477
478 retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, 478 retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk,
479 &junk); 479 &junk);
480 if (retv) 480 if (retv)
481 goto done; 481 goto done;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 49f986d626a..76b893771e6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -319,7 +319,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
319{ 319{
320 struct in6_addr *source, *group; 320 struct in6_addr *source, *group;
321 struct ipv6_mc_socklist *pmc; 321 struct ipv6_mc_socklist *pmc;
322 struct net_device *dev;
323 struct inet6_dev *idev; 322 struct inet6_dev *idev;
324 struct ipv6_pinfo *inet6 = inet6_sk(sk); 323 struct ipv6_pinfo *inet6 = inet6_sk(sk);
325 struct ip6_sf_socklist *psl; 324 struct ip6_sf_socklist *psl;
@@ -341,7 +340,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
341 rcu_read_unlock(); 340 rcu_read_unlock();
342 return -ENODEV; 341 return -ENODEV;
343 } 342 }
344 dev = idev->dev;
345 343
346 err = -EADDRNOTAVAIL; 344 err = -EADDRNOTAVAIL;
347 345
@@ -455,7 +453,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
455{ 453{
456 struct in6_addr *group; 454 struct in6_addr *group;
457 struct ipv6_mc_socklist *pmc; 455 struct ipv6_mc_socklist *pmc;
458 struct net_device *dev;
459 struct inet6_dev *idev; 456 struct inet6_dev *idev;
460 struct ipv6_pinfo *inet6 = inet6_sk(sk); 457 struct ipv6_pinfo *inet6 = inet6_sk(sk);
461 struct ip6_sf_socklist *newpsl, *psl; 458 struct ip6_sf_socklist *newpsl, *psl;
@@ -478,7 +475,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
478 rcu_read_unlock(); 475 rcu_read_unlock();
479 return -ENODEV; 476 return -ENODEV;
480 } 477 }
481 dev = idev->dev;
482 478
483 err = 0; 479 err = 0;
484 480
@@ -549,7 +545,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
549 struct in6_addr *group; 545 struct in6_addr *group;
550 struct ipv6_mc_socklist *pmc; 546 struct ipv6_mc_socklist *pmc;
551 struct inet6_dev *idev; 547 struct inet6_dev *idev;
552 struct net_device *dev;
553 struct ipv6_pinfo *inet6 = inet6_sk(sk); 548 struct ipv6_pinfo *inet6 = inet6_sk(sk);
554 struct ip6_sf_socklist *psl; 549 struct ip6_sf_socklist *psl;
555 struct net *net = sock_net(sk); 550 struct net *net = sock_net(sk);
@@ -566,7 +561,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
566 rcu_read_unlock(); 561 rcu_read_unlock();
567 return -ENODEV; 562 return -ENODEV;
568 } 563 }
569 dev = idev->dev;
570 564
571 err = -EADDRNOTAVAIL; 565 err = -EADDRNOTAVAIL;
572 /* 566 /*
@@ -1402,7 +1396,7 @@ static void mld_sendpack(struct sk_buff *skb)
1402 struct inet6_dev *idev; 1396 struct inet6_dev *idev;
1403 struct net *net = dev_net(skb->dev); 1397 struct net *net = dev_net(skb->dev);
1404 int err; 1398 int err;
1405 struct flowi fl; 1399 struct flowi6 fl6;
1406 struct dst_entry *dst; 1400 struct dst_entry *dst;
1407 1401
1408 rcu_read_lock(); 1402 rcu_read_lock();
@@ -1425,11 +1419,16 @@ static void mld_sendpack(struct sk_buff *skb)
1425 goto err_out; 1419 goto err_out;
1426 } 1420 }
1427 1421
1428 icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, 1422 icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
1429 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 1423 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
1430 skb->dev->ifindex); 1424 skb->dev->ifindex);
1431 1425
1432 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 1426 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1427 err = 0;
1428 if (IS_ERR(dst)) {
1429 err = PTR_ERR(dst);
1430 dst = NULL;
1431 }
1433 skb_dst_set(skb, dst); 1432 skb_dst_set(skb, dst);
1434 if (err) 1433 if (err)
1435 goto err_out; 1434 goto err_out;
@@ -1732,7 +1731,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1732 u8 ra[8] = { IPPROTO_ICMPV6, 0, 1731 u8 ra[8] = { IPPROTO_ICMPV6, 0,
1733 IPV6_TLV_ROUTERALERT, 2, 0, 0, 1732 IPV6_TLV_ROUTERALERT, 2, 0, 0,
1734 IPV6_TLV_PADN, 0 }; 1733 IPV6_TLV_PADN, 0 };
1735 struct flowi fl; 1734 struct flowi6 fl6;
1736 struct dst_entry *dst; 1735 struct dst_entry *dst;
1737 1736
1738 if (type == ICMPV6_MGM_REDUCTION) 1737 if (type == ICMPV6_MGM_REDUCTION)
@@ -1792,13 +1791,15 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1792 goto err_out; 1791 goto err_out;
1793 } 1792 }
1794 1793
1795 icmpv6_flow_init(sk, &fl, type, 1794 icmpv6_flow_init(sk, &fl6, type,
1796 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 1795 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
1797 skb->dev->ifindex); 1796 skb->dev->ifindex);
1798 1797
1799 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 1798 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1800 if (err) 1799 if (IS_ERR(dst)) {
1800 err = PTR_ERR(dst);
1801 goto err_out; 1801 goto err_out;
1802 }
1802 1803
1803 skb_dst_set(skb, dst); 1804 skb_dst_set(skb, dst);
1804 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, 1805 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index d6e9599d070..9b210482fb0 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -203,18 +203,20 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
203 return allow; 203 return allow;
204} 204}
205 205
206static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) 206static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
207 const struct flowi *fl)
207{ 208{
208 struct net *net = xs_net(x); 209 struct net *net = xs_net(x);
209 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; 210 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
211 const struct flowi6 *fl6 = &fl->u.ip6;
210 struct ipv6_destopt_hao *hao = NULL; 212 struct ipv6_destopt_hao *hao = NULL;
211 struct xfrm_selector sel; 213 struct xfrm_selector sel;
212 int offset; 214 int offset;
213 struct timeval stamp; 215 struct timeval stamp;
214 int err = 0; 216 int err = 0;
215 217
216 if (unlikely(fl->proto == IPPROTO_MH && 218 if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
217 fl->fl_mh_type <= IP6_MH_TYPE_MAX)) 219 fl6->fl6_mh_type <= IP6_MH_TYPE_MAX))
218 goto out; 220 goto out;
219 221
220 if (likely(opt->dsthao)) { 222 if (likely(opt->dsthao)) {
@@ -239,14 +241,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
239 sizeof(sel.saddr)); 241 sizeof(sel.saddr));
240 sel.prefixlen_s = 128; 242 sel.prefixlen_s = 128;
241 sel.family = AF_INET6; 243 sel.family = AF_INET6;
242 sel.proto = fl->proto; 244 sel.proto = fl6->flowi6_proto;
243 sel.dport = xfrm_flowi_dport(fl); 245 sel.dport = xfrm_flowi_dport(fl, &fl6->uli);
244 if (sel.dport) 246 if (sel.dport)
245 sel.dport_mask = htons(~0); 247 sel.dport_mask = htons(~0);
246 sel.sport = xfrm_flowi_sport(fl); 248 sel.sport = xfrm_flowi_sport(fl, &fl6->uli);
247 if (sel.sport) 249 if (sel.sport)
248 sel.sport_mask = htons(~0); 250 sel.sport_mask = htons(~0);
249 sel.ifindex = fl->oif; 251 sel.ifindex = fl6->flowi6_oif;
250 252
251 err = km_report(net, IPPROTO_DSTOPTS, &sel, 253 err = km_report(net, IPPROTO_DSTOPTS, &sel,
252 (hao ? (xfrm_address_t *)&hao->addr : NULL)); 254 (hao ? (xfrm_address_t *)&hao->addr : NULL));
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2342545a5ee..0e49c9db3c9 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -511,7 +511,7 @@ void ndisc_send_skb(struct sk_buff *skb,
511 const struct in6_addr *saddr, 511 const struct in6_addr *saddr,
512 struct icmp6hdr *icmp6h) 512 struct icmp6hdr *icmp6h)
513{ 513{
514 struct flowi fl; 514 struct flowi6 fl6;
515 struct dst_entry *dst; 515 struct dst_entry *dst;
516 struct net *net = dev_net(dev); 516 struct net *net = dev_net(dev);
517 struct sock *sk = net->ipv6.ndisc_sk; 517 struct sock *sk = net->ipv6.ndisc_sk;
@@ -521,7 +521,7 @@ void ndisc_send_skb(struct sk_buff *skb,
521 521
522 type = icmp6h->icmp6_type; 522 type = icmp6h->icmp6_type;
523 523
524 icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); 524 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
525 525
526 dst = icmp6_dst_alloc(dev, neigh, daddr); 526 dst = icmp6_dst_alloc(dev, neigh, daddr);
527 if (!dst) { 527 if (!dst) {
@@ -529,8 +529,8 @@ void ndisc_send_skb(struct sk_buff *skb,
529 return; 529 return;
530 } 530 }
531 531
532 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 532 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
533 if (err < 0) { 533 if (IS_ERR(dst)) {
534 kfree_skb(skb); 534 kfree_skb(skb);
535 return; 535 return;
536 } 536 }
@@ -1515,7 +1515,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1515 struct rt6_info *rt; 1515 struct rt6_info *rt;
1516 struct dst_entry *dst; 1516 struct dst_entry *dst;
1517 struct inet6_dev *idev; 1517 struct inet6_dev *idev;
1518 struct flowi fl; 1518 struct flowi6 fl6;
1519 u8 *opt; 1519 u8 *opt;
1520 int rd_len; 1520 int rd_len;
1521 int err; 1521 int err;
@@ -1535,15 +1535,15 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1535 return; 1535 return;
1536 } 1536 }
1537 1537
1538 icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, 1538 icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1539 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); 1539 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1540 1540
1541 dst = ip6_route_output(net, NULL, &fl); 1541 dst = ip6_route_output(net, NULL, &fl6);
1542 if (dst == NULL) 1542 if (dst == NULL)
1543 return; 1543 return;
1544 1544
1545 err = xfrm_lookup(net, &dst, &fl, NULL, 0); 1545 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1546 if (err) 1546 if (IS_ERR(dst))
1547 return; 1547 return;
1548 1548
1549 rt = (struct rt6_info *) dst; 1549 rt = (struct rt6_info *) dst;
@@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1553 "ICMPv6 Redirect: destination is not a neighbour.\n"); 1553 "ICMPv6 Redirect: destination is not a neighbour.\n");
1554 goto release; 1554 goto release;
1555 } 1555 }
1556 if (!xrlim_allow(dst, 1*HZ)) 1556 if (!rt->rt6i_peer)
1557 rt6_bind_peer(rt, 1);
1558 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1557 goto release; 1559 goto release;
1558 1560
1559 if (dev->addr_len) { 1561 if (dev->addr_len) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 35915e8617f..39aaca2b4fd 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -15,14 +15,14 @@ int ip6_route_me_harder(struct sk_buff *skb)
15 struct net *net = dev_net(skb_dst(skb)->dev); 15 struct net *net = dev_net(skb_dst(skb)->dev);
16 struct ipv6hdr *iph = ipv6_hdr(skb); 16 struct ipv6hdr *iph = ipv6_hdr(skb);
17 struct dst_entry *dst; 17 struct dst_entry *dst;
18 struct flowi fl = { 18 struct flowi6 fl6 = {
19 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 19 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
20 .mark = skb->mark, 20 .flowi6_mark = skb->mark,
21 .fl6_dst = iph->daddr, 21 .daddr = iph->daddr,
22 .fl6_src = iph->saddr, 22 .saddr = iph->saddr,
23 }; 23 };
24 24
25 dst = ip6_route_output(net, skb->sk, &fl); 25 dst = ip6_route_output(net, skb->sk, &fl6);
26 if (dst->error) { 26 if (dst->error) {
27 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 27 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
28 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 28 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -37,9 +37,10 @@ int ip6_route_me_harder(struct sk_buff *skb)
37 37
38#ifdef CONFIG_XFRM 38#ifdef CONFIG_XFRM
39 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 39 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
40 xfrm_decode_session(skb, &fl, AF_INET6) == 0) { 40 xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
41 skb_dst_set(skb, NULL); 41 skb_dst_set(skb, NULL);
42 if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) 42 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
43 if (IS_ERR(dst))
43 return -1; 44 return -1;
44 skb_dst_set(skb, dst); 45 skb_dst_set(skb, dst);
45 } 46 }
@@ -91,7 +92,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
91 92
92static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) 93static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
93{ 94{
94 *dst = ip6_route_output(&init_net, NULL, fl); 95 *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6);
95 return (*dst)->error; 96 return (*dst)->error;
96} 97}
97 98
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d227c644f7..0b2af9b85ce 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -410,7 +410,7 @@ ip6t_do_table(struct sk_buff *skb,
410 verdict = (unsigned)(-v) - 1; 410 verdict = (unsigned)(-v) - 1;
411 break; 411 break;
412 } 412 }
413 if (*stackptr == 0) 413 if (*stackptr <= origptr)
414 e = get_entry(table_base, 414 e = get_entry(table_base,
415 private->underflow[hook]); 415 private->underflow[hook]);
416 else 416 else
@@ -441,8 +441,8 @@ ip6t_do_table(struct sk_buff *skb,
441 break; 441 break;
442 } while (!acpar.hotdrop); 442 } while (!acpar.hotdrop);
443 443
444 xt_info_rdunlock_bh();
445 *stackptr = origptr; 444 *stackptr = origptr;
445 xt_info_rdunlock_bh();
446 446
447#ifdef DEBUG_ALLOW_ALL 447#ifdef DEBUG_ALLOW_ALL
448 return NF_ACCEPT; 448 return NF_ACCEPT;
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
1076 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1076 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1077 newinfo->initial_entries = 0; 1077 newinfo->initial_entries = 0;
1078 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1078 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1079 xt_compat_init_offsets(AF_INET6, info->number);
1079 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1080 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1080 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1081 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1081 if (ret != 0) 1082 if (ret != 0)
@@ -1274,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1274 /* overflow check */ 1275 /* overflow check */
1275 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1276 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1276 return -ENOMEM; 1277 return -ENOMEM;
1278 tmp.name[sizeof(tmp.name)-1] = 0;
1277 1279
1278 newinfo = xt_alloc_table_info(tmp.size); 1280 newinfo = xt_alloc_table_info(tmp.size);
1279 if (!newinfo) 1281 if (!newinfo)
@@ -1679,6 +1681,7 @@ translate_compat_table(struct net *net,
1679 duprintf("translate_compat_table: size %u\n", info->size); 1681 duprintf("translate_compat_table: size %u\n", info->size);
1680 j = 0; 1682 j = 0;
1681 xt_compat_lock(AF_INET6); 1683 xt_compat_lock(AF_INET6);
1684 xt_compat_init_offsets(AF_INET6, number);
1682 /* Walk through entries, checking offsets. */ 1685 /* Walk through entries, checking offsets. */
1683 xt_entry_foreach(iter0, entry0, total_size) { 1686 xt_entry_foreach(iter0, entry0, total_size) {
1684 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1687 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1820,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1820 return -ENOMEM; 1823 return -ENOMEM;
1821 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1824 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1822 return -ENOMEM; 1825 return -ENOMEM;
1826 tmp.name[sizeof(tmp.name)-1] = 0;
1823 1827
1824 newinfo = xt_alloc_table_info(tmp.size); 1828 newinfo = xt_alloc_table_info(tmp.size);
1825 if (!newinfo) 1829 if (!newinfo)
@@ -2049,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2049 ret = -EFAULT; 2053 ret = -EFAULT;
2050 break; 2054 break;
2051 } 2055 }
2056 rev.name[sizeof(rev.name)-1] = 0;
2052 2057
2053 if (cmd == IP6T_SO_GET_REVISION_TARGET) 2058 if (cmd == IP6T_SO_GET_REVISION_TARGET)
2054 target = 1; 2059 target = 1;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index de338037a73..e6af8d72f26 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
452 in ? in->name : "", 452 in ? in->name : "",
453 out ? out->name : ""); 453 out ? out->name : "");
454 454
455 /* MAC logging for input path only. */ 455 if (in != NULL)
456 if (in && !out)
457 dump_mac_header(m, loginfo, skb); 456 dump_mac_header(m, loginfo, skb);
458 457
459 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); 458 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index bf998feac14..28e74488a32 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -47,7 +47,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
47 struct ipv6hdr *ip6h; 47 struct ipv6hdr *ip6h;
48 struct dst_entry *dst = NULL; 48 struct dst_entry *dst = NULL;
49 u8 proto; 49 u8 proto;
50 struct flowi fl; 50 struct flowi6 fl6;
51 51
52 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || 52 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
53 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { 53 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
@@ -89,19 +89,20 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
89 return; 89 return;
90 } 90 }
91 91
92 memset(&fl, 0, sizeof(fl)); 92 memset(&fl6, 0, sizeof(fl6));
93 fl.proto = IPPROTO_TCP; 93 fl6.flowi6_proto = IPPROTO_TCP;
94 ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); 94 ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
95 ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); 95 ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
96 fl.fl_ip_sport = otcph.dest; 96 fl6.fl6_sport = otcph.dest;
97 fl.fl_ip_dport = otcph.source; 97 fl6.fl6_dport = otcph.source;
98 security_skb_classify_flow(oldskb, &fl); 98 security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
99 dst = ip6_route_output(net, NULL, &fl); 99 dst = ip6_route_output(net, NULL, &fl6);
100 if (dst == NULL || dst->error) { 100 if (dst == NULL || dst->error) {
101 dst_release(dst); 101 dst_release(dst);
102 return; 102 return;
103 } 103 }
104 if (xfrm_lookup(net, &dst, &fl, NULL, 0)) 104 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
105 if (IS_ERR(dst))
105 return; 106 return;
106 107
107 hh_len = (dst->dev->hard_header_len + 15)&~15; 108 hh_len = (dst->dev->hard_header_len + 15)&~15;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa8fa8..08572726381 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,6 +45,7 @@
45#include <linux/netfilter_ipv6.h> 45#include <linux/netfilter_ipv6.h>
46#include <linux/kernel.h> 46#include <linux/kernel.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
48 49
49 50
50struct nf_ct_frag6_skb_cb 51struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags;
73static struct netns_frags nf_init_frags; 74static struct netns_frags nf_init_frags;
74 75
75#ifdef CONFIG_SYSCTL 76#ifdef CONFIG_SYSCTL
76struct ctl_table nf_ct_frag6_sysctl_table[] = { 77static struct ctl_table nf_ct_frag6_sysctl_table[] = {
77 { 78 {
78 .procname = "nf_conntrack_frag6_timeout", 79 .procname = "nf_conntrack_frag6_timeout",
79 .data = &nf_init_frags.timeout, 80 .data = &nf_init_frags.timeout,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c5b0915d106..4a1c3b46c56 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -124,18 +124,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
124} 124}
125 125
126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
127static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); 127typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
128 128
129int rawv6_mh_filter_register(int (*filter)(struct sock *sock, 129static mh_filter_t __rcu *mh_filter __read_mostly;
130 struct sk_buff *skb)) 130
131int rawv6_mh_filter_register(mh_filter_t filter)
131{ 132{
132 rcu_assign_pointer(mh_filter, filter); 133 rcu_assign_pointer(mh_filter, filter);
133 return 0; 134 return 0;
134} 135}
135EXPORT_SYMBOL(rawv6_mh_filter_register); 136EXPORT_SYMBOL(rawv6_mh_filter_register);
136 137
137int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, 138int rawv6_mh_filter_unregister(mh_filter_t filter)
138 struct sk_buff *skb))
139{ 139{
140 rcu_assign_pointer(mh_filter, NULL); 140 rcu_assign_pointer(mh_filter, NULL);
141 synchronize_rcu(); 141 synchronize_rcu();
@@ -193,10 +193,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
193 * policy is placed in rawv6_rcv() because it is 193 * policy is placed in rawv6_rcv() because it is
194 * required for each socket. 194 * required for each socket.
195 */ 195 */
196 int (*filter)(struct sock *sock, struct sk_buff *skb); 196 mh_filter_t *filter;
197 197
198 filter = rcu_dereference(mh_filter); 198 filter = rcu_dereference(mh_filter);
199 filtered = filter ? filter(sk, skb) : 0; 199 filtered = filter ? (*filter)(sk, skb) : 0;
200 break; 200 break;
201 } 201 }
202#endif 202#endif
@@ -524,7 +524,7 @@ csum_copy_err:
524 goto out; 524 goto out;
525} 525}
526 526
527static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, 527static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
528 struct raw6_sock *rp) 528 struct raw6_sock *rp)
529{ 529{
530 struct sk_buff *skb; 530 struct sk_buff *skb;
@@ -586,11 +586,10 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
586 if (unlikely(csum)) 586 if (unlikely(csum))
587 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); 587 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
588 588
589 csum = csum_ipv6_magic(&fl->fl6_src, 589 csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
590 &fl->fl6_dst, 590 total_len, fl6->flowi6_proto, tmp_csum);
591 total_len, fl->proto, tmp_csum);
592 591
593 if (csum == 0 && fl->proto == IPPROTO_UDP) 592 if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
594 csum = CSUM_MANGLED_0; 593 csum = CSUM_MANGLED_0;
595 594
596 if (skb_store_bits(skb, offset, &csum, 2)) 595 if (skb_store_bits(skb, offset, &csum, 2))
@@ -603,7 +602,7 @@ out:
603} 602}
604 603
605static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, 604static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
606 struct flowi *fl, struct dst_entry **dstp, 605 struct flowi6 *fl6, struct dst_entry **dstp,
607 unsigned int flags) 606 unsigned int flags)
608{ 607{
609 struct ipv6_pinfo *np = inet6_sk(sk); 608 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -613,7 +612,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
613 struct rt6_info *rt = (struct rt6_info *)*dstp; 612 struct rt6_info *rt = (struct rt6_info *)*dstp;
614 613
615 if (length > rt->dst.dev->mtu) { 614 if (length > rt->dst.dev->mtu) {
616 ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); 615 ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
617 return -EMSGSIZE; 616 return -EMSGSIZE;
618 } 617 }
619 if (flags&MSG_PROBE) 618 if (flags&MSG_PROBE)
@@ -662,7 +661,7 @@ error:
662 return err; 661 return err;
663} 662}
664 663
665static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) 664static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
666{ 665{
667 struct iovec *iov; 666 struct iovec *iov;
668 u8 __user *type = NULL; 667 u8 __user *type = NULL;
@@ -679,7 +678,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
679 if (!iov) 678 if (!iov)
680 continue; 679 continue;
681 680
682 switch (fl->proto) { 681 switch (fl6->flowi6_proto) {
683 case IPPROTO_ICMPV6: 682 case IPPROTO_ICMPV6:
684 /* check if one-byte field is readable or not. */ 683 /* check if one-byte field is readable or not. */
685 if (iov->iov_base && iov->iov_len < 1) 684 if (iov->iov_base && iov->iov_len < 1)
@@ -694,8 +693,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
694 code = iov->iov_base; 693 code = iov->iov_base;
695 694
696 if (type && code) { 695 if (type && code) {
697 if (get_user(fl->fl_icmp_type, type) || 696 if (get_user(fl6->fl6_icmp_type, type) ||
698 get_user(fl->fl_icmp_code, code)) 697 get_user(fl6->fl6_icmp_code, code))
699 return -EFAULT; 698 return -EFAULT;
700 probed = 1; 699 probed = 1;
701 } 700 }
@@ -706,7 +705,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
706 /* check if type field is readable or not. */ 705 /* check if type field is readable or not. */
707 if (iov->iov_len > 2 - len) { 706 if (iov->iov_len > 2 - len) {
708 u8 __user *p = iov->iov_base; 707 u8 __user *p = iov->iov_base;
709 if (get_user(fl->fl_mh_type, &p[2 - len])) 708 if (get_user(fl6->fl6_mh_type, &p[2 - len]))
710 return -EFAULT; 709 return -EFAULT;
711 probed = 1; 710 probed = 1;
712 } else 711 } else
@@ -735,7 +734,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
735 struct ipv6_txoptions *opt = NULL; 734 struct ipv6_txoptions *opt = NULL;
736 struct ip6_flowlabel *flowlabel = NULL; 735 struct ip6_flowlabel *flowlabel = NULL;
737 struct dst_entry *dst = NULL; 736 struct dst_entry *dst = NULL;
738 struct flowi fl; 737 struct flowi6 fl6;
739 int addr_len = msg->msg_namelen; 738 int addr_len = msg->msg_namelen;
740 int hlimit = -1; 739 int hlimit = -1;
741 int tclass = -1; 740 int tclass = -1;
@@ -756,9 +755,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
756 /* 755 /*
757 * Get and verify the address. 756 * Get and verify the address.
758 */ 757 */
759 memset(&fl, 0, sizeof(fl)); 758 memset(&fl6, 0, sizeof(fl6));
760 759
761 fl.mark = sk->sk_mark; 760 fl6.flowi6_mark = sk->sk_mark;
762 761
763 if (sin6) { 762 if (sin6) {
764 if (addr_len < SIN6_LEN_RFC2133) 763 if (addr_len < SIN6_LEN_RFC2133)
@@ -780,9 +779,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
780 779
781 daddr = &sin6->sin6_addr; 780 daddr = &sin6->sin6_addr;
782 if (np->sndflow) { 781 if (np->sndflow) {
783 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 782 fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
784 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 783 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
785 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 784 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
786 if (flowlabel == NULL) 785 if (flowlabel == NULL)
787 return -EINVAL; 786 return -EINVAL;
788 daddr = &flowlabel->dst; 787 daddr = &flowlabel->dst;
@@ -800,32 +799,32 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
800 if (addr_len >= sizeof(struct sockaddr_in6) && 799 if (addr_len >= sizeof(struct sockaddr_in6) &&
801 sin6->sin6_scope_id && 800 sin6->sin6_scope_id &&
802 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 801 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
803 fl.oif = sin6->sin6_scope_id; 802 fl6.flowi6_oif = sin6->sin6_scope_id;
804 } else { 803 } else {
805 if (sk->sk_state != TCP_ESTABLISHED) 804 if (sk->sk_state != TCP_ESTABLISHED)
806 return -EDESTADDRREQ; 805 return -EDESTADDRREQ;
807 806
808 proto = inet->inet_num; 807 proto = inet->inet_num;
809 daddr = &np->daddr; 808 daddr = &np->daddr;
810 fl.fl6_flowlabel = np->flow_label; 809 fl6.flowlabel = np->flow_label;
811 } 810 }
812 811
813 if (fl.oif == 0) 812 if (fl6.flowi6_oif == 0)
814 fl.oif = sk->sk_bound_dev_if; 813 fl6.flowi6_oif = sk->sk_bound_dev_if;
815 814
816 if (msg->msg_controllen) { 815 if (msg->msg_controllen) {
817 opt = &opt_space; 816 opt = &opt_space;
818 memset(opt, 0, sizeof(struct ipv6_txoptions)); 817 memset(opt, 0, sizeof(struct ipv6_txoptions));
819 opt->tot_len = sizeof(struct ipv6_txoptions); 818 opt->tot_len = sizeof(struct ipv6_txoptions);
820 819
821 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, 820 err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
822 &tclass, &dontfrag); 821 &tclass, &dontfrag);
823 if (err < 0) { 822 if (err < 0) {
824 fl6_sock_release(flowlabel); 823 fl6_sock_release(flowlabel);
825 return err; 824 return err;
826 } 825 }
827 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 826 if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
828 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 827 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
829 if (flowlabel == NULL) 828 if (flowlabel == NULL)
830 return -EINVAL; 829 return -EINVAL;
831 } 830 }
@@ -838,40 +837,31 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
838 opt = fl6_merge_options(&opt_space, flowlabel, opt); 837 opt = fl6_merge_options(&opt_space, flowlabel, opt);
839 opt = ipv6_fixup_options(&opt_space, opt); 838 opt = ipv6_fixup_options(&opt_space, opt);
840 839
841 fl.proto = proto; 840 fl6.flowi6_proto = proto;
842 err = rawv6_probe_proto_opt(&fl, msg); 841 err = rawv6_probe_proto_opt(&fl6, msg);
843 if (err) 842 if (err)
844 goto out; 843 goto out;
845 844
846 if (!ipv6_addr_any(daddr)) 845 if (!ipv6_addr_any(daddr))
847 ipv6_addr_copy(&fl.fl6_dst, daddr); 846 ipv6_addr_copy(&fl6.daddr, daddr);
848 else 847 else
849 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 848 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
850 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 849 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
851 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 850 ipv6_addr_copy(&fl6.saddr, &np->saddr);
852 851
853 final_p = fl6_update_dst(&fl, opt, &final); 852 final_p = fl6_update_dst(&fl6, opt, &final);
854 853
855 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 854 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
856 fl.oif = np->mcast_oif; 855 fl6.flowi6_oif = np->mcast_oif;
857 security_sk_classify_flow(sk, &fl); 856 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
858 857
859 err = ip6_dst_lookup(sk, &dst, &fl); 858 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
860 if (err) 859 if (IS_ERR(dst)) {
860 err = PTR_ERR(dst);
861 goto out; 861 goto out;
862 if (final_p)
863 ipv6_addr_copy(&fl.fl6_dst, final_p);
864
865 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
866 if (err < 0) {
867 if (err == -EREMOTE)
868 err = ip6_dst_blackhole(sk, &dst, &fl);
869 if (err < 0)
870 goto out;
871 } 862 }
872
873 if (hlimit < 0) { 863 if (hlimit < 0) {
874 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 864 if (ipv6_addr_is_multicast(&fl6.daddr))
875 hlimit = np->mcast_hops; 865 hlimit = np->mcast_hops;
876 else 866 else
877 hlimit = np->hop_limit; 867 hlimit = np->hop_limit;
@@ -890,17 +880,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
890 880
891back_from_confirm: 881back_from_confirm:
892 if (inet->hdrincl) 882 if (inet->hdrincl)
893 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); 883 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
894 else { 884 else {
895 lock_sock(sk); 885 lock_sock(sk);
896 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, 886 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
897 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, 887 len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst,
898 msg->msg_flags, dontfrag); 888 msg->msg_flags, dontfrag);
899 889
900 if (err) 890 if (err)
901 ip6_flush_pending_frames(sk); 891 ip6_flush_pending_frames(sk);
902 else if (!(msg->msg_flags & MSG_MORE)) 892 else if (!(msg->msg_flags & MSG_MORE))
903 err = rawv6_push_pending_frames(sk, &fl, rp); 893 err = rawv6_push_pending_frames(sk, &fl6, rp);
904 release_sock(sk); 894 release_sock(sk);
905 } 895 }
906done: 896done:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 904312e25a3..6814c8722fa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -97,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
97 struct in6_addr *gwaddr, int ifindex); 97 struct in6_addr *gwaddr, int ifindex);
98#endif 98#endif
99 99
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
100static struct dst_ops ip6_dst_ops_template = { 130static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6, 131 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6), 132 .protocol = cpu_to_be16(ETH_P_IPV6),
@@ -105,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = {
105 .check = ip6_dst_check, 135 .check = ip6_dst_check,
106 .default_advmss = ip6_default_advmss, 136 .default_advmss = ip6_default_advmss,
107 .default_mtu = ip6_default_mtu, 137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
108 .destroy = ip6_dst_destroy, 139 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown, 140 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice, 141 .negative_advice = ip6_negative_advice,
@@ -132,6 +163,10 @@ static struct dst_ops ip6_dst_blackhole_ops = {
132 .update_pmtu = ip6_rt_blackhole_update_pmtu, 163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
133}; 164};
134 165
166static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168};
169
135static struct rt6_info ip6_null_entry_template = { 170static struct rt6_info ip6_null_entry_template = {
136 .dst = { 171 .dst = {
137 .__refcnt = ATOMIC_INIT(1), 172 .__refcnt = ATOMIC_INIT(1),
@@ -187,7 +222,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
187/* allocate dst with ip6_dst_ops */ 222/* allocate dst with ip6_dst_ops */
188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 223static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
189{ 224{
190 return (struct rt6_info *)dst_alloc(ops); 225 return (struct rt6_info *)dst_alloc(ops, 0);
191} 226}
192 227
193static void ip6_dst_destroy(struct dst_entry *dst) 228static void ip6_dst_destroy(struct dst_entry *dst)
@@ -206,6 +241,13 @@ static void ip6_dst_destroy(struct dst_entry *dst)
206 } 241 }
207} 242}
208 243
244static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246static u32 rt6_peer_genid(void)
247{
248 return atomic_read(&__rt6_peer_genid);
249}
250
209void rt6_bind_peer(struct rt6_info *rt, int create) 251void rt6_bind_peer(struct rt6_info *rt, int create)
210{ 252{
211 struct inet_peer *peer; 253 struct inet_peer *peer;
@@ -213,6 +255,8 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
213 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
214 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
215 inet_putpeer(peer); 257 inet_putpeer(peer);
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
216} 260}
217 261
218static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 262static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -555,17 +599,17 @@ do { \
555 599
556static struct rt6_info *ip6_pol_route_lookup(struct net *net, 600static struct rt6_info *ip6_pol_route_lookup(struct net *net,
557 struct fib6_table *table, 601 struct fib6_table *table,
558 struct flowi *fl, int flags) 602 struct flowi6 *fl6, int flags)
559{ 603{
560 struct fib6_node *fn; 604 struct fib6_node *fn;
561 struct rt6_info *rt; 605 struct rt6_info *rt;
562 606
563 read_lock_bh(&table->tb6_lock); 607 read_lock_bh(&table->tb6_lock);
564 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 608 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
565restart: 609restart:
566 rt = fn->leaf; 610 rt = fn->leaf;
567 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 611 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
568 BACKTRACK(net, &fl->fl6_src); 612 BACKTRACK(net, &fl6->saddr);
569out: 613out:
570 dst_use(&rt->dst, jiffies); 614 dst_use(&rt->dst, jiffies);
571 read_unlock_bh(&table->tb6_lock); 615 read_unlock_bh(&table->tb6_lock);
@@ -576,19 +620,19 @@ out:
576struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 620struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
577 const struct in6_addr *saddr, int oif, int strict) 621 const struct in6_addr *saddr, int oif, int strict)
578{ 622{
579 struct flowi fl = { 623 struct flowi6 fl6 = {
580 .oif = oif, 624 .flowi6_oif = oif,
581 .fl6_dst = *daddr, 625 .daddr = *daddr,
582 }; 626 };
583 struct dst_entry *dst; 627 struct dst_entry *dst;
584 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
585 629
586 if (saddr) { 630 if (saddr) {
587 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 631 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
588 flags |= RT6_LOOKUP_F_HAS_SADDR; 632 flags |= RT6_LOOKUP_F_HAS_SADDR;
589 } 633 }
590 634
591 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 635 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
592 if (dst->error == 0) 636 if (dst->error == 0)
593 return (struct rt6_info *) dst; 637 return (struct rt6_info *) dst;
594 638
@@ -709,7 +753,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
709} 753}
710 754
711static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 755static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
712 struct flowi *fl, int flags) 756 struct flowi6 *fl6, int flags)
713{ 757{
714 struct fib6_node *fn; 758 struct fib6_node *fn;
715 struct rt6_info *rt, *nrt; 759 struct rt6_info *rt, *nrt;
@@ -724,12 +768,12 @@ relookup:
724 read_lock_bh(&table->tb6_lock); 768 read_lock_bh(&table->tb6_lock);
725 769
726restart_2: 770restart_2:
727 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 771 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
728 772
729restart: 773restart:
730 rt = rt6_select(fn, oif, strict | reachable); 774 rt = rt6_select(fn, oif, strict | reachable);
731 775
732 BACKTRACK(net, &fl->fl6_src); 776 BACKTRACK(net, &fl6->saddr);
733 if (rt == net->ipv6.ip6_null_entry || 777 if (rt == net->ipv6.ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE) 778 rt->rt6i_flags & RTF_CACHE)
735 goto out; 779 goto out;
@@ -738,9 +782,11 @@ restart:
738 read_unlock_bh(&table->tb6_lock); 782 read_unlock_bh(&table->tb6_lock);
739 783
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 785 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
786 else if (!(rt->dst.flags & DST_HOST))
787 nrt = rt6_alloc_clone(rt, &fl6->daddr);
742 else 788 else
743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 789 goto out2;
744 790
745 dst_release(&rt->dst); 791 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry; 792 rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -777,9 +823,9 @@ out2:
777} 823}
778 824
779static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 825static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
780 struct flowi *fl, int flags) 826 struct flowi6 *fl6, int flags)
781{ 827{
782 return ip6_pol_route(net, table, fl->iif, fl, flags); 828 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
783} 829}
784 830
785void ip6_route_input(struct sk_buff *skb) 831void ip6_route_input(struct sk_buff *skb)
@@ -787,56 +833,54 @@ void ip6_route_input(struct sk_buff *skb)
787 struct ipv6hdr *iph = ipv6_hdr(skb); 833 struct ipv6hdr *iph = ipv6_hdr(skb);
788 struct net *net = dev_net(skb->dev); 834 struct net *net = dev_net(skb->dev);
789 int flags = RT6_LOOKUP_F_HAS_SADDR; 835 int flags = RT6_LOOKUP_F_HAS_SADDR;
790 struct flowi fl = { 836 struct flowi6 fl6 = {
791 .iif = skb->dev->ifindex, 837 .flowi6_iif = skb->dev->ifindex,
792 .fl6_dst = iph->daddr, 838 .daddr = iph->daddr,
793 .fl6_src = iph->saddr, 839 .saddr = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 840 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795 .mark = skb->mark, 841 .flowi6_mark = skb->mark,
796 .proto = iph->nexthdr, 842 .flowi6_proto = iph->nexthdr,
797 }; 843 };
798 844
799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 845 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
800 flags |= RT6_LOOKUP_F_IFACE; 846 flags |= RT6_LOOKUP_F_IFACE;
801 847
802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); 848 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
803} 849}
804 850
805static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 851static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
806 struct flowi *fl, int flags) 852 struct flowi6 *fl6, int flags)
807{ 853{
808 return ip6_pol_route(net, table, fl->oif, fl, flags); 854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
809} 855}
810 856
811struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 857struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl) 858 struct flowi6 *fl6)
813{ 859{
814 int flags = 0; 860 int flags = 0;
815 861
816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) 862 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
817 flags |= RT6_LOOKUP_F_IFACE; 863 flags |= RT6_LOOKUP_F_IFACE;
818 864
819 if (!ipv6_addr_any(&fl->fl6_src)) 865 if (!ipv6_addr_any(&fl6->saddr))
820 flags |= RT6_LOOKUP_F_HAS_SADDR; 866 flags |= RT6_LOOKUP_F_HAS_SADDR;
821 else if (sk) 867 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 868 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
823 869
824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 870 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
825} 871}
826 872
827EXPORT_SYMBOL(ip6_route_output); 873EXPORT_SYMBOL(ip6_route_output);
828 874
829int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 875struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
830{ 876{
831 struct rt6_info *ort = (struct rt6_info *) *dstp; 877 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
832 struct rt6_info *rt = (struct rt6_info *) 878 struct rt6_info *ort = (struct rt6_info *) dst_orig;
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL; 879 struct dst_entry *new = NULL;
835 880
836 if (rt) { 881 if (rt) {
837 new = &rt->dst; 882 new = &rt->dst;
838 883
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1; 884 new->__use = 1;
841 new->input = dst_discard; 885 new->input = dst_discard;
842 new->output = dst_discard; 886 new->output = dst_discard;
@@ -862,11 +906,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
862 dst_free(new); 906 dst_free(new);
863 } 907 }
864 908
865 dst_release(*dstp); 909 dst_release(dst_orig);
866 *dstp = new; 910 return new ? new : ERR_PTR(-ENOMEM);
867 return new ? 0 : -ENOMEM;
868} 911}
869EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870 912
871/* 913/*
872 * Destination cache support functions 914 * Destination cache support functions
@@ -878,9 +920,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
878 920
879 rt = (struct rt6_info *) dst; 921 rt = (struct rt6_info *) dst;
880 922
881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 923 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
924 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
925 if (!rt->rt6i_peer)
926 rt6_bind_peer(rt, 0);
927 rt->rt6i_peer_genid = rt6_peer_genid();
928 }
882 return dst; 929 return dst;
883 930 }
884 return NULL; 931 return NULL;
885} 932}
886 933
@@ -931,7 +978,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
931 dst_metric_set(dst, RTAX_FEATURES, features); 978 dst_metric_set(dst, RTAX_FEATURES, features);
932 } 979 }
933 dst_metric_set(dst, RTAX_MTU, mtu); 980 dst_metric_set(dst, RTAX_MTU, mtu);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
935 } 981 }
936} 982}
937 983
@@ -1028,11 +1074,9 @@ out:
1028 1074
1029int icmp6_dst_gc(void) 1075int icmp6_dst_gc(void)
1030{ 1076{
1031 struct dst_entry *dst, *next, **pprev; 1077 struct dst_entry *dst, **pprev;
1032 int more = 0; 1078 int more = 0;
1033 1079
1034 next = NULL;
1035
1036 spin_lock_bh(&icmp6_dst_lock); 1080 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list; 1081 pprev = &icmp6_dst_gc_list;
1038 1082
@@ -1400,16 +1444,16 @@ static int ip6_route_del(struct fib6_config *cfg)
1400 * Handle redirects 1444 * Handle redirects
1401 */ 1445 */
1402struct ip6rd_flowi { 1446struct ip6rd_flowi {
1403 struct flowi fl; 1447 struct flowi6 fl6;
1404 struct in6_addr gateway; 1448 struct in6_addr gateway;
1405}; 1449};
1406 1450
1407static struct rt6_info *__ip6_route_redirect(struct net *net, 1451static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 struct fib6_table *table, 1452 struct fib6_table *table,
1409 struct flowi *fl, 1453 struct flowi6 *fl6,
1410 int flags) 1454 int flags)
1411{ 1455{
1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1456 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1413 struct rt6_info *rt; 1457 struct rt6_info *rt;
1414 struct fib6_node *fn; 1458 struct fib6_node *fn;
1415 1459
@@ -1425,7 +1469,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1425 */ 1469 */
1426 1470
1427 read_lock_bh(&table->tb6_lock); 1471 read_lock_bh(&table->tb6_lock);
1428 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1472 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1429restart: 1473restart:
1430 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1474 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1431 /* 1475 /*
@@ -1440,7 +1484,7 @@ restart:
1440 continue; 1484 continue;
1441 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1485 if (!(rt->rt6i_flags & RTF_GATEWAY))
1442 continue; 1486 continue;
1443 if (fl->oif != rt->rt6i_dev->ifindex) 1487 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1444 continue; 1488 continue;
1445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1489 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1446 continue; 1490 continue;
@@ -1449,7 +1493,7 @@ restart:
1449 1493
1450 if (!rt) 1494 if (!rt)
1451 rt = net->ipv6.ip6_null_entry; 1495 rt = net->ipv6.ip6_null_entry;
1452 BACKTRACK(net, &fl->fl6_src); 1496 BACKTRACK(net, &fl6->saddr);
1453out: 1497out:
1454 dst_hold(&rt->dst); 1498 dst_hold(&rt->dst);
1455 1499
@@ -1466,10 +1510,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1466 int flags = RT6_LOOKUP_F_HAS_SADDR; 1510 int flags = RT6_LOOKUP_F_HAS_SADDR;
1467 struct net *net = dev_net(dev); 1511 struct net *net = dev_net(dev);
1468 struct ip6rd_flowi rdfl = { 1512 struct ip6rd_flowi rdfl = {
1469 .fl = { 1513 .fl6 = {
1470 .oif = dev->ifindex, 1514 .flowi6_oif = dev->ifindex,
1471 .fl6_dst = *dest, 1515 .daddr = *dest,
1472 .fl6_src = *src, 1516 .saddr = *src,
1473 }, 1517 },
1474 }; 1518 };
1475 1519
@@ -1478,7 +1522,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1478 if (rt6_need_strict(dest)) 1522 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE; 1523 flags |= RT6_LOOKUP_F_IFACE;
1480 1524
1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1525 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1482 flags, __ip6_route_redirect); 1526 flags, __ip6_route_redirect);
1483} 1527}
1484 1528
@@ -1980,12 +2024,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1980 if (IS_ERR(neigh)) { 2024 if (IS_ERR(neigh)) {
1981 dst_free(&rt->dst); 2025 dst_free(&rt->dst);
1982 2026
1983 /* We are casting this because that is the return 2027 return ERR_CAST(neigh);
1984 * value type. But an errno encoded pointer is the
1985 * same regardless of the underlying pointer type,
1986 * and that's what we are returning. So this is OK.
1987 */
1988 return (struct rt6_info *) neigh;
1989 } 2028 }
1990 rt->rt6i_nexthop = neigh; 2029 rt->rt6i_nexthop = neigh;
1991 2030
@@ -2346,7 +2385,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2346 struct rt6_info *rt; 2385 struct rt6_info *rt;
2347 struct sk_buff *skb; 2386 struct sk_buff *skb;
2348 struct rtmsg *rtm; 2387 struct rtmsg *rtm;
2349 struct flowi fl; 2388 struct flowi6 fl6;
2350 int err, iif = 0; 2389 int err, iif = 0;
2351 2390
2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2391 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
@@ -2354,27 +2393,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2354 goto errout; 2393 goto errout;
2355 2394
2356 err = -EINVAL; 2395 err = -EINVAL;
2357 memset(&fl, 0, sizeof(fl)); 2396 memset(&fl6, 0, sizeof(fl6));
2358 2397
2359 if (tb[RTA_SRC]) { 2398 if (tb[RTA_SRC]) {
2360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2399 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2361 goto errout; 2400 goto errout;
2362 2401
2363 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2402 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2364 } 2403 }
2365 2404
2366 if (tb[RTA_DST]) { 2405 if (tb[RTA_DST]) {
2367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2406 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2368 goto errout; 2407 goto errout;
2369 2408
2370 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2409 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2371 } 2410 }
2372 2411
2373 if (tb[RTA_IIF]) 2412 if (tb[RTA_IIF])
2374 iif = nla_get_u32(tb[RTA_IIF]); 2413 iif = nla_get_u32(tb[RTA_IIF]);
2375 2414
2376 if (tb[RTA_OIF]) 2415 if (tb[RTA_OIF])
2377 fl.oif = nla_get_u32(tb[RTA_OIF]); 2416 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2378 2417
2379 if (iif) { 2418 if (iif) {
2380 struct net_device *dev; 2419 struct net_device *dev;
@@ -2397,10 +2436,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2397 skb_reset_mac_header(skb); 2436 skb_reset_mac_header(skb);
2398 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2437 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2399 2438
2400 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2439 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2401 skb_dst_set(skb, &rt->dst); 2440 skb_dst_set(skb, &rt->dst);
2402 2441
2403 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2442 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2404 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2443 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2405 nlh->nlmsg_seq, 0, 0, 0); 2444 nlh->nlmsg_seq, 0, 0, 0);
2406 if (err < 0) { 2445 if (err < 0) {
@@ -2687,7 +2726,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2687 net->ipv6.ip6_null_entry->dst.path = 2726 net->ipv6.ip6_null_entry->dst.path =
2688 (struct dst_entry *)net->ipv6.ip6_null_entry; 2727 (struct dst_entry *)net->ipv6.ip6_null_entry;
2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2728 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2690 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); 2729 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2730 ip6_template_metrics, true);
2691 2731
2692#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2732#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2733 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2698,7 +2738,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2698 net->ipv6.ip6_prohibit_entry->dst.path = 2738 net->ipv6.ip6_prohibit_entry->dst.path =
2699 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2739 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2700 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2740 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2701 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); 2741 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2742 ip6_template_metrics, true);
2702 2743
2703 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2744 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2704 sizeof(*net->ipv6.ip6_blk_hole_entry), 2745 sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2708,7 +2749,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2708 net->ipv6.ip6_blk_hole_entry->dst.path = 2749 net->ipv6.ip6_blk_hole_entry->dst.path =
2709 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2750 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2710 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2751 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2711 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); 2752 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2753 ip6_template_metrics, true);
2712#endif 2754#endif
2713 2755
2714 net->ipv6.sysctl.flush_delay = 0; 2756 net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a54..43b33373adb 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
412 412
413 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); 413 p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
414 do { 414 do {
415 n = p->next; 415 n = rcu_dereference_protected(p->next, 1);
416 kfree(p); 416 kfree(p);
417 p = n; 417 p = n;
418 } while (p); 418 } while (p);
@@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
421static int 421static int
422ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) 422ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
423{ 423{
424 struct ip_tunnel_prl_entry *x, **p; 424 struct ip_tunnel_prl_entry *x;
425 struct ip_tunnel_prl_entry __rcu **p;
425 int err = 0; 426 int err = 0;
426 427
427 ASSERT_RTNL(); 428 ASSERT_RTNL();
428 429
429 if (a && a->addr != htonl(INADDR_ANY)) { 430 if (a && a->addr != htonl(INADDR_ANY)) {
430 for (p = &t->prl; *p; p = &(*p)->next) { 431 for (p = &t->prl;
431 if ((*p)->addr == a->addr) { 432 (x = rtnl_dereference(*p)) != NULL;
432 x = *p; 433 p = &x->next) {
434 if (x->addr == a->addr) {
433 *p = x->next; 435 *p = x->next;
434 call_rcu(&x->rcu_head, prl_entry_destroy_rcu); 436 call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
435 t->prl_count--; 437 t->prl_count--;
@@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
438 } 440 }
439 err = -ENXIO; 441 err = -ENXIO;
440 } else { 442 } else {
441 if (t->prl) { 443 x = rtnl_dereference(t->prl);
444 if (x) {
442 t->prl_count = 0; 445 t->prl_count = 0;
443 x = t->prl;
444 call_rcu(&x->rcu_head, prl_list_destroy_rcu); 446 call_rcu(&x->rcu_head, prl_list_destroy_rcu);
445 t->prl = NULL; 447 t->prl = NULL;
446 } 448 }
@@ -730,16 +732,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
730 dst = addr6->s6_addr32[3]; 732 dst = addr6->s6_addr32[3];
731 } 733 }
732 734
733 { 735 rt = ip_route_output_ports(dev_net(dev), NULL,
734 struct flowi fl = { .fl4_dst = dst, 736 dst, tiph->saddr,
735 .fl4_src = tiph->saddr, 737 0, 0,
736 .fl4_tos = RT_TOS(tos), 738 IPPROTO_IPV6, RT_TOS(tos),
737 .oif = tunnel->parms.link, 739 tunnel->parms.link);
738 .proto = IPPROTO_IPV6 }; 740 if (IS_ERR(rt)) {
739 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 741 dev->stats.tx_carrier_errors++;
740 dev->stats.tx_carrier_errors++; 742 goto tx_error_icmp;
741 goto tx_error_icmp;
742 }
743 } 743 }
744 if (rt->rt_type != RTN_UNICAST) { 744 if (rt->rt_type != RTN_UNICAST) {
745 ip_rt_put(rt); 745 ip_rt_put(rt);
@@ -855,13 +855,14 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
855 iph = &tunnel->parms.iph; 855 iph = &tunnel->parms.iph;
856 856
857 if (iph->daddr) { 857 if (iph->daddr) {
858 struct flowi fl = { .fl4_dst = iph->daddr, 858 struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL,
859 .fl4_src = iph->saddr, 859 iph->daddr, iph->saddr,
860 .fl4_tos = RT_TOS(iph->tos), 860 0, 0,
861 .oif = tunnel->parms.link, 861 IPPROTO_IPV6,
862 .proto = IPPROTO_IPV6 }; 862 RT_TOS(iph->tos),
863 struct rtable *rt; 863 tunnel->parms.link);
864 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 864
865 if (!IS_ERR(rt)) {
865 tdev = rt->dst.dev; 866 tdev = rt->dst.dev;
866 ip_rt_put(rt); 867 ip_rt_put(rt);
867 } 868 }
@@ -1179,7 +1180,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1179 if (!dev->tstats) 1180 if (!dev->tstats)
1180 return -ENOMEM; 1181 return -ENOMEM;
1181 dev_hold(dev); 1182 dev_hold(dev);
1182 sitn->tunnels_wc[0] = tunnel; 1183 rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
1183 return 0; 1184 return 0;
1184} 1185}
1185 1186
@@ -1196,11 +1197,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
1196 for (prio = 1; prio < 4; prio++) { 1197 for (prio = 1; prio < 4; prio++) {
1197 int h; 1198 int h;
1198 for (h = 0; h < HASH_SIZE; h++) { 1199 for (h = 0; h < HASH_SIZE; h++) {
1199 struct ip_tunnel *t = sitn->tunnels[prio][h]; 1200 struct ip_tunnel *t;
1200 1201
1202 t = rtnl_dereference(sitn->tunnels[prio][h]);
1201 while (t != NULL) { 1203 while (t != NULL) {
1202 unregister_netdevice_queue(t->dev, head); 1204 unregister_netdevice_queue(t->dev, head);
1203 t = t->next; 1205 t = rtnl_dereference(t->next);
1204 } 1206 }
1205 } 1207 }
1206 } 1208 }
@@ -1290,4 +1292,4 @@ static int __init sit_init(void)
1290module_init(sit_init); 1292module_init(sit_init);
1291module_exit(sit_cleanup); 1293module_exit(sit_cleanup);
1292MODULE_LICENSE("GPL"); 1294MODULE_LICENSE("GPL");
1293MODULE_ALIAS("sit0"); 1295MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 09fd34f0dbf..352c26081f5 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -232,23 +232,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
232 */ 232 */
233 { 233 {
234 struct in6_addr *final_p, final; 234 struct in6_addr *final_p, final;
235 struct flowi fl; 235 struct flowi6 fl6;
236 memset(&fl, 0, sizeof(fl)); 236 memset(&fl6, 0, sizeof(fl6));
237 fl.proto = IPPROTO_TCP; 237 fl6.flowi6_proto = IPPROTO_TCP;
238 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 238 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
239 final_p = fl6_update_dst(&fl, np->opt, &final); 239 final_p = fl6_update_dst(&fl6, np->opt, &final);
240 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 240 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
241 fl.oif = sk->sk_bound_dev_if; 241 fl6.flowi6_oif = sk->sk_bound_dev_if;
242 fl.mark = sk->sk_mark; 242 fl6.flowi6_mark = sk->sk_mark;
243 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 243 fl6.fl6_dport = inet_rsk(req)->rmt_port;
244 fl.fl_ip_sport = inet_sk(sk)->inet_sport; 244 fl6.fl6_sport = inet_sk(sk)->inet_sport;
245 security_req_classify_flow(req, &fl); 245 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
246 if (ip6_dst_lookup(sk, &dst, &fl)) 246
247 goto out_free; 247 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
248 248 if (IS_ERR(dst))
249 if (final_p)
250 ipv6_addr_copy(&fl.fl6_dst, final_p);
251 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
252 goto out_free; 249 goto out_free;
253 } 250 }
254 251
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef79f9..6dcf5e7d661 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
17 17
18static struct ctl_table empty[1]; 18static struct ctl_table empty[1];
19 19
20static ctl_table ipv6_static_skeleton[] = {
21 {
22 .procname = "neigh",
23 .maxlen = 0,
24 .mode = 0555,
25 .child = empty,
26 },
27 { }
28};
29
20static ctl_table ipv6_table_template[] = { 30static ctl_table ipv6_table_template[] = {
21 { 31 {
22 .procname = "route", 32 .procname = "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
37 .mode = 0644, 47 .mode = 0644,
38 .proc_handler = proc_dointvec 48 .proc_handler = proc_dointvec
39 }, 49 },
40 {
41 .procname = "neigh",
42 .maxlen = 0,
43 .mode = 0555,
44 .child = empty,
45 },
46 { } 50 { }
47}; 51};
48 52
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
160 164
161int ipv6_static_sysctl_register(void) 165int ipv6_static_sysctl_register(void)
162{ 166{
163 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); 167 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
164 if (ip6_base == NULL) 168 if (ip6_base == NULL)
165 return -ENOMEM; 169 return -ENOMEM;
166 return 0; 170 return 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 20aa95e3735..2b0c186862c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -131,7 +131,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
131 struct tcp_sock *tp = tcp_sk(sk); 131 struct tcp_sock *tp = tcp_sk(sk);
132 struct in6_addr *saddr = NULL, *final_p, final; 132 struct in6_addr *saddr = NULL, *final_p, final;
133 struct rt6_info *rt; 133 struct rt6_info *rt;
134 struct flowi fl; 134 struct flowi6 fl6;
135 struct dst_entry *dst; 135 struct dst_entry *dst;
136 int addr_type; 136 int addr_type;
137 int err; 137 int err;
@@ -142,14 +142,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142 if (usin->sin6_family != AF_INET6) 142 if (usin->sin6_family != AF_INET6)
143 return -EAFNOSUPPORT; 143 return -EAFNOSUPPORT;
144 144
145 memset(&fl, 0, sizeof(fl)); 145 memset(&fl6, 0, sizeof(fl6));
146 146
147 if (np->sndflow) { 147 if (np->sndflow) {
148 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 148 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
149 IP6_ECN_flow_init(fl.fl6_flowlabel); 149 IP6_ECN_flow_init(fl6.flowlabel);
150 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 150 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
151 struct ip6_flowlabel *flowlabel; 151 struct ip6_flowlabel *flowlabel;
152 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 152 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
153 if (flowlabel == NULL) 153 if (flowlabel == NULL)
154 return -EINVAL; 154 return -EINVAL;
155 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); 155 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
@@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
195 } 195 }
196 196
197 ipv6_addr_copy(&np->daddr, &usin->sin6_addr); 197 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
198 np->flow_label = fl.fl6_flowlabel; 198 np->flow_label = fl6.flowlabel;
199 199
200 /* 200 /*
201 * TCP over IPv4 201 * TCP over IPv4
@@ -242,35 +242,27 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
242 if (!ipv6_addr_any(&np->rcv_saddr)) 242 if (!ipv6_addr_any(&np->rcv_saddr))
243 saddr = &np->rcv_saddr; 243 saddr = &np->rcv_saddr;
244 244
245 fl.proto = IPPROTO_TCP; 245 fl6.flowi6_proto = IPPROTO_TCP;
246 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 246 ipv6_addr_copy(&fl6.daddr, &np->daddr);
247 ipv6_addr_copy(&fl.fl6_src, 247 ipv6_addr_copy(&fl6.saddr,
248 (saddr ? saddr : &np->saddr)); 248 (saddr ? saddr : &np->saddr));
249 fl.oif = sk->sk_bound_dev_if; 249 fl6.flowi6_oif = sk->sk_bound_dev_if;
250 fl.mark = sk->sk_mark; 250 fl6.flowi6_mark = sk->sk_mark;
251 fl.fl_ip_dport = usin->sin6_port; 251 fl6.fl6_dport = usin->sin6_port;
252 fl.fl_ip_sport = inet->inet_sport; 252 fl6.fl6_sport = inet->inet_sport;
253 253
254 final_p = fl6_update_dst(&fl, np->opt, &final); 254 final_p = fl6_update_dst(&fl6, np->opt, &final);
255 255
256 security_sk_classify_flow(sk, &fl); 256 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
257 257
258 err = ip6_dst_lookup(sk, &dst, &fl); 258 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
259 if (err) 259 if (IS_ERR(dst)) {
260 err = PTR_ERR(dst);
260 goto failure; 261 goto failure;
261 if (final_p)
262 ipv6_addr_copy(&fl.fl6_dst, final_p);
263
264 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
265 if (err < 0) {
266 if (err == -EREMOTE)
267 err = ip6_dst_blackhole(sk, &dst, &fl);
268 if (err < 0)
269 goto failure;
270 } 262 }
271 263
272 if (saddr == NULL) { 264 if (saddr == NULL) {
273 saddr = &fl.fl6_src; 265 saddr = &fl6.saddr;
274 ipv6_addr_copy(&np->rcv_saddr, saddr); 266 ipv6_addr_copy(&np->rcv_saddr, saddr);
275 } 267 }
276 268
@@ -385,7 +377,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
385 np = inet6_sk(sk); 377 np = inet6_sk(sk);
386 378
387 if (type == ICMPV6_PKT_TOOBIG) { 379 if (type == ICMPV6_PKT_TOOBIG) {
388 struct dst_entry *dst = NULL; 380 struct dst_entry *dst;
389 381
390 if (sock_owned_by_user(sk)) 382 if (sock_owned_by_user(sk))
391 goto out; 383 goto out;
@@ -397,29 +389,25 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
397 389
398 if (dst == NULL) { 390 if (dst == NULL) {
399 struct inet_sock *inet = inet_sk(sk); 391 struct inet_sock *inet = inet_sk(sk);
400 struct flowi fl; 392 struct flowi6 fl6;
401 393
402 /* BUGGG_FUTURE: Again, it is not clear how 394 /* BUGGG_FUTURE: Again, it is not clear how
403 to handle rthdr case. Ignore this complexity 395 to handle rthdr case. Ignore this complexity
404 for now. 396 for now.
405 */ 397 */
406 memset(&fl, 0, sizeof(fl)); 398 memset(&fl6, 0, sizeof(fl6));
407 fl.proto = IPPROTO_TCP; 399 fl6.flowi6_proto = IPPROTO_TCP;
408 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 400 ipv6_addr_copy(&fl6.daddr, &np->daddr);
409 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 401 ipv6_addr_copy(&fl6.saddr, &np->saddr);
410 fl.oif = sk->sk_bound_dev_if; 402 fl6.flowi6_oif = sk->sk_bound_dev_if;
411 fl.mark = sk->sk_mark; 403 fl6.flowi6_mark = sk->sk_mark;
412 fl.fl_ip_dport = inet->inet_dport; 404 fl6.fl6_dport = inet->inet_dport;
413 fl.fl_ip_sport = inet->inet_sport; 405 fl6.fl6_sport = inet->inet_sport;
414 security_skb_classify_flow(skb, &fl); 406 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
415 407
416 if ((err = ip6_dst_lookup(sk, &dst, &fl))) { 408 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
417 sk->sk_err_soft = -err; 409 if (IS_ERR(dst)) {
418 goto out; 410 sk->sk_err_soft = -PTR_ERR(dst);
419 }
420
421 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
422 sk->sk_err_soft = -err;
423 goto out; 411 goto out;
424 } 412 }
425 413
@@ -494,38 +482,36 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
494 struct sk_buff * skb; 482 struct sk_buff * skb;
495 struct ipv6_txoptions *opt = NULL; 483 struct ipv6_txoptions *opt = NULL;
496 struct in6_addr * final_p, final; 484 struct in6_addr * final_p, final;
497 struct flowi fl; 485 struct flowi6 fl6;
498 struct dst_entry *dst; 486 struct dst_entry *dst;
499 int err = -1; 487 int err;
500 488
501 memset(&fl, 0, sizeof(fl)); 489 memset(&fl6, 0, sizeof(fl6));
502 fl.proto = IPPROTO_TCP; 490 fl6.flowi6_proto = IPPROTO_TCP;
503 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 491 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
504 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 492 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
505 fl.fl6_flowlabel = 0; 493 fl6.flowlabel = 0;
506 fl.oif = treq->iif; 494 fl6.flowi6_oif = treq->iif;
507 fl.mark = sk->sk_mark; 495 fl6.flowi6_mark = sk->sk_mark;
508 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 496 fl6.fl6_dport = inet_rsk(req)->rmt_port;
509 fl.fl_ip_sport = inet_rsk(req)->loc_port; 497 fl6.fl6_sport = inet_rsk(req)->loc_port;
510 security_req_classify_flow(req, &fl); 498 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
511 499
512 opt = np->opt; 500 opt = np->opt;
513 final_p = fl6_update_dst(&fl, opt, &final); 501 final_p = fl6_update_dst(&fl6, opt, &final);
514 502
515 err = ip6_dst_lookup(sk, &dst, &fl); 503 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
516 if (err) 504 if (IS_ERR(dst)) {
517 goto done; 505 err = PTR_ERR(dst);
518 if (final_p)
519 ipv6_addr_copy(&fl.fl6_dst, final_p);
520 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
521 goto done; 506 goto done;
522 507 }
523 skb = tcp_make_synack(sk, dst, req, rvp); 508 skb = tcp_make_synack(sk, dst, req, rvp);
509 err = -ENOMEM;
524 if (skb) { 510 if (skb) {
525 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 511 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
526 512
527 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 513 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
528 err = ip6_xmit(sk, skb, &fl, opt); 514 err = ip6_xmit(sk, skb, &fl6, opt);
529 err = net_xmit_eval(err); 515 err = net_xmit_eval(err);
530 } 516 }
531 517
@@ -1006,7 +992,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
1006{ 992{
1007 struct tcphdr *th = tcp_hdr(skb), *t1; 993 struct tcphdr *th = tcp_hdr(skb), *t1;
1008 struct sk_buff *buff; 994 struct sk_buff *buff;
1009 struct flowi fl; 995 struct flowi6 fl6;
1010 struct net *net = dev_net(skb_dst(skb)->dev); 996 struct net *net = dev_net(skb_dst(skb)->dev);
1011 struct sock *ctl_sk = net->ipv6.tcp_sk; 997 struct sock *ctl_sk = net->ipv6.tcp_sk;
1012 unsigned int tot_len = sizeof(struct tcphdr); 998 unsigned int tot_len = sizeof(struct tcphdr);
@@ -1060,34 +1046,33 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
1060 } 1046 }
1061#endif 1047#endif
1062 1048
1063 memset(&fl, 0, sizeof(fl)); 1049 memset(&fl6, 0, sizeof(fl6));
1064 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); 1050 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
1065 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); 1051 ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
1066 1052
1067 buff->ip_summed = CHECKSUM_PARTIAL; 1053 buff->ip_summed = CHECKSUM_PARTIAL;
1068 buff->csum = 0; 1054 buff->csum = 0;
1069 1055
1070 __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); 1056 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
1071 1057
1072 fl.proto = IPPROTO_TCP; 1058 fl6.flowi6_proto = IPPROTO_TCP;
1073 fl.oif = inet6_iif(skb); 1059 fl6.flowi6_oif = inet6_iif(skb);
1074 fl.fl_ip_dport = t1->dest; 1060 fl6.fl6_dport = t1->dest;
1075 fl.fl_ip_sport = t1->source; 1061 fl6.fl6_sport = t1->source;
1076 security_skb_classify_flow(skb, &fl); 1062 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
1077 1063
1078 /* Pass a socket to ip6_dst_lookup either it is for RST 1064 /* Pass a socket to ip6_dst_lookup either it is for RST
1079 * Underlying function will use this to retrieve the network 1065 * Underlying function will use this to retrieve the network
1080 * namespace 1066 * namespace
1081 */ 1067 */
1082 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { 1068 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
1083 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { 1069 if (!IS_ERR(dst)) {
1084 skb_dst_set(buff, dst); 1070 skb_dst_set(buff, dst);
1085 ip6_xmit(ctl_sk, buff, &fl, NULL); 1071 ip6_xmit(ctl_sk, buff, &fl6, NULL);
1086 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 1072 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1087 if (rst) 1073 if (rst)
1088 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 1074 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1089 return; 1075 return;
1090 }
1091 } 1076 }
1092 1077
1093 kfree_skb(buff); 1078 kfree_skb(buff);
@@ -1323,7 +1308,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1323 tcp_death_row.sysctl_tw_recycle && 1308 tcp_death_row.sysctl_tw_recycle &&
1324 (dst = inet6_csk_route_req(sk, req)) != NULL && 1309 (dst = inet6_csk_route_req(sk, req)) != NULL &&
1325 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && 1310 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1326 ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, 1311 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1327 &treq->rmt_addr)) { 1312 &treq->rmt_addr)) {
1328 inet_peer_refcheck(peer); 1313 inet_peer_refcheck(peer);
1329 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1314 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
@@ -1636,10 +1621,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1636 opt_skb = skb_clone(skb, GFP_ATOMIC); 1621 opt_skb = skb_clone(skb, GFP_ATOMIC);
1637 1622
1638 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1623 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1639 TCP_CHECK_TIMER(sk);
1640 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) 1624 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1641 goto reset; 1625 goto reset;
1642 TCP_CHECK_TIMER(sk);
1643 if (opt_skb) 1626 if (opt_skb)
1644 goto ipv6_pktoptions; 1627 goto ipv6_pktoptions;
1645 return 0; 1628 return 0;
@@ -1667,10 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1667 } 1650 }
1668 } 1651 }
1669 1652
1670 TCP_CHECK_TIMER(sk);
1671 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) 1653 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1672 goto reset; 1654 goto reset;
1673 TCP_CHECK_TIMER(sk);
1674 if (opt_skb) 1655 if (opt_skb)
1675 goto ipv6_pktoptions; 1656 goto ipv6_pktoptions;
1676 return 0; 1657 return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9a009c66c8a..d7037c006e1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -886,7 +886,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
886 struct udphdr *uh; 886 struct udphdr *uh;
887 struct udp_sock *up = udp_sk(sk); 887 struct udp_sock *up = udp_sk(sk);
888 struct inet_sock *inet = inet_sk(sk); 888 struct inet_sock *inet = inet_sk(sk);
889 struct flowi *fl = &inet->cork.fl; 889 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
890 int err = 0; 890 int err = 0;
891 int is_udplite = IS_UDPLITE(sk); 891 int is_udplite = IS_UDPLITE(sk);
892 __wsum csum = 0; 892 __wsum csum = 0;
@@ -899,23 +899,23 @@ static int udp_v6_push_pending_frames(struct sock *sk)
899 * Create a UDP header 899 * Create a UDP header
900 */ 900 */
901 uh = udp_hdr(skb); 901 uh = udp_hdr(skb);
902 uh->source = fl->fl_ip_sport; 902 uh->source = fl6->fl6_sport;
903 uh->dest = fl->fl_ip_dport; 903 uh->dest = fl6->fl6_dport;
904 uh->len = htons(up->len); 904 uh->len = htons(up->len);
905 uh->check = 0; 905 uh->check = 0;
906 906
907 if (is_udplite) 907 if (is_udplite)
908 csum = udplite_csum_outgoing(sk, skb); 908 csum = udplite_csum_outgoing(sk, skb);
909 else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 909 else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
910 udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, 910 udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
911 up->len); 911 up->len);
912 goto send; 912 goto send;
913 } else 913 } else
914 csum = udp_csum_outgoing(sk, skb); 914 csum = udp_csum_outgoing(sk, skb);
915 915
916 /* add protocol-dependent pseudo-header */ 916 /* add protocol-dependent pseudo-header */
917 uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, 917 uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
918 up->len, fl->proto, csum ); 918 up->len, fl6->flowi6_proto, csum);
919 if (uh->check == 0) 919 if (uh->check == 0)
920 uh->check = CSUM_MANGLED_0; 920 uh->check = CSUM_MANGLED_0;
921 921
@@ -947,7 +947,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
947 struct in6_addr *daddr, *final_p, final; 947 struct in6_addr *daddr, *final_p, final;
948 struct ipv6_txoptions *opt = NULL; 948 struct ipv6_txoptions *opt = NULL;
949 struct ip6_flowlabel *flowlabel = NULL; 949 struct ip6_flowlabel *flowlabel = NULL;
950 struct flowi fl; 950 struct flowi6 fl6;
951 struct dst_entry *dst; 951 struct dst_entry *dst;
952 int addr_len = msg->msg_namelen; 952 int addr_len = msg->msg_namelen;
953 int ulen = len; 953 int ulen = len;
@@ -1030,19 +1030,19 @@ do_udp_sendmsg:
1030 } 1030 }
1031 ulen += sizeof(struct udphdr); 1031 ulen += sizeof(struct udphdr);
1032 1032
1033 memset(&fl, 0, sizeof(fl)); 1033 memset(&fl6, 0, sizeof(fl6));
1034 1034
1035 if (sin6) { 1035 if (sin6) {
1036 if (sin6->sin6_port == 0) 1036 if (sin6->sin6_port == 0)
1037 return -EINVAL; 1037 return -EINVAL;
1038 1038
1039 fl.fl_ip_dport = sin6->sin6_port; 1039 fl6.fl6_dport = sin6->sin6_port;
1040 daddr = &sin6->sin6_addr; 1040 daddr = &sin6->sin6_addr;
1041 1041
1042 if (np->sndflow) { 1042 if (np->sndflow) {
1043 fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 1043 fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
1044 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { 1044 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
1045 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 1045 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
1046 if (flowlabel == NULL) 1046 if (flowlabel == NULL)
1047 return -EINVAL; 1047 return -EINVAL;
1048 daddr = &flowlabel->dst; 1048 daddr = &flowlabel->dst;
@@ -1060,38 +1060,38 @@ do_udp_sendmsg:
1060 if (addr_len >= sizeof(struct sockaddr_in6) && 1060 if (addr_len >= sizeof(struct sockaddr_in6) &&
1061 sin6->sin6_scope_id && 1061 sin6->sin6_scope_id &&
1062 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 1062 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
1063 fl.oif = sin6->sin6_scope_id; 1063 fl6.flowi6_oif = sin6->sin6_scope_id;
1064 } else { 1064 } else {
1065 if (sk->sk_state != TCP_ESTABLISHED) 1065 if (sk->sk_state != TCP_ESTABLISHED)
1066 return -EDESTADDRREQ; 1066 return -EDESTADDRREQ;
1067 1067
1068 fl.fl_ip_dport = inet->inet_dport; 1068 fl6.fl6_dport = inet->inet_dport;
1069 daddr = &np->daddr; 1069 daddr = &np->daddr;
1070 fl.fl6_flowlabel = np->flow_label; 1070 fl6.flowlabel = np->flow_label;
1071 connected = 1; 1071 connected = 1;
1072 } 1072 }
1073 1073
1074 if (!fl.oif) 1074 if (!fl6.flowi6_oif)
1075 fl.oif = sk->sk_bound_dev_if; 1075 fl6.flowi6_oif = sk->sk_bound_dev_if;
1076 1076
1077 if (!fl.oif) 1077 if (!fl6.flowi6_oif)
1078 fl.oif = np->sticky_pktinfo.ipi6_ifindex; 1078 fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
1079 1079
1080 fl.mark = sk->sk_mark; 1080 fl6.flowi6_mark = sk->sk_mark;
1081 1081
1082 if (msg->msg_controllen) { 1082 if (msg->msg_controllen) {
1083 opt = &opt_space; 1083 opt = &opt_space;
1084 memset(opt, 0, sizeof(struct ipv6_txoptions)); 1084 memset(opt, 0, sizeof(struct ipv6_txoptions));
1085 opt->tot_len = sizeof(*opt); 1085 opt->tot_len = sizeof(*opt);
1086 1086
1087 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, 1087 err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
1088 &tclass, &dontfrag); 1088 &tclass, &dontfrag);
1089 if (err < 0) { 1089 if (err < 0) {
1090 fl6_sock_release(flowlabel); 1090 fl6_sock_release(flowlabel);
1091 return err; 1091 return err;
1092 } 1092 }
1093 if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 1093 if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
1094 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); 1094 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
1095 if (flowlabel == NULL) 1095 if (flowlabel == NULL)
1096 return -EINVAL; 1096 return -EINVAL;
1097 } 1097 }
@@ -1105,42 +1105,35 @@ do_udp_sendmsg:
1105 opt = fl6_merge_options(&opt_space, flowlabel, opt); 1105 opt = fl6_merge_options(&opt_space, flowlabel, opt);
1106 opt = ipv6_fixup_options(&opt_space, opt); 1106 opt = ipv6_fixup_options(&opt_space, opt);
1107 1107
1108 fl.proto = sk->sk_protocol; 1108 fl6.flowi6_proto = sk->sk_protocol;
1109 if (!ipv6_addr_any(daddr)) 1109 if (!ipv6_addr_any(daddr))
1110 ipv6_addr_copy(&fl.fl6_dst, daddr); 1110 ipv6_addr_copy(&fl6.daddr, daddr);
1111 else 1111 else
1112 fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 1112 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
1113 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 1113 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
1114 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 1114 ipv6_addr_copy(&fl6.saddr, &np->saddr);
1115 fl.fl_ip_sport = inet->inet_sport; 1115 fl6.fl6_sport = inet->inet_sport;
1116 1116
1117 final_p = fl6_update_dst(&fl, opt, &final); 1117 final_p = fl6_update_dst(&fl6, opt, &final);
1118 if (final_p) 1118 if (final_p)
1119 connected = 0; 1119 connected = 0;
1120 1120
1121 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { 1121 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
1122 fl.oif = np->mcast_oif; 1122 fl6.flowi6_oif = np->mcast_oif;
1123 connected = 0; 1123 connected = 0;
1124 } 1124 }
1125 1125
1126 security_sk_classify_flow(sk, &fl); 1126 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
1127 1127
1128 err = ip6_sk_dst_lookup(sk, &dst, &fl); 1128 dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true);
1129 if (err) 1129 if (IS_ERR(dst)) {
1130 err = PTR_ERR(dst);
1131 dst = NULL;
1130 goto out; 1132 goto out;
1131 if (final_p)
1132 ipv6_addr_copy(&fl.fl6_dst, final_p);
1133
1134 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
1135 if (err < 0) {
1136 if (err == -EREMOTE)
1137 err = ip6_dst_blackhole(sk, &dst, &fl);
1138 if (err < 0)
1139 goto out;
1140 } 1133 }
1141 1134
1142 if (hlimit < 0) { 1135 if (hlimit < 0) {
1143 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 1136 if (ipv6_addr_is_multicast(&fl6.daddr))
1144 hlimit = np->mcast_hops; 1137 hlimit = np->mcast_hops;
1145 else 1138 else
1146 hlimit = np->hop_limit; 1139 hlimit = np->hop_limit;
@@ -1175,7 +1168,7 @@ do_append_data:
1175 up->len += ulen; 1168 up->len += ulen;
1176 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 1169 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
1177 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, 1170 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
1178 sizeof(struct udphdr), hlimit, tclass, opt, &fl, 1171 sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
1179 (struct rt6_info*)dst, 1172 (struct rt6_info*)dst,
1180 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); 1173 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
1181 if (err) 1174 if (err)
@@ -1188,10 +1181,10 @@ do_append_data:
1188 if (dst) { 1181 if (dst) {
1189 if (connected) { 1182 if (connected) {
1190 ip6_dst_store(sk, dst, 1183 ip6_dst_store(sk, dst,
1191 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? 1184 ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
1192 &np->daddr : NULL, 1185 &np->daddr : NULL,
1193#ifdef CONFIG_IPV6_SUBTREES 1186#ifdef CONFIG_IPV6_SUBTREES
1194 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? 1187 ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
1195 &np->saddr : 1188 &np->saddr :
1196#endif 1189#endif
1197 NULL); 1190 NULL);
@@ -1299,7 +1292,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
1299 return 0; 1292 return 0;
1300} 1293}
1301 1294
1302static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) 1295static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
1303{ 1296{
1304 struct sk_buff *segs = ERR_PTR(-EINVAL); 1297 struct sk_buff *segs = ERR_PTR(-EINVAL);
1305 unsigned int mss; 1298 unsigned int mss;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index da87428681c..05e34c8ec91 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -27,18 +27,19 @@
27static struct xfrm_policy_afinfo xfrm6_policy_afinfo; 27static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
28 28
29static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, 29static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
30 xfrm_address_t *saddr, 30 const xfrm_address_t *saddr,
31 xfrm_address_t *daddr) 31 const xfrm_address_t *daddr)
32{ 32{
33 struct flowi fl = {}; 33 struct flowi6 fl6;
34 struct dst_entry *dst; 34 struct dst_entry *dst;
35 int err; 35 int err;
36 36
37 memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); 37 memset(&fl6, 0, sizeof(fl6));
38 memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
38 if (saddr) 39 if (saddr)
39 memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); 40 memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
40 41
41 dst = ip6_route_output(net, NULL, &fl); 42 dst = ip6_route_output(net, NULL, &fl6);
42 43
43 err = dst->error; 44 err = dst->error;
44 if (dst->error) { 45 if (dst->error) {
@@ -67,7 +68,7 @@ static int xfrm6_get_saddr(struct net *net,
67 return 0; 68 return 0;
68} 69}
69 70
70static int xfrm6_get_tos(struct flowi *fl) 71static int xfrm6_get_tos(const struct flowi *fl)
71{ 72{
72 return 0; 73 return 0;
73} 74}
@@ -87,7 +88,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
87} 88}
88 89
89static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 90static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
90 struct flowi *fl) 91 const struct flowi *fl)
91{ 92{
92 struct rt6_info *rt = (struct rt6_info*)xdst->route; 93 struct rt6_info *rt = (struct rt6_info*)xdst->route;
93 94
@@ -120,6 +121,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
120static inline void 121static inline void
121_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) 122_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
122{ 123{
124 struct flowi6 *fl6 = &fl->u.ip6;
123 int onlyproto = 0; 125 int onlyproto = 0;
124 u16 offset = skb_network_header_len(skb); 126 u16 offset = skb_network_header_len(skb);
125 struct ipv6hdr *hdr = ipv6_hdr(skb); 127 struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -127,11 +129,11 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
127 const unsigned char *nh = skb_network_header(skb); 129 const unsigned char *nh = skb_network_header(skb);
128 u8 nexthdr = nh[IP6CB(skb)->nhoff]; 130 u8 nexthdr = nh[IP6CB(skb)->nhoff];
129 131
130 memset(fl, 0, sizeof(struct flowi)); 132 memset(fl6, 0, sizeof(struct flowi6));
131 fl->mark = skb->mark; 133 fl6->flowi6_mark = skb->mark;
132 134
133 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); 135 ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr);
134 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); 136 ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr);
135 137
136 while (nh + offset + 1 < skb->data || 138 while (nh + offset + 1 < skb->data ||
137 pskb_may_pull(skb, nh + offset + 1 - skb->data)) { 139 pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
@@ -158,20 +160,20 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
158 pskb_may_pull(skb, nh + offset + 4 - skb->data))) { 160 pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
159 __be16 *ports = (__be16 *)exthdr; 161 __be16 *ports = (__be16 *)exthdr;
160 162
161 fl->fl_ip_sport = ports[!!reverse]; 163 fl6->fl6_sport = ports[!!reverse];
162 fl->fl_ip_dport = ports[!reverse]; 164 fl6->fl6_dport = ports[!reverse];
163 } 165 }
164 fl->proto = nexthdr; 166 fl6->flowi6_proto = nexthdr;
165 return; 167 return;
166 168
167 case IPPROTO_ICMPV6: 169 case IPPROTO_ICMPV6:
168 if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { 170 if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
169 u8 *icmp = (u8 *)exthdr; 171 u8 *icmp = (u8 *)exthdr;
170 172
171 fl->fl_icmp_type = icmp[0]; 173 fl6->fl6_icmp_type = icmp[0];
172 fl->fl_icmp_code = icmp[1]; 174 fl6->fl6_icmp_code = icmp[1];
173 } 175 }
174 fl->proto = nexthdr; 176 fl6->flowi6_proto = nexthdr;
175 return; 177 return;
176 178
177#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 179#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -180,9 +182,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
180 struct ip6_mh *mh; 182 struct ip6_mh *mh;
181 mh = (struct ip6_mh *)exthdr; 183 mh = (struct ip6_mh *)exthdr;
182 184
183 fl->fl_mh_type = mh->ip6mh_type; 185 fl6->fl6_mh_type = mh->ip6mh_type;
184 } 186 }
185 fl->proto = nexthdr; 187 fl6->flowi6_proto = nexthdr;
186 return; 188 return;
187#endif 189#endif
188 190
@@ -191,8 +193,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
191 case IPPROTO_ESP: 193 case IPPROTO_ESP:
192 case IPPROTO_COMP: 194 case IPPROTO_COMP:
193 default: 195 default:
194 fl->fl_ipsec_spi = 0; 196 fl6->fl6_ipsec_spi = 0;
195 fl->proto = nexthdr; 197 fl6->flowi6_proto = nexthdr;
196 return; 198 return;
197 } 199 }
198 } 200 }
@@ -220,6 +222,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
220 222
221 if (likely(xdst->u.rt6.rt6i_idev)) 223 if (likely(xdst->u.rt6.rt6i_idev))
222 in6_dev_put(xdst->u.rt6.rt6i_idev); 224 in6_dev_put(xdst->u.rt6.rt6i_idev);
225 dst_destroy_metrics_generic(dst);
223 if (likely(xdst->u.rt6.rt6i_peer)) 226 if (likely(xdst->u.rt6.rt6i_peer))
224 inet_putpeer(xdst->u.rt6.rt6i_peer); 227 inet_putpeer(xdst->u.rt6.rt6i_peer);
225 xfrm_dst_destroy(xdst); 228 xfrm_dst_destroy(xdst);
@@ -257,6 +260,7 @@ static struct dst_ops xfrm6_dst_ops = {
257 .protocol = cpu_to_be16(ETH_P_IPV6), 260 .protocol = cpu_to_be16(ETH_P_IPV6),
258 .gc = xfrm6_garbage_collect, 261 .gc = xfrm6_garbage_collect,
259 .update_pmtu = xfrm6_update_pmtu, 262 .update_pmtu = xfrm6_update_pmtu,
263 .cow_metrics = dst_cow_metrics_generic,
260 .destroy = xfrm6_dst_destroy, 264 .destroy = xfrm6_dst_destroy,
261 .ifdown = xfrm6_dst_ifdown, 265 .ifdown = xfrm6_dst_ifdown,
262 .local_out = __ip6_local_out, 266 .local_out = __ip6_local_out,
@@ -272,6 +276,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
272 .get_tos = xfrm6_get_tos, 276 .get_tos = xfrm6_get_tos,
273 .init_path = xfrm6_init_path, 277 .init_path = xfrm6_init_path,
274 .fill_dst = xfrm6_fill_dst, 278 .fill_dst = xfrm6_fill_dst,
279 .blackhole_route = ip6_blackhole_route,
275}; 280};
276 281
277static int __init xfrm6_policy_init(void) 282static int __init xfrm6_policy_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a67575d472a..afe941e9415 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,26 +20,28 @@
20#include <net/addrconf.h> 20#include <net/addrconf.h>
21 21
22static void 22static void
23__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) 23__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
24{ 24{
25 const struct flowi6 *fl6 = &fl->u.ip6;
26
25 /* Initialize temporary selector matching only 27 /* Initialize temporary selector matching only
26 * to current session. */ 28 * to current session. */
27 ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); 29 ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr);
28 ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); 30 ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr);
29 sel->dport = xfrm_flowi_dport(fl); 31 sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
30 sel->dport_mask = htons(0xffff); 32 sel->dport_mask = htons(0xffff);
31 sel->sport = xfrm_flowi_sport(fl); 33 sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
32 sel->sport_mask = htons(0xffff); 34 sel->sport_mask = htons(0xffff);
33 sel->family = AF_INET6; 35 sel->family = AF_INET6;
34 sel->prefixlen_d = 128; 36 sel->prefixlen_d = 128;
35 sel->prefixlen_s = 128; 37 sel->prefixlen_s = 128;
36 sel->proto = fl->proto; 38 sel->proto = fl6->flowi6_proto;
37 sel->ifindex = fl->oif; 39 sel->ifindex = fl6->flowi6_oif;
38} 40}
39 41
40static void 42static void
41xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, 43xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
42 xfrm_address_t *daddr, xfrm_address_t *saddr) 44 const xfrm_address_t *daddr, const xfrm_address_t *saddr)
43{ 45{
44 x->id = tmpl->id; 46 x->id = tmpl->id;
45 if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) 47 if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
index 02549cb2c32..e9ad0062fbb 100644
--- a/net/ipx/Kconfig
+++ b/net/ipx/Kconfig
@@ -3,7 +3,6 @@
3# 3#
4config IPX 4config IPX
5 tristate "The IPX protocol" 5 tristate "The IPX protocol"
6 depends on BKL # should be fixable
7 select LLC 6 select LLC
8 ---help--- 7 ---help---
9 This is support for the Novell networking protocol, IPX, commonly 8 This is support for the Novell networking protocol, IPX, commonly
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index da3d21c41d9..9680226640e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -42,7 +42,6 @@
42#include <linux/uio.h> 42#include <linux/uio.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/skbuff.h> 44#include <linux/skbuff.h>
45#include <linux/smp_lock.h>
46#include <linux/socket.h> 45#include <linux/socket.h>
47#include <linux/sockios.h> 46#include <linux/sockios.h>
48#include <linux/string.h> 47#include <linux/string.h>
@@ -149,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
149 ipx_remove_socket(sk); 148 ipx_remove_socket(sk);
150 skb_queue_purge(&sk->sk_receive_queue); 149 skb_queue_purge(&sk->sk_receive_queue);
151 sk_refcnt_debug_dec(sk); 150 sk_refcnt_debug_dec(sk);
152 sock_put(sk);
153} 151}
154 152
155/* 153/*
@@ -1299,7 +1297,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname,
1299 int opt; 1297 int opt;
1300 int rc = -EINVAL; 1298 int rc = -EINVAL;
1301 1299
1302 lock_kernel(); 1300 lock_sock(sk);
1303 if (optlen != sizeof(int)) 1301 if (optlen != sizeof(int))
1304 goto out; 1302 goto out;
1305 1303
@@ -1314,7 +1312,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname,
1314 ipx_sk(sk)->type = opt; 1312 ipx_sk(sk)->type = opt;
1315 rc = 0; 1313 rc = 0;
1316out: 1314out:
1317 unlock_kernel(); 1315 release_sock(sk);
1318 return rc; 1316 return rc;
1319} 1317}
1320 1318
@@ -1326,7 +1324,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname,
1326 int len; 1324 int len;
1327 int rc = -ENOPROTOOPT; 1325 int rc = -ENOPROTOOPT;
1328 1326
1329 lock_kernel(); 1327 lock_sock(sk);
1330 if (!(level == SOL_IPX && optname == IPX_TYPE)) 1328 if (!(level == SOL_IPX && optname == IPX_TYPE))
1331 goto out; 1329 goto out;
1332 1330
@@ -1347,7 +1345,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname,
1347 1345
1348 rc = 0; 1346 rc = 0;
1349out: 1347out:
1350 unlock_kernel(); 1348 release_sock(sk);
1351 return rc; 1349 return rc;
1352} 1350}
1353 1351
@@ -1396,7 +1394,7 @@ static int ipx_release(struct socket *sock)
1396 if (!sk) 1394 if (!sk)
1397 goto out; 1395 goto out;
1398 1396
1399 lock_kernel(); 1397 lock_sock(sk);
1400 if (!sock_flag(sk, SOCK_DEAD)) 1398 if (!sock_flag(sk, SOCK_DEAD))
1401 sk->sk_state_change(sk); 1399 sk->sk_state_change(sk);
1402 1400
@@ -1404,7 +1402,8 @@ static int ipx_release(struct socket *sock)
1404 sock->sk = NULL; 1402 sock->sk = NULL;
1405 sk_refcnt_debug_release(sk); 1403 sk_refcnt_debug_release(sk);
1406 ipx_destroy_socket(sk); 1404 ipx_destroy_socket(sk);
1407 unlock_kernel(); 1405 release_sock(sk);
1406 sock_put(sk);
1408out: 1407out:
1409 return 0; 1408 return 0;
1410} 1409}
@@ -1530,11 +1529,12 @@ out:
1530 1529
1531static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1530static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1532{ 1531{
1532 struct sock *sk = sock->sk;
1533 int rc; 1533 int rc;
1534 1534
1535 lock_kernel(); 1535 lock_sock(sk);
1536 rc = __ipx_bind(sock, uaddr, addr_len); 1536 rc = __ipx_bind(sock, uaddr, addr_len);
1537 unlock_kernel(); 1537 release_sock(sk);
1538 1538
1539 return rc; 1539 return rc;
1540} 1540}
@@ -1551,7 +1551,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
1551 sk->sk_state = TCP_CLOSE; 1551 sk->sk_state = TCP_CLOSE;
1552 sock->state = SS_UNCONNECTED; 1552 sock->state = SS_UNCONNECTED;
1553 1553
1554 lock_kernel(); 1554 lock_sock(sk);
1555 if (addr_len != sizeof(*addr)) 1555 if (addr_len != sizeof(*addr))
1556 goto out; 1556 goto out;
1557 addr = (struct sockaddr_ipx *)uaddr; 1557 addr = (struct sockaddr_ipx *)uaddr;
@@ -1598,7 +1598,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
1598 ipxrtr_put(rt); 1598 ipxrtr_put(rt);
1599 rc = 0; 1599 rc = 0;
1600out: 1600out:
1601 unlock_kernel(); 1601 release_sock(sk);
1602 return rc; 1602 return rc;
1603} 1603}
1604 1604
@@ -1614,7 +1614,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1614 1614
1615 *uaddr_len = sizeof(struct sockaddr_ipx); 1615 *uaddr_len = sizeof(struct sockaddr_ipx);
1616 1616
1617 lock_kernel(); 1617 lock_sock(sk);
1618 if (peer) { 1618 if (peer) {
1619 rc = -ENOTCONN; 1619 rc = -ENOTCONN;
1620 if (sk->sk_state != TCP_ESTABLISHED) 1620 if (sk->sk_state != TCP_ESTABLISHED)
@@ -1649,19 +1649,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
1649 1649
1650 rc = 0; 1650 rc = 0;
1651out: 1651out:
1652 unlock_kernel(); 1652 release_sock(sk);
1653 return rc;
1654}
1655
1656static unsigned int ipx_datagram_poll(struct file *file, struct socket *sock,
1657 poll_table *wait)
1658{
1659 int rc;
1660
1661 lock_kernel();
1662 rc = datagram_poll(file, sock, wait);
1663 unlock_kernel();
1664
1665 return rc; 1653 return rc;
1666} 1654}
1667 1655
@@ -1736,7 +1724,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
1736 int rc = -EINVAL; 1724 int rc = -EINVAL;
1737 int flags = msg->msg_flags; 1725 int flags = msg->msg_flags;
1738 1726
1739 lock_kernel(); 1727 lock_sock(sk);
1740 /* Socket gets bound below anyway */ 1728 /* Socket gets bound below anyway */
1741/* if (sk->sk_zapped) 1729/* if (sk->sk_zapped)
1742 return -EIO; */ /* Socket not bound */ 1730 return -EIO; */ /* Socket not bound */
@@ -1788,7 +1776,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
1788 if (rc >= 0) 1776 if (rc >= 0)
1789 rc = len; 1777 rc = len;
1790out: 1778out:
1791 unlock_kernel(); 1779 release_sock(sk);
1792 return rc; 1780 return rc;
1793} 1781}
1794 1782
@@ -1803,7 +1791,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
1803 struct sk_buff *skb; 1791 struct sk_buff *skb;
1804 int copied, rc; 1792 int copied, rc;
1805 1793
1806 lock_kernel(); 1794 lock_sock(sk);
1807 /* put the autobinding in */ 1795 /* put the autobinding in */
1808 if (!ipxs->port) { 1796 if (!ipxs->port) {
1809 struct sockaddr_ipx uaddr; 1797 struct sockaddr_ipx uaddr;
@@ -1862,7 +1850,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
1862out_free: 1850out_free:
1863 skb_free_datagram(sk, skb); 1851 skb_free_datagram(sk, skb);
1864out: 1852out:
1865 unlock_kernel(); 1853 release_sock(sk);
1866 return rc; 1854 return rc;
1867} 1855}
1868 1856
@@ -1874,7 +1862,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1874 struct sock *sk = sock->sk; 1862 struct sock *sk = sock->sk;
1875 void __user *argp = (void __user *)arg; 1863 void __user *argp = (void __user *)arg;
1876 1864
1877 lock_kernel(); 1865 lock_sock(sk);
1878 switch (cmd) { 1866 switch (cmd) {
1879 case TIOCOUTQ: 1867 case TIOCOUTQ:
1880 amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); 1868 amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
@@ -1937,7 +1925,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1937 rc = -ENOIOCTLCMD; 1925 rc = -ENOIOCTLCMD;
1938 break; 1926 break;
1939 } 1927 }
1940 unlock_kernel(); 1928 release_sock(sk);
1941 1929
1942 return rc; 1930 return rc;
1943} 1931}
@@ -1984,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = {
1984 .socketpair = sock_no_socketpair, 1972 .socketpair = sock_no_socketpair,
1985 .accept = sock_no_accept, 1973 .accept = sock_no_accept,
1986 .getname = ipx_getname, 1974 .getname = ipx_getname,
1987 .poll = ipx_datagram_poll, 1975 .poll = datagram_poll,
1988 .ioctl = ipx_ioctl, 1976 .ioctl = ipx_ioctl,
1989#ifdef CONFIG_COMPAT 1977#ifdef CONFIG_COMPAT
1990 .compat_ioctl = ipx_compat_ioctl, 1978 .compat_ioctl = ipx_compat_ioctl,
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 24cb3aa2bbf..77c5e6499f8 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -189,12 +189,12 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
189} 189}
190 190
191/* 191/*
192 * Function ircomm_tty_tiocmget (tty, file) 192 * Function ircomm_tty_tiocmget (tty)
193 * 193 *
194 * 194 *
195 * 195 *
196 */ 196 */
197int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file) 197int ircomm_tty_tiocmget(struct tty_struct *tty)
198{ 198{
199 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; 199 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
200 unsigned int result; 200 unsigned int result;
@@ -214,12 +214,12 @@ int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file)
214} 214}
215 215
216/* 216/*
217 * Function ircomm_tty_tiocmset (tty, file, set, clear) 217 * Function ircomm_tty_tiocmset (tty, set, clear)
218 * 218 *
219 * 219 *
220 * 220 *
221 */ 221 */
222int ircomm_tty_tiocmset(struct tty_struct *tty, struct file *file, 222int ircomm_tty_tiocmset(struct tty_struct *tty,
223 unsigned int set, unsigned int clear) 223 unsigned int set, unsigned int clear)
224{ 224{
225 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; 225 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
@@ -365,12 +365,12 @@ static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self,
365} 365}
366 366
367/* 367/*
368 * Function ircomm_tty_ioctl (tty, file, cmd, arg) 368 * Function ircomm_tty_ioctl (tty, cmd, arg)
369 * 369 *
370 * 370 *
371 * 371 *
372 */ 372 */
373int ircomm_tty_ioctl(struct tty_struct *tty, struct file *file, 373int ircomm_tty_ioctl(struct tty_struct *tty,
374 unsigned int cmd, unsigned long arg) 374 unsigned int cmd, unsigned long arg)
375{ 375{
376 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; 376 struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d87c22df6f1..7db86ffcf07 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -70,7 +70,7 @@ static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
70 return (struct pfkey_sock *)sk; 70 return (struct pfkey_sock *)sk;
71} 71}
72 72
73static int pfkey_can_dump(struct sock *sk) 73static int pfkey_can_dump(const struct sock *sk)
74{ 74{
75 if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) 75 if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf)
76 return 1; 76 return 1;
@@ -303,12 +303,13 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
303 return rc; 303 return rc;
304} 304}
305 305
306static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) 306static inline void pfkey_hdr_dup(struct sadb_msg *new,
307 const struct sadb_msg *orig)
307{ 308{
308 *new = *orig; 309 *new = *orig;
309} 310}
310 311
311static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) 312static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
312{ 313{
313 struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); 314 struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
314 struct sadb_msg *hdr; 315 struct sadb_msg *hdr;
@@ -369,13 +370,13 @@ static u8 sadb_ext_min_len[] = {
369}; 370};
370 371
371/* Verify sadb_address_{len,prefixlen} against sa_family. */ 372/* Verify sadb_address_{len,prefixlen} against sa_family. */
372static int verify_address_len(void *p) 373static int verify_address_len(const void *p)
373{ 374{
374 struct sadb_address *sp = p; 375 const struct sadb_address *sp = p;
375 struct sockaddr *addr = (struct sockaddr *)(sp + 1); 376 const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
376 struct sockaddr_in *sin; 377 const struct sockaddr_in *sin;
377#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 378#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
378 struct sockaddr_in6 *sin6; 379 const struct sockaddr_in6 *sin6;
379#endif 380#endif
380 int len; 381 int len;
381 382
@@ -411,16 +412,16 @@ static int verify_address_len(void *p)
411 return 0; 412 return 0;
412} 413}
413 414
414static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) 415static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
415{ 416{
416 return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + 417 return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
417 sec_ctx->sadb_x_ctx_len, 418 sec_ctx->sadb_x_ctx_len,
418 sizeof(uint64_t)); 419 sizeof(uint64_t));
419} 420}
420 421
421static inline int verify_sec_ctx_len(void *p) 422static inline int verify_sec_ctx_len(const void *p)
422{ 423{
423 struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; 424 const struct sadb_x_sec_ctx *sec_ctx = p;
424 int len = sec_ctx->sadb_x_ctx_len; 425 int len = sec_ctx->sadb_x_ctx_len;
425 426
426 if (len > PAGE_SIZE) 427 if (len > PAGE_SIZE)
@@ -434,7 +435,7 @@ static inline int verify_sec_ctx_len(void *p)
434 return 0; 435 return 0;
435} 436}
436 437
437static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx) 438static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx)
438{ 439{
439 struct xfrm_user_sec_ctx *uctx = NULL; 440 struct xfrm_user_sec_ctx *uctx = NULL;
440 int ctx_size = sec_ctx->sadb_x_ctx_len; 441 int ctx_size = sec_ctx->sadb_x_ctx_len;
@@ -455,16 +456,16 @@ static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb
455 return uctx; 456 return uctx;
456} 457}
457 458
458static int present_and_same_family(struct sadb_address *src, 459static int present_and_same_family(const struct sadb_address *src,
459 struct sadb_address *dst) 460 const struct sadb_address *dst)
460{ 461{
461 struct sockaddr *s_addr, *d_addr; 462 const struct sockaddr *s_addr, *d_addr;
462 463
463 if (!src || !dst) 464 if (!src || !dst)
464 return 0; 465 return 0;
465 466
466 s_addr = (struct sockaddr *)(src + 1); 467 s_addr = (const struct sockaddr *)(src + 1);
467 d_addr = (struct sockaddr *)(dst + 1); 468 d_addr = (const struct sockaddr *)(dst + 1);
468 if (s_addr->sa_family != d_addr->sa_family) 469 if (s_addr->sa_family != d_addr->sa_family)
469 return 0; 470 return 0;
470 if (s_addr->sa_family != AF_INET 471 if (s_addr->sa_family != AF_INET
@@ -477,15 +478,15 @@ static int present_and_same_family(struct sadb_address *src,
477 return 1; 478 return 1;
478} 479}
479 480
480static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 481static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs)
481{ 482{
482 char *p = (char *) hdr; 483 const char *p = (char *) hdr;
483 int len = skb->len; 484 int len = skb->len;
484 485
485 len -= sizeof(*hdr); 486 len -= sizeof(*hdr);
486 p += sizeof(*hdr); 487 p += sizeof(*hdr);
487 while (len > 0) { 488 while (len > 0) {
488 struct sadb_ext *ehdr = (struct sadb_ext *) p; 489 const struct sadb_ext *ehdr = (const struct sadb_ext *) p;
489 uint16_t ext_type; 490 uint16_t ext_type;
490 int ext_len; 491 int ext_len;
491 492
@@ -514,7 +515,7 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
514 if (verify_sec_ctx_len(p)) 515 if (verify_sec_ctx_len(p))
515 return -EINVAL; 516 return -EINVAL;
516 } 517 }
517 ext_hdrs[ext_type-1] = p; 518 ext_hdrs[ext_type-1] = (void *) p;
518 } 519 }
519 p += ext_len; 520 p += ext_len;
520 len -= ext_len; 521 len -= ext_len;
@@ -606,21 +607,21 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
606} 607}
607 608
608static 609static
609int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) 610int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr)
610{ 611{
611 return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), 612 return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1),
612 xaddr); 613 xaddr);
613} 614}
614 615
615static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs) 616static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs)
616{ 617{
617 struct sadb_sa *sa; 618 const struct sadb_sa *sa;
618 struct sadb_address *addr; 619 const struct sadb_address *addr;
619 uint16_t proto; 620 uint16_t proto;
620 unsigned short family; 621 unsigned short family;
621 xfrm_address_t *xaddr; 622 xfrm_address_t *xaddr;
622 623
623 sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; 624 sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
624 if (sa == NULL) 625 if (sa == NULL)
625 return NULL; 626 return NULL;
626 627
@@ -629,18 +630,18 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_
629 return NULL; 630 return NULL;
630 631
631 /* sadb_address_len should be checked by caller */ 632 /* sadb_address_len should be checked by caller */
632 addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; 633 addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
633 if (addr == NULL) 634 if (addr == NULL)
634 return NULL; 635 return NULL;
635 636
636 family = ((struct sockaddr *)(addr + 1))->sa_family; 637 family = ((const struct sockaddr *)(addr + 1))->sa_family;
637 switch (family) { 638 switch (family) {
638 case AF_INET: 639 case AF_INET:
639 xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr; 640 xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
640 break; 641 break;
641#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 642#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
642 case AF_INET6: 643 case AF_INET6:
643 xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr; 644 xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
644 break; 645 break;
645#endif 646#endif
646 default: 647 default:
@@ -690,9 +691,9 @@ static inline int pfkey_mode_to_xfrm(int mode)
690 } 691 }
691} 692}
692 693
693static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, 694static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port,
694 struct sockaddr *sa, 695 struct sockaddr *sa,
695 unsigned short family) 696 unsigned short family)
696{ 697{
697 switch (family) { 698 switch (family) {
698 case AF_INET: 699 case AF_INET:
@@ -720,7 +721,7 @@ static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port,
720 return 0; 721 return 0;
721} 722}
722 723
723static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, 724static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
724 int add_keys, int hsc) 725 int add_keys, int hsc)
725{ 726{
726 struct sk_buff *skb; 727 struct sk_buff *skb;
@@ -1010,7 +1011,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
1010} 1011}
1011 1012
1012 1013
1013static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) 1014static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x)
1014{ 1015{
1015 struct sk_buff *skb; 1016 struct sk_buff *skb;
1016 1017
@@ -1019,26 +1020,26 @@ static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x)
1019 return skb; 1020 return skb;
1020} 1021}
1021 1022
1022static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, 1023static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x,
1023 int hsc) 1024 int hsc)
1024{ 1025{
1025 return __pfkey_xfrm_state2msg(x, 0, hsc); 1026 return __pfkey_xfrm_state2msg(x, 0, hsc);
1026} 1027}
1027 1028
1028static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, 1029static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1029 struct sadb_msg *hdr, 1030 const struct sadb_msg *hdr,
1030 void **ext_hdrs) 1031 void * const *ext_hdrs)
1031{ 1032{
1032 struct xfrm_state *x; 1033 struct xfrm_state *x;
1033 struct sadb_lifetime *lifetime; 1034 const struct sadb_lifetime *lifetime;
1034 struct sadb_sa *sa; 1035 const struct sadb_sa *sa;
1035 struct sadb_key *key; 1036 const struct sadb_key *key;
1036 struct sadb_x_sec_ctx *sec_ctx; 1037 const struct sadb_x_sec_ctx *sec_ctx;
1037 uint16_t proto; 1038 uint16_t proto;
1038 int err; 1039 int err;
1039 1040
1040 1041
1041 sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; 1042 sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
1042 if (!sa || 1043 if (!sa ||
1043 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1044 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1044 ext_hdrs[SADB_EXT_ADDRESS_DST-1])) 1045 ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
@@ -1077,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1077 sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || 1078 sa->sadb_sa_encrypt > SADB_X_CALG_MAX) ||
1078 sa->sadb_sa_encrypt > SADB_EALG_MAX) 1079 sa->sadb_sa_encrypt > SADB_EALG_MAX)
1079 return ERR_PTR(-EINVAL); 1080 return ERR_PTR(-EINVAL);
1080 key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; 1081 key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
1081 if (key != NULL && 1082 if (key != NULL &&
1082 sa->sadb_sa_auth != SADB_X_AALG_NULL && 1083 sa->sadb_sa_auth != SADB_X_AALG_NULL &&
1083 ((key->sadb_key_bits+7) / 8 == 0 || 1084 ((key->sadb_key_bits+7) / 8 == 0 ||
@@ -1104,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1104 if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) 1105 if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
1105 x->props.flags |= XFRM_STATE_NOPMTUDISC; 1106 x->props.flags |= XFRM_STATE_NOPMTUDISC;
1106 1107
1107 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; 1108 lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
1108 if (lifetime != NULL) { 1109 if (lifetime != NULL) {
1109 x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); 1110 x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
1110 x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); 1111 x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
1111 x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; 1112 x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
1112 x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; 1113 x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
1113 } 1114 }
1114 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; 1115 lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
1115 if (lifetime != NULL) { 1116 if (lifetime != NULL) {
1116 x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); 1117 x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
1117 x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); 1118 x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
@@ -1119,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1119 x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; 1120 x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
1120 } 1121 }
1121 1122
1122 sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; 1123 sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
1123 if (sec_ctx != NULL) { 1124 if (sec_ctx != NULL) {
1124 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); 1125 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
1125 1126
@@ -1133,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1133 goto out; 1134 goto out;
1134 } 1135 }
1135 1136
1136 key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; 1137 key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
1137 if (sa->sadb_sa_auth) { 1138 if (sa->sadb_sa_auth) {
1138 int keysize = 0; 1139 int keysize = 0;
1139 struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); 1140 struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
@@ -1202,7 +1203,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1202 &x->id.daddr); 1203 &x->id.daddr);
1203 1204
1204 if (ext_hdrs[SADB_X_EXT_SA2-1]) { 1205 if (ext_hdrs[SADB_X_EXT_SA2-1]) {
1205 struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1]; 1206 const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1];
1206 int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); 1207 int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
1207 if (mode < 0) { 1208 if (mode < 0) {
1208 err = -EINVAL; 1209 err = -EINVAL;
@@ -1213,7 +1214,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1213 } 1214 }
1214 1215
1215 if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { 1216 if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) {
1216 struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; 1217 const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
1217 1218
1218 /* Nobody uses this, but we try. */ 1219 /* Nobody uses this, but we try. */
1219 x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); 1220 x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr);
@@ -1224,7 +1225,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1224 x->sel.family = x->props.family; 1225 x->sel.family = x->props.family;
1225 1226
1226 if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { 1227 if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
1227 struct sadb_x_nat_t_type* n_type; 1228 const struct sadb_x_nat_t_type* n_type;
1228 struct xfrm_encap_tmpl *natt; 1229 struct xfrm_encap_tmpl *natt;
1229 1230
1230 x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); 1231 x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
@@ -1236,12 +1237,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1236 natt->encap_type = n_type->sadb_x_nat_t_type_type; 1237 natt->encap_type = n_type->sadb_x_nat_t_type_type;
1237 1238
1238 if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { 1239 if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
1239 struct sadb_x_nat_t_port* n_port = 1240 const struct sadb_x_nat_t_port *n_port =
1240 ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; 1241 ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
1241 natt->encap_sport = n_port->sadb_x_nat_t_port_port; 1242 natt->encap_sport = n_port->sadb_x_nat_t_port_port;
1242 } 1243 }
1243 if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { 1244 if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) {
1244 struct sadb_x_nat_t_port* n_port = 1245 const struct sadb_x_nat_t_port *n_port =
1245 ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; 1246 ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
1246 natt->encap_dport = n_port->sadb_x_nat_t_port_port; 1247 natt->encap_dport = n_port->sadb_x_nat_t_port_port;
1247 } 1248 }
@@ -1261,12 +1262,12 @@ out:
1261 return ERR_PTR(err); 1262 return ERR_PTR(err);
1262} 1263}
1263 1264
1264static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1265static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1265{ 1266{
1266 return -EOPNOTSUPP; 1267 return -EOPNOTSUPP;
1267} 1268}
1268 1269
1269static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1270static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1270{ 1271{
1271 struct net *net = sock_net(sk); 1272 struct net *net = sock_net(sk);
1272 struct sk_buff *resp_skb; 1273 struct sk_buff *resp_skb;
@@ -1365,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1365 return 0; 1366 return 0;
1366} 1367}
1367 1368
1368static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1369static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1369{ 1370{
1370 struct net *net = sock_net(sk); 1371 struct net *net = sock_net(sk);
1371 struct xfrm_state *x; 1372 struct xfrm_state *x;
@@ -1429,7 +1430,7 @@ static inline int event2keytype(int event)
1429} 1430}
1430 1431
1431/* ADD/UPD/DEL */ 1432/* ADD/UPD/DEL */
1432static int key_notify_sa(struct xfrm_state *x, struct km_event *c) 1433static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
1433{ 1434{
1434 struct sk_buff *skb; 1435 struct sk_buff *skb;
1435 struct sadb_msg *hdr; 1436 struct sadb_msg *hdr;
@@ -1453,7 +1454,7 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
1453 return 0; 1454 return 0;
1454} 1455}
1455 1456
1456static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1457static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1457{ 1458{
1458 struct net *net = sock_net(sk); 1459 struct net *net = sock_net(sk);
1459 struct xfrm_state *x; 1460 struct xfrm_state *x;
@@ -1492,7 +1493,7 @@ out:
1492 return err; 1493 return err;
1493} 1494}
1494 1495
1495static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1496static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1496{ 1497{
1497 struct net *net = sock_net(sk); 1498 struct net *net = sock_net(sk);
1498 struct xfrm_state *x; 1499 struct xfrm_state *x;
@@ -1534,7 +1535,7 @@ out:
1534 return err; 1535 return err;
1535} 1536}
1536 1537
1537static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1538static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1538{ 1539{
1539 struct net *net = sock_net(sk); 1540 struct net *net = sock_net(sk);
1540 __u8 proto; 1541 __u8 proto;
@@ -1570,7 +1571,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1570 return 0; 1571 return 0;
1571} 1572}
1572 1573
1573static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, 1574static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
1574 gfp_t allocation) 1575 gfp_t allocation)
1575{ 1576{
1576 struct sk_buff *skb; 1577 struct sk_buff *skb;
@@ -1642,7 +1643,7 @@ out_put_algs:
1642 return skb; 1643 return skb;
1643} 1644}
1644 1645
1645static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1646static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1646{ 1647{
1647 struct pfkey_sock *pfk = pfkey_sk(sk); 1648 struct pfkey_sock *pfk = pfkey_sk(sk);
1648 struct sk_buff *supp_skb; 1649 struct sk_buff *supp_skb;
@@ -1671,7 +1672,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1671 return 0; 1672 return 0;
1672} 1673}
1673 1674
1674static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) 1675static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
1675{ 1676{
1676 struct sk_buff *skb; 1677 struct sk_buff *skb;
1677 struct sadb_msg *hdr; 1678 struct sadb_msg *hdr;
@@ -1688,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr)
1688 return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); 1689 return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
1689} 1690}
1690 1691
1691static int key_notify_sa_flush(struct km_event *c) 1692static int key_notify_sa_flush(const struct km_event *c)
1692{ 1693{
1693 struct sk_buff *skb; 1694 struct sk_buff *skb;
1694 struct sadb_msg *hdr; 1695 struct sadb_msg *hdr;
@@ -1710,7 +1711,7 @@ static int key_notify_sa_flush(struct km_event *c)
1710 return 0; 1711 return 0;
1711} 1712}
1712 1713
1713static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1714static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1714{ 1715{
1715 struct net *net = sock_net(sk); 1716 struct net *net = sock_net(sk);
1716 unsigned proto; 1717 unsigned proto;
@@ -1784,7 +1785,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
1784 xfrm_state_walk_done(&pfk->dump.u.state); 1785 xfrm_state_walk_done(&pfk->dump.u.state);
1785} 1786}
1786 1787
1787static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1788static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1788{ 1789{
1789 u8 proto; 1790 u8 proto;
1790 struct pfkey_sock *pfk = pfkey_sk(sk); 1791 struct pfkey_sock *pfk = pfkey_sk(sk);
@@ -1805,19 +1806,29 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr
1805 return pfkey_do_dump(pfk); 1806 return pfkey_do_dump(pfk);
1806} 1807}
1807 1808
1808static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1809static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1809{ 1810{
1810 struct pfkey_sock *pfk = pfkey_sk(sk); 1811 struct pfkey_sock *pfk = pfkey_sk(sk);
1811 int satype = hdr->sadb_msg_satype; 1812 int satype = hdr->sadb_msg_satype;
1813 bool reset_errno = false;
1812 1814
1813 if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { 1815 if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) {
1814 /* XXX we mangle packet... */ 1816 reset_errno = true;
1815 hdr->sadb_msg_errno = 0;
1816 if (satype != 0 && satype != 1) 1817 if (satype != 0 && satype != 1)
1817 return -EINVAL; 1818 return -EINVAL;
1818 pfk->promisc = satype; 1819 pfk->promisc = satype;
1819 } 1820 }
1820 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); 1821 if (reset_errno && skb_cloned(skb))
1822 skb = skb_copy(skb, GFP_KERNEL);
1823 else
1824 skb = skb_clone(skb, GFP_KERNEL);
1825
1826 if (reset_errno && skb) {
1827 struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data;
1828 new_hdr->sadb_msg_errno = 0;
1829 }
1830
1831 pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
1821 return 0; 1832 return 0;
1822} 1833}
1823 1834
@@ -1921,7 +1932,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
1921 return 0; 1932 return 0;
1922} 1933}
1923 1934
1924static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) 1935static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
1925{ 1936{
1926 struct xfrm_sec_ctx *xfrm_ctx = xp->security; 1937 struct xfrm_sec_ctx *xfrm_ctx = xp->security;
1927 1938
@@ -1933,9 +1944,9 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
1933 return 0; 1944 return 0;
1934} 1945}
1935 1946
1936static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) 1947static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp)
1937{ 1948{
1938 struct xfrm_tmpl *t; 1949 const struct xfrm_tmpl *t;
1939 int sockaddr_size = pfkey_sockaddr_size(xp->family); 1950 int sockaddr_size = pfkey_sockaddr_size(xp->family);
1940 int socklen = 0; 1951 int socklen = 0;
1941 int i; 1952 int i;
@@ -1955,7 +1966,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
1955 pfkey_xfrm_policy2sec_ctx_size(xp); 1966 pfkey_xfrm_policy2sec_ctx_size(xp);
1956} 1967}
1957 1968
1958static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) 1969static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp)
1959{ 1970{
1960 struct sk_buff *skb; 1971 struct sk_buff *skb;
1961 int size; 1972 int size;
@@ -1969,7 +1980,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
1969 return skb; 1980 return skb;
1970} 1981}
1971 1982
1972static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir) 1983static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir)
1973{ 1984{
1974 struct sadb_msg *hdr; 1985 struct sadb_msg *hdr;
1975 struct sadb_address *addr; 1986 struct sadb_address *addr;
@@ -2065,8 +2076,8 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
2065 pol->sadb_x_policy_priority = xp->priority; 2076 pol->sadb_x_policy_priority = xp->priority;
2066 2077
2067 for (i=0; i<xp->xfrm_nr; i++) { 2078 for (i=0; i<xp->xfrm_nr; i++) {
2079 const struct xfrm_tmpl *t = xp->xfrm_vec + i;
2068 struct sadb_x_ipsecrequest *rq; 2080 struct sadb_x_ipsecrequest *rq;
2069 struct xfrm_tmpl *t = xp->xfrm_vec + i;
2070 int req_size; 2081 int req_size;
2071 int mode; 2082 int mode;
2072 2083
@@ -2123,7 +2134,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
2123 return 0; 2134 return 0;
2124} 2135}
2125 2136
2126static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) 2137static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
2127{ 2138{
2128 struct sk_buff *out_skb; 2139 struct sk_buff *out_skb;
2129 struct sadb_msg *out_hdr; 2140 struct sadb_msg *out_hdr;
@@ -2152,7 +2163,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
2152 2163
2153} 2164}
2154 2165
2155static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2166static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2156{ 2167{
2157 struct net *net = sock_net(sk); 2168 struct net *net = sock_net(sk);
2158 int err = 0; 2169 int err = 0;
@@ -2273,7 +2284,7 @@ out:
2273 return err; 2284 return err;
2274} 2285}
2275 2286
2276static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2287static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2277{ 2288{
2278 struct net *net = sock_net(sk); 2289 struct net *net = sock_net(sk);
2279 int err; 2290 int err;
@@ -2350,7 +2361,7 @@ out:
2350 return err; 2361 return err;
2351} 2362}
2352 2363
2353static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir) 2364static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir)
2354{ 2365{
2355 int err; 2366 int err;
2356 struct sk_buff *out_skb; 2367 struct sk_buff *out_skb;
@@ -2458,7 +2469,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
2458} 2469}
2459 2470
2460static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, 2471static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2461 struct sadb_msg *hdr, void **ext_hdrs) 2472 const struct sadb_msg *hdr, void * const *ext_hdrs)
2462{ 2473{
2463 int i, len, ret, err = -EINVAL; 2474 int i, len, ret, err = -EINVAL;
2464 u8 dir; 2475 u8 dir;
@@ -2549,14 +2560,14 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2549} 2560}
2550#else 2561#else
2551static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, 2562static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2552 struct sadb_msg *hdr, void **ext_hdrs) 2563 const struct sadb_msg *hdr, void * const *ext_hdrs)
2553{ 2564{
2554 return -ENOPROTOOPT; 2565 return -ENOPROTOOPT;
2555} 2566}
2556#endif 2567#endif
2557 2568
2558 2569
2559static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2570static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2560{ 2571{
2561 struct net *net = sock_net(sk); 2572 struct net *net = sock_net(sk);
2562 unsigned int dir; 2573 unsigned int dir;
@@ -2644,7 +2655,7 @@ static void pfkey_dump_sp_done(struct pfkey_sock *pfk)
2644 xfrm_policy_walk_done(&pfk->dump.u.policy); 2655 xfrm_policy_walk_done(&pfk->dump.u.policy);
2645} 2656}
2646 2657
2647static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2658static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2648{ 2659{
2649 struct pfkey_sock *pfk = pfkey_sk(sk); 2660 struct pfkey_sock *pfk = pfkey_sk(sk);
2650 2661
@@ -2660,7 +2671,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2660 return pfkey_do_dump(pfk); 2671 return pfkey_do_dump(pfk);
2661} 2672}
2662 2673
2663static int key_notify_policy_flush(struct km_event *c) 2674static int key_notify_policy_flush(const struct km_event *c)
2664{ 2675{
2665 struct sk_buff *skb_out; 2676 struct sk_buff *skb_out;
2666 struct sadb_msg *hdr; 2677 struct sadb_msg *hdr;
@@ -2680,7 +2691,7 @@ static int key_notify_policy_flush(struct km_event *c)
2680 2691
2681} 2692}
2682 2693
2683static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2694static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
2684{ 2695{
2685 struct net *net = sock_net(sk); 2696 struct net *net = sock_net(sk);
2686 struct km_event c; 2697 struct km_event c;
@@ -2709,7 +2720,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2709} 2720}
2710 2721
2711typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, 2722typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
2712 struct sadb_msg *hdr, void **ext_hdrs); 2723 const struct sadb_msg *hdr, void * const *ext_hdrs);
2713static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { 2724static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
2714 [SADB_RESERVED] = pfkey_reserved, 2725 [SADB_RESERVED] = pfkey_reserved,
2715 [SADB_GETSPI] = pfkey_getspi, 2726 [SADB_GETSPI] = pfkey_getspi,
@@ -2736,7 +2747,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
2736 [SADB_X_MIGRATE] = pfkey_migrate, 2747 [SADB_X_MIGRATE] = pfkey_migrate,
2737}; 2748};
2738 2749
2739static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr) 2750static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr)
2740{ 2751{
2741 void *ext_hdrs[SADB_EXT_MAX]; 2752 void *ext_hdrs[SADB_EXT_MAX];
2742 int err; 2753 int err;
@@ -2781,7 +2792,8 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
2781 return hdr; 2792 return hdr;
2782} 2793}
2783 2794
2784static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) 2795static inline int aalg_tmpl_set(const struct xfrm_tmpl *t,
2796 const struct xfrm_algo_desc *d)
2785{ 2797{
2786 unsigned int id = d->desc.sadb_alg_id; 2798 unsigned int id = d->desc.sadb_alg_id;
2787 2799
@@ -2791,7 +2803,8 @@ static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
2791 return (t->aalgos >> id) & 1; 2803 return (t->aalgos >> id) & 1;
2792} 2804}
2793 2805
2794static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) 2806static inline int ealg_tmpl_set(const struct xfrm_tmpl *t,
2807 const struct xfrm_algo_desc *d)
2795{ 2808{
2796 unsigned int id = d->desc.sadb_alg_id; 2809 unsigned int id = d->desc.sadb_alg_id;
2797 2810
@@ -2801,12 +2814,12 @@ static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
2801 return (t->ealgos >> id) & 1; 2814 return (t->ealgos >> id) & 1;
2802} 2815}
2803 2816
2804static int count_ah_combs(struct xfrm_tmpl *t) 2817static int count_ah_combs(const struct xfrm_tmpl *t)
2805{ 2818{
2806 int i, sz = 0; 2819 int i, sz = 0;
2807 2820
2808 for (i = 0; ; i++) { 2821 for (i = 0; ; i++) {
2809 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); 2822 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
2810 if (!aalg) 2823 if (!aalg)
2811 break; 2824 break;
2812 if (aalg_tmpl_set(t, aalg) && aalg->available) 2825 if (aalg_tmpl_set(t, aalg) && aalg->available)
@@ -2815,12 +2828,12 @@ static int count_ah_combs(struct xfrm_tmpl *t)
2815 return sz + sizeof(struct sadb_prop); 2828 return sz + sizeof(struct sadb_prop);
2816} 2829}
2817 2830
2818static int count_esp_combs(struct xfrm_tmpl *t) 2831static int count_esp_combs(const struct xfrm_tmpl *t)
2819{ 2832{
2820 int i, k, sz = 0; 2833 int i, k, sz = 0;
2821 2834
2822 for (i = 0; ; i++) { 2835 for (i = 0; ; i++) {
2823 struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); 2836 const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
2824 if (!ealg) 2837 if (!ealg)
2825 break; 2838 break;
2826 2839
@@ -2828,7 +2841,7 @@ static int count_esp_combs(struct xfrm_tmpl *t)
2828 continue; 2841 continue;
2829 2842
2830 for (k = 1; ; k++) { 2843 for (k = 1; ; k++) {
2831 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); 2844 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
2832 if (!aalg) 2845 if (!aalg)
2833 break; 2846 break;
2834 2847
@@ -2839,7 +2852,7 @@ static int count_esp_combs(struct xfrm_tmpl *t)
2839 return sz + sizeof(struct sadb_prop); 2852 return sz + sizeof(struct sadb_prop);
2840} 2853}
2841 2854
2842static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) 2855static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
2843{ 2856{
2844 struct sadb_prop *p; 2857 struct sadb_prop *p;
2845 int i; 2858 int i;
@@ -2851,7 +2864,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2851 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); 2864 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
2852 2865
2853 for (i = 0; ; i++) { 2866 for (i = 0; ; i++) {
2854 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); 2867 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
2855 if (!aalg) 2868 if (!aalg)
2856 break; 2869 break;
2857 2870
@@ -2871,7 +2884,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2871 } 2884 }
2872} 2885}
2873 2886
2874static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) 2887static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
2875{ 2888{
2876 struct sadb_prop *p; 2889 struct sadb_prop *p;
2877 int i, k; 2890 int i, k;
@@ -2883,7 +2896,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2883 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); 2896 memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
2884 2897
2885 for (i=0; ; i++) { 2898 for (i=0; ; i++) {
2886 struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); 2899 const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
2887 if (!ealg) 2900 if (!ealg)
2888 break; 2901 break;
2889 2902
@@ -2892,7 +2905,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2892 2905
2893 for (k = 1; ; k++) { 2906 for (k = 1; ; k++) {
2894 struct sadb_comb *c; 2907 struct sadb_comb *c;
2895 struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); 2908 const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
2896 if (!aalg) 2909 if (!aalg)
2897 break; 2910 break;
2898 if (!(aalg_tmpl_set(t, aalg) && aalg->available)) 2911 if (!(aalg_tmpl_set(t, aalg) && aalg->available))
@@ -2914,12 +2927,12 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2914 } 2927 }
2915} 2928}
2916 2929
2917static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c) 2930static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
2918{ 2931{
2919 return 0; 2932 return 0;
2920} 2933}
2921 2934
2922static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) 2935static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
2923{ 2936{
2924 struct sk_buff *out_skb; 2937 struct sk_buff *out_skb;
2925 struct sadb_msg *out_hdr; 2938 struct sadb_msg *out_hdr;
@@ -2949,7 +2962,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
2949 return 0; 2962 return 0;
2950} 2963}
2951 2964
2952static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) 2965static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c)
2953{ 2966{
2954 struct net *net = x ? xs_net(x) : c->net; 2967 struct net *net = x ? xs_net(x) : c->net;
2955 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 2968 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -2976,7 +2989,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2976 return 0; 2989 return 0;
2977} 2990}
2978 2991
2979static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2992static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2980{ 2993{
2981 if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) 2994 if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
2982 return 0; 2995 return 0;
@@ -3318,7 +3331,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
3318 3331
3319#ifdef CONFIG_NET_KEY_MIGRATE 3332#ifdef CONFIG_NET_KEY_MIGRATE
3320static int set_sadb_address(struct sk_buff *skb, int sasize, int type, 3333static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
3321 struct xfrm_selector *sel) 3334 const struct xfrm_selector *sel)
3322{ 3335{
3323 struct sadb_address *addr; 3336 struct sadb_address *addr;
3324 addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); 3337 addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize);
@@ -3348,7 +3361,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
3348} 3361}
3349 3362
3350 3363
3351static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) 3364static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k)
3352{ 3365{
3353 struct sadb_x_kmaddress *kma; 3366 struct sadb_x_kmaddress *kma;
3354 u8 *sa; 3367 u8 *sa;
@@ -3376,7 +3389,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k)
3376static int set_ipsecrequest(struct sk_buff *skb, 3389static int set_ipsecrequest(struct sk_buff *skb,
3377 uint8_t proto, uint8_t mode, int level, 3390 uint8_t proto, uint8_t mode, int level,
3378 uint32_t reqid, uint8_t family, 3391 uint32_t reqid, uint8_t family,
3379 xfrm_address_t *src, xfrm_address_t *dst) 3392 const xfrm_address_t *src, const xfrm_address_t *dst)
3380{ 3393{
3381 struct sadb_x_ipsecrequest *rq; 3394 struct sadb_x_ipsecrequest *rq;
3382 u8 *sa; 3395 u8 *sa;
@@ -3404,9 +3417,9 @@ static int set_ipsecrequest(struct sk_buff *skb,
3404#endif 3417#endif
3405 3418
3406#ifdef CONFIG_NET_KEY_MIGRATE 3419#ifdef CONFIG_NET_KEY_MIGRATE
3407static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 3420static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3408 struct xfrm_migrate *m, int num_bundles, 3421 const struct xfrm_migrate *m, int num_bundles,
3409 struct xfrm_kmaddress *k) 3422 const struct xfrm_kmaddress *k)
3410{ 3423{
3411 int i; 3424 int i;
3412 int sasize_sel; 3425 int sasize_sel;
@@ -3415,7 +3428,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
3415 struct sk_buff *skb; 3428 struct sk_buff *skb;
3416 struct sadb_msg *hdr; 3429 struct sadb_msg *hdr;
3417 struct sadb_x_policy *pol; 3430 struct sadb_x_policy *pol;
3418 struct xfrm_migrate *mp; 3431 const struct xfrm_migrate *mp;
3419 3432
3420 if (type != XFRM_POLICY_TYPE_MAIN) 3433 if (type != XFRM_POLICY_TYPE_MAIN)
3421 return 0; 3434 return 0;
@@ -3513,9 +3526,9 @@ err:
3513 return -EINVAL; 3526 return -EINVAL;
3514} 3527}
3515#else 3528#else
3516static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 3529static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3517 struct xfrm_migrate *m, int num_bundles, 3530 const struct xfrm_migrate *m, int num_bundles,
3518 struct xfrm_kmaddress *k) 3531 const struct xfrm_kmaddress *k)
3519{ 3532{
3520 return -ENOPROTOOPT; 3533 return -ENOPROTOOPT;
3521} 3534}
@@ -3655,6 +3668,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
3655} 3668}
3656 3669
3657static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) 3670static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos)
3671 __acquires(rcu)
3658{ 3672{
3659 struct net *net = seq_file_net(f); 3673 struct net *net = seq_file_net(f);
3660 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 3674 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -3672,6 +3686,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
3672} 3686}
3673 3687
3674static void pfkey_seq_stop(struct seq_file *f, void *v) 3688static void pfkey_seq_stop(struct seq_file *f, void *v)
3689 __releases(rcu)
3675{ 3690{
3676 rcu_read_unlock(); 3691 rcu_read_unlock();
3677} 3692}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0accc9..a8193f52c13 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
283 return 0; 283 return 0;
284} 284}
285 285
286static __net_initdata struct pernet_operations l2tp_eth_net_ops = { 286static struct pernet_operations l2tp_eth_net_ops = {
287 .init = l2tp_eth_init_net, 287 .init = l2tp_eth_init_net,
288 .id = &l2tp_eth_net_id, 288 .id = &l2tp_eth_net_id,
289 .size = sizeof(struct l2tp_eth_net), 289 .size = sizeof(struct l2tp_eth_net),
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 110efb704c9..fce9bd3bd3f 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -320,11 +320,12 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
320 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) 320 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
321 goto out; 321 goto out;
322 322
323 rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr, 323 rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr,
324 RT_CONN_FLAGS(sk), oif, 324 RT_CONN_FLAGS(sk), oif,
325 IPPROTO_L2TP, 325 IPPROTO_L2TP,
326 0, 0, sk, 1); 326 0, 0, sk, true);
327 if (rc) { 327 if (IS_ERR(rt)) {
328 rc = PTR_ERR(rt);
328 if (rc == -ENETUNREACH) 329 if (rc == -ENETUNREACH)
329 IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); 330 IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
330 goto out; 331 goto out;
@@ -474,24 +475,17 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
474 if (opt && opt->srr) 475 if (opt && opt->srr)
475 daddr = opt->faddr; 476 daddr = opt->faddr;
476 477
477 { 478 /* If this fails, retransmit mechanism of transport layer will
478 struct flowi fl = { .oif = sk->sk_bound_dev_if, 479 * keep trying until route appears or the connection times
479 .fl4_dst = daddr, 480 * itself out.
480 .fl4_src = inet->inet_saddr, 481 */
481 .fl4_tos = RT_CONN_FLAGS(sk), 482 rt = ip_route_output_ports(sock_net(sk), sk,
482 .proto = sk->sk_protocol, 483 daddr, inet->inet_saddr,
483 .flags = inet_sk_flowi_flags(sk), 484 inet->inet_dport, inet->inet_sport,
484 .fl_ip_sport = inet->inet_sport, 485 sk->sk_protocol, RT_CONN_FLAGS(sk),
485 .fl_ip_dport = inet->inet_dport }; 486 sk->sk_bound_dev_if);
486 487 if (IS_ERR(rt))
487 /* If this fails, retransmit mechanism of transport layer will 488 goto no_route;
488 * keep trying until route appears or the connection times
489 * itself out.
490 */
491 security_sk_classify_flow(sk, &fl);
492 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
493 goto no_route;
494 }
495 sk_setup_caps(sk, &rt->dst); 489 sk_setup_caps(sk, &rt->dst);
496 } 490 }
497 skb_dst_set(skb, dst_clone(&rt->dst)); 491 skb_dst_set(skb, dst_clone(&rt->dst));
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index f9968743913..058f1e9a912 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -181,25 +181,26 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
181 * LLC functionality 181 * LLC functionality
182 */ 182 */
183 rcv = rcu_dereference(sap->rcv_func); 183 rcv = rcu_dereference(sap->rcv_func);
184 if (rcv) {
185 struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC);
186 if (cskb)
187 rcv(cskb, dev, pt, orig_dev);
188 }
189 dest = llc_pdu_type(skb); 184 dest = llc_pdu_type(skb);
190 if (unlikely(!dest || !llc_type_handlers[dest - 1])) 185 if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
191 goto drop_put; 186 if (rcv)
192 llc_type_handlers[dest - 1](sap, skb); 187 rcv(skb, dev, pt, orig_dev);
193out_put: 188 else
189 kfree_skb(skb);
190 } else {
191 if (rcv) {
192 struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC);
193 if (cskb)
194 rcv(cskb, dev, pt, orig_dev);
195 }
196 llc_type_handlers[dest - 1](sap, skb);
197 }
194 llc_sap_put(sap); 198 llc_sap_put(sap);
195out: 199out:
196 return 0; 200 return 0;
197drop: 201drop:
198 kfree_skb(skb); 202 kfree_skb(skb);
199 goto out; 203 goto out;
200drop_put:
201 kfree_skb(skb);
202 goto out_put;
203handle_station: 204handle_station:
204 if (!llc_station_handler) 205 if (!llc_station_handler)
205 goto drop; 206 goto drop;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index c766056d048..513f85cc2ae 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -17,7 +17,7 @@ comment "CFG80211 needs to be enabled for MAC80211"
17if MAC80211 != n 17if MAC80211 != n
18 18
19config MAC80211_HAS_RC 19config MAC80211_HAS_RC
20 def_bool n 20 bool
21 21
22config MAC80211_RC_PID 22config MAC80211_RC_PID
23 bool "PID controller based rate control algorithm" if EXPERT 23 bool "PID controller based rate control algorithm" if EXPERT
@@ -78,7 +78,7 @@ config MAC80211_RC_DEFAULT
78endif 78endif
79 79
80comment "Some wireless drivers require a rate control algorithm" 80comment "Some wireless drivers require a rate control algorithm"
81 depends on MAC80211_HAS_RC=n 81 depends on MAC80211 && MAC80211_HAS_RC=n
82 82
83config MAC80211_MESH 83config MAC80211_MESH
84 bool "Enable mac80211 mesh networking (pre-802.11s) support" 84 bool "Enable mac80211 mesh networking (pre-802.11s) support"
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 227ca82eef7..0c9d0c07eae 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -76,7 +76,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
76#endif /* CONFIG_MAC80211_HT_DEBUG */ 76#endif /* CONFIG_MAC80211_HT_DEBUG */
77 77
78 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, 78 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
79 &sta->sta, tid, NULL)) 79 &sta->sta, tid, NULL, 0))
80 printk(KERN_DEBUG "HW problem - can not stop rx " 80 printk(KERN_DEBUG "HW problem - can not stop rx "
81 "aggregation for tid %d\n", tid); 81 "aggregation for tid %d\n", tid);
82 82
@@ -232,6 +232,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
232 if (buf_size == 0) 232 if (buf_size == 0)
233 buf_size = IEEE80211_MAX_AMPDU_BUF; 233 buf_size = IEEE80211_MAX_AMPDU_BUF;
234 234
235 /* make sure the size doesn't exceed the maximum supported by the hw */
236 if (buf_size > local->hw.max_rx_aggregation_subframes)
237 buf_size = local->hw.max_rx_aggregation_subframes;
235 238
236 /* examine state machine */ 239 /* examine state machine */
237 mutex_lock(&sta->ampdu_mlme.mtx); 240 mutex_lock(&sta->ampdu_mlme.mtx);
@@ -287,7 +290,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
287 } 290 }
288 291
289 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, 292 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
290 &sta->sta, tid, &start_seq_num); 293 &sta->sta, tid, &start_seq_num, 0);
291#ifdef CONFIG_MAC80211_HT_DEBUG 294#ifdef CONFIG_MAC80211_HT_DEBUG
292 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); 295 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
293#endif /* CONFIG_MAC80211_HT_DEBUG */ 296#endif /* CONFIG_MAC80211_HT_DEBUG */
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9cc472c6a6a..63d852cb4ca 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -190,7 +190,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
190 190
191 ret = drv_ampdu_action(local, sta->sdata, 191 ret = drv_ampdu_action(local, sta->sdata,
192 IEEE80211_AMPDU_TX_STOP, 192 IEEE80211_AMPDU_TX_STOP,
193 &sta->sta, tid, NULL); 193 &sta->sta, tid, NULL, 0);
194 194
195 /* HW shall not deny going back to legacy */ 195 /* HW shall not deny going back to legacy */
196 if (WARN_ON(ret)) { 196 if (WARN_ON(ret)) {
@@ -311,7 +311,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
311 start_seq_num = sta->tid_seq[tid] >> 4; 311 start_seq_num = sta->tid_seq[tid] >> 4;
312 312
313 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, 313 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
314 &sta->sta, tid, &start_seq_num); 314 &sta->sta, tid, &start_seq_num, 0);
315 if (ret) { 315 if (ret) {
316#ifdef CONFIG_MAC80211_HT_DEBUG 316#ifdef CONFIG_MAC80211_HT_DEBUG
317 printk(KERN_DEBUG "BA request denied - HW unavailable for" 317 printk(KERN_DEBUG "BA request denied - HW unavailable for"
@@ -342,7 +342,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
342 /* send AddBA request */ 342 /* send AddBA request */
343 ieee80211_send_addba_request(sdata, sta->sta.addr, tid, 343 ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
344 tid_tx->dialog_token, start_seq_num, 344 tid_tx->dialog_token, start_seq_num,
345 0x40, tid_tx->timeout); 345 local->hw.max_tx_aggregation_subframes,
346 tid_tx->timeout);
346} 347}
347 348
348int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, 349int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -487,7 +488,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
487 488
488 drv_ampdu_action(local, sta->sdata, 489 drv_ampdu_action(local, sta->sdata,
489 IEEE80211_AMPDU_TX_OPERATIONAL, 490 IEEE80211_AMPDU_TX_OPERATIONAL,
490 &sta->sta, tid, NULL); 491 &sta->sta, tid, NULL,
492 sta->ampdu_mlme.tid_tx[tid]->buf_size);
491 493
492 /* 494 /*
493 * synchronize with TX path, while splicing the TX path 495 * synchronize with TX path, while splicing the TX path
@@ -742,9 +744,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
742{ 744{
743 struct tid_ampdu_tx *tid_tx; 745 struct tid_ampdu_tx *tid_tx;
744 u16 capab, tid; 746 u16 capab, tid;
747 u8 buf_size;
745 748
746 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); 749 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
747 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; 750 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
751 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
748 752
749 mutex_lock(&sta->ampdu_mlme.mtx); 753 mutex_lock(&sta->ampdu_mlme.mtx);
750 754
@@ -767,12 +771,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
767 771
768 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) 772 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
769 == WLAN_STATUS_SUCCESS) { 773 == WLAN_STATUS_SUCCESS) {
774 /*
775 * IEEE 802.11-2007 7.3.1.14:
776 * In an ADDBA Response frame, when the Status Code field
777 * is set to 0, the Buffer Size subfield is set to a value
778 * of at least 1.
779 */
780 if (!buf_size)
781 goto out;
782
770 if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, 783 if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
771 &tid_tx->state)) { 784 &tid_tx->state)) {
772 /* ignore duplicate response */ 785 /* ignore duplicate response */
773 goto out; 786 goto out;
774 } 787 }
775 788
789 tid_tx->buf_size = buf_size;
790
776 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) 791 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
777 ieee80211_agg_tx_operational(local, sta, tid); 792 ieee80211_agg_tx_operational(local, sta, tid);
778 793
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9cd73b11506..334213571ad 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -316,6 +316,17 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
316 return 0; 316 return 0;
317} 317}
318 318
319static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, int idx)
320{
321 if (!(rate->flags & RATE_INFO_FLAGS_MCS)) {
322 struct ieee80211_supported_band *sband;
323 sband = sta->local->hw.wiphy->bands[
324 sta->local->hw.conf.channel->band];
325 rate->legacy = sband->bitrates[idx].bitrate;
326 } else
327 rate->mcs = idx;
328}
329
319static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) 330static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
320{ 331{
321 struct ieee80211_sub_if_data *sdata = sta->sdata; 332 struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -330,6 +341,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
330 STATION_INFO_TX_RETRIES | 341 STATION_INFO_TX_RETRIES |
331 STATION_INFO_TX_FAILED | 342 STATION_INFO_TX_FAILED |
332 STATION_INFO_TX_BITRATE | 343 STATION_INFO_TX_BITRATE |
344 STATION_INFO_RX_BITRATE |
333 STATION_INFO_RX_DROP_MISC; 345 STATION_INFO_RX_DROP_MISC;
334 346
335 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 347 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
@@ -355,15 +367,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
355 sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; 367 sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
356 if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI) 368 if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI)
357 sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; 369 sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
370 rate_idx_to_bitrate(&sinfo->txrate, sta, sta->last_tx_rate.idx);
358 371
359 if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) { 372 sinfo->rxrate.flags = 0;
360 struct ieee80211_supported_band *sband; 373 if (sta->last_rx_rate_flag & RX_FLAG_HT)
361 sband = sta->local->hw.wiphy->bands[ 374 sinfo->rxrate.flags |= RATE_INFO_FLAGS_MCS;
362 sta->local->hw.conf.channel->band]; 375 if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
363 sinfo->txrate.legacy = 376 sinfo->rxrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
364 sband->bitrates[sta->last_tx_rate.idx].bitrate; 377 if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
365 } else 378 sinfo->rxrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
366 sinfo->txrate.mcs = sta->last_tx_rate.idx; 379 rate_idx_to_bitrate(&sinfo->rxrate, sta, sta->last_rx_rate_idx);
367 380
368 if (ieee80211_vif_is_mesh(&sdata->vif)) { 381 if (ieee80211_vif_is_mesh(&sdata->vif)) {
369#ifdef CONFIG_MAC80211_MESH 382#ifdef CONFIG_MAC80211_MESH
@@ -821,6 +834,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
821 834
822 rcu_read_unlock(); 835 rcu_read_unlock();
823 836
837 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
838 params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))
839 ieee80211_recalc_ps(local, -1);
840
824 return 0; 841 return 0;
825} 842}
826 843
@@ -1215,6 +1232,9 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
1215{ 1232{
1216 struct ieee80211_local *local = wiphy_priv(wiphy); 1233 struct ieee80211_local *local = wiphy_priv(wiphy);
1217 struct ieee80211_sub_if_data *sdata = NULL; 1234 struct ieee80211_sub_if_data *sdata = NULL;
1235 struct ieee80211_channel *old_oper;
1236 enum nl80211_channel_type old_oper_type;
1237 enum nl80211_channel_type old_vif_oper_type= NL80211_CHAN_NO_HT;
1218 1238
1219 if (netdev) 1239 if (netdev)
1220 sdata = IEEE80211_DEV_TO_SUB_IF(netdev); 1240 sdata = IEEE80211_DEV_TO_SUB_IF(netdev);
@@ -1232,13 +1252,23 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
1232 break; 1252 break;
1233 } 1253 }
1234 1254
1235 local->oper_channel = chan; 1255 if (sdata)
1256 old_vif_oper_type = sdata->vif.bss_conf.channel_type;
1257 old_oper_type = local->_oper_channel_type;
1236 1258
1237 if (!ieee80211_set_channel_type(local, sdata, channel_type)) 1259 if (!ieee80211_set_channel_type(local, sdata, channel_type))
1238 return -EBUSY; 1260 return -EBUSY;
1239 1261
1240 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 1262 old_oper = local->oper_channel;
1241 if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) 1263 local->oper_channel = chan;
1264
1265 /* Update driver if changes were actually made. */
1266 if ((old_oper != local->oper_channel) ||
1267 (old_oper_type != local->_oper_channel_type))
1268 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
1269
1270 if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) &&
1271 old_vif_oper_type != sdata->vif.bss_conf.channel_type)
1242 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); 1272 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
1243 1273
1244 return 0; 1274 return 0;
@@ -1274,8 +1304,11 @@ static int ieee80211_scan(struct wiphy *wiphy,
1274 case NL80211_IFTYPE_P2P_GO: 1304 case NL80211_IFTYPE_P2P_GO:
1275 if (sdata->local->ops->hw_scan) 1305 if (sdata->local->ops->hw_scan)
1276 break; 1306 break;
1277 /* FIXME: implement NoA while scanning in software */ 1307 /*
1278 return -EOPNOTSUPP; 1308 * FIXME: implement NoA while scanning in software,
1309 * for now fall through to allow scanning only when
1310 * beaconing hasn't been configured yet
1311 */
1279 case NL80211_IFTYPE_AP: 1312 case NL80211_IFTYPE_AP:
1280 if (sdata->u.ap.beacon) 1313 if (sdata->u.ap.beacon)
1281 return -EOPNOTSUPP; 1314 return -EOPNOTSUPP;
@@ -1784,6 +1817,33 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1784 1817
1785 *cookie = (unsigned long) skb; 1818 *cookie = (unsigned long) skb;
1786 1819
1820 if (is_offchan && local->ops->offchannel_tx) {
1821 int ret;
1822
1823 IEEE80211_SKB_CB(skb)->band = chan->band;
1824
1825 mutex_lock(&local->mtx);
1826
1827 if (local->hw_offchan_tx_cookie) {
1828 mutex_unlock(&local->mtx);
1829 return -EBUSY;
1830 }
1831
1832 /* TODO: bitrate control, TX processing? */
1833 ret = drv_offchannel_tx(local, skb, chan, channel_type, wait);
1834
1835 if (ret == 0)
1836 local->hw_offchan_tx_cookie = *cookie;
1837 mutex_unlock(&local->mtx);
1838
1839 /*
1840 * Allow driver to return 1 to indicate it wants to have the
1841 * frame transmitted with a remain_on_channel + regular TX.
1842 */
1843 if (ret != 1)
1844 return ret;
1845 }
1846
1787 if (is_offchan && local->ops->remain_on_channel) { 1847 if (is_offchan && local->ops->remain_on_channel) {
1788 unsigned int duration; 1848 unsigned int duration;
1789 int ret; 1849 int ret;
@@ -1847,6 +1907,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1847 1907
1848 wk->type = IEEE80211_WORK_OFFCHANNEL_TX; 1908 wk->type = IEEE80211_WORK_OFFCHANNEL_TX;
1849 wk->chan = chan; 1909 wk->chan = chan;
1910 wk->chan_type = channel_type;
1850 wk->sdata = sdata; 1911 wk->sdata = sdata;
1851 wk->done = ieee80211_offchan_tx_done; 1912 wk->done = ieee80211_offchan_tx_done;
1852 wk->offchan_tx.frame = skb; 1913 wk->offchan_tx.frame = skb;
@@ -1869,6 +1930,18 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
1869 1930
1870 mutex_lock(&local->mtx); 1931 mutex_lock(&local->mtx);
1871 1932
1933 if (local->ops->offchannel_tx_cancel_wait &&
1934 local->hw_offchan_tx_cookie == cookie) {
1935 ret = drv_offchannel_tx_cancel_wait(local);
1936
1937 if (!ret)
1938 local->hw_offchan_tx_cookie = 0;
1939
1940 mutex_unlock(&local->mtx);
1941
1942 return ret;
1943 }
1944
1872 if (local->ops->cancel_remain_on_channel) { 1945 if (local->ops->cancel_remain_on_channel) {
1873 cookie ^= 2; 1946 cookie ^= 2;
1874 ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); 1947 ret = ieee80211_cancel_remain_on_channel_hw(local, cookie);
@@ -1939,6 +2012,21 @@ static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
1939 return drv_get_antenna(local, tx_ant, rx_ant); 2012 return drv_get_antenna(local, tx_ant, rx_ant);
1940} 2013}
1941 2014
2015static int ieee80211_set_ringparam(struct wiphy *wiphy, u32 tx, u32 rx)
2016{
2017 struct ieee80211_local *local = wiphy_priv(wiphy);
2018
2019 return drv_set_ringparam(local, tx, rx);
2020}
2021
2022static void ieee80211_get_ringparam(struct wiphy *wiphy,
2023 u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max)
2024{
2025 struct ieee80211_local *local = wiphy_priv(wiphy);
2026
2027 drv_get_ringparam(local, tx, tx_max, rx, rx_max);
2028}
2029
1942struct cfg80211_ops mac80211_config_ops = { 2030struct cfg80211_ops mac80211_config_ops = {
1943 .add_virtual_intf = ieee80211_add_iface, 2031 .add_virtual_intf = ieee80211_add_iface,
1944 .del_virtual_intf = ieee80211_del_iface, 2032 .del_virtual_intf = ieee80211_del_iface,
@@ -1996,4 +2084,6 @@ struct cfg80211_ops mac80211_config_ops = {
1996 .mgmt_frame_register = ieee80211_mgmt_frame_register, 2084 .mgmt_frame_register = ieee80211_mgmt_frame_register,
1997 .set_antenna = ieee80211_set_antenna, 2085 .set_antenna = ieee80211_set_antenna,
1998 .get_antenna = ieee80211_get_antenna, 2086 .get_antenna = ieee80211_get_antenna,
2087 .set_ringparam = ieee80211_set_ringparam,
2088 .get_ringparam = ieee80211_get_ringparam,
1999}; 2089};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5b24740fc0b..889c3e93e0f 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -77,6 +77,9 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local,
77 switch (tmp->vif.bss_conf.channel_type) { 77 switch (tmp->vif.bss_conf.channel_type) {
78 case NL80211_CHAN_NO_HT: 78 case NL80211_CHAN_NO_HT:
79 case NL80211_CHAN_HT20: 79 case NL80211_CHAN_HT20:
80 if (superchan > tmp->vif.bss_conf.channel_type)
81 break;
82
80 superchan = tmp->vif.bss_conf.channel_type; 83 superchan = tmp->vif.bss_conf.channel_type;
81 break; 84 break;
82 case NL80211_CHAN_HT40PLUS: 85 case NL80211_CHAN_HT40PLUS:
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f02e599a31..51f0d780daf 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -60,6 +60,10 @@ static const struct file_operations name## _ops = { \
60 debugfs_create_file(#name, mode, phyd, local, &name## _ops); 60 debugfs_create_file(#name, mode, phyd, local, &name## _ops);
61 61
62 62
63DEBUGFS_READONLY_FILE(user_power, "%d",
64 local->user_power_level);
65DEBUGFS_READONLY_FILE(power, "%d",
66 local->hw.conf.power_level);
63DEBUGFS_READONLY_FILE(frequency, "%d", 67DEBUGFS_READONLY_FILE(frequency, "%d",
64 local->hw.conf.channel->center_freq); 68 local->hw.conf.channel->center_freq);
65DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", 69DEBUGFS_READONLY_FILE(total_ps_buffered, "%d",
@@ -391,6 +395,8 @@ void debugfs_hw_add(struct ieee80211_local *local)
391 DEBUGFS_ADD(uapsd_queues); 395 DEBUGFS_ADD(uapsd_queues);
392 DEBUGFS_ADD(uapsd_max_sp_len); 396 DEBUGFS_ADD(uapsd_max_sp_len);
393 DEBUGFS_ADD(channel_type); 397 DEBUGFS_ADD(channel_type);
398 DEBUGFS_ADD(user_power);
399 DEBUGFS_ADD(power);
394 400
395 statsd = debugfs_create_dir("statistics", phyd); 401 statsd = debugfs_create_dir("statistics", phyd);
396 402
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 2dabdf7680d..dacace6b139 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -36,7 +36,7 @@ static ssize_t ieee80211_if_read(
36 ret = (*format)(sdata, buf, sizeof(buf)); 36 ret = (*format)(sdata, buf, sizeof(buf));
37 read_unlock(&dev_base_lock); 37 read_unlock(&dev_base_lock);
38 38
39 if (ret != -EINVAL) 39 if (ret >= 0)
40 ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); 40 ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
41 41
42 return ret; 42 return ret;
@@ -81,6 +81,8 @@ static ssize_t ieee80211_if_fmt_##name( \
81 IEEE80211_IF_FMT(name, field, "%d\n") 81 IEEE80211_IF_FMT(name, field, "%d\n")
82#define IEEE80211_IF_FMT_HEX(name, field) \ 82#define IEEE80211_IF_FMT_HEX(name, field) \
83 IEEE80211_IF_FMT(name, field, "%#x\n") 83 IEEE80211_IF_FMT(name, field, "%#x\n")
84#define IEEE80211_IF_FMT_LHEX(name, field) \
85 IEEE80211_IF_FMT(name, field, "%#lx\n")
84#define IEEE80211_IF_FMT_SIZE(name, field) \ 86#define IEEE80211_IF_FMT_SIZE(name, field) \
85 IEEE80211_IF_FMT(name, field, "%zd\n") 87 IEEE80211_IF_FMT(name, field, "%zd\n")
86 88
@@ -145,6 +147,9 @@ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ],
145 HEX); 147 HEX);
146IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], 148IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
147 HEX); 149 HEX);
150IEEE80211_IF_FILE(flags, flags, HEX);
151IEEE80211_IF_FILE(state, state, LHEX);
152IEEE80211_IF_FILE(channel_type, vif.bss_conf.channel_type, DEC);
148 153
149/* STA attributes */ 154/* STA attributes */
150IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); 155IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
@@ -216,6 +221,104 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
216 221
217__IEEE80211_IF_FILE_W(smps); 222__IEEE80211_IF_FILE_W(smps);
218 223
224static ssize_t ieee80211_if_fmt_tkip_mic_test(
225 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
226{
227 return -EOPNOTSUPP;
228}
229
230static int hwaddr_aton(const char *txt, u8 *addr)
231{
232 int i;
233
234 for (i = 0; i < ETH_ALEN; i++) {
235 int a, b;
236
237 a = hex_to_bin(*txt++);
238 if (a < 0)
239 return -1;
240 b = hex_to_bin(*txt++);
241 if (b < 0)
242 return -1;
243 *addr++ = (a << 4) | b;
244 if (i < 5 && *txt++ != ':')
245 return -1;
246 }
247
248 return 0;
249}
250
251static ssize_t ieee80211_if_parse_tkip_mic_test(
252 struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
253{
254 struct ieee80211_local *local = sdata->local;
255 u8 addr[ETH_ALEN];
256 struct sk_buff *skb;
257 struct ieee80211_hdr *hdr;
258 __le16 fc;
259
260 /*
261 * Assume colon-delimited MAC address with possible white space
262 * following.
263 */
264 if (buflen < 3 * ETH_ALEN - 1)
265 return -EINVAL;
266 if (hwaddr_aton(buf, addr) < 0)
267 return -EINVAL;
268
269 if (!ieee80211_sdata_running(sdata))
270 return -ENOTCONN;
271
272 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 100);
273 if (!skb)
274 return -ENOMEM;
275 skb_reserve(skb, local->hw.extra_tx_headroom);
276
277 hdr = (struct ieee80211_hdr *) skb_put(skb, 24);
278 memset(hdr, 0, 24);
279 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
280
281 switch (sdata->vif.type) {
282 case NL80211_IFTYPE_AP:
283 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
284 /* DA BSSID SA */
285 memcpy(hdr->addr1, addr, ETH_ALEN);
286 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
287 memcpy(hdr->addr3, sdata->vif.addr, ETH_ALEN);
288 break;
289 case NL80211_IFTYPE_STATION:
290 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
291 /* BSSID SA DA */
292 if (sdata->vif.bss_conf.bssid == NULL) {
293 dev_kfree_skb(skb);
294 return -ENOTCONN;
295 }
296 memcpy(hdr->addr1, sdata->vif.bss_conf.bssid, ETH_ALEN);
297 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
298 memcpy(hdr->addr3, addr, ETH_ALEN);
299 break;
300 default:
301 dev_kfree_skb(skb);
302 return -EOPNOTSUPP;
303 }
304 hdr->frame_control = fc;
305
306 /*
307 * Add some length to the test frame to make it look bit more valid.
308 * The exact contents does not matter since the recipient is required
309 * to drop this because of the Michael MIC failure.
310 */
311 memset(skb_put(skb, 50), 0, 50);
312
313 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE;
314
315 ieee80211_tx_skb(sdata, skb);
316
317 return buflen;
318}
319
320__IEEE80211_IF_FILE_W(tkip_mic_test);
321
219/* AP attributes */ 322/* AP attributes */
220IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); 323IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
221IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); 324IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
@@ -283,6 +386,9 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
283static void add_sta_files(struct ieee80211_sub_if_data *sdata) 386static void add_sta_files(struct ieee80211_sub_if_data *sdata)
284{ 387{
285 DEBUGFS_ADD(drop_unencrypted); 388 DEBUGFS_ADD(drop_unencrypted);
389 DEBUGFS_ADD(flags);
390 DEBUGFS_ADD(state);
391 DEBUGFS_ADD(channel_type);
286 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 392 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
287 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 393 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
288 394
@@ -291,22 +397,30 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
291 DEBUGFS_ADD(last_beacon); 397 DEBUGFS_ADD(last_beacon);
292 DEBUGFS_ADD(ave_beacon); 398 DEBUGFS_ADD(ave_beacon);
293 DEBUGFS_ADD_MODE(smps, 0600); 399 DEBUGFS_ADD_MODE(smps, 0600);
400 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
294} 401}
295 402
296static void add_ap_files(struct ieee80211_sub_if_data *sdata) 403static void add_ap_files(struct ieee80211_sub_if_data *sdata)
297{ 404{
298 DEBUGFS_ADD(drop_unencrypted); 405 DEBUGFS_ADD(drop_unencrypted);
406 DEBUGFS_ADD(flags);
407 DEBUGFS_ADD(state);
408 DEBUGFS_ADD(channel_type);
299 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 409 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
300 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 410 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
301 411
302 DEBUGFS_ADD(num_sta_ps); 412 DEBUGFS_ADD(num_sta_ps);
303 DEBUGFS_ADD(dtim_count); 413 DEBUGFS_ADD(dtim_count);
304 DEBUGFS_ADD(num_buffered_multicast); 414 DEBUGFS_ADD(num_buffered_multicast);
415 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
305} 416}
306 417
307static void add_wds_files(struct ieee80211_sub_if_data *sdata) 418static void add_wds_files(struct ieee80211_sub_if_data *sdata)
308{ 419{
309 DEBUGFS_ADD(drop_unencrypted); 420 DEBUGFS_ADD(drop_unencrypted);
421 DEBUGFS_ADD(flags);
422 DEBUGFS_ADD(state);
423 DEBUGFS_ADD(channel_type);
310 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 424 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
311 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 425 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
312 426
@@ -316,12 +430,18 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
316static void add_vlan_files(struct ieee80211_sub_if_data *sdata) 430static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
317{ 431{
318 DEBUGFS_ADD(drop_unencrypted); 432 DEBUGFS_ADD(drop_unencrypted);
433 DEBUGFS_ADD(flags);
434 DEBUGFS_ADD(state);
435 DEBUGFS_ADD(channel_type);
319 DEBUGFS_ADD(rc_rateidx_mask_2ghz); 436 DEBUGFS_ADD(rc_rateidx_mask_2ghz);
320 DEBUGFS_ADD(rc_rateidx_mask_5ghz); 437 DEBUGFS_ADD(rc_rateidx_mask_5ghz);
321} 438}
322 439
323static void add_monitor_files(struct ieee80211_sub_if_data *sdata) 440static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
324{ 441{
442 DEBUGFS_ADD(flags);
443 DEBUGFS_ADD(state);
444 DEBUGFS_ADD(channel_type);
325} 445}
326 446
327#ifdef CONFIG_MAC80211_MESH 447#ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 98d589960a4..9c0d62bb0ea 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -5,9 +5,9 @@
5#include "ieee80211_i.h" 5#include "ieee80211_i.h"
6#include "driver-trace.h" 6#include "driver-trace.h"
7 7
8static inline int drv_tx(struct ieee80211_local *local, struct sk_buff *skb) 8static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb)
9{ 9{
10 return local->ops->tx(&local->hw, skb); 10 local->ops->tx(&local->hw, skb);
11} 11}
12 12
13static inline int drv_start(struct ieee80211_local *local) 13static inline int drv_start(struct ieee80211_local *local)
@@ -382,17 +382,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
382 struct ieee80211_sub_if_data *sdata, 382 struct ieee80211_sub_if_data *sdata,
383 enum ieee80211_ampdu_mlme_action action, 383 enum ieee80211_ampdu_mlme_action action,
384 struct ieee80211_sta *sta, u16 tid, 384 struct ieee80211_sta *sta, u16 tid,
385 u16 *ssn) 385 u16 *ssn, u8 buf_size)
386{ 386{
387 int ret = -EOPNOTSUPP; 387 int ret = -EOPNOTSUPP;
388 388
389 might_sleep(); 389 might_sleep();
390 390
391 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); 391 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
392 392
393 if (local->ops->ampdu_action) 393 if (local->ops->ampdu_action)
394 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, 394 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
395 sta, tid, ssn); 395 sta, tid, ssn, buf_size);
396 396
397 trace_drv_return_int(local, ret); 397 trace_drv_return_int(local, ret);
398 398
@@ -495,4 +495,61 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
495 return ret; 495 return ret;
496} 496}
497 497
498static inline int drv_offchannel_tx(struct ieee80211_local *local,
499 struct sk_buff *skb,
500 struct ieee80211_channel *chan,
501 enum nl80211_channel_type channel_type,
502 unsigned int wait)
503{
504 int ret;
505
506 might_sleep();
507
508 trace_drv_offchannel_tx(local, skb, chan, channel_type, wait);
509 ret = local->ops->offchannel_tx(&local->hw, skb, chan,
510 channel_type, wait);
511 trace_drv_return_int(local, ret);
512
513 return ret;
514}
515
516static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local)
517{
518 int ret;
519
520 might_sleep();
521
522 trace_drv_offchannel_tx_cancel_wait(local);
523 ret = local->ops->offchannel_tx_cancel_wait(&local->hw);
524 trace_drv_return_int(local, ret);
525
526 return ret;
527}
528
529static inline int drv_set_ringparam(struct ieee80211_local *local,
530 u32 tx, u32 rx)
531{
532 int ret = -ENOTSUPP;
533
534 might_sleep();
535
536 trace_drv_set_ringparam(local, tx, rx);
537 if (local->ops->set_ringparam)
538 ret = local->ops->set_ringparam(&local->hw, tx, rx);
539 trace_drv_return_int(local, ret);
540
541 return ret;
542}
543
544static inline void drv_get_ringparam(struct ieee80211_local *local,
545 u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max)
546{
547 might_sleep();
548
549 trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max);
550 if (local->ops->get_ringparam)
551 local->ops->get_ringparam(&local->hw, tx, tx_max, rx, rx_max);
552 trace_drv_return_void(local);
553}
554
498#endif /* __MAC80211_DRIVER_OPS */ 555#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 49c84218b2f..45aab80738e 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -9,6 +9,11 @@
9#undef TRACE_EVENT 9#undef TRACE_EVENT
10#define TRACE_EVENT(name, proto, ...) \ 10#define TRACE_EVENT(name, proto, ...) \
11static inline void trace_ ## name(proto) {} 11static inline void trace_ ## name(proto) {}
12#undef DECLARE_EVENT_CLASS
13#define DECLARE_EVENT_CLASS(...)
14#undef DEFINE_EVENT
15#define DEFINE_EVENT(evt_class, name, proto, ...) \
16static inline void trace_ ## name(proto) {}
12#endif 17#endif
13 18
14#undef TRACE_SYSTEM 19#undef TRACE_SYSTEM
@@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {}
38 * Tracing for driver callbacks. 43 * Tracing for driver callbacks.
39 */ 44 */
40 45
41TRACE_EVENT(drv_return_void, 46DECLARE_EVENT_CLASS(local_only_evt,
42 TP_PROTO(struct ieee80211_local *local), 47 TP_PROTO(struct ieee80211_local *local),
43 TP_ARGS(local), 48 TP_ARGS(local),
44 TP_STRUCT__entry( 49 TP_STRUCT__entry(
@@ -50,6 +55,11 @@ TRACE_EVENT(drv_return_void,
50 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) 55 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
51); 56);
52 57
58DEFINE_EVENT(local_only_evt, drv_return_void,
59 TP_PROTO(struct ieee80211_local *local),
60 TP_ARGS(local)
61);
62
53TRACE_EVENT(drv_return_int, 63TRACE_EVENT(drv_return_int,
54 TP_PROTO(struct ieee80211_local *local, int ret), 64 TP_PROTO(struct ieee80211_local *local, int ret),
55 TP_ARGS(local, ret), 65 TP_ARGS(local, ret),
@@ -78,40 +88,14 @@ TRACE_EVENT(drv_return_u64,
78 TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) 88 TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
79); 89);
80 90
81TRACE_EVENT(drv_start, 91DEFINE_EVENT(local_only_evt, drv_start,
82 TP_PROTO(struct ieee80211_local *local), 92 TP_PROTO(struct ieee80211_local *local),
83 93 TP_ARGS(local)
84 TP_ARGS(local),
85
86 TP_STRUCT__entry(
87 LOCAL_ENTRY
88 ),
89
90 TP_fast_assign(
91 LOCAL_ASSIGN;
92 ),
93
94 TP_printk(
95 LOCAL_PR_FMT, LOCAL_PR_ARG
96 )
97); 94);
98 95
99TRACE_EVENT(drv_stop, 96DEFINE_EVENT(local_only_evt, drv_stop,
100 TP_PROTO(struct ieee80211_local *local), 97 TP_PROTO(struct ieee80211_local *local),
101 98 TP_ARGS(local)
102 TP_ARGS(local),
103
104 TP_STRUCT__entry(
105 LOCAL_ENTRY
106 ),
107
108 TP_fast_assign(
109 LOCAL_ASSIGN;
110 ),
111
112 TP_printk(
113 LOCAL_PR_FMT, LOCAL_PR_ARG
114 )
115); 99);
116 100
117TRACE_EVENT(drv_add_interface, 101TRACE_EVENT(drv_add_interface,
@@ -439,40 +423,14 @@ TRACE_EVENT(drv_hw_scan,
439 ) 423 )
440); 424);
441 425
442TRACE_EVENT(drv_sw_scan_start, 426DEFINE_EVENT(local_only_evt, drv_sw_scan_start,
443 TP_PROTO(struct ieee80211_local *local), 427 TP_PROTO(struct ieee80211_local *local),
444 428 TP_ARGS(local)
445 TP_ARGS(local),
446
447 TP_STRUCT__entry(
448 LOCAL_ENTRY
449 ),
450
451 TP_fast_assign(
452 LOCAL_ASSIGN;
453 ),
454
455 TP_printk(
456 LOCAL_PR_FMT, LOCAL_PR_ARG
457 )
458); 429);
459 430
460TRACE_EVENT(drv_sw_scan_complete, 431DEFINE_EVENT(local_only_evt, drv_sw_scan_complete,
461 TP_PROTO(struct ieee80211_local *local), 432 TP_PROTO(struct ieee80211_local *local),
462 433 TP_ARGS(local)
463 TP_ARGS(local),
464
465 TP_STRUCT__entry(
466 LOCAL_ENTRY
467 ),
468
469 TP_fast_assign(
470 LOCAL_ASSIGN;
471 ),
472
473 TP_printk(
474 LOCAL_PR_FMT, LOCAL_PR_ARG
475 )
476); 434);
477 435
478TRACE_EVENT(drv_get_stats, 436TRACE_EVENT(drv_get_stats,
@@ -702,23 +660,9 @@ TRACE_EVENT(drv_conf_tx,
702 ) 660 )
703); 661);
704 662
705TRACE_EVENT(drv_get_tsf, 663DEFINE_EVENT(local_only_evt, drv_get_tsf,
706 TP_PROTO(struct ieee80211_local *local), 664 TP_PROTO(struct ieee80211_local *local),
707 665 TP_ARGS(local)
708 TP_ARGS(local),
709
710 TP_STRUCT__entry(
711 LOCAL_ENTRY
712 ),
713
714 TP_fast_assign(
715 LOCAL_ASSIGN;
716 ),
717
718 TP_printk(
719 LOCAL_PR_FMT,
720 LOCAL_PR_ARG
721 )
722); 666);
723 667
724TRACE_EVENT(drv_set_tsf, 668TRACE_EVENT(drv_set_tsf,
@@ -742,41 +686,14 @@ TRACE_EVENT(drv_set_tsf,
742 ) 686 )
743); 687);
744 688
745TRACE_EVENT(drv_reset_tsf, 689DEFINE_EVENT(local_only_evt, drv_reset_tsf,
746 TP_PROTO(struct ieee80211_local *local), 690 TP_PROTO(struct ieee80211_local *local),
747 691 TP_ARGS(local)
748 TP_ARGS(local),
749
750 TP_STRUCT__entry(
751 LOCAL_ENTRY
752 ),
753
754 TP_fast_assign(
755 LOCAL_ASSIGN;
756 ),
757
758 TP_printk(
759 LOCAL_PR_FMT, LOCAL_PR_ARG
760 )
761); 692);
762 693
763TRACE_EVENT(drv_tx_last_beacon, 694DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
764 TP_PROTO(struct ieee80211_local *local), 695 TP_PROTO(struct ieee80211_local *local),
765 696 TP_ARGS(local)
766 TP_ARGS(local),
767
768 TP_STRUCT__entry(
769 LOCAL_ENTRY
770 ),
771
772 TP_fast_assign(
773 LOCAL_ASSIGN;
774 ),
775
776 TP_printk(
777 LOCAL_PR_FMT,
778 LOCAL_PR_ARG
779 )
780); 697);
781 698
782TRACE_EVENT(drv_ampdu_action, 699TRACE_EVENT(drv_ampdu_action,
@@ -784,9 +701,9 @@ TRACE_EVENT(drv_ampdu_action,
784 struct ieee80211_sub_if_data *sdata, 701 struct ieee80211_sub_if_data *sdata,
785 enum ieee80211_ampdu_mlme_action action, 702 enum ieee80211_ampdu_mlme_action action,
786 struct ieee80211_sta *sta, u16 tid, 703 struct ieee80211_sta *sta, u16 tid,
787 u16 *ssn), 704 u16 *ssn, u8 buf_size),
788 705
789 TP_ARGS(local, sdata, action, sta, tid, ssn), 706 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
790 707
791 TP_STRUCT__entry( 708 TP_STRUCT__entry(
792 LOCAL_ENTRY 709 LOCAL_ENTRY
@@ -794,6 +711,7 @@ TRACE_EVENT(drv_ampdu_action,
794 __field(u32, action) 711 __field(u32, action)
795 __field(u16, tid) 712 __field(u16, tid)
796 __field(u16, ssn) 713 __field(u16, ssn)
714 __field(u8, buf_size)
797 VIF_ENTRY 715 VIF_ENTRY
798 ), 716 ),
799 717
@@ -804,11 +722,13 @@ TRACE_EVENT(drv_ampdu_action,
804 __entry->action = action; 722 __entry->action = action;
805 __entry->tid = tid; 723 __entry->tid = tid;
806 __entry->ssn = ssn ? *ssn : 0; 724 __entry->ssn = ssn ? *ssn : 0;
725 __entry->buf_size = buf_size;
807 ), 726 ),
808 727
809 TP_printk( 728 TP_printk(
810 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", 729 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
811 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid 730 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
731 __entry->tid, __entry->buf_size
812 ) 732 )
813); 733);
814 734
@@ -959,24 +879,96 @@ TRACE_EVENT(drv_remain_on_channel,
959 ) 879 )
960); 880);
961 881
962TRACE_EVENT(drv_cancel_remain_on_channel, 882DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
963 TP_PROTO(struct ieee80211_local *local), 883 TP_PROTO(struct ieee80211_local *local),
884 TP_ARGS(local)
885);
964 886
965 TP_ARGS(local), 887TRACE_EVENT(drv_offchannel_tx,
888 TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb,
889 struct ieee80211_channel *chan,
890 enum nl80211_channel_type channel_type,
891 unsigned int wait),
892
893 TP_ARGS(local, skb, chan, channel_type, wait),
966 894
967 TP_STRUCT__entry( 895 TP_STRUCT__entry(
968 LOCAL_ENTRY 896 LOCAL_ENTRY
897 __field(int, center_freq)
898 __field(int, channel_type)
899 __field(unsigned int, wait)
969 ), 900 ),
970 901
971 TP_fast_assign( 902 TP_fast_assign(
972 LOCAL_ASSIGN; 903 LOCAL_ASSIGN;
904 __entry->center_freq = chan->center_freq;
905 __entry->channel_type = channel_type;
906 __entry->wait = wait;
973 ), 907 ),
974 908
975 TP_printk( 909 TP_printk(
976 LOCAL_PR_FMT, LOCAL_PR_ARG 910 LOCAL_PR_FMT " freq:%dMHz, wait:%dms",
911 LOCAL_PR_ARG, __entry->center_freq, __entry->wait
912 )
913);
914
915TRACE_EVENT(drv_set_ringparam,
916 TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx),
917
918 TP_ARGS(local, tx, rx),
919
920 TP_STRUCT__entry(
921 LOCAL_ENTRY
922 __field(u32, tx)
923 __field(u32, rx)
924 ),
925
926 TP_fast_assign(
927 LOCAL_ASSIGN;
928 __entry->tx = tx;
929 __entry->rx = rx;
930 ),
931
932 TP_printk(
933 LOCAL_PR_FMT " tx:%d rx %d",
934 LOCAL_PR_ARG, __entry->tx, __entry->rx
935 )
936);
937
938TRACE_EVENT(drv_get_ringparam,
939 TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max,
940 u32 *rx, u32 *rx_max),
941
942 TP_ARGS(local, tx, tx_max, rx, rx_max),
943
944 TP_STRUCT__entry(
945 LOCAL_ENTRY
946 __field(u32, tx)
947 __field(u32, tx_max)
948 __field(u32, rx)
949 __field(u32, rx_max)
950 ),
951
952 TP_fast_assign(
953 LOCAL_ASSIGN;
954 __entry->tx = *tx;
955 __entry->tx_max = *tx_max;
956 __entry->rx = *rx;
957 __entry->rx_max = *rx_max;
958 ),
959
960 TP_printk(
961 LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d",
962 LOCAL_PR_ARG,
963 __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max
977 ) 964 )
978); 965);
979 966
967DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait,
968 TP_PROTO(struct ieee80211_local *local),
969 TP_ARGS(local)
970);
971
980/* 972/*
981 * Tracing for API calls that drivers call. 973 * Tracing for API calls that drivers call.
982 */ 974 */
@@ -1069,23 +1061,9 @@ TRACE_EVENT(api_stop_tx_ba_cb,
1069 ) 1061 )
1070); 1062);
1071 1063
1072TRACE_EVENT(api_restart_hw, 1064DEFINE_EVENT(local_only_evt, api_restart_hw,
1073 TP_PROTO(struct ieee80211_local *local), 1065 TP_PROTO(struct ieee80211_local *local),
1074 1066 TP_ARGS(local)
1075 TP_ARGS(local),
1076
1077 TP_STRUCT__entry(
1078 LOCAL_ENTRY
1079 ),
1080
1081 TP_fast_assign(
1082 LOCAL_ASSIGN;
1083 ),
1084
1085 TP_printk(
1086 LOCAL_PR_FMT,
1087 LOCAL_PR_ARG
1088 )
1089); 1067);
1090 1068
1091TRACE_EVENT(api_beacon_loss, 1069TRACE_EVENT(api_beacon_loss,
@@ -1214,40 +1192,14 @@ TRACE_EVENT(api_chswitch_done,
1214 ) 1192 )
1215); 1193);
1216 1194
1217TRACE_EVENT(api_ready_on_channel, 1195DEFINE_EVENT(local_only_evt, api_ready_on_channel,
1218 TP_PROTO(struct ieee80211_local *local), 1196 TP_PROTO(struct ieee80211_local *local),
1219 1197 TP_ARGS(local)
1220 TP_ARGS(local),
1221
1222 TP_STRUCT__entry(
1223 LOCAL_ENTRY
1224 ),
1225
1226 TP_fast_assign(
1227 LOCAL_ASSIGN;
1228 ),
1229
1230 TP_printk(
1231 LOCAL_PR_FMT, LOCAL_PR_ARG
1232 )
1233); 1198);
1234 1199
1235TRACE_EVENT(api_remain_on_channel_expired, 1200DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
1236 TP_PROTO(struct ieee80211_local *local), 1201 TP_PROTO(struct ieee80211_local *local),
1237 1202 TP_ARGS(local)
1238 TP_ARGS(local),
1239
1240 TP_STRUCT__entry(
1241 LOCAL_ENTRY
1242 ),
1243
1244 TP_fast_assign(
1245 LOCAL_ASSIGN;
1246 ),
1247
1248 TP_printk(
1249 LOCAL_PR_FMT, LOCAL_PR_ARG
1250 )
1251); 1203);
1252 1204
1253/* 1205/*
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 75d679d75e6..b9e4b9bd217 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -66,6 +66,9 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
66 /* own MCS TX capabilities */ 66 /* own MCS TX capabilities */
67 tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; 67 tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
68 68
69 /* Copy peer MCS TX capabilities, the driver might need them. */
70 ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params;
71
69 /* can we TX with MCS rates? */ 72 /* can we TX with MCS rates? */
70 if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) 73 if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED))
71 return; 74 return;
@@ -79,7 +82,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
79 max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; 82 max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS;
80 83
81 /* 84 /*
82 * 802.11n D5.0 20.3.5 / 20.6 says: 85 * 802.11n-2009 20.3.5 / 20.6 says:
83 * - indices 0 to 7 and 32 are single spatial stream 86 * - indices 0 to 7 and 32 are single spatial stream
84 * - 8 to 31 are multiple spatial streams using equal modulation 87 * - 8 to 31 are multiple spatial streams using equal modulation
85 * [8..15 for two streams, 16..23 for three and 24..31 for four] 88 * [8..15 for two streams, 16..23 for three and 24..31 for four]
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 53c7077ffd4..3e81af1fce5 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -31,7 +31,6 @@
31#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) 31#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
32 32
33#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) 33#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
34#define IEEE80211_IBSS_MERGE_DELAY 0x400000
35#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) 34#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
36 35
37#define IEEE80211_IBSS_MAX_STA_ENTRIES 128 36#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
@@ -270,7 +269,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
270 enum ieee80211_band band = rx_status->band; 269 enum ieee80211_band band = rx_status->band;
271 270
272 if (elems->ds_params && elems->ds_params_len == 1) 271 if (elems->ds_params && elems->ds_params_len == 1)
273 freq = ieee80211_channel_to_frequency(elems->ds_params[0]); 272 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
273 band);
274 else 274 else
275 freq = rx_status->freq; 275 freq = rx_status->freq;
276 276
@@ -354,7 +354,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
354 if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) 354 if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
355 goto put_bss; 355 goto put_bss;
356 356
357 if (rx_status->flag & RX_FLAG_TSFT) { 357 if (rx_status->flag & RX_FLAG_MACTIME_MPDU) {
358 /* 358 /*
359 * For correct IBSS merging we need mactime; since mactime is 359 * For correct IBSS merging we need mactime; since mactime is
360 * defined as the time the first data symbol of the frame hits 360 * defined as the time the first data symbol of the frame hits
@@ -396,10 +396,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
396 jiffies); 396 jiffies);
397#endif 397#endif
398 398
399 /* give slow hardware some time to do the TSF sync */
400 if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY)
401 goto put_bss;
402
403 if (beacon_timestamp > rx_timestamp) { 399 if (beacon_timestamp > rx_timestamp) {
404#ifdef CONFIG_MAC80211_IBSS_DEBUG 400#ifdef CONFIG_MAC80211_IBSS_DEBUG
405 printk(KERN_DEBUG "%s: beacon TSF higher than " 401 printk(KERN_DEBUG "%s: beacon TSF higher than "
@@ -663,12 +659,13 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
663} 659}
664 660
665static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, 661static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
666 struct ieee80211_mgmt *mgmt, 662 struct sk_buff *req)
667 size_t len)
668{ 663{
664 struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(req);
665 struct ieee80211_mgmt *mgmt = (void *)req->data;
669 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 666 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
670 struct ieee80211_local *local = sdata->local; 667 struct ieee80211_local *local = sdata->local;
671 int tx_last_beacon; 668 int tx_last_beacon, len = req->len;
672 struct sk_buff *skb; 669 struct sk_buff *skb;
673 struct ieee80211_mgmt *resp; 670 struct ieee80211_mgmt *resp;
674 u8 *pos, *end; 671 u8 *pos, *end;
@@ -688,7 +685,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
688 mgmt->bssid, tx_last_beacon); 685 mgmt->bssid, tx_last_beacon);
689#endif /* CONFIG_MAC80211_IBSS_DEBUG */ 686#endif /* CONFIG_MAC80211_IBSS_DEBUG */
690 687
691 if (!tx_last_beacon) 688 if (!tx_last_beacon && !(rx_status->rx_flags & IEEE80211_RX_RA_MATCH))
692 return; 689 return;
693 690
694 if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 && 691 if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 &&
@@ -785,7 +782,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
785 782
786 switch (fc & IEEE80211_FCTL_STYPE) { 783 switch (fc & IEEE80211_FCTL_STYPE) {
787 case IEEE80211_STYPE_PROBE_REQ: 784 case IEEE80211_STYPE_PROBE_REQ:
788 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); 785 ieee80211_rx_mgmt_probe_req(sdata, skb);
789 break; 786 break;
790 case IEEE80211_STYPE_PROBE_RESP: 787 case IEEE80211_STYPE_PROBE_RESP:
791 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, 788 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 533fd32f49f..a4040170142 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -225,6 +225,7 @@ struct ieee80211_if_ap {
225 struct sk_buff_head ps_bc_buf; 225 struct sk_buff_head ps_bc_buf;
226 atomic_t num_sta_ps; /* number of stations in PS mode */ 226 atomic_t num_sta_ps; /* number of stations in PS mode */
227 int dtim_count; 227 int dtim_count;
228 bool dtim_bc_mc;
228}; 229};
229 230
230struct ieee80211_if_wds { 231struct ieee80211_if_wds {
@@ -654,8 +655,6 @@ struct tpt_led_trigger {
654 * well be on the operating channel 655 * well be on the operating channel
655 * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to 656 * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to
656 * determine if we are on the operating channel or not 657 * determine if we are on the operating channel or not
657 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
658 * gets only set in conjunction with SCAN_SW_SCANNING
659 * @SCAN_COMPLETED: Set for our scan work function when the driver reported 658 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
660 * that the scan completed. 659 * that the scan completed.
661 * @SCAN_ABORTED: Set for our scan work function when the driver reported 660 * @SCAN_ABORTED: Set for our scan work function when the driver reported
@@ -664,7 +663,6 @@ struct tpt_led_trigger {
664enum { 663enum {
665 SCAN_SW_SCANNING, 664 SCAN_SW_SCANNING,
666 SCAN_HW_SCANNING, 665 SCAN_HW_SCANNING,
667 SCAN_OFF_CHANNEL,
668 SCAN_COMPLETED, 666 SCAN_COMPLETED,
669 SCAN_ABORTED, 667 SCAN_ABORTED,
670}; 668};
@@ -959,6 +957,7 @@ struct ieee80211_local {
959 unsigned int hw_roc_duration; 957 unsigned int hw_roc_duration;
960 u32 hw_roc_cookie; 958 u32 hw_roc_cookie;
961 bool hw_roc_for_tx; 959 bool hw_roc_for_tx;
960 unsigned long hw_offchan_tx_cookie;
962 961
963 /* dummy netdev for use w/ NAPI */ 962 /* dummy netdev for use w/ NAPI */
964 struct net_device napi_dev; 963 struct net_device napi_dev;
@@ -1068,8 +1067,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
1068void ieee80211_configure_filter(struct ieee80211_local *local); 1067void ieee80211_configure_filter(struct ieee80211_local *local);
1069u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); 1068u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
1070 1069
1071extern bool ieee80211_disable_40mhz_24ghz;
1072
1073/* STA code */ 1070/* STA code */
1074void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); 1071void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
1075int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, 1072int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
@@ -1147,10 +1144,14 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
1147 struct ieee80211_bss *bss); 1144 struct ieee80211_bss *bss);
1148 1145
1149/* off-channel helpers */ 1146/* off-channel helpers */
1150void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); 1147bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local);
1151void ieee80211_offchannel_stop_station(struct ieee80211_local *local); 1148void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local,
1149 bool tell_ap);
1150void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
1151 bool offchannel_ps_enable);
1152void ieee80211_offchannel_return(struct ieee80211_local *local, 1152void ieee80211_offchannel_return(struct ieee80211_local *local,
1153 bool enable_beaconing); 1153 bool enable_beaconing,
1154 bool offchannel_ps_disable);
1154void ieee80211_hw_roc_setup(struct ieee80211_local *local); 1155void ieee80211_hw_roc_setup(struct ieee80211_local *local);
1155 1156
1156/* interface handling */ 1157/* interface handling */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7a10a8d1b2d..4054399be90 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -382,6 +382,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
382 struct sk_buff *skb, *tmp; 382 struct sk_buff *skb, *tmp;
383 u32 hw_reconf_flags = 0; 383 u32 hw_reconf_flags = 0;
384 int i; 384 int i;
385 enum nl80211_channel_type orig_ct;
385 386
386 if (local->scan_sdata == sdata) 387 if (local->scan_sdata == sdata)
387 ieee80211_scan_cancel(local); 388 ieee80211_scan_cancel(local);
@@ -542,8 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
542 hw_reconf_flags = 0; 543 hw_reconf_flags = 0;
543 } 544 }
544 545
546 /* Re-calculate channel-type, in case there are multiple vifs
547 * on different channel types.
548 */
549 orig_ct = local->_oper_channel_type;
550 ieee80211_set_channel_type(local, NULL, NL80211_CHAN_NO_HT);
551
545 /* do after stop to avoid reconfiguring when we stop anyway */ 552 /* do after stop to avoid reconfiguring when we stop anyway */
546 if (hw_reconf_flags) 553 if (hw_reconf_flags || (orig_ct != local->_oper_channel_type))
547 ieee80211_hw_config(local, hw_reconf_flags); 554 ieee80211_hw_config(local, hw_reconf_flags);
548 555
549 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 556 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 8106aa1b746..4ddbe27eb57 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -21,7 +21,6 @@
21 21
22#define WEP_IV_LEN 4 22#define WEP_IV_LEN 4
23#define WEP_ICV_LEN 4 23#define WEP_ICV_LEN 4
24#define ALG_TKIP_KEY_LEN 32
25#define ALG_CCMP_KEY_LEN 16 24#define ALG_CCMP_KEY_LEN 16
26#define CCMP_HDR_LEN 8 25#define CCMP_HDR_LEN 8
27#define CCMP_MIC_LEN 8 26#define CCMP_MIC_LEN 8
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a46ff06d7cb..562d2984c48 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -34,7 +34,7 @@
34#include "debugfs.h" 34#include "debugfs.h"
35 35
36 36
37bool ieee80211_disable_40mhz_24ghz; 37static bool ieee80211_disable_40mhz_24ghz;
38module_param(ieee80211_disable_40mhz_24ghz, bool, 0644); 38module_param(ieee80211_disable_40mhz_24ghz, bool, 0644);
39MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz, 39MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz,
40 "Disable 40MHz support in the 2.4GHz band"); 40 "Disable 40MHz support in the 2.4GHz band");
@@ -98,6 +98,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)
98 ieee80211_configure_filter(local); 98 ieee80211_configure_filter(local);
99} 99}
100 100
101/*
102 * Returns true if we are logically configured to be on
103 * the operating channel AND the hardware-conf is currently
104 * configured on the operating channel. Compares channel-type
105 * as well.
106 */
107bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local)
108{
109 struct ieee80211_channel *chan, *scan_chan;
110 enum nl80211_channel_type channel_type;
111
112 /* This logic needs to match logic in ieee80211_hw_config */
113 if (local->scan_channel) {
114 chan = local->scan_channel;
115 /* If scanning on oper channel, use whatever channel-type
116 * is currently in use.
117 */
118 if (chan == local->oper_channel)
119 channel_type = local->_oper_channel_type;
120 else
121 channel_type = NL80211_CHAN_NO_HT;
122 } else if (local->tmp_channel) {
123 chan = scan_chan = local->tmp_channel;
124 channel_type = local->tmp_channel_type;
125 } else {
126 chan = local->oper_channel;
127 channel_type = local->_oper_channel_type;
128 }
129
130 if (chan != local->oper_channel ||
131 channel_type != local->_oper_channel_type)
132 return false;
133
134 /* Check current hardware-config against oper_channel. */
135 if ((local->oper_channel != local->hw.conf.channel) ||
136 (local->_oper_channel_type != local->hw.conf.channel_type))
137 return false;
138
139 return true;
140}
141
101int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) 142int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
102{ 143{
103 struct ieee80211_channel *chan, *scan_chan; 144 struct ieee80211_channel *chan, *scan_chan;
@@ -110,21 +151,33 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
110 151
111 scan_chan = local->scan_channel; 152 scan_chan = local->scan_channel;
112 153
154 /* If this off-channel logic ever changes, ieee80211_on_oper_channel
155 * may need to change as well.
156 */
113 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 157 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
114 if (scan_chan) { 158 if (scan_chan) {
115 chan = scan_chan; 159 chan = scan_chan;
116 channel_type = NL80211_CHAN_NO_HT; 160 /* If scanning on oper channel, use whatever channel-type
117 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 161 * is currently in use.
118 } else if (local->tmp_channel && 162 */
119 local->oper_channel != local->tmp_channel) { 163 if (chan == local->oper_channel)
164 channel_type = local->_oper_channel_type;
165 else
166 channel_type = NL80211_CHAN_NO_HT;
167 } else if (local->tmp_channel) {
120 chan = scan_chan = local->tmp_channel; 168 chan = scan_chan = local->tmp_channel;
121 channel_type = local->tmp_channel_type; 169 channel_type = local->tmp_channel_type;
122 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
123 } else { 170 } else {
124 chan = local->oper_channel; 171 chan = local->oper_channel;
125 channel_type = local->_oper_channel_type; 172 channel_type = local->_oper_channel_type;
126 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
127 } 173 }
174
175 if (chan != local->oper_channel ||
176 channel_type != local->_oper_channel_type)
177 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
178 else
179 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
180
128 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 181 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
129 182
130 if (offchannel_flag || chan != local->hw.conf.channel || 183 if (offchannel_flag || chan != local->hw.conf.channel ||
@@ -146,7 +199,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
146 changed |= IEEE80211_CONF_CHANGE_SMPS; 199 changed |= IEEE80211_CONF_CHANGE_SMPS;
147 } 200 }
148 201
149 if (scan_chan) 202 if ((local->scanning & SCAN_SW_SCANNING) ||
203 (local->scanning & SCAN_HW_SCANNING))
150 power = chan->max_power; 204 power = chan->max_power;
151 else 205 else
152 power = local->power_constr_level ? 206 power = local->power_constr_level ?
@@ -231,7 +285,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
231 285
232 if (changed & BSS_CHANGED_BEACON_ENABLED) { 286 if (changed & BSS_CHANGED_BEACON_ENABLED) {
233 if (local->quiescing || !ieee80211_sdata_running(sdata) || 287 if (local->quiescing || !ieee80211_sdata_running(sdata) ||
234 test_bit(SCAN_SW_SCANNING, &local->scanning)) { 288 test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) {
235 sdata->vif.bss_conf.enable_beacon = false; 289 sdata->vif.bss_conf.enable_beacon = false;
236 } else { 290 } else {
237 /* 291 /*
@@ -326,6 +380,9 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
326 380
327 trace_api_restart_hw(local); 381 trace_api_restart_hw(local);
328 382
383 wiphy_info(hw->wiphy,
384 "Hardware restart was requested\n");
385
329 /* use this reason, ieee80211_reconfig will unblock it */ 386 /* use this reason, ieee80211_reconfig will unblock it */
330 ieee80211_stop_queues_by_reason(hw, 387 ieee80211_stop_queues_by_reason(hw,
331 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 388 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
@@ -554,6 +611,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
554 local->hw.queues = 1; 611 local->hw.queues = 1;
555 local->hw.max_rates = 1; 612 local->hw.max_rates = 1;
556 local->hw.max_report_rates = 0; 613 local->hw.max_report_rates = 0;
614 local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
557 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; 615 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
558 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; 616 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
559 local->user_power_level = -1; 617 local->user_power_level = -1;
@@ -668,6 +726,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
668 } 726 }
669 channels += sband->n_channels; 727 channels += sband->n_channels;
670 728
729 /*
730 * Since ieee80211_disable_40mhz_24ghz is global, we can
731 * modify the sband's ht data even if the driver uses a
732 * global structure for that.
733 */
734 if (ieee80211_disable_40mhz_24ghz &&
735 band == IEEE80211_BAND_2GHZ &&
736 sband->ht_cap.ht_supported) {
737 sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
738 sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
739 }
740
671 if (max_bitrates < sband->n_bitrates) 741 if (max_bitrates < sband->n_bitrates)
672 max_bitrates = sband->n_bitrates; 742 max_bitrates = sband->n_bitrates;
673 supp_ht = supp_ht || sband->ht_cap.ht_supported; 743 supp_ht = supp_ht || sband->ht_cap.ht_supported;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ca3af4685b0..2a57cc02c61 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
574 &elems); 574 &elems);
575 575
576 if (elems.ds_params && elems.ds_params_len == 1) 576 if (elems.ds_params && elems.ds_params_len == 1)
577 freq = ieee80211_channel_to_frequency(elems.ds_params[0]); 577 freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
578 else 578 else
579 freq = rx_status->freq; 579 freq = rx_status->freq;
580 580
@@ -645,7 +645,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
645 if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) 645 if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags))
646 mesh_mpath_table_grow(); 646 mesh_mpath_table_grow();
647 647
648 if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) 648 if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags))
649 mesh_mpp_table_grow(); 649 mesh_mpp_table_grow();
650 650
651 if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) 651 if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c9ceb4d57ab..64d92d5a7f4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -28,8 +28,15 @@
28#include "rate.h" 28#include "rate.h"
29#include "led.h" 29#include "led.h"
30 30
31#define IEEE80211_MAX_NULLFUNC_TRIES 2 31static int max_nullfunc_tries = 2;
32#define IEEE80211_MAX_PROBE_TRIES 5 32module_param(max_nullfunc_tries, int, 0644);
33MODULE_PARM_DESC(max_nullfunc_tries,
34 "Maximum nullfunc tx tries before disconnecting (reason 4).");
35
36static int max_probe_tries = 5;
37module_param(max_probe_tries, int, 0644);
38MODULE_PARM_DESC(max_probe_tries,
39 "Maximum probe tries before disconnecting (reason 4).");
33 40
34/* 41/*
35 * Beacon loss timeout is calculated as N frames times the 42 * Beacon loss timeout is calculated as N frames times the
@@ -51,7 +58,11 @@
51 * a probe request because of beacon loss or for 58 * a probe request because of beacon loss or for
52 * checking the connection still works. 59 * checking the connection still works.
53 */ 60 */
54#define IEEE80211_PROBE_WAIT (HZ / 2) 61static int probe_wait_ms = 500;
62module_param(probe_wait_ms, int, 0644);
63MODULE_PARM_DESC(probe_wait_ms,
64 "Maximum time(ms) to wait for probe response"
65 " before disconnecting (reason 4).");
55 66
56/* 67/*
57 * Weight given to the latest Beacon frame when calculating average signal 68 * Weight given to the latest Beacon frame when calculating average signal
@@ -134,6 +145,9 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
134{ 145{
135 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 146 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
136 147
148 if (unlikely(!sdata->u.mgd.associated))
149 return;
150
137 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 151 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
138 return; 152 return;
139 153
@@ -161,6 +175,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
161 struct ieee80211_supported_band *sband; 175 struct ieee80211_supported_band *sband;
162 struct sta_info *sta; 176 struct sta_info *sta;
163 u32 changed = 0; 177 u32 changed = 0;
178 int hti_cfreq;
164 u16 ht_opmode; 179 u16 ht_opmode;
165 bool enable_ht = true; 180 bool enable_ht = true;
166 enum nl80211_channel_type prev_chantype; 181 enum nl80211_channel_type prev_chantype;
@@ -174,10 +189,27 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
174 if (!sband->ht_cap.ht_supported) 189 if (!sband->ht_cap.ht_supported)
175 enable_ht = false; 190 enable_ht = false;
176 191
177 /* check that channel matches the right operating channel */ 192 if (enable_ht) {
178 if (local->hw.conf.channel->center_freq != 193 hti_cfreq = ieee80211_channel_to_frequency(hti->control_chan,
179 ieee80211_channel_to_frequency(hti->control_chan)) 194 sband->band);
180 enable_ht = false; 195 /* check that channel matches the right operating channel */
196 if (local->hw.conf.channel->center_freq != hti_cfreq) {
197 /* Some APs mess this up, evidently.
198 * Netgear WNDR3700 sometimes reports 4 higher than
199 * the actual channel, for instance.
200 */
201 printk(KERN_DEBUG
202 "%s: Wrong control channel in association"
203 " response: configured center-freq: %d"
204 " hti-cfreq: %d hti->control_chan: %d"
205 " band: %d. Disabling HT.\n",
206 sdata->name,
207 local->hw.conf.channel->center_freq,
208 hti_cfreq, hti->control_chan,
209 sband->band);
210 enable_ht = false;
211 }
212 }
181 213
182 if (enable_ht) { 214 if (enable_ht) {
183 channel_type = NL80211_CHAN_HT20; 215 channel_type = NL80211_CHAN_HT20;
@@ -429,7 +461,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
429 container_of((void *)bss, struct cfg80211_bss, priv); 461 container_of((void *)bss, struct cfg80211_bss, priv);
430 struct ieee80211_channel *new_ch; 462 struct ieee80211_channel *new_ch;
431 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 463 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
432 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); 464 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num,
465 cbss->channel->band);
433 466
434 ASSERT_MGD_MTX(ifmgd); 467 ASSERT_MGD_MTX(ifmgd);
435 468
@@ -580,6 +613,37 @@ static void ieee80211_change_ps(struct ieee80211_local *local)
580 } 613 }
581} 614}
582 615
616static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
617{
618 struct ieee80211_if_managed *mgd = &sdata->u.mgd;
619 struct sta_info *sta = NULL;
620 u32 sta_flags = 0;
621
622 if (!mgd->powersave)
623 return false;
624
625 if (!mgd->associated)
626 return false;
627
628 if (!mgd->associated->beacon_ies)
629 return false;
630
631 if (mgd->flags & (IEEE80211_STA_BEACON_POLL |
632 IEEE80211_STA_CONNECTION_POLL))
633 return false;
634
635 rcu_read_lock();
636 sta = sta_info_get(sdata, mgd->bssid);
637 if (sta)
638 sta_flags = get_sta_flags(sta);
639 rcu_read_unlock();
640
641 if (!(sta_flags & WLAN_STA_AUTHORIZED))
642 return false;
643
644 return true;
645}
646
583/* need to hold RTNL or interface lock */ 647/* need to hold RTNL or interface lock */
584void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) 648void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
585{ 649{
@@ -600,17 +664,21 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
600 list_for_each_entry(sdata, &local->interfaces, list) { 664 list_for_each_entry(sdata, &local->interfaces, list) {
601 if (!ieee80211_sdata_running(sdata)) 665 if (!ieee80211_sdata_running(sdata))
602 continue; 666 continue;
667 if (sdata->vif.type == NL80211_IFTYPE_AP) {
668 /* If an AP vif is found, then disable PS
669 * by setting the count to zero thereby setting
670 * ps_sdata to NULL.
671 */
672 count = 0;
673 break;
674 }
603 if (sdata->vif.type != NL80211_IFTYPE_STATION) 675 if (sdata->vif.type != NL80211_IFTYPE_STATION)
604 continue; 676 continue;
605 found = sdata; 677 found = sdata;
606 count++; 678 count++;
607 } 679 }
608 680
609 if (count == 1 && found->u.mgd.powersave && 681 if (count == 1 && ieee80211_powersave_allowed(found)) {
610 found->u.mgd.associated &&
611 found->u.mgd.associated->beacon_ies &&
612 !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
613 IEEE80211_STA_CONNECTION_POLL))) {
614 struct ieee80211_conf *conf = &local->hw.conf; 682 struct ieee80211_conf *conf = &local->hw.conf;
615 s32 beaconint_us; 683 s32 beaconint_us;
616 684
@@ -700,9 +768,19 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
700 return; 768 return;
701 769
702 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && 770 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
703 (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) 771 (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) {
772 netif_tx_stop_all_queues(sdata->dev);
773 /*
774 * Flush all the frames queued in the driver before
775 * going to power save
776 */
777 drv_flush(local, false);
704 ieee80211_send_nullfunc(local, sdata, 1); 778 ieee80211_send_nullfunc(local, sdata, 1);
705 779
780 /* Flush once again to get the tx status of nullfunc frame */
781 drv_flush(local, false);
782 }
783
706 if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && 784 if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
707 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || 785 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
708 (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { 786 (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
@@ -710,6 +788,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
710 local->hw.conf.flags |= IEEE80211_CONF_PS; 788 local->hw.conf.flags |= IEEE80211_CONF_PS;
711 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 789 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
712 } 790 }
791
792 netif_tx_start_all_queues(sdata->dev);
713} 793}
714 794
715void ieee80211_dynamic_ps_timer(unsigned long data) 795void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1033,12 +1113,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1033 if (is_multicast_ether_addr(hdr->addr1)) 1113 if (is_multicast_ether_addr(hdr->addr1))
1034 return; 1114 return;
1035 1115
1036 /*
1037 * In case we receive frames after disassociation.
1038 */
1039 if (!sdata->u.mgd.associated)
1040 return;
1041
1042 ieee80211_sta_reset_conn_monitor(sdata); 1116 ieee80211_sta_reset_conn_monitor(sdata);
1043} 1117}
1044 1118
@@ -1095,7 +1169,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1095 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1169 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1096 const u8 *ssid; 1170 const u8 *ssid;
1097 u8 *dst = ifmgd->associated->bssid; 1171 u8 *dst = ifmgd->associated->bssid;
1098 u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3); 1172 u8 unicast_limit = max(1, max_probe_tries - 3);
1099 1173
1100 /* 1174 /*
1101 * Try sending broadcast probe requests for the last three 1175 * Try sending broadcast probe requests for the last three
@@ -1121,7 +1195,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1121 } 1195 }
1122 1196
1123 ifmgd->probe_send_count++; 1197 ifmgd->probe_send_count++;
1124 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1198 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
1125 run_again(ifmgd, ifmgd->probe_timeout); 1199 run_again(ifmgd, ifmgd->probe_timeout);
1126} 1200}
1127 1201
@@ -1222,7 +1296,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1222 1296
1223 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); 1297 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
1224 1298
1225 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1299 printk(KERN_DEBUG "%s: Connection to AP %pM lost.\n",
1300 sdata->name, bssid);
1226 1301
1227 ieee80211_set_disassoc(sdata, true, true); 1302 ieee80211_set_disassoc(sdata, true, true);
1228 mutex_unlock(&ifmgd->mtx); 1303 mutex_unlock(&ifmgd->mtx);
@@ -1525,7 +1600,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1525 } 1600 }
1526 1601
1527 if (elems->ds_params && elems->ds_params_len == 1) 1602 if (elems->ds_params && elems->ds_params_len == 1)
1528 freq = ieee80211_channel_to_frequency(elems->ds_params[0]); 1603 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
1604 rx_status->band);
1529 else 1605 else
1530 freq = rx_status->freq; 1606 freq = rx_status->freq;
1531 1607
@@ -1966,9 +2042,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1966 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); 2042 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
1967 2043
1968 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 2044 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
1969 max_tries = IEEE80211_MAX_NULLFUNC_TRIES; 2045 max_tries = max_nullfunc_tries;
1970 else 2046 else
1971 max_tries = IEEE80211_MAX_PROBE_TRIES; 2047 max_tries = max_probe_tries;
1972 2048
1973 /* ACK received for nullfunc probing frame */ 2049 /* ACK received for nullfunc probing frame */
1974 if (!ifmgd->probe_send_count) 2050 if (!ifmgd->probe_send_count)
@@ -1978,9 +2054,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1978#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 2054#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1979 wiphy_debug(local->hw.wiphy, 2055 wiphy_debug(local->hw.wiphy,
1980 "%s: No ack for nullfunc frame to" 2056 "%s: No ack for nullfunc frame to"
1981 " AP %pM, try %d\n", 2057 " AP %pM, try %d/%i\n",
1982 sdata->name, bssid, 2058 sdata->name, bssid,
1983 ifmgd->probe_send_count); 2059 ifmgd->probe_send_count, max_tries);
1984#endif 2060#endif
1985 ieee80211_mgd_probe_ap_send(sdata); 2061 ieee80211_mgd_probe_ap_send(sdata);
1986 } else { 2062 } else {
@@ -2000,17 +2076,17 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
2000 "%s: Failed to send nullfunc to AP %pM" 2076 "%s: Failed to send nullfunc to AP %pM"
2001 " after %dms, disconnecting.\n", 2077 " after %dms, disconnecting.\n",
2002 sdata->name, 2078 sdata->name,
2003 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 2079 bssid, probe_wait_ms);
2004#endif 2080#endif
2005 ieee80211_sta_connection_lost(sdata, bssid); 2081 ieee80211_sta_connection_lost(sdata, bssid);
2006 } else if (ifmgd->probe_send_count < max_tries) { 2082 } else if (ifmgd->probe_send_count < max_tries) {
2007#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 2083#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
2008 wiphy_debug(local->hw.wiphy, 2084 wiphy_debug(local->hw.wiphy,
2009 "%s: No probe response from AP %pM" 2085 "%s: No probe response from AP %pM"
2010 " after %dms, try %d\n", 2086 " after %dms, try %d/%i\n",
2011 sdata->name, 2087 sdata->name,
2012 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ, 2088 bssid, probe_wait_ms,
2013 ifmgd->probe_send_count); 2089 ifmgd->probe_send_count, max_tries);
2014#endif 2090#endif
2015 ieee80211_mgd_probe_ap_send(sdata); 2091 ieee80211_mgd_probe_ap_send(sdata);
2016 } else { 2092 } else {
@@ -2022,7 +2098,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
2022 "%s: No probe response from AP %pM" 2098 "%s: No probe response from AP %pM"
2023 " after %dms, disconnecting.\n", 2099 " after %dms, disconnecting.\n",
2024 sdata->name, 2100 sdata->name,
2025 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 2101 bssid, probe_wait_ms);
2026 2102
2027 ieee80211_sta_connection_lost(sdata, bssid); 2103 ieee80211_sta_connection_lost(sdata, bssid);
2028 } 2104 }
@@ -2260,6 +2336,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
2260 else 2336 else
2261 wk->type = IEEE80211_WORK_DIRECT_PROBE; 2337 wk->type = IEEE80211_WORK_DIRECT_PROBE;
2262 wk->chan = req->bss->channel; 2338 wk->chan = req->bss->channel;
2339 wk->chan_type = NL80211_CHAN_NO_HT;
2263 wk->sdata = sdata; 2340 wk->sdata = sdata;
2264 wk->done = ieee80211_probe_auth_done; 2341 wk->done = ieee80211_probe_auth_done;
2265 2342
@@ -2409,6 +2486,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2409 memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); 2486 memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
2410 2487
2411 wk->chan = req->bss->channel; 2488 wk->chan = req->bss->channel;
2489 wk->chan_type = NL80211_CHAN_NO_HT;
2412 wk->sdata = sdata; 2490 wk->sdata = sdata;
2413 wk->done = ieee80211_assoc_done; 2491 wk->done = ieee80211_assoc_done;
2414 if (!bss->dtim_period && 2492 if (!bss->dtim_period &&
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index b4e52676f3f..13427b194ce 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -17,10 +17,14 @@
17#include "driver-trace.h" 17#include "driver-trace.h"
18 18
19/* 19/*
20 * inform AP that we will go to sleep so that it will buffer the frames 20 * Tell our hardware to disable PS.
21 * while we scan 21 * Optionally inform AP that we will go to sleep so that it will buffer
22 * the frames while we are doing off-channel work. This is optional
23 * because we *may* be doing work on-operating channel, and want our
24 * hardware unconditionally awake, but still let the AP send us normal frames.
22 */ 25 */
23static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) 26static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata,
27 bool tell_ap)
24{ 28{
25 struct ieee80211_local *local = sdata->local; 29 struct ieee80211_local *local = sdata->local;
26 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 30 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -41,8 +45,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
41 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 45 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
42 } 46 }
43 47
44 if (!(local->offchannel_ps_enabled) || 48 if (tell_ap && (!local->offchannel_ps_enabled ||
45 !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) 49 !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)))
46 /* 50 /*
47 * If power save was enabled, no need to send a nullfunc 51 * If power save was enabled, no need to send a nullfunc
48 * frame because AP knows that we are sleeping. But if the 52 * frame because AP knows that we are sleeping. But if the
@@ -77,6 +81,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
77 * we are sleeping, let's just enable power save mode in 81 * we are sleeping, let's just enable power save mode in
78 * hardware. 82 * hardware.
79 */ 83 */
84 /* TODO: Only set hardware if CONF_PS changed?
85 * TODO: Should we set offchannel_ps_enabled to false?
86 */
80 local->hw.conf.flags |= IEEE80211_CONF_PS; 87 local->hw.conf.flags |= IEEE80211_CONF_PS;
81 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 88 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
82 } else if (local->hw.conf.dynamic_ps_timeout > 0) { 89 } else if (local->hw.conf.dynamic_ps_timeout > 0) {
@@ -95,63 +102,61 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
95 ieee80211_sta_reset_conn_monitor(sdata); 102 ieee80211_sta_reset_conn_monitor(sdata);
96} 103}
97 104
98void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) 105void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
106 bool offchannel_ps_enable)
99{ 107{
100 struct ieee80211_sub_if_data *sdata; 108 struct ieee80211_sub_if_data *sdata;
101 109
110 /*
111 * notify the AP about us leaving the channel and stop all
112 * STA interfaces.
113 */
102 mutex_lock(&local->iflist_mtx); 114 mutex_lock(&local->iflist_mtx);
103 list_for_each_entry(sdata, &local->interfaces, list) { 115 list_for_each_entry(sdata, &local->interfaces, list) {
104 if (!ieee80211_sdata_running(sdata)) 116 if (!ieee80211_sdata_running(sdata))
105 continue; 117 continue;
106 118
107 /* disable beaconing */ 119 if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
120 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
121
122 /* Check to see if we should disable beaconing. */
108 if (sdata->vif.type == NL80211_IFTYPE_AP || 123 if (sdata->vif.type == NL80211_IFTYPE_AP ||
109 sdata->vif.type == NL80211_IFTYPE_ADHOC || 124 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
110 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) 125 sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
111 ieee80211_bss_info_change_notify( 126 ieee80211_bss_info_change_notify(
112 sdata, BSS_CHANGED_BEACON_ENABLED); 127 sdata, BSS_CHANGED_BEACON_ENABLED);
113 128
114 /* 129 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
115 * only handle non-STA interfaces here, STA interfaces
116 * are handled in ieee80211_offchannel_stop_station(),
117 * e.g., from the background scan state machine.
118 *
119 * In addition, do not stop monitor interface to allow it to be
120 * used from user space controlled off-channel operations.
121 */
122 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
123 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
124 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
125 netif_tx_stop_all_queues(sdata->dev); 130 netif_tx_stop_all_queues(sdata->dev);
131 if (offchannel_ps_enable &&
132 (sdata->vif.type == NL80211_IFTYPE_STATION) &&
133 sdata->u.mgd.associated)
134 ieee80211_offchannel_ps_enable(sdata, true);
126 } 135 }
127 } 136 }
128 mutex_unlock(&local->iflist_mtx); 137 mutex_unlock(&local->iflist_mtx);
129} 138}
130 139
131void ieee80211_offchannel_stop_station(struct ieee80211_local *local) 140void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local,
141 bool tell_ap)
132{ 142{
133 struct ieee80211_sub_if_data *sdata; 143 struct ieee80211_sub_if_data *sdata;
134 144
135 /*
136 * notify the AP about us leaving the channel and stop all STA interfaces
137 */
138 mutex_lock(&local->iflist_mtx); 145 mutex_lock(&local->iflist_mtx);
139 list_for_each_entry(sdata, &local->interfaces, list) { 146 list_for_each_entry(sdata, &local->interfaces, list) {
140 if (!ieee80211_sdata_running(sdata)) 147 if (!ieee80211_sdata_running(sdata))
141 continue; 148 continue;
142 149
143 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 150 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
144 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); 151 sdata->u.mgd.associated)
145 netif_tx_stop_all_queues(sdata->dev); 152 ieee80211_offchannel_ps_enable(sdata, tell_ap);
146 if (sdata->u.mgd.associated)
147 ieee80211_offchannel_ps_enable(sdata);
148 }
149 } 153 }
150 mutex_unlock(&local->iflist_mtx); 154 mutex_unlock(&local->iflist_mtx);
151} 155}
152 156
153void ieee80211_offchannel_return(struct ieee80211_local *local, 157void ieee80211_offchannel_return(struct ieee80211_local *local,
154 bool enable_beaconing) 158 bool enable_beaconing,
159 bool offchannel_ps_disable)
155{ 160{
156 struct ieee80211_sub_if_data *sdata; 161 struct ieee80211_sub_if_data *sdata;
157 162
@@ -161,7 +166,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
161 continue; 166 continue;
162 167
163 /* Tell AP we're back */ 168 /* Tell AP we're back */
164 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 169 if (offchannel_ps_disable &&
170 sdata->vif.type == NL80211_IFTYPE_STATION) {
165 if (sdata->u.mgd.associated) 171 if (sdata->u.mgd.associated)
166 ieee80211_offchannel_ps_disable(sdata); 172 ieee80211_offchannel_ps_disable(sdata);
167 } 173 }
@@ -181,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
181 netif_tx_wake_all_queues(sdata->dev); 187 netif_tx_wake_all_queues(sdata->dev);
182 } 188 }
183 189
184 /* re-enable beaconing */ 190 /* Check to see if we should re-enable beaconing */
185 if (enable_beaconing && 191 if (enable_beaconing &&
186 (sdata->vif.type == NL80211_IFTYPE_AP || 192 (sdata->vif.type == NL80211_IFTYPE_AP ||
187 sdata->vif.type == NL80211_IFTYPE_ADHOC || 193 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 165a4518bb4..8212a8bebf0 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -415,10 +415,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
415 mi->sample_count--; 415 mi->sample_count--;
416 } 416 }
417 417
418 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { 418 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
419 mi->sample_packets += info->status.ampdu_len; 419 mi->sample_packets += info->status.ampdu_len;
420 minstrel_next_sample_idx(mi);
421 }
422 420
423 for (i = 0; !last; i++) { 421 for (i = 0; !last; i++) {
424 last = (i == IEEE80211_TX_MAX_RATES - 1) || 422 last = (i == IEEE80211_TX_MAX_RATES - 1) ||
@@ -519,9 +517,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
519 rate->count = mr->retry_count; 517 rate->count = mr->retry_count;
520 518
521 rate->flags = IEEE80211_TX_RC_MCS | group->flags; 519 rate->flags = IEEE80211_TX_RC_MCS | group->flags;
522 if (txrc->short_preamble) 520 if (rtscts)
523 rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
524 if (txrc->rts || rtscts)
525 rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; 521 rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
526 rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; 522 rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES;
527} 523}
@@ -553,13 +549,14 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
553 sample_idx = sample_table[mg->column][mg->index]; 549 sample_idx = sample_table[mg->column][mg->index];
554 mr = &mg->rates[sample_idx]; 550 mr = &mg->rates[sample_idx];
555 sample_idx += mi->sample_group * MCS_GROUP_RATES; 551 sample_idx += mi->sample_group * MCS_GROUP_RATES;
552 minstrel_next_sample_idx(mi);
556 553
557 /* 554 /*
558 * When not using MRR, do not sample if the probability is already 555 * When not using MRR, do not sample if the probability is already
559 * higher than 95% to avoid wasting airtime 556 * higher than 95% to avoid wasting airtime
560 */ 557 */
561 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) 558 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
562 goto next; 559 return -1;
563 560
564 /* 561 /*
565 * Make sure that lower rates get sampled only occasionally, 562 * Make sure that lower rates get sampled only occasionally,
@@ -568,17 +565,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
568 if (minstrel_get_duration(sample_idx) > 565 if (minstrel_get_duration(sample_idx) >
569 minstrel_get_duration(mi->max_tp_rate)) { 566 minstrel_get_duration(mi->max_tp_rate)) {
570 if (mr->sample_skipped < 20) 567 if (mr->sample_skipped < 20)
571 goto next; 568 return -1;
572 569
573 if (mi->sample_slow++ > 2) 570 if (mi->sample_slow++ > 2)
574 goto next; 571 return -1;
575 } 572 }
576 573
577 return sample_idx; 574 return sample_idx;
578
579next:
580 minstrel_next_sample_idx(mi);
581 return -1;
582} 575}
583 576
584static void 577static void
@@ -605,19 +598,46 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
605 sample = true; 598 sample = true;
606 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, 599 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
607 txrc, true, false); 600 txrc, true, false);
608 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
609 txrc, false, false);
610 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; 601 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
611 } else { 602 } else {
612 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, 603 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
613 txrc, false, false); 604 txrc, false, false);
614 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
615 txrc, false, true);
616 } 605 }
617 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample);
618 606
619 ar[3].count = 0; 607 if (mp->hw->max_rates >= 3) {
620 ar[3].idx = -1; 608 /*
609 * At least 3 tx rates supported, use
610 * sample_rate -> max_tp_rate -> max_prob_rate for sampling and
611 * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default.
612 */
613 if (sample_idx >= 0)
614 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
615 txrc, false, false);
616 else
617 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
618 txrc, false, true);
619
620 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate,
621 txrc, false, !sample);
622
623 ar[3].count = 0;
624 ar[3].idx = -1;
625 } else if (mp->hw->max_rates == 2) {
626 /*
627 * Only 2 tx rates supported, use
628 * sample_rate -> max_prob_rate for sampling and
629 * max_tp_rate -> max_prob_rate by default.
630 */
631 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate,
632 txrc, false, !sample);
633
634 ar[2].count = 0;
635 ar[2].idx = -1;
636 } else {
637 /* Not using MRR, only use the first rate */
638 ar[1].count = 0;
639 ar[1].idx = -1;
640 }
621 641
622 mi->total_packets++; 642 mi->total_packets++;
623 643
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 1a873f00691..6510f8ee738 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -24,9 +24,6 @@
24/* Fixed point arithmetic shifting amount. */ 24/* Fixed point arithmetic shifting amount. */
25#define RC_PID_ARITH_SHIFT 8 25#define RC_PID_ARITH_SHIFT 8
26 26
27/* Fixed point arithmetic factor. */
28#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
29
30/* Proportional PID component coefficient. */ 27/* Proportional PID component coefficient. */
31#define RC_PID_COEFF_P 15 28#define RC_PID_COEFF_P 15
32/* Integral PID component coefficient. */ 29/* Integral PID component coefficient. */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a6701ed87f0..5c1930ba8eb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -77,7 +77,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
77 /* always present fields */ 77 /* always present fields */
78 len = sizeof(struct ieee80211_radiotap_header) + 9; 78 len = sizeof(struct ieee80211_radiotap_header) + 9;
79 79
80 if (status->flag & RX_FLAG_TSFT) 80 if (status->flag & RX_FLAG_MACTIME_MPDU)
81 len += 8; 81 len += 8;
82 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 82 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
83 len += 1; 83 len += 1;
@@ -85,6 +85,9 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
85 if (len & 1) /* padding for RX_FLAGS if necessary */ 85 if (len & 1) /* padding for RX_FLAGS if necessary */
86 len++; 86 len++;
87 87
88 if (status->flag & RX_FLAG_HT) /* HT info */
89 len += 3;
90
88 return len; 91 return len;
89} 92}
90 93
@@ -120,7 +123,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
120 /* the order of the following fields is important */ 123 /* the order of the following fields is important */
121 124
122 /* IEEE80211_RADIOTAP_TSFT */ 125 /* IEEE80211_RADIOTAP_TSFT */
123 if (status->flag & RX_FLAG_TSFT) { 126 if (status->flag & RX_FLAG_MACTIME_MPDU) {
124 put_unaligned_le64(status->mactime, pos); 127 put_unaligned_le64(status->mactime, pos);
125 rthdr->it_present |= 128 rthdr->it_present |=
126 cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); 129 cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
@@ -139,11 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
139 /* IEEE80211_RADIOTAP_RATE */ 142 /* IEEE80211_RADIOTAP_RATE */
140 if (status->flag & RX_FLAG_HT) { 143 if (status->flag & RX_FLAG_HT) {
141 /* 144 /*
142 * TODO: add following information into radiotap header once 145 * MCS information is a separate field in radiotap,
143 * suitable fields are defined for it: 146 * added below.
144 * - MCS index (status->rate_idx)
145 * - HT40 (status->flag & RX_FLAG_40MHZ)
146 * - short-GI (status->flag & RX_FLAG_SHORT_GI)
147 */ 147 */
148 *pos = 0; 148 *pos = 0;
149 } else { 149 } else {
@@ -193,6 +193,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
193 rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; 193 rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP;
194 put_unaligned_le16(rx_flags, pos); 194 put_unaligned_le16(rx_flags, pos);
195 pos += 2; 195 pos += 2;
196
197 if (status->flag & RX_FLAG_HT) {
198 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
199 *pos++ = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
200 IEEE80211_RADIOTAP_MCS_HAVE_GI |
201 IEEE80211_RADIOTAP_MCS_HAVE_BW;
202 *pos = 0;
203 if (status->flag & RX_FLAG_SHORT_GI)
204 *pos |= IEEE80211_RADIOTAP_MCS_SGI;
205 if (status->flag & RX_FLAG_40MHZ)
206 *pos |= IEEE80211_RADIOTAP_MCS_BW_40;
207 pos++;
208 *pos++ = status->rate_idx;
209 }
196} 210}
197 211
198/* 212/*
@@ -392,16 +406,10 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
392 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN))) 406 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN)))
393 return RX_CONTINUE; 407 return RX_CONTINUE;
394 408
395 if (test_bit(SCAN_HW_SCANNING, &local->scanning)) 409 if (test_bit(SCAN_HW_SCANNING, &local->scanning) ||
410 test_bit(SCAN_SW_SCANNING, &local->scanning))
396 return ieee80211_scan_rx(rx->sdata, skb); 411 return ieee80211_scan_rx(rx->sdata, skb);
397 412
398 if (test_bit(SCAN_SW_SCANNING, &local->scanning)) {
399 /* drop all the other packets during a software scan anyway */
400 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
401 dev_kfree_skb(skb);
402 return RX_QUEUED;
403 }
404
405 /* scanning finished during invoking of handlers */ 413 /* scanning finished during invoking of handlers */
406 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); 414 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
407 return RX_DROP_UNUSABLE; 415 return RX_DROP_UNUSABLE;
@@ -798,7 +806,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
798 rx->local->dot11FrameDuplicateCount++; 806 rx->local->dot11FrameDuplicateCount++;
799 rx->sta->num_duplicates++; 807 rx->sta->num_duplicates++;
800 } 808 }
801 return RX_DROP_MONITOR; 809 return RX_DROP_UNUSABLE;
802 } else 810 } else
803 rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl; 811 rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl;
804 } 812 }
@@ -824,18 +832,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
824 ieee80211_is_pspoll(hdr->frame_control)) && 832 ieee80211_is_pspoll(hdr->frame_control)) &&
825 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && 833 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
826 rx->sdata->vif.type != NL80211_IFTYPE_WDS && 834 rx->sdata->vif.type != NL80211_IFTYPE_WDS &&
827 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { 835 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC))))
828 if ((!ieee80211_has_fromds(hdr->frame_control) &&
829 !ieee80211_has_tods(hdr->frame_control) &&
830 ieee80211_is_data(hdr->frame_control)) ||
831 !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
832 /* Drop IBSS frames and frames for other hosts
833 * silently. */
834 return RX_DROP_MONITOR;
835 }
836
837 return RX_DROP_MONITOR; 836 return RX_DROP_MONITOR;
838 }
839 837
840 return RX_CONTINUE; 838 return RX_CONTINUE;
841} 839}
@@ -1088,7 +1086,8 @@ static void ap_sta_ps_start(struct sta_info *sta)
1088 1086
1089 atomic_inc(&sdata->bss->num_sta_ps); 1087 atomic_inc(&sdata->bss->num_sta_ps);
1090 set_sta_flags(sta, WLAN_STA_PS_STA); 1088 set_sta_flags(sta, WLAN_STA_PS_STA);
1091 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); 1089 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
1090 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
1092#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 1091#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
1093 printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", 1092 printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
1094 sdata->name, sta->sta.addr, sta->sta.aid); 1093 sdata->name, sta->sta.addr, sta->sta.aid);
@@ -1117,6 +1116,27 @@ static void ap_sta_ps_end(struct sta_info *sta)
1117 ieee80211_sta_ps_deliver_wakeup(sta); 1116 ieee80211_sta_ps_deliver_wakeup(sta);
1118} 1117}
1119 1118
1119int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start)
1120{
1121 struct sta_info *sta_inf = container_of(sta, struct sta_info, sta);
1122 bool in_ps;
1123
1124 WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS));
1125
1126 /* Don't let the same PS state be set twice */
1127 in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA);
1128 if ((start && in_ps) || (!start && !in_ps))
1129 return -EINVAL;
1130
1131 if (start)
1132 ap_sta_ps_start(sta_inf);
1133 else
1134 ap_sta_ps_end(sta_inf);
1135
1136 return 0;
1137}
1138EXPORT_SYMBOL(ieee80211_sta_ps_transition);
1139
1120static ieee80211_rx_result debug_noinline 1140static ieee80211_rx_result debug_noinline
1121ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) 1141ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1122{ 1142{
@@ -1136,14 +1156,23 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1136 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { 1156 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) {
1137 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, 1157 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
1138 NL80211_IFTYPE_ADHOC); 1158 NL80211_IFTYPE_ADHOC);
1139 if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) 1159 if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) {
1140 sta->last_rx = jiffies; 1160 sta->last_rx = jiffies;
1161 if (ieee80211_is_data(hdr->frame_control)) {
1162 sta->last_rx_rate_idx = status->rate_idx;
1163 sta->last_rx_rate_flag = status->flag;
1164 }
1165 }
1141 } else if (!is_multicast_ether_addr(hdr->addr1)) { 1166 } else if (!is_multicast_ether_addr(hdr->addr1)) {
1142 /* 1167 /*
1143 * Mesh beacons will update last_rx when if they are found to 1168 * Mesh beacons will update last_rx when if they are found to
1144 * match the current local configuration when processed. 1169 * match the current local configuration when processed.
1145 */ 1170 */
1146 sta->last_rx = jiffies; 1171 sta->last_rx = jiffies;
1172 if (ieee80211_is_data(hdr->frame_control)) {
1173 sta->last_rx_rate_idx = status->rate_idx;
1174 sta->last_rx_rate_flag = status->flag;
1175 }
1147 } 1176 }
1148 1177
1149 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) 1178 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
@@ -1161,7 +1190,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1161 * Change STA power saving mode only at the end of a frame 1190 * Change STA power saving mode only at the end of a frame
1162 * exchange sequence. 1191 * exchange sequence.
1163 */ 1192 */
1164 if (!ieee80211_has_morefrags(hdr->frame_control) && 1193 if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) &&
1194 !ieee80211_has_morefrags(hdr->frame_control) &&
1165 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && 1195 !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
1166 (rx->sdata->vif.type == NL80211_IFTYPE_AP || 1196 (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
1167 rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { 1197 rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
@@ -1556,17 +1586,36 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1556{ 1586{
1557 struct ieee80211_sub_if_data *sdata = rx->sdata; 1587 struct ieee80211_sub_if_data *sdata = rx->sdata;
1558 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 1588 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
1589 bool check_port_control = false;
1590 struct ethhdr *ehdr;
1591 int ret;
1559 1592
1560 if (ieee80211_has_a4(hdr->frame_control) && 1593 if (ieee80211_has_a4(hdr->frame_control) &&
1561 sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) 1594 sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
1562 return -1; 1595 return -1;
1563 1596
1597 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1598 !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) {
1599
1600 if (!sdata->u.mgd.use_4addr)
1601 return -1;
1602 else
1603 check_port_control = true;
1604 }
1605
1564 if (is_multicast_ether_addr(hdr->addr1) && 1606 if (is_multicast_ether_addr(hdr->addr1) &&
1565 ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || 1607 sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta)
1566 (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr)))
1567 return -1; 1608 return -1;
1568 1609
1569 return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); 1610 ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
1611 if (ret < 0 || !check_port_control)
1612 return ret;
1613
1614 ehdr = (struct ethhdr *) rx->skb->data;
1615 if (ehdr->h_proto != rx->sdata->control_port_protocol)
1616 return -1;
1617
1618 return 0;
1570} 1619}
1571 1620
1572/* 1621/*
@@ -1893,7 +1942,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
1893 dev->stats.rx_bytes += rx->skb->len; 1942 dev->stats.rx_bytes += rx->skb->len;
1894 1943
1895 if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && 1944 if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 &&
1896 !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) { 1945 !is_multicast_ether_addr(
1946 ((struct ethhdr *)rx->skb->data)->h_dest) &&
1947 (!local->scanning &&
1948 !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) {
1897 mod_timer(&local->dynamic_ps_timer, jiffies + 1949 mod_timer(&local->dynamic_ps_timer, jiffies +
1898 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 1950 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
1899 } 1951 }
@@ -2590,7 +2642,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2590 return 0; 2642 return 0;
2591 if (!multicast && 2643 if (!multicast &&
2592 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { 2644 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
2593 if (!(sdata->dev->flags & IFF_PROMISC)) 2645 if (!(sdata->dev->flags & IFF_PROMISC) ||
2646 sdata->u.mgd.use_4addr)
2594 return 0; 2647 return 0;
2595 status->rx_flags &= ~IEEE80211_RX_RA_MATCH; 2648 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2596 } 2649 }
@@ -2639,7 +2692,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2639 return 0; 2692 return 0;
2640 } else if (!ieee80211_bssid_match(bssid, 2693 } else if (!ieee80211_bssid_match(bssid,
2641 sdata->vif.addr)) { 2694 sdata->vif.addr)) {
2642 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) 2695 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
2696 !ieee80211_is_beacon(hdr->frame_control))
2643 return 0; 2697 return 0;
2644 status->rx_flags &= ~IEEE80211_RX_RA_MATCH; 2698 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2645 } 2699 }
@@ -2692,7 +2746,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
2692 if (!skb) { 2746 if (!skb) {
2693 if (net_ratelimit()) 2747 if (net_ratelimit())
2694 wiphy_debug(local->hw.wiphy, 2748 wiphy_debug(local->hw.wiphy,
2695 "failed to copy multicast frame for %s\n", 2749 "failed to copy skb for %s\n",
2696 sdata->name); 2750 sdata->name);
2697 return true; 2751 return true;
2698 } 2752 }
@@ -2730,7 +2784,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2730 local->dot11ReceivedFragmentCount++; 2784 local->dot11ReceivedFragmentCount++;
2731 2785
2732 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || 2786 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
2733 test_bit(SCAN_OFF_CHANNEL, &local->scanning))) 2787 test_bit(SCAN_SW_SCANNING, &local->scanning)))
2734 status->rx_flags |= IEEE80211_RX_IN_SCAN; 2788 status->rx_flags |= IEEE80211_RX_IN_SCAN;
2735 2789
2736 if (ieee80211_is_mgmt(fc)) 2790 if (ieee80211_is_mgmt(fc))
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fb274db77e3..489b6ad200d 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -196,7 +196,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
196 ieee802_11_parse_elems(elements, skb->len - baselen, &elems); 196 ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
197 197
198 if (elems.ds_params && elems.ds_params_len == 1) 198 if (elems.ds_params && elems.ds_params_len == 1)
199 freq = ieee80211_channel_to_frequency(elems.ds_params[0]); 199 freq = ieee80211_channel_to_frequency(elems.ds_params[0],
200 rx_status->band);
200 else 201 else
201 freq = rx_status->freq; 202 freq = rx_status->freq;
202 203
@@ -211,6 +212,14 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
211 if (bss) 212 if (bss)
212 ieee80211_rx_bss_put(sdata->local, bss); 213 ieee80211_rx_bss_put(sdata->local, bss);
213 214
215 /* If we are on-operating-channel, and this packet is for the
216 * current channel, pass the pkt on up the stack so that
217 * the rest of the stack can make use of it.
218 */
219 if (ieee80211_cfg_on_oper_channel(sdata->local)
220 && (channel == sdata->local->oper_channel))
221 return RX_CONTINUE;
222
214 dev_kfree_skb(skb); 223 dev_kfree_skb(skb);
215 return RX_QUEUED; 224 return RX_QUEUED;
216} 225}
@@ -249,10 +258,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
249 return true; 258 return true;
250} 259}
251 260
252static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, 261static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
253 bool was_hw_scan) 262 bool was_hw_scan)
254{ 263{
255 struct ieee80211_local *local = hw_to_local(hw); 264 struct ieee80211_local *local = hw_to_local(hw);
265 bool on_oper_chan;
266 bool enable_beacons = false;
256 267
257 lockdep_assert_held(&local->mtx); 268 lockdep_assert_held(&local->mtx);
258 269
@@ -266,12 +277,12 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
266 aborted = true; 277 aborted = true;
267 278
268 if (WARN_ON(!local->scan_req)) 279 if (WARN_ON(!local->scan_req))
269 return false; 280 return;
270 281
271 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 282 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
272 int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req); 283 int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req);
273 if (rc == 0) 284 if (rc == 0)
274 return false; 285 return;
275 } 286 }
276 287
277 kfree(local->hw_scan_req); 288 kfree(local->hw_scan_req);
@@ -285,24 +296,28 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
285 local->scanning = 0; 296 local->scanning = 0;
286 local->scan_channel = NULL; 297 local->scan_channel = NULL;
287 298
288 return true; 299 on_oper_chan = ieee80211_cfg_on_oper_channel(local);
289}
290 300
291static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, 301 if (was_hw_scan || !on_oper_chan)
292 bool was_hw_scan) 302 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
293{ 303 else
294 struct ieee80211_local *local = hw_to_local(hw); 304 /* Set power back to normal operating levels. */
305 ieee80211_hw_config(local, 0);
295 306
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (!was_hw_scan) { 307 if (!was_hw_scan) {
308 bool on_oper_chan2;
298 ieee80211_configure_filter(local); 309 ieee80211_configure_filter(local);
299 drv_sw_scan_complete(local); 310 drv_sw_scan_complete(local);
300 ieee80211_offchannel_return(local, true); 311 on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
312 /* We should always be on-channel at this point. */
313 WARN_ON(!on_oper_chan2);
314 if (on_oper_chan2 && (on_oper_chan != on_oper_chan2))
315 enable_beacons = true;
316
317 ieee80211_offchannel_return(local, enable_beacons, true);
301 } 318 }
302 319
303 mutex_lock(&local->mtx);
304 ieee80211_recalc_idle(local); 320 ieee80211_recalc_idle(local);
305 mutex_unlock(&local->mtx);
306 321
307 ieee80211_mlme_notify_scan_completed(local); 322 ieee80211_mlme_notify_scan_completed(local);
308 ieee80211_ibss_notify_scan_completed(local); 323 ieee80211_ibss_notify_scan_completed(local);
@@ -340,16 +355,21 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
340 */ 355 */
341 drv_sw_scan_start(local); 356 drv_sw_scan_start(local);
342 357
343 ieee80211_offchannel_stop_beaconing(local);
344
345 local->leave_oper_channel_time = 0; 358 local->leave_oper_channel_time = 0;
346 local->next_scan_state = SCAN_DECISION; 359 local->next_scan_state = SCAN_DECISION;
347 local->scan_channel_idx = 0; 360 local->scan_channel_idx = 0;
348 361
349 drv_flush(local, false); 362 /* We always want to use off-channel PS, even if we
363 * are not really leaving oper-channel. Don't
364 * tell the AP though, as long as we are on-channel.
365 */
366 ieee80211_offchannel_enable_all_ps(local, false);
350 367
351 ieee80211_configure_filter(local); 368 ieee80211_configure_filter(local);
352 369
370 /* We need to set power level at maximum rate for scanning. */
371 ieee80211_hw_config(local, 0);
372
353 ieee80211_queue_delayed_work(&local->hw, 373 ieee80211_queue_delayed_work(&local->hw,
354 &local->scan_work, 374 &local->scan_work,
355 IEEE80211_CHANNEL_TIME); 375 IEEE80211_CHANNEL_TIME);
@@ -486,7 +506,20 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
486 } 506 }
487 mutex_unlock(&local->iflist_mtx); 507 mutex_unlock(&local->iflist_mtx);
488 508
489 if (local->scan_channel) { 509 next_chan = local->scan_req->channels[local->scan_channel_idx];
510
511 if (ieee80211_cfg_on_oper_channel(local)) {
512 /* We're currently on operating channel. */
513 if (next_chan == local->oper_channel)
514 /* We don't need to move off of operating channel. */
515 local->next_scan_state = SCAN_SET_CHANNEL;
516 else
517 /*
518 * We do need to leave operating channel, as next
519 * scan is somewhere else.
520 */
521 local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL;
522 } else {
490 /* 523 /*
491 * we're currently scanning a different channel, let's 524 * we're currently scanning a different channel, let's
492 * see if we can scan another channel without interfering 525 * see if we can scan another channel without interfering
@@ -502,7 +535,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
502 * 535 *
503 * Otherwise switch back to the operating channel. 536 * Otherwise switch back to the operating channel.
504 */ 537 */
505 next_chan = local->scan_req->channels[local->scan_channel_idx];
506 538
507 bad_latency = time_after(jiffies + 539 bad_latency = time_after(jiffies +
508 ieee80211_scan_get_channel_time(next_chan), 540 ieee80211_scan_get_channel_time(next_chan),
@@ -520,12 +552,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
520 local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; 552 local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
521 else 553 else
522 local->next_scan_state = SCAN_SET_CHANNEL; 554 local->next_scan_state = SCAN_SET_CHANNEL;
523 } else {
524 /*
525 * we're on the operating channel currently, let's
526 * leave that channel now to scan another one
527 */
528 local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL;
529 } 555 }
530 556
531 *next_delay = 0; 557 *next_delay = 0;
@@ -534,9 +560,10 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
534static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, 560static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local,
535 unsigned long *next_delay) 561 unsigned long *next_delay)
536{ 562{
537 ieee80211_offchannel_stop_station(local); 563 /* PS will already be in off-channel mode,
538 564 * we do that once at the beginning of scanning.
539 __set_bit(SCAN_OFF_CHANNEL, &local->scanning); 565 */
566 ieee80211_offchannel_stop_vifs(local, false);
540 567
541 /* 568 /*
542 * What if the nullfunc frames didn't arrive? 569 * What if the nullfunc frames didn't arrive?
@@ -559,15 +586,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca
559{ 586{
560 /* switch back to the operating channel */ 587 /* switch back to the operating channel */
561 local->scan_channel = NULL; 588 local->scan_channel = NULL;
562 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 589 if (!ieee80211_cfg_on_oper_channel(local))
590 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
563 591
564 /* 592 /*
565 * Only re-enable station mode interface now; beaconing will be 593 * Re-enable vifs and beaconing. Leave PS
566 * re-enabled once the full scan has been completed. 594 * in off-channel state..will put that back
595 * on-channel at the end of scanning.
567 */ 596 */
568 ieee80211_offchannel_return(local, false); 597 ieee80211_offchannel_return(local, true, false);
569
570 __clear_bit(SCAN_OFF_CHANNEL, &local->scanning);
571 598
572 *next_delay = HZ / 5; 599 *next_delay = HZ / 5;
573 local->next_scan_state = SCAN_DECISION; 600 local->next_scan_state = SCAN_DECISION;
@@ -583,8 +610,11 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
583 chan = local->scan_req->channels[local->scan_channel_idx]; 610 chan = local->scan_req->channels[local->scan_channel_idx];
584 611
585 local->scan_channel = chan; 612 local->scan_channel = chan;
586 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) 613
587 skip = 1; 614 /* Only call hw-config if we really need to change channels. */
615 if (chan != local->hw.conf.channel)
616 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
617 skip = 1;
588 618
589 /* advance state machine to next channel/band */ 619 /* advance state machine to next channel/band */
590 local->scan_channel_idx++; 620 local->scan_channel_idx++;
@@ -642,12 +672,14 @@ void ieee80211_scan_work(struct work_struct *work)
642{ 672{
643 struct ieee80211_local *local = 673 struct ieee80211_local *local =
644 container_of(work, struct ieee80211_local, scan_work.work); 674 container_of(work, struct ieee80211_local, scan_work.work);
645 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 675 struct ieee80211_sub_if_data *sdata;
646 unsigned long next_delay = 0; 676 unsigned long next_delay = 0;
647 bool aborted, hw_scan, finish; 677 bool aborted, hw_scan;
648 678
649 mutex_lock(&local->mtx); 679 mutex_lock(&local->mtx);
650 680
681 sdata = local->scan_sdata;
682
651 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { 683 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
652 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); 684 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
653 goto out_complete; 685 goto out_complete;
@@ -711,17 +743,11 @@ void ieee80211_scan_work(struct work_struct *work)
711 } while (next_delay == 0); 743 } while (next_delay == 0);
712 744
713 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); 745 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay);
714 mutex_unlock(&local->mtx); 746 goto out;
715 return;
716 747
717out_complete: 748out_complete:
718 hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); 749 hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
719 finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan); 750 __ieee80211_scan_completed(&local->hw, aborted, hw_scan);
720 mutex_unlock(&local->mtx);
721 if (finish)
722 __ieee80211_scan_completed_finish(&local->hw, hw_scan);
723 return;
724
725out: 751out:
726 mutex_unlock(&local->mtx); 752 mutex_unlock(&local->mtx);
727} 753}
@@ -791,7 +817,6 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
791void ieee80211_scan_cancel(struct ieee80211_local *local) 817void ieee80211_scan_cancel(struct ieee80211_local *local)
792{ 818{
793 bool abortscan; 819 bool abortscan;
794 bool finish = false;
795 820
796 /* 821 /*
797 * We are only canceling software scan, or deferred scan that was not 822 * We are only canceling software scan, or deferred scan that was not
@@ -811,14 +836,17 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
811 836
812 mutex_lock(&local->mtx); 837 mutex_lock(&local->mtx);
813 abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning); 838 abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning);
814 if (abortscan)
815 finish = __ieee80211_scan_completed(&local->hw, true, false);
816 mutex_unlock(&local->mtx);
817
818 if (abortscan) { 839 if (abortscan) {
819 /* The scan is canceled, but stop work from being pending */ 840 /*
820 cancel_delayed_work_sync(&local->scan_work); 841 * The scan is canceled, but stop work from being pending.
842 *
843 * If the work is currently running, it must be blocked on
844 * the mutex, but we'll set scan_sdata = NULL and it'll
845 * simply exit once it acquires the mutex.
846 */
847 cancel_delayed_work(&local->scan_work);
848 /* and clean up */
849 __ieee80211_scan_completed(&local->hw, true, false);
821 } 850 }
822 if (finish) 851 mutex_unlock(&local->mtx);
823 __ieee80211_scan_completed_finish(&local->hw, false);
824} 852}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c426504ed1c..5a11078827a 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -899,7 +899,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
899 struct ieee80211_local *local = sdata->local; 899 struct ieee80211_local *local = sdata->local;
900 int sent, buffered; 900 int sent, buffered;
901 901
902 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); 902 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
903 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
903 904
904 if (!skb_queue_empty(&sta->ps_tx_buf)) 905 if (!skb_queue_empty(&sta->ps_tx_buf))
905 sta_info_clear_tim_bit(sta); 906 sta_info_clear_tim_bit(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index bbdd2a86a94..57681149e37 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -82,6 +82,7 @@ enum ieee80211_sta_info_flags {
82 * @state: session state (see above) 82 * @state: session state (see above)
83 * @stop_initiator: initiator of a session stop 83 * @stop_initiator: initiator of a session stop
84 * @tx_stop: TX DelBA frame when stopping 84 * @tx_stop: TX DelBA frame when stopping
85 * @buf_size: reorder buffer size at receiver
85 * 86 *
86 * This structure's lifetime is managed by RCU, assignments to 87 * This structure's lifetime is managed by RCU, assignments to
87 * the array holding it must hold the aggregation mutex. 88 * the array holding it must hold the aggregation mutex.
@@ -101,6 +102,7 @@ struct tid_ampdu_tx {
101 u8 dialog_token; 102 u8 dialog_token;
102 u8 stop_initiator; 103 u8 stop_initiator;
103 bool tx_stop; 104 bool tx_stop;
105 u8 buf_size;
104}; 106};
105 107
106/** 108/**
@@ -207,6 +209,8 @@ enum plink_state {
207 * @rate_ctrl_priv: rate control private per-STA pointer 209 * @rate_ctrl_priv: rate control private per-STA pointer
208 * @last_tx_rate: rate used for last transmit, to report to userspace as 210 * @last_tx_rate: rate used for last transmit, to report to userspace as
209 * "the" transmit rate 211 * "the" transmit rate
212 * @last_rx_rate_idx: rx status rate index of the last data packet
213 * @last_rx_rate_flag: rx status flag of the last data packet
210 * @lock: used for locking all fields that require locking, see comments 214 * @lock: used for locking all fields that require locking, see comments
211 * in the header file. 215 * in the header file.
212 * @flaglock: spinlock for flags accesses 216 * @flaglock: spinlock for flags accesses
@@ -309,6 +313,8 @@ struct sta_info {
309 unsigned long tx_bytes; 313 unsigned long tx_bytes;
310 unsigned long tx_fragments; 314 unsigned long tx_fragments;
311 struct ieee80211_tx_rate last_tx_rate; 315 struct ieee80211_tx_rate last_tx_rate;
316 int last_rx_rate_idx;
317 int last_rx_rate_flag;
312 u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; 318 u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
313 319
314 /* 320 /*
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 071ac95c4aa..b936dd29e92 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -98,6 +98,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
98 * (b) always process RX events before TX status events if ordering 98 * (b) always process RX events before TX status events if ordering
99 * can be unknown, for example with different interrupt status 99 * can be unknown, for example with different interrupt status
100 * bits. 100 * bits.
101 * (c) if PS mode transitions are manual (i.e. the flag
102 * %IEEE80211_HW_AP_LINK_PS is set), always process PS state
103 * changes before calling TX status events if ordering can be
104 * unknown.
101 */ 105 */
102 if (test_sta_flags(sta, WLAN_STA_PS_STA) && 106 if (test_sta_flags(sta, WLAN_STA_PS_STA) &&
103 skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { 107 skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) {
@@ -314,8 +318,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
314 if (info->flags & IEEE80211_TX_STAT_ACK) { 318 if (info->flags & IEEE80211_TX_STAT_ACK) {
315 local->ps_sdata->u.mgd.flags |= 319 local->ps_sdata->u.mgd.flags |=
316 IEEE80211_STA_NULLFUNC_ACKED; 320 IEEE80211_STA_NULLFUNC_ACKED;
317 ieee80211_queue_work(&local->hw,
318 &local->dynamic_ps_enable_work);
319 } else 321 } else
320 mod_timer(&local->dynamic_ps_timer, jiffies + 322 mod_timer(&local->dynamic_ps_timer, jiffies +
321 msecs_to_jiffies(10)); 323 msecs_to_jiffies(10));
@@ -339,6 +341,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
339 cookie = local->hw_roc_cookie ^ 2; 341 cookie = local->hw_roc_cookie ^ 2;
340 local->hw_roc_skb_for_status = NULL; 342 local->hw_roc_skb_for_status = NULL;
341 } 343 }
344
345 if (cookie == local->hw_offchan_tx_cookie)
346 local->hw_offchan_tx_cookie = 0;
347
342 cfg80211_mgmt_tx_status( 348 cfg80211_mgmt_tx_status(
343 skb->dev, cookie, skb->data, skb->len, 349 skb->dev, cookie, skb->data, skb->len,
344 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 350 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b0beaa58246..ce4596ed126 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -33,10 +33,6 @@
33#include "wme.h" 33#include "wme.h"
34#include "rate.h" 34#include "rate.h"
35 35
36#define IEEE80211_TX_OK 0
37#define IEEE80211_TX_AGAIN 1
38#define IEEE80211_TX_PENDING 2
39
40/* misc utils */ 36/* misc utils */
41 37
42static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, 38static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
@@ -173,7 +169,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
173 return cpu_to_le16(dur); 169 return cpu_to_le16(dur);
174} 170}
175 171
176static int inline is_ieee80211_device(struct ieee80211_local *local, 172static inline int is_ieee80211_device(struct ieee80211_local *local,
177 struct net_device *dev) 173 struct net_device *dev)
178{ 174{
179 return local == wdev_priv(dev->ieee80211_ptr); 175 return local == wdev_priv(dev->ieee80211_ptr);
@@ -236,6 +232,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
236 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 232 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
237 ieee80211_stop_queues_by_reason(&local->hw, 233 ieee80211_stop_queues_by_reason(&local->hw,
238 IEEE80211_QUEUE_STOP_REASON_PS); 234 IEEE80211_QUEUE_STOP_REASON_PS);
235 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
239 ieee80211_queue_work(&local->hw, 236 ieee80211_queue_work(&local->hw,
240 &local->dynamic_ps_disable_work); 237 &local->dynamic_ps_disable_work);
241 } 238 }
@@ -257,7 +254,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
257 if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) 254 if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED))
258 return TX_CONTINUE; 255 return TX_CONTINUE;
259 256
260 if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) && 257 if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) &&
258 test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) &&
261 !ieee80211_is_probe_req(hdr->frame_control) && 259 !ieee80211_is_probe_req(hdr->frame_control) &&
262 !ieee80211_is_nullfunc(hdr->frame_control)) 260 !ieee80211_is_nullfunc(hdr->frame_control))
263 /* 261 /*
@@ -1283,16 +1281,17 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1283 return TX_CONTINUE; 1281 return TX_CONTINUE;
1284} 1282}
1285 1283
1286static int __ieee80211_tx(struct ieee80211_local *local, 1284/*
1287 struct sk_buff **skbp, 1285 * Returns false if the frame couldn't be transmitted but was queued instead.
1288 struct sta_info *sta, 1286 */
1289 bool txpending) 1287static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp,
1288 struct sta_info *sta, bool txpending)
1290{ 1289{
1291 struct sk_buff *skb = *skbp, *next; 1290 struct sk_buff *skb = *skbp, *next;
1292 struct ieee80211_tx_info *info; 1291 struct ieee80211_tx_info *info;
1293 struct ieee80211_sub_if_data *sdata; 1292 struct ieee80211_sub_if_data *sdata;
1294 unsigned long flags; 1293 unsigned long flags;
1295 int ret, len; 1294 int len;
1296 bool fragm = false; 1295 bool fragm = false;
1297 1296
1298 while (skb) { 1297 while (skb) {
@@ -1300,13 +1299,37 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1300 __le16 fc; 1299 __le16 fc;
1301 1300
1302 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 1301 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1303 ret = IEEE80211_TX_OK;
1304 if (local->queue_stop_reasons[q] || 1302 if (local->queue_stop_reasons[q] ||
1305 (!txpending && !skb_queue_empty(&local->pending[q]))) 1303 (!txpending && !skb_queue_empty(&local->pending[q]))) {
1306 ret = IEEE80211_TX_PENDING; 1304 /*
1305 * Since queue is stopped, queue up frames for later
1306 * transmission from the tx-pending tasklet when the
1307 * queue is woken again.
1308 */
1309
1310 do {
1311 next = skb->next;
1312 skb->next = NULL;
1313 /*
1314 * NB: If txpending is true, next must already
1315 * be NULL since we must've gone through this
1316 * loop before already; therefore we can just
1317 * queue the frame to the head without worrying
1318 * about reordering of fragments.
1319 */
1320 if (unlikely(txpending))
1321 __skb_queue_head(&local->pending[q],
1322 skb);
1323 else
1324 __skb_queue_tail(&local->pending[q],
1325 skb);
1326 } while ((skb = next));
1327
1328 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1329 flags);
1330 return false;
1331 }
1307 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 1332 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
1308 if (ret != IEEE80211_TX_OK)
1309 return ret;
1310 1333
1311 info = IEEE80211_SKB_CB(skb); 1334 info = IEEE80211_SKB_CB(skb);
1312 1335
@@ -1341,15 +1364,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1341 info->control.sta = NULL; 1364 info->control.sta = NULL;
1342 1365
1343 fc = ((struct ieee80211_hdr *)skb->data)->frame_control; 1366 fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
1344 ret = drv_tx(local, skb); 1367 drv_tx(local, skb);
1345 if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
1346 dev_kfree_skb(skb);
1347 ret = NETDEV_TX_OK;
1348 }
1349 if (ret != NETDEV_TX_OK) {
1350 info->control.vif = &sdata->vif;
1351 return IEEE80211_TX_AGAIN;
1352 }
1353 1368
1354 ieee80211_tpt_led_trig_tx(local, fc, len); 1369 ieee80211_tpt_led_trig_tx(local, fc, len);
1355 *skbp = skb = next; 1370 *skbp = skb = next;
@@ -1357,7 +1372,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1357 fragm = true; 1372 fragm = true;
1358 } 1373 }
1359 1374
1360 return IEEE80211_TX_OK; 1375 return true;
1361} 1376}
1362 1377
1363/* 1378/*
@@ -1394,7 +1409,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1394 /* handlers after fragment must be aware of tx info fragmentation! */ 1409 /* handlers after fragment must be aware of tx info fragmentation! */
1395 CALL_TXH(ieee80211_tx_h_stats); 1410 CALL_TXH(ieee80211_tx_h_stats);
1396 CALL_TXH(ieee80211_tx_h_encrypt); 1411 CALL_TXH(ieee80211_tx_h_encrypt);
1397 CALL_TXH(ieee80211_tx_h_calculate_duration); 1412 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1413 CALL_TXH(ieee80211_tx_h_calculate_duration);
1398#undef CALL_TXH 1414#undef CALL_TXH
1399 1415
1400 txh_done: 1416 txh_done:
@@ -1416,23 +1432,24 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1416 return 0; 1432 return 0;
1417} 1433}
1418 1434
1419static void ieee80211_tx(struct ieee80211_sub_if_data *sdata, 1435/*
1436 * Returns false if the frame couldn't be transmitted but was queued instead.
1437 */
1438static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
1420 struct sk_buff *skb, bool txpending) 1439 struct sk_buff *skb, bool txpending)
1421{ 1440{
1422 struct ieee80211_local *local = sdata->local; 1441 struct ieee80211_local *local = sdata->local;
1423 struct ieee80211_tx_data tx; 1442 struct ieee80211_tx_data tx;
1424 ieee80211_tx_result res_prepare; 1443 ieee80211_tx_result res_prepare;
1425 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1444 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1426 struct sk_buff *next;
1427 unsigned long flags;
1428 int ret, retries;
1429 u16 queue; 1445 u16 queue;
1446 bool result = true;
1430 1447
1431 queue = skb_get_queue_mapping(skb); 1448 queue = skb_get_queue_mapping(skb);
1432 1449
1433 if (unlikely(skb->len < 10)) { 1450 if (unlikely(skb->len < 10)) {
1434 dev_kfree_skb(skb); 1451 dev_kfree_skb(skb);
1435 return; 1452 return true;
1436 } 1453 }
1437 1454
1438 rcu_read_lock(); 1455 rcu_read_lock();
@@ -1442,85 +1459,19 @@ static void ieee80211_tx(struct ieee80211_sub_if_data *sdata,
1442 1459
1443 if (unlikely(res_prepare == TX_DROP)) { 1460 if (unlikely(res_prepare == TX_DROP)) {
1444 dev_kfree_skb(skb); 1461 dev_kfree_skb(skb);
1445 rcu_read_unlock(); 1462 goto out;
1446 return;
1447 } else if (unlikely(res_prepare == TX_QUEUED)) { 1463 } else if (unlikely(res_prepare == TX_QUEUED)) {
1448 rcu_read_unlock(); 1464 goto out;
1449 return;
1450 } 1465 }
1451 1466
1452 tx.channel = local->hw.conf.channel; 1467 tx.channel = local->hw.conf.channel;
1453 info->band = tx.channel->band; 1468 info->band = tx.channel->band;
1454 1469
1455 if (invoke_tx_handlers(&tx)) 1470 if (!invoke_tx_handlers(&tx))
1456 goto out; 1471 result = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
1457
1458 retries = 0;
1459 retry:
1460 ret = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
1461 switch (ret) {
1462 case IEEE80211_TX_OK:
1463 break;
1464 case IEEE80211_TX_AGAIN:
1465 /*
1466 * Since there are no fragmented frames on A-MPDU
1467 * queues, there's no reason for a driver to reject
1468 * a frame there, warn and drop it.
1469 */
1470 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
1471 goto drop;
1472 /* fall through */
1473 case IEEE80211_TX_PENDING:
1474 skb = tx.skb;
1475
1476 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1477
1478 if (local->queue_stop_reasons[queue] ||
1479 !skb_queue_empty(&local->pending[queue])) {
1480 /*
1481 * if queue is stopped, queue up frames for later
1482 * transmission from the tasklet
1483 */
1484 do {
1485 next = skb->next;
1486 skb->next = NULL;
1487 if (unlikely(txpending))
1488 __skb_queue_head(&local->pending[queue],
1489 skb);
1490 else
1491 __skb_queue_tail(&local->pending[queue],
1492 skb);
1493 } while ((skb = next));
1494
1495 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1496 flags);
1497 } else {
1498 /*
1499 * otherwise retry, but this is a race condition or
1500 * a driver bug (which we warn about if it persists)
1501 */
1502 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1503 flags);
1504
1505 retries++;
1506 if (WARN(retries > 10, "tx refused but queue active\n"))
1507 goto drop;
1508 goto retry;
1509 }
1510 }
1511 out: 1472 out:
1512 rcu_read_unlock(); 1473 rcu_read_unlock();
1513 return; 1474 return result;
1514
1515 drop:
1516 rcu_read_unlock();
1517
1518 skb = tx.skb;
1519 while (skb) {
1520 next = skb->next;
1521 dev_kfree_skb(skb);
1522 skb = next;
1523 }
1524} 1475}
1525 1476
1526/* device xmit handlers */ 1477/* device xmit handlers */
@@ -1750,7 +1701,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1750 __le16 fc; 1701 __le16 fc;
1751 struct ieee80211_hdr hdr; 1702 struct ieee80211_hdr hdr;
1752 struct ieee80211s_hdr mesh_hdr __maybe_unused; 1703 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1753 struct mesh_path *mppath = NULL; 1704 struct mesh_path __maybe_unused *mppath = NULL;
1754 const u8 *encaps_data; 1705 const u8 *encaps_data;
1755 int encaps_len, skip_header_bytes; 1706 int encaps_len, skip_header_bytes;
1756 int nh_pos, h_pos; 1707 int nh_pos, h_pos;
@@ -1815,19 +1766,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1815 mppath = mpp_path_lookup(skb->data, sdata); 1766 mppath = mpp_path_lookup(skb->data, sdata);
1816 1767
1817 /* 1768 /*
1818 * Do not use address extension, if it is a packet from 1769 * Use address extension if it is a packet from
1819 * the same interface and the destination is not being 1770 * another interface or if we know the destination
1820 * proxied by any other mest point. 1771 * is being proxied by a portal (i.e. portal address
1772 * differs from proxied address)
1821 */ 1773 */
1822 if (compare_ether_addr(sdata->vif.addr, 1774 if (compare_ether_addr(sdata->vif.addr,
1823 skb->data + ETH_ALEN) == 0 && 1775 skb->data + ETH_ALEN) == 0 &&
1824 (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) { 1776 !(mppath && compare_ether_addr(mppath->mpp, skb->data))) {
1825 hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, 1777 hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
1826 skb->data, skb->data + ETH_ALEN); 1778 skb->data, skb->data + ETH_ALEN);
1827 meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, 1779 meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
1828 sdata, NULL, NULL); 1780 sdata, NULL, NULL);
1829 } else { 1781 } else {
1830 /* packet from other interface */
1831 int is_mesh_mcast = 1; 1782 int is_mesh_mcast = 1;
1832 const u8 *mesh_da; 1783 const u8 *mesh_da;
1833 1784
@@ -2067,6 +2018,11 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local)
2067 skb_queue_purge(&local->pending[i]); 2018 skb_queue_purge(&local->pending[i]);
2068} 2019}
2069 2020
2021/*
2022 * Returns false if the frame couldn't be transmitted but was queued instead,
2023 * which in this case means re-queued -- take as an indication to stop sending
2024 * more pending frames.
2025 */
2070static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, 2026static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
2071 struct sk_buff *skb) 2027 struct sk_buff *skb)
2072{ 2028{
@@ -2074,20 +2030,17 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
2074 struct ieee80211_sub_if_data *sdata; 2030 struct ieee80211_sub_if_data *sdata;
2075 struct sta_info *sta; 2031 struct sta_info *sta;
2076 struct ieee80211_hdr *hdr; 2032 struct ieee80211_hdr *hdr;
2077 int ret; 2033 bool result;
2078 bool result = true;
2079 2034
2080 sdata = vif_to_sdata(info->control.vif); 2035 sdata = vif_to_sdata(info->control.vif);
2081 2036
2082 if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) { 2037 if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) {
2083 ieee80211_tx(sdata, skb, true); 2038 result = ieee80211_tx(sdata, skb, true);
2084 } else { 2039 } else {
2085 hdr = (struct ieee80211_hdr *)skb->data; 2040 hdr = (struct ieee80211_hdr *)skb->data;
2086 sta = sta_info_get(sdata, hdr->addr1); 2041 sta = sta_info_get(sdata, hdr->addr1);
2087 2042
2088 ret = __ieee80211_tx(local, &skb, sta, true); 2043 result = __ieee80211_tx(local, &skb, sta, true);
2089 if (ret != IEEE80211_TX_OK)
2090 result = false;
2091 } 2044 }
2092 2045
2093 return result; 2046 return result;
@@ -2129,8 +2082,6 @@ void ieee80211_tx_pending(unsigned long data)
2129 flags); 2082 flags);
2130 2083
2131 txok = ieee80211_tx_pending_skb(local, skb); 2084 txok = ieee80211_tx_pending_skb(local, skb);
2132 if (!txok)
2133 __skb_queue_head(&local->pending[i], skb);
2134 spin_lock_irqsave(&local->queue_stop_reason_lock, 2085 spin_lock_irqsave(&local->queue_stop_reason_lock,
2135 flags); 2086 flags);
2136 if (!txok) 2087 if (!txok)
@@ -2178,6 +2129,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss,
2178 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) 2129 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
2179 aid0 = 1; 2130 aid0 = 1;
2180 2131
2132 bss->dtim_bc_mc = aid0 == 1;
2133
2181 if (have_bits) { 2134 if (have_bits) {
2182 /* Find largest even number N1 so that bits numbered 1 through 2135 /* Find largest even number N1 so that bits numbered 1 through
2183 * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits 2136 * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits
@@ -2241,7 +2194,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2241 if (sdata->vif.type == NL80211_IFTYPE_AP) { 2194 if (sdata->vif.type == NL80211_IFTYPE_AP) {
2242 ap = &sdata->u.ap; 2195 ap = &sdata->u.ap;
2243 beacon = rcu_dereference(ap->beacon); 2196 beacon = rcu_dereference(ap->beacon);
2244 if (ap && beacon) { 2197 if (beacon) {
2245 /* 2198 /*
2246 * headroom, head length, 2199 * headroom, head length,
2247 * tail length and maximum TIM length 2200 * tail length and maximum TIM length
@@ -2302,6 +2255,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2302 struct ieee80211_mgmt *mgmt; 2255 struct ieee80211_mgmt *mgmt;
2303 u8 *pos; 2256 u8 *pos;
2304 2257
2258#ifdef CONFIG_MAC80211_MESH
2259 if (!sdata->u.mesh.mesh_id_len)
2260 goto out;
2261#endif
2262
2305 /* headroom, head length, tail length and maximum TIM length */ 2263 /* headroom, head length, tail length and maximum TIM length */
2306 skb = dev_alloc_skb(local->tx_headroom + 400 + 2264 skb = dev_alloc_skb(local->tx_headroom + 400 +
2307 sdata->u.mesh.vendor_ie_len); 2265 sdata->u.mesh.vendor_ie_len);
@@ -2543,7 +2501,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
2543 if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) 2501 if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head)
2544 goto out; 2502 goto out;
2545 2503
2546 if (bss->dtim_count != 0) 2504 if (bss->dtim_count != 0 || !bss->dtim_bc_mc)
2547 goto out; /* send buffered bc/mc only after DTIM beacon */ 2505 goto out; /* send buffered bc/mc only after DTIM beacon */
2548 2506
2549 while (1) { 2507 while (1) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d036597aabb..556647a910a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -986,12 +986,6 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
986 u16 cap = sband->ht_cap.cap; 986 u16 cap = sband->ht_cap.cap;
987 __le16 tmp; 987 __le16 tmp;
988 988
989 if (ieee80211_disable_40mhz_24ghz &&
990 sband->band == IEEE80211_BAND_2GHZ) {
991 cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
992 cap &= ~IEEE80211_HT_CAP_SGI_40;
993 }
994
995 *pos++ = WLAN_EID_HT_CAPABILITY; 989 *pos++ = WLAN_EID_HT_CAPABILITY;
996 *pos++ = sizeof(struct ieee80211_ht_cap); 990 *pos++ = sizeof(struct ieee80211_ht_cap);
997 memset(pos, 0, sizeof(struct ieee80211_ht_cap)); 991 memset(pos, 0, sizeof(struct ieee80211_ht_cap));
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 36305e0d06e..e73c8cae036 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -30,7 +30,6 @@
30#define IEEE80211_AUTH_MAX_TRIES 3 30#define IEEE80211_AUTH_MAX_TRIES 3
31#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) 31#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
32#define IEEE80211_ASSOC_MAX_TRIES 3 32#define IEEE80211_ASSOC_MAX_TRIES 3
33#define IEEE80211_MAX_PROBE_TRIES 5
34 33
35enum work_action { 34enum work_action {
36 WORK_ACT_MISMATCH, 35 WORK_ACT_MISMATCH,
@@ -126,12 +125,6 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie,
126 125
127 /* determine capability flags */ 126 /* determine capability flags */
128 127
129 if (ieee80211_disable_40mhz_24ghz &&
130 sband->band == IEEE80211_BAND_2GHZ) {
131 cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
132 cap &= ~IEEE80211_HT_CAP_SGI_40;
133 }
134
135 switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { 128 switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
136 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: 129 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
137 if (flags & IEEE80211_CHAN_NO_HT40PLUS) { 130 if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
@@ -874,6 +867,44 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
874 kfree_skb(skb); 867 kfree_skb(skb);
875} 868}
876 869
870static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct,
871 enum nl80211_channel_type oper_ct)
872{
873 switch (wk_ct) {
874 case NL80211_CHAN_NO_HT:
875 return true;
876 case NL80211_CHAN_HT20:
877 if (oper_ct != NL80211_CHAN_NO_HT)
878 return true;
879 return false;
880 case NL80211_CHAN_HT40MINUS:
881 case NL80211_CHAN_HT40PLUS:
882 return (wk_ct == oper_ct);
883 }
884 WARN_ON(1); /* shouldn't get here */
885 return false;
886}
887
888static enum nl80211_channel_type
889ieee80211_calc_ct(enum nl80211_channel_type wk_ct,
890 enum nl80211_channel_type oper_ct)
891{
892 switch (wk_ct) {
893 case NL80211_CHAN_NO_HT:
894 return oper_ct;
895 case NL80211_CHAN_HT20:
896 if (oper_ct != NL80211_CHAN_NO_HT)
897 return oper_ct;
898 return wk_ct;
899 case NL80211_CHAN_HT40MINUS:
900 case NL80211_CHAN_HT40PLUS:
901 return wk_ct;
902 }
903 WARN_ON(1); /* shouldn't get here */
904 return wk_ct;
905}
906
907
877static void ieee80211_work_timer(unsigned long data) 908static void ieee80211_work_timer(unsigned long data)
878{ 909{
879 struct ieee80211_local *local = (void *) data; 910 struct ieee80211_local *local = (void *) data;
@@ -924,18 +955,52 @@ static void ieee80211_work_work(struct work_struct *work)
924 } 955 }
925 956
926 if (!started && !local->tmp_channel) { 957 if (!started && !local->tmp_channel) {
958 bool on_oper_chan;
959 bool tmp_chan_changed = false;
960 bool on_oper_chan2;
961 enum nl80211_channel_type wk_ct;
962 on_oper_chan = ieee80211_cfg_on_oper_channel(local);
963
964 /* Work with existing channel type if possible. */
965 wk_ct = wk->chan_type;
966 if (wk->chan == local->hw.conf.channel)
967 wk_ct = ieee80211_calc_ct(wk->chan_type,
968 local->hw.conf.channel_type);
969
970 if (local->tmp_channel)
971 if ((local->tmp_channel != wk->chan) ||
972 (local->tmp_channel_type != wk_ct))
973 tmp_chan_changed = true;
974
975 local->tmp_channel = wk->chan;
976 local->tmp_channel_type = wk_ct;
927 /* 977 /*
928 * TODO: could optimize this by leaving the 978 * Leave the station vifs in awake mode if they
929 * station vifs in awake mode if they 979 * happen to be on the same channel as
930 * happen to be on the same channel as 980 * the requested channel.
931 * the requested channel
932 */ 981 */
933 ieee80211_offchannel_stop_beaconing(local); 982 on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
934 ieee80211_offchannel_stop_station(local); 983 if (on_oper_chan != on_oper_chan2) {
984 if (on_oper_chan2) {
985 /* going off oper channel, PS too */
986 ieee80211_offchannel_stop_vifs(local,
987 true);
988 ieee80211_hw_config(local, 0);
989 } else {
990 /* going on channel, but leave PS
991 * off-channel. */
992 ieee80211_hw_config(local, 0);
993 ieee80211_offchannel_return(local,
994 true,
995 false);
996 }
997 } else if (tmp_chan_changed)
998 /* Still off-channel, but on some other
999 * channel, so update hardware.
1000 * PS should already be off-channel.
1001 */
1002 ieee80211_hw_config(local, 0);
935 1003
936 local->tmp_channel = wk->chan;
937 local->tmp_channel_type = wk->chan_type;
938 ieee80211_hw_config(local, 0);
939 started = true; 1004 started = true;
940 wk->timeout = jiffies; 1005 wk->timeout = jiffies;
941 } 1006 }
@@ -1005,15 +1070,34 @@ static void ieee80211_work_work(struct work_struct *work)
1005 continue; 1070 continue;
1006 if (wk->chan != local->tmp_channel) 1071 if (wk->chan != local->tmp_channel)
1007 continue; 1072 continue;
1008 if (wk->chan_type != local->tmp_channel_type) 1073 if (ieee80211_work_ct_coexists(wk->chan_type,
1074 local->tmp_channel_type))
1009 continue; 1075 continue;
1010 remain_off_channel = true; 1076 remain_off_channel = true;
1011 } 1077 }
1012 1078
1013 if (!remain_off_channel && local->tmp_channel) { 1079 if (!remain_off_channel && local->tmp_channel) {
1080 bool on_oper_chan = ieee80211_cfg_on_oper_channel(local);
1014 local->tmp_channel = NULL; 1081 local->tmp_channel = NULL;
1015 ieee80211_hw_config(local, 0); 1082 /* If tmp_channel wasn't operating channel, then
1016 ieee80211_offchannel_return(local, true); 1083 * we need to go back on-channel.
1084 * NOTE: If we can ever be here while scannning,
1085 * or if the hw_config() channel config logic changes,
1086 * then we may need to do a more thorough check to see if
1087 * we still need to do a hardware config. Currently,
1088 * we cannot be here while scanning, however.
1089 */
1090 if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan)
1091 ieee80211_hw_config(local, 0);
1092
1093 /* At the least, we need to disable offchannel_ps,
1094 * so just go ahead and run the entire offchannel
1095 * return logic here. We *could* skip enabling
1096 * beaconing if we were already on-oper-channel
1097 * as a future optimization.
1098 */
1099 ieee80211_offchannel_return(local, true, true);
1100
1017 /* give connection some time to breathe */ 1101 /* give connection some time to breathe */
1018 run_again(local, jiffies + HZ/2); 1102 run_again(local, jiffies + HZ/2);
1019 } 1103 }
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index bee230d8fd1..f1765de2f4b 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -26,13 +26,12 @@
26ieee80211_tx_result 26ieee80211_tx_result
27ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) 27ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
28{ 28{
29 u8 *data, *key, *mic, key_offset; 29 u8 *data, *key, *mic;
30 size_t data_len; 30 size_t data_len;
31 unsigned int hdrlen; 31 unsigned int hdrlen;
32 struct ieee80211_hdr *hdr; 32 struct ieee80211_hdr *hdr;
33 struct sk_buff *skb = tx->skb; 33 struct sk_buff *skb = tx->skb;
34 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 34 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
35 int authenticator;
36 int tail; 35 int tail;
37 36
38 hdr = (struct ieee80211_hdr *)skb->data; 37 hdr = (struct ieee80211_hdr *)skb->data;
@@ -47,6 +46,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
47 data = skb->data + hdrlen; 46 data = skb->data + hdrlen;
48 data_len = skb->len - hdrlen; 47 data_len = skb->len - hdrlen;
49 48
49 if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) {
50 /* Need to use software crypto for the test */
51 info->control.hw_key = NULL;
52 }
53
50 if (info->control.hw_key && 54 if (info->control.hw_key &&
51 !(tx->flags & IEEE80211_TX_FRAGMENTED) && 55 !(tx->flags & IEEE80211_TX_FRAGMENTED) &&
52 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { 56 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
@@ -62,17 +66,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
62 skb_headroom(skb) < TKIP_IV_LEN)) 66 skb_headroom(skb) < TKIP_IV_LEN))
63 return TX_DROP; 67 return TX_DROP;
64 68
65#if 0 69 key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
66 authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */
67#else
68 authenticator = 1;
69#endif
70 key_offset = authenticator ?
71 NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY :
72 NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY;
73 key = &tx->key->conf.key[key_offset];
74 mic = skb_put(skb, MICHAEL_MIC_LEN); 70 mic = skb_put(skb, MICHAEL_MIC_LEN);
75 michael_mic(key, hdr, data, data_len, mic); 71 michael_mic(key, hdr, data, data_len, mic);
72 if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE))
73 mic[0]++;
76 74
77 return TX_CONTINUE; 75 return TX_CONTINUE;
78} 76}
@@ -81,14 +79,13 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
81ieee80211_rx_result 79ieee80211_rx_result
82ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) 80ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
83{ 81{
84 u8 *data, *key = NULL, key_offset; 82 u8 *data, *key = NULL;
85 size_t data_len; 83 size_t data_len;
86 unsigned int hdrlen; 84 unsigned int hdrlen;
87 u8 mic[MICHAEL_MIC_LEN]; 85 u8 mic[MICHAEL_MIC_LEN];
88 struct sk_buff *skb = rx->skb; 86 struct sk_buff *skb = rx->skb;
89 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 87 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
90 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 88 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
91 int authenticator = 1, wpa_test = 0;
92 89
93 /* No way to verify the MIC if the hardware stripped it */ 90 /* No way to verify the MIC if the hardware stripped it */
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 91 if (status->flag & RX_FLAG_MMIC_STRIPPED)
@@ -106,17 +103,9 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
106 data = skb->data + hdrlen; 103 data = skb->data + hdrlen;
107 data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; 104 data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
108 105
109#if 0 106 key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
110 authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */
111#else
112 authenticator = 1;
113#endif
114 key_offset = authenticator ?
115 NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY :
116 NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY;
117 key = &rx->key->conf.key[key_offset];
118 michael_mic(key, hdr, data, data_len, mic); 107 michael_mic(key, hdr, data, data_len, mic);
119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { 108 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0) {
120 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) 109 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
121 return RX_DROP_UNUSABLE; 110 return RX_DROP_UNUSABLE;
122 111
@@ -208,7 +197,7 @@ ieee80211_rx_result
208ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) 197ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
209{ 198{
210 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; 199 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
211 int hdrlen, res, hwaccel = 0, wpa_test = 0; 200 int hdrlen, res, hwaccel = 0;
212 struct ieee80211_key *key = rx->key; 201 struct ieee80211_key *key = rx->key;
213 struct sk_buff *skb = rx->skb; 202 struct sk_buff *skb = rx->skb;
214 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 203 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -235,7 +224,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
235 hdr->addr1, hwaccel, rx->queue, 224 hdr->addr1, hwaccel, rx->queue,
236 &rx->tkip_iv32, 225 &rx->tkip_iv32,
237 &rx->tkip_iv16); 226 &rx->tkip_iv16);
238 if (res != TKIP_DECRYPT_OK || wpa_test) 227 if (res != TKIP_DECRYPT_OK)
239 return RX_DROP_UNUSABLE; 228 return RX_DROP_UNUSABLE;
240 229
241 /* Trim ICV */ 230 /* Trim ICV */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1534f2b44ca..c3f988aa115 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
85 85
86 If unsure, say `N'. 86 If unsure, say `N'.
87 87
88config NF_CONNTRACK_TIMESTAMP
89 bool 'Connection tracking timestamping'
90 depends on NETFILTER_ADVANCED
91 help
92 This option enables support for connection tracking timestamping.
93 This allows you to store the flow start-time and to obtain
94 the flow-stop time (once it has been destroyed) via Connection
95 tracking events.
96
97 If unsure, say `N'.
98
88config NF_CT_PROTO_DCCP 99config NF_CT_PROTO_DCCP
89 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' 100 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
90 depends on EXPERIMENTAL 101 depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
185 196
186 To compile it as a module, choose M here. If unsure, say N. 197 To compile it as a module, choose M here. If unsure, say N.
187 198
199config NF_CONNTRACK_BROADCAST
200 tristate
201
188config NF_CONNTRACK_NETBIOS_NS 202config NF_CONNTRACK_NETBIOS_NS
189 tristate "NetBIOS name service protocol support" 203 tristate "NetBIOS name service protocol support"
190 depends on NETFILTER_ADVANCED 204 depends on NETFILTER_ADVANCED
205 select NF_CONNTRACK_BROADCAST
191 help 206 help
192 NetBIOS name service requests are sent as broadcast messages from an 207 NetBIOS name service requests are sent as broadcast messages from an
193 unprivileged port and responded to with unicast messages to the 208 unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
204 219
205 To compile it as a module, choose M here. If unsure, say N. 220 To compile it as a module, choose M here. If unsure, say N.
206 221
222config NF_CONNTRACK_SNMP
223 tristate "SNMP service protocol support"
224 depends on NETFILTER_ADVANCED
225 select NF_CONNTRACK_BROADCAST
226 help
227 SNMP service requests are sent as broadcast messages from an
228 unprivileged port and responded to with unicast messages to the
229 same port. This make them hard to firewall properly because connection
230 tracking doesn't deal with broadcasts. This helper tracks locally
231 originating SNMP service requests and the corresponding
232 responses. It relies on correct IP address configuration, specifically
233 netmask and broadcast address.
234
235 To compile it as a module, choose M here. If unsure, say N.
236
207config NF_CONNTRACK_PPTP 237config NF_CONNTRACK_PPTP
208 tristate "PPtP protocol support" 238 tristate "PPtP protocol support"
209 depends on NETFILTER_ADVANCED 239 depends on NETFILTER_ADVANCED
@@ -322,10 +352,32 @@ config NETFILTER_XT_CONNMARK
322 ctmark), similarly to the packet mark (nfmark). Using this 352 ctmark), similarly to the packet mark (nfmark). Using this
323 target and match, you can set and match on this mark. 353 target and match, you can set and match on this mark.
324 354
355config NETFILTER_XT_SET
356 tristate 'set target and match support'
357 depends on IP_SET
358 depends on NETFILTER_ADVANCED
359 help
360 This option adds the "SET" target and "set" match.
361
362 Using this target and match, you can add/delete and match
363 elements in the sets created by ipset(8).
364
365 To compile it as a module, choose M here. If unsure, say N.
366
325# alphabetically ordered list of targets 367# alphabetically ordered list of targets
326 368
327comment "Xtables targets" 369comment "Xtables targets"
328 370
371config NETFILTER_XT_TARGET_AUDIT
372 tristate "AUDIT target support"
373 depends on AUDIT
374 depends on NETFILTER_ADVANCED
375 ---help---
376 This option adds a 'AUDIT' target, which can be used to create
377 audit records for packets dropped/accepted.
378
379 To compileit as a module, choose M here. If unsure, say N.
380
329config NETFILTER_XT_TARGET_CHECKSUM 381config NETFILTER_XT_TARGET_CHECKSUM
330 tristate "CHECKSUM target support" 382 tristate "CHECKSUM target support"
331 depends on IP_NF_MANGLE || IP6_NF_MANGLE 383 depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +529,7 @@ config NETFILTER_XT_TARGET_NFLOG
477config NETFILTER_XT_TARGET_NFQUEUE 529config NETFILTER_XT_TARGET_NFQUEUE
478 tristate '"NFQUEUE" target Support' 530 tristate '"NFQUEUE" target Support'
479 depends on NETFILTER_ADVANCED 531 depends on NETFILTER_ADVANCED
532 select NETFILTER_NETLINK_QUEUE
480 help 533 help
481 This target replaced the old obsolete QUEUE target. 534 This target replaced the old obsolete QUEUE target.
482 535
@@ -596,6 +649,17 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
596 649
597comment "Xtables matches" 650comment "Xtables matches"
598 651
652config NETFILTER_XT_MATCH_ADDRTYPE
653 tristate '"addrtype" address type match support'
654 depends on NETFILTER_ADVANCED
655 depends on (IPV6 || IPV6=n)
656 ---help---
657 This option allows you to match what routing thinks of an address,
658 eg. UNICAST, LOCAL, BROADCAST, ...
659
660 If you want to compile it as a module, say M here and read
661 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
662
599config NETFILTER_XT_MATCH_CLUSTER 663config NETFILTER_XT_MATCH_CLUSTER
600 tristate '"cluster" match support' 664 tristate '"cluster" match support'
601 depends on NF_CONNTRACK 665 depends on NF_CONNTRACK
@@ -685,6 +749,15 @@ config NETFILTER_XT_MATCH_DCCP
685 If you want to compile it as a module, say M here and read 749 If you want to compile it as a module, say M here and read
686 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 750 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
687 751
752config NETFILTER_XT_MATCH_DEVGROUP
753 tristate '"devgroup" match support'
754 depends on NETFILTER_ADVANCED
755 help
756 This options adds a `devgroup' match, which allows to match on the
757 device group a network device is assigned to.
758
759 To compile it as a module, choose M here. If unsure, say N.
760
688config NETFILTER_XT_MATCH_DSCP 761config NETFILTER_XT_MATCH_DSCP
689 tristate '"dscp" and "tos" match support' 762 tristate '"dscp" and "tos" match support'
690 depends on NETFILTER_ADVANCED 763 depends on NETFILTER_ADVANCED
@@ -886,7 +959,7 @@ config NETFILTER_XT_MATCH_RATEEST
886config NETFILTER_XT_MATCH_REALM 959config NETFILTER_XT_MATCH_REALM
887 tristate '"realm" match support' 960 tristate '"realm" match support'
888 depends on NETFILTER_ADVANCED 961 depends on NETFILTER_ADVANCED
889 select NET_CLS_ROUTE 962 select IP_ROUTE_CLASSID
890 help 963 help
891 This option adds a `realm' match, which allows you to use the realm 964 This option adds a `realm' match, which allows you to use the realm
892 key from the routing subsystem inside iptables. 965 key from the routing subsystem inside iptables.
@@ -1011,4 +1084,6 @@ endif # NETFILTER_XTABLES
1011 1084
1012endmenu 1085endmenu
1013 1086
1087source "net/netfilter/ipset/Kconfig"
1088
1014source "net/netfilter/ipvs/Kconfig" 1089source "net/netfilter/ipvs/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 441050f3111..1a02853df86 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
2 2
3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
5 6
6obj-$(CONFIG_NETFILTER) = netfilter.o 7obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
28obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o 29obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
29obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o 30obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
30obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o 31obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
32obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
31obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o 33obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
34obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
32obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o 35obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
33obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o 36obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
34obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o 37obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -43,8 +46,10 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
43# combos 46# combos
44obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o 47obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
45obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o 48obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
49obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
46 50
47# targets 51# targets
52obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o 53obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 54obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 55obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
@@ -65,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
65obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o 70obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
66 71
67# matches 72# matches
73obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
68obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 76obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
@@ -72,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 80obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
75obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o 82obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 83obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
77obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o 84obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
@@ -101,5 +108,8 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
101obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o 108obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
102obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o 109obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
103 110
111# ipset
112obj-$(CONFIG_IP_SET) += ipset/
113
104# IPVS 114# IPVS
105obj-$(CONFIG_IP_VS) += ipvs/ 115obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4aa614b8a96..899b71c0ff5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -176,13 +176,21 @@ next_hook:
176 ret = 1; 176 ret = 1;
177 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { 177 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
178 kfree_skb(skb); 178 kfree_skb(skb);
179 ret = -(verdict >> NF_VERDICT_BITS); 179 ret = NF_DROP_GETERR(verdict);
180 if (ret == 0) 180 if (ret == 0)
181 ret = -EPERM; 181 ret = -EPERM;
182 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 182 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
183 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 183 ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
184 verdict >> NF_VERDICT_BITS)) 184 verdict >> NF_VERDICT_QBITS);
185 goto next_hook; 185 if (ret < 0) {
186 if (ret == -ECANCELED)
187 goto next_hook;
188 if (ret == -ESRCH &&
189 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
190 goto next_hook;
191 kfree_skb(skb);
192 }
193 ret = 0;
186 } 194 }
187 rcu_read_unlock(); 195 rcu_read_unlock();
188 return ret; 196 return ret;
@@ -215,7 +223,7 @@ EXPORT_SYMBOL(skb_make_writable);
215/* This does not belong here, but locally generated errors need it if connection 223/* This does not belong here, but locally generated errors need it if connection
216 tracking in use: without this, connection may not be in hash table, and hence 224 tracking in use: without this, connection may not be in hash table, and hence
217 manufactured ICMP or RST packets will not be associated with it. */ 225 manufactured ICMP or RST packets will not be associated with it. */
218void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); 226void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
219EXPORT_SYMBOL(ip_ct_attach); 227EXPORT_SYMBOL(ip_ct_attach);
220 228
221void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) 229void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -232,7 +240,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
232} 240}
233EXPORT_SYMBOL(nf_ct_attach); 241EXPORT_SYMBOL(nf_ct_attach);
234 242
235void (*nf_ct_destroy)(struct nf_conntrack *); 243void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
236EXPORT_SYMBOL(nf_ct_destroy); 244EXPORT_SYMBOL(nf_ct_destroy);
237 245
238void nf_conntrack_destroy(struct nf_conntrack *nfct) 246void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
new file mode 100644
index 00000000000..2c5b348eb3a
--- /dev/null
+++ b/net/netfilter/ipset/Kconfig
@@ -0,0 +1,122 @@
1menuconfig IP_SET
2 tristate "IP set support"
3 depends on INET && NETFILTER
4 depends on NETFILTER_NETLINK
5 help
6 This option adds IP set support to the kernel.
7 In order to define and use the sets, you need the userspace utility
8 ipset(8). You can use the sets in netfilter via the "set" match
9 and "SET" target.
10
11 To compile it as a module, choose M here. If unsure, say N.
12
13if IP_SET
14
15config IP_SET_MAX
16 int "Maximum number of IP sets"
17 default 256
18 range 2 65534
19 depends on IP_SET
20 help
21 You can define here default value of the maximum number
22 of IP sets for the kernel.
23
24 The value can be overriden by the 'max_sets' module
25 parameter of the 'ip_set' module.
26
27config IP_SET_BITMAP_IP
28 tristate "bitmap:ip set support"
29 depends on IP_SET
30 help
31 This option adds the bitmap:ip set type support, by which one
32 can store IPv4 addresses (or network addresse) from a range.
33
34 To compile it as a module, choose M here. If unsure, say N.
35
36config IP_SET_BITMAP_IPMAC
37 tristate "bitmap:ip,mac set support"
38 depends on IP_SET
39 help
40 This option adds the bitmap:ip,mac set type support, by which one
41 can store IPv4 address and (source) MAC address pairs from a range.
42
43 To compile it as a module, choose M here. If unsure, say N.
44
45config IP_SET_BITMAP_PORT
46 tristate "bitmap:port set support"
47 depends on IP_SET
48 help
49 This option adds the bitmap:port set type support, by which one
50 can store TCP/UDP port numbers from a range.
51
52 To compile it as a module, choose M here. If unsure, say N.
53
54config IP_SET_HASH_IP
55 tristate "hash:ip set support"
56 depends on IP_SET
57 help
58 This option adds the hash:ip set type support, by which one
59 can store arbitrary IPv4 or IPv6 addresses (or network addresses)
60 in a set.
61
62 To compile it as a module, choose M here. If unsure, say N.
63
64config IP_SET_HASH_IPPORT
65 tristate "hash:ip,port set support"
66 depends on IP_SET
67 help
68 This option adds the hash:ip,port set type support, by which one
69 can store IPv4/IPv6 address and protocol/port pairs.
70
71 To compile it as a module, choose M here. If unsure, say N.
72
73config IP_SET_HASH_IPPORTIP
74 tristate "hash:ip,port,ip set support"
75 depends on IP_SET
76 help
77 This option adds the hash:ip,port,ip set type support, by which
78 one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
79 address triples in a set.
80
81 To compile it as a module, choose M here. If unsure, say N.
82
83config IP_SET_HASH_IPPORTNET
84 tristate "hash:ip,port,net set support"
85 depends on IP_SET
86 help
87 This option adds the hash:ip,port,net set type support, by which
88 one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
89 network address/prefix triples in a set.
90
91 To compile it as a module, choose M here. If unsure, say N.
92
93config IP_SET_HASH_NET
94 tristate "hash:net set support"
95 depends on IP_SET
96 help
97 This option adds the hash:net set type support, by which
98 one can store IPv4/IPv6 network address/prefix elements in a set.
99
100 To compile it as a module, choose M here. If unsure, say N.
101
102config IP_SET_HASH_NETPORT
103 tristate "hash:net,port set support"
104 depends on IP_SET
105 help
106 This option adds the hash:net,port set type support, by which
107 one can store IPv4/IPv6 network address/prefix and
108 protocol/port pairs as elements in a set.
109
110 To compile it as a module, choose M here. If unsure, say N.
111
112config IP_SET_LIST_SET
113 tristate "list:set set support"
114 depends on IP_SET
115 help
116 This option adds the list:set set type support. In this
117 kind of set one can store the name of other sets and it forms
118 an ordered union of the member sets.
119
120 To compile it as a module, choose M here. If unsure, say N.
121
122endif # IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
new file mode 100644
index 00000000000..5adbdab67bd
--- /dev/null
+++ b/net/netfilter/ipset/Makefile
@@ -0,0 +1,24 @@
1#
2# Makefile for the ipset modules
3#
4
5ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o
6
7# ipset core
8obj-$(CONFIG_IP_SET) += ip_set.o
9
10# bitmap types
11obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o
12obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o
13obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
14
15# hash types
16obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
17obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
18obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
19obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
20obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
21obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
22
23# list types
24obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
new file mode 100644
index 00000000000..bca96990218
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -0,0 +1,587 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module implementing an IP set type: the bitmap:ip type */
11
12#include <linux/module.h>
13#include <linux/ip.h>
14#include <linux/skbuff.h>
15#include <linux/errno.h>
16#include <linux/bitops.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/jiffies.h>
20#include <linux/timer.h>
21#include <net/netlink.h>
22#include <net/tcp.h>
23
24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_bitmap.h>
27#define IP_SET_BITMAP_TIMEOUT
28#include <linux/netfilter/ipset/ip_set_timeout.h>
29
30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
32MODULE_DESCRIPTION("bitmap:ip type of IP sets");
33MODULE_ALIAS("ip_set_bitmap:ip");
34
35/* Type structure */
36struct bitmap_ip {
37 void *members; /* the set members */
38 u32 first_ip; /* host byte order, included in range */
39 u32 last_ip; /* host byte order, included in range */
40 u32 elements; /* number of max elements in the set */
41 u32 hosts; /* number of hosts in a subnet */
42 size_t memsize; /* members size */
43 u8 netmask; /* subnet netmask */
44 u32 timeout; /* timeout parameter */
45 struct timer_list gc; /* garbage collection */
46};
47
48/* Base variant */
49
50static inline u32
51ip_to_id(const struct bitmap_ip *m, u32 ip)
52{
53 return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
54}
55
56static int
57bitmap_ip_test(struct ip_set *set, void *value, u32 timeout)
58{
59 const struct bitmap_ip *map = set->data;
60 u16 id = *(u16 *)value;
61
62 return !!test_bit(id, map->members);
63}
64
65static int
66bitmap_ip_add(struct ip_set *set, void *value, u32 timeout)
67{
68 struct bitmap_ip *map = set->data;
69 u16 id = *(u16 *)value;
70
71 if (test_and_set_bit(id, map->members))
72 return -IPSET_ERR_EXIST;
73
74 return 0;
75}
76
77static int
78bitmap_ip_del(struct ip_set *set, void *value, u32 timeout)
79{
80 struct bitmap_ip *map = set->data;
81 u16 id = *(u16 *)value;
82
83 if (!test_and_clear_bit(id, map->members))
84 return -IPSET_ERR_EXIST;
85
86 return 0;
87}
88
89static int
90bitmap_ip_list(const struct ip_set *set,
91 struct sk_buff *skb, struct netlink_callback *cb)
92{
93 const struct bitmap_ip *map = set->data;
94 struct nlattr *atd, *nested;
95 u32 id, first = cb->args[2];
96
97 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
98 if (!atd)
99 return -EMSGSIZE;
100 for (; cb->args[2] < map->elements; cb->args[2]++) {
101 id = cb->args[2];
102 if (!test_bit(id, map->members))
103 continue;
104 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
105 if (!nested) {
106 if (id == first) {
107 nla_nest_cancel(skb, atd);
108 return -EMSGSIZE;
109 } else
110 goto nla_put_failure;
111 }
112 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
113 htonl(map->first_ip + id * map->hosts));
114 ipset_nest_end(skb, nested);
115 }
116 ipset_nest_end(skb, atd);
117 /* Set listing finished */
118 cb->args[2] = 0;
119 return 0;
120
121nla_put_failure:
122 nla_nest_cancel(skb, nested);
123 ipset_nest_end(skb, atd);
124 if (unlikely(id == first)) {
125 cb->args[2] = 0;
126 return -EMSGSIZE;
127 }
128 return 0;
129}
130
131/* Timeout variant */
132
133static int
134bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout)
135{
136 const struct bitmap_ip *map = set->data;
137 const unsigned long *members = map->members;
138 u16 id = *(u16 *)value;
139
140 return ip_set_timeout_test(members[id]);
141}
142
143static int
144bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout)
145{
146 struct bitmap_ip *map = set->data;
147 unsigned long *members = map->members;
148 u16 id = *(u16 *)value;
149
150 if (ip_set_timeout_test(members[id]))
151 return -IPSET_ERR_EXIST;
152
153 members[id] = ip_set_timeout_set(timeout);
154
155 return 0;
156}
157
158static int
159bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout)
160{
161 struct bitmap_ip *map = set->data;
162 unsigned long *members = map->members;
163 u16 id = *(u16 *)value;
164 int ret = -IPSET_ERR_EXIST;
165
166 if (ip_set_timeout_test(members[id]))
167 ret = 0;
168
169 members[id] = IPSET_ELEM_UNSET;
170 return ret;
171}
172
173static int
174bitmap_ip_tlist(const struct ip_set *set,
175 struct sk_buff *skb, struct netlink_callback *cb)
176{
177 const struct bitmap_ip *map = set->data;
178 struct nlattr *adt, *nested;
179 u32 id, first = cb->args[2];
180 const unsigned long *members = map->members;
181
182 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
183 if (!adt)
184 return -EMSGSIZE;
185 for (; cb->args[2] < map->elements; cb->args[2]++) {
186 id = cb->args[2];
187 if (!ip_set_timeout_test(members[id]))
188 continue;
189 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
190 if (!nested) {
191 if (id == first) {
192 nla_nest_cancel(skb, adt);
193 return -EMSGSIZE;
194 } else
195 goto nla_put_failure;
196 }
197 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
198 htonl(map->first_ip + id * map->hosts));
199 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
200 htonl(ip_set_timeout_get(members[id])));
201 ipset_nest_end(skb, nested);
202 }
203 ipset_nest_end(skb, adt);
204
205 /* Set listing finished */
206 cb->args[2] = 0;
207
208 return 0;
209
210nla_put_failure:
211 nla_nest_cancel(skb, nested);
212 ipset_nest_end(skb, adt);
213 if (unlikely(id == first)) {
214 cb->args[2] = 0;
215 return -EMSGSIZE;
216 }
217 return 0;
218}
219
220static int
221bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
222 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
223{
224 struct bitmap_ip *map = set->data;
225 ipset_adtfn adtfn = set->variant->adt[adt];
226 u32 ip;
227
228 ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
229 if (ip < map->first_ip || ip > map->last_ip)
230 return -IPSET_ERR_BITMAP_RANGE;
231
232 ip = ip_to_id(map, ip);
233
234 return adtfn(set, &ip, map->timeout);
235}
236
237static int
238bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
239 enum ipset_adt adt, u32 *lineno, u32 flags)
240{
241 struct bitmap_ip *map = set->data;
242 ipset_adtfn adtfn = set->variant->adt[adt];
243 u32 timeout = map->timeout;
244 u32 ip, ip_to, id;
245 int ret = 0;
246
247 if (unlikely(!tb[IPSET_ATTR_IP] ||
248 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
249 return -IPSET_ERR_PROTOCOL;
250
251 if (tb[IPSET_ATTR_LINENO])
252 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
253
254 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
255 if (ret)
256 return ret;
257
258 if (ip < map->first_ip || ip > map->last_ip)
259 return -IPSET_ERR_BITMAP_RANGE;
260
261 if (tb[IPSET_ATTR_TIMEOUT]) {
262 if (!with_timeout(map->timeout))
263 return -IPSET_ERR_TIMEOUT;
264 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
265 }
266
267 if (adt == IPSET_TEST) {
268 id = ip_to_id(map, ip);
269 return adtfn(set, &id, timeout);
270 }
271
272 if (tb[IPSET_ATTR_IP_TO]) {
273 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
274 if (ret)
275 return ret;
276 if (ip > ip_to) {
277 swap(ip, ip_to);
278 if (ip < map->first_ip)
279 return -IPSET_ERR_BITMAP_RANGE;
280 }
281 } else if (tb[IPSET_ATTR_CIDR]) {
282 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
283
284 if (cidr > 32)
285 return -IPSET_ERR_INVALID_CIDR;
286 ip &= ip_set_hostmask(cidr);
287 ip_to = ip | ~ip_set_hostmask(cidr);
288 } else
289 ip_to = ip;
290
291 if (ip_to > map->last_ip)
292 return -IPSET_ERR_BITMAP_RANGE;
293
294 for (; !before(ip_to, ip); ip += map->hosts) {
295 id = ip_to_id(map, ip);
296 ret = adtfn(set, &id, timeout);;
297
298 if (ret && !ip_set_eexist(ret, flags))
299 return ret;
300 else
301 ret = 0;
302 }
303 return ret;
304}
305
306static void
307bitmap_ip_destroy(struct ip_set *set)
308{
309 struct bitmap_ip *map = set->data;
310
311 if (with_timeout(map->timeout))
312 del_timer_sync(&map->gc);
313
314 ip_set_free(map->members);
315 kfree(map);
316
317 set->data = NULL;
318}
319
320static void
321bitmap_ip_flush(struct ip_set *set)
322{
323 struct bitmap_ip *map = set->data;
324
325 memset(map->members, 0, map->memsize);
326}
327
328static int
329bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
330{
331 const struct bitmap_ip *map = set->data;
332 struct nlattr *nested;
333
334 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
335 if (!nested)
336 goto nla_put_failure;
337 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
338 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
339 if (map->netmask != 32)
340 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
341 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
342 htonl(atomic_read(&set->ref) - 1));
343 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
344 htonl(sizeof(*map) + map->memsize));
345 if (with_timeout(map->timeout))
346 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
347 ipset_nest_end(skb, nested);
348
349 return 0;
350nla_put_failure:
351 return -EMSGSIZE;
352}
353
354static bool
355bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
356{
357 const struct bitmap_ip *x = a->data;
358 const struct bitmap_ip *y = b->data;
359
360 return x->first_ip == y->first_ip &&
361 x->last_ip == y->last_ip &&
362 x->netmask == y->netmask &&
363 x->timeout == y->timeout;
364}
365
366static const struct ip_set_type_variant bitmap_ip = {
367 .kadt = bitmap_ip_kadt,
368 .uadt = bitmap_ip_uadt,
369 .adt = {
370 [IPSET_ADD] = bitmap_ip_add,
371 [IPSET_DEL] = bitmap_ip_del,
372 [IPSET_TEST] = bitmap_ip_test,
373 },
374 .destroy = bitmap_ip_destroy,
375 .flush = bitmap_ip_flush,
376 .head = bitmap_ip_head,
377 .list = bitmap_ip_list,
378 .same_set = bitmap_ip_same_set,
379};
380
381static const struct ip_set_type_variant bitmap_tip = {
382 .kadt = bitmap_ip_kadt,
383 .uadt = bitmap_ip_uadt,
384 .adt = {
385 [IPSET_ADD] = bitmap_ip_tadd,
386 [IPSET_DEL] = bitmap_ip_tdel,
387 [IPSET_TEST] = bitmap_ip_ttest,
388 },
389 .destroy = bitmap_ip_destroy,
390 .flush = bitmap_ip_flush,
391 .head = bitmap_ip_head,
392 .list = bitmap_ip_tlist,
393 .same_set = bitmap_ip_same_set,
394};
395
396static void
397bitmap_ip_gc(unsigned long ul_set)
398{
399 struct ip_set *set = (struct ip_set *) ul_set;
400 struct bitmap_ip *map = set->data;
401 unsigned long *table = map->members;
402 u32 id;
403
404 /* We run parallel with other readers (test element)
405 * but adding/deleting new entries is locked out */
406 read_lock_bh(&set->lock);
407 for (id = 0; id < map->elements; id++)
408 if (ip_set_timeout_expired(table[id]))
409 table[id] = IPSET_ELEM_UNSET;
410 read_unlock_bh(&set->lock);
411
412 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
413 add_timer(&map->gc);
414}
415
416static void
417bitmap_ip_gc_init(struct ip_set *set)
418{
419 struct bitmap_ip *map = set->data;
420
421 init_timer(&map->gc);
422 map->gc.data = (unsigned long) set;
423 map->gc.function = bitmap_ip_gc;
424 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
425 add_timer(&map->gc);
426}
427
428/* Create bitmap:ip type of sets */
429
430static bool
431init_map_ip(struct ip_set *set, struct bitmap_ip *map,
432 u32 first_ip, u32 last_ip,
433 u32 elements, u32 hosts, u8 netmask)
434{
435 map->members = ip_set_alloc(map->memsize);
436 if (!map->members)
437 return false;
438 map->first_ip = first_ip;
439 map->last_ip = last_ip;
440 map->elements = elements;
441 map->hosts = hosts;
442 map->netmask = netmask;
443 map->timeout = IPSET_NO_TIMEOUT;
444
445 set->data = map;
446 set->family = AF_INET;
447
448 return true;
449}
450
451static int
452bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
453{
454 struct bitmap_ip *map;
455 u32 first_ip, last_ip, hosts, elements;
456 u8 netmask = 32;
457 int ret;
458
459 if (unlikely(!tb[IPSET_ATTR_IP] ||
460 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
461 return -IPSET_ERR_PROTOCOL;
462
463 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
464 if (ret)
465 return ret;
466
467 if (tb[IPSET_ATTR_IP_TO]) {
468 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
469 if (ret)
470 return ret;
471 if (first_ip > last_ip) {
472 u32 tmp = first_ip;
473
474 first_ip = last_ip;
475 last_ip = tmp;
476 }
477 } else if (tb[IPSET_ATTR_CIDR]) {
478 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
479
480 if (cidr >= 32)
481 return -IPSET_ERR_INVALID_CIDR;
482 last_ip = first_ip | ~ip_set_hostmask(cidr);
483 } else
484 return -IPSET_ERR_PROTOCOL;
485
486 if (tb[IPSET_ATTR_NETMASK]) {
487 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
488
489 if (netmask > 32)
490 return -IPSET_ERR_INVALID_NETMASK;
491
492 first_ip &= ip_set_hostmask(netmask);
493 last_ip |= ~ip_set_hostmask(netmask);
494 }
495
496 if (netmask == 32) {
497 hosts = 1;
498 elements = last_ip - first_ip + 1;
499 } else {
500 u8 mask_bits;
501 u32 mask;
502
503 mask = range_to_mask(first_ip, last_ip, &mask_bits);
504
505 if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) ||
506 netmask <= mask_bits)
507 return -IPSET_ERR_BITMAP_RANGE;
508
509 pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
510 hosts = 2 << (32 - netmask - 1);
511 elements = 2 << (netmask - mask_bits - 1);
512 }
513 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
514 return -IPSET_ERR_BITMAP_RANGE_SIZE;
515
516 pr_debug("hosts %u, elements %u\n", hosts, elements);
517
518 map = kzalloc(sizeof(*map), GFP_KERNEL);
519 if (!map)
520 return -ENOMEM;
521
522 if (tb[IPSET_ATTR_TIMEOUT]) {
523 map->memsize = elements * sizeof(unsigned long);
524
525 if (!init_map_ip(set, map, first_ip, last_ip,
526 elements, hosts, netmask)) {
527 kfree(map);
528 return -ENOMEM;
529 }
530
531 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
532 set->variant = &bitmap_tip;
533
534 bitmap_ip_gc_init(set);
535 } else {
536 map->memsize = bitmap_bytes(0, elements - 1);
537
538 if (!init_map_ip(set, map, first_ip, last_ip,
539 elements, hosts, netmask)) {
540 kfree(map);
541 return -ENOMEM;
542 }
543
544 set->variant = &bitmap_ip;
545 }
546 return 0;
547}
548
549static struct ip_set_type bitmap_ip_type __read_mostly = {
550 .name = "bitmap:ip",
551 .protocol = IPSET_PROTOCOL,
552 .features = IPSET_TYPE_IP,
553 .dimension = IPSET_DIM_ONE,
554 .family = AF_INET,
555 .revision = 0,
556 .create = bitmap_ip_create,
557 .create_policy = {
558 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
559 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
560 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
561 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
562 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
563 },
564 .adt_policy = {
565 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
566 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
567 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
568 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
569 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
570 },
571 .me = THIS_MODULE,
572};
573
574static int __init
575bitmap_ip_init(void)
576{
577 return ip_set_type_register(&bitmap_ip_type);
578}
579
580static void __exit
581bitmap_ip_fini(void)
582{
583 ip_set_type_unregister(&bitmap_ip_type);
584}
585
586module_init(bitmap_ip_init);
587module_exit(bitmap_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
new file mode 100644
index 00000000000..5e790172def
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -0,0 +1,652 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Martin Josefsson <gandalf@wlug.westbo.se>
4 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* Kernel module implementing an IP set type: the bitmap:ip,mac type */
12
13#include <linux/module.h>
14#include <linux/ip.h>
15#include <linux/etherdevice.h>
16#include <linux/skbuff.h>
17#include <linux/errno.h>
18#include <linux/if_ether.h>
19#include <linux/netlink.h>
20#include <linux/jiffies.h>
21#include <linux/timer.h>
22#include <net/netlink.h>
23
24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_timeout.h>
27#include <linux/netfilter/ipset/ip_set_bitmap.h>
28
29MODULE_LICENSE("GPL");
30MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
31MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets");
32MODULE_ALIAS("ip_set_bitmap:ip,mac");
33
34enum {
35 MAC_EMPTY, /* element is not set */
36 MAC_FILLED, /* element is set with MAC */
37 MAC_UNSET, /* element is set, without MAC */
38};
39
40/* Type structure */
41struct bitmap_ipmac {
42 void *members; /* the set members */
43 u32 first_ip; /* host byte order, included in range */
44 u32 last_ip; /* host byte order, included in range */
45 u32 timeout; /* timeout value */
46 struct timer_list gc; /* garbage collector */
47 size_t dsize; /* size of element */
48};
49
50/* ADT structure for generic function args */
51struct ipmac {
52 u32 id; /* id in array */
53 unsigned char *ether; /* ethernet address */
54};
55
56/* Member element without and with timeout */
57
58struct ipmac_elem {
59 unsigned char ether[ETH_ALEN];
60 unsigned char match;
61} __attribute__ ((aligned));
62
63struct ipmac_telem {
64 unsigned char ether[ETH_ALEN];
65 unsigned char match;
66 unsigned long timeout;
67} __attribute__ ((aligned));
68
69static inline void *
70bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id)
71{
72 return (void *)((char *)map->members + id * map->dsize);
73}
74
75static inline bool
76bitmap_timeout(const struct bitmap_ipmac *map, u32 id)
77{
78 const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
79
80 return ip_set_timeout_test(elem->timeout);
81}
82
83static inline bool
84bitmap_expired(const struct bitmap_ipmac *map, u32 id)
85{
86 const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
87
88 return ip_set_timeout_expired(elem->timeout);
89}
90
91static inline int
92bitmap_ipmac_exist(const struct ipmac_telem *elem)
93{
94 return elem->match == MAC_UNSET ||
95 (elem->match == MAC_FILLED &&
96 !ip_set_timeout_expired(elem->timeout));
97}
98
99/* Base variant */
100
101static int
102bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout)
103{
104 const struct bitmap_ipmac *map = set->data;
105 const struct ipmac *data = value;
106 const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
107
108 switch (elem->match) {
109 case MAC_UNSET:
110 /* Trigger kernel to fill out the ethernet address */
111 return -EAGAIN;
112 case MAC_FILLED:
113 return data->ether == NULL ||
114 compare_ether_addr(data->ether, elem->ether) == 0;
115 }
116 return 0;
117}
118
119static int
120bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout)
121{
122 struct bitmap_ipmac *map = set->data;
123 const struct ipmac *data = value;
124 struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
125
126 switch (elem->match) {
127 case MAC_UNSET:
128 if (!data->ether)
129 /* Already added without ethernet address */
130 return -IPSET_ERR_EXIST;
131 /* Fill the MAC address */
132 memcpy(elem->ether, data->ether, ETH_ALEN);
133 elem->match = MAC_FILLED;
134 break;
135 case MAC_FILLED:
136 return -IPSET_ERR_EXIST;
137 case MAC_EMPTY:
138 if (data->ether) {
139 memcpy(elem->ether, data->ether, ETH_ALEN);
140 elem->match = MAC_FILLED;
141 } else
142 elem->match = MAC_UNSET;
143 }
144
145 return 0;
146}
147
148static int
149bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout)
150{
151 struct bitmap_ipmac *map = set->data;
152 const struct ipmac *data = value;
153 struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
154
155 if (elem->match == MAC_EMPTY)
156 return -IPSET_ERR_EXIST;
157
158 elem->match = MAC_EMPTY;
159
160 return 0;
161}
162
163static int
164bitmap_ipmac_list(const struct ip_set *set,
165 struct sk_buff *skb, struct netlink_callback *cb)
166{
167 const struct bitmap_ipmac *map = set->data;
168 const struct ipmac_elem *elem;
169 struct nlattr *atd, *nested;
170 u32 id, first = cb->args[2];
171 u32 last = map->last_ip - map->first_ip;
172
173 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
174 if (!atd)
175 return -EMSGSIZE;
176 for (; cb->args[2] <= last; cb->args[2]++) {
177 id = cb->args[2];
178 elem = bitmap_ipmac_elem(map, id);
179 if (elem->match == MAC_EMPTY)
180 continue;
181 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
182 if (!nested) {
183 if (id == first) {
184 nla_nest_cancel(skb, atd);
185 return -EMSGSIZE;
186 } else
187 goto nla_put_failure;
188 }
189 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
190 htonl(map->first_ip + id));
191 if (elem->match == MAC_FILLED)
192 NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
193 elem->ether);
194 ipset_nest_end(skb, nested);
195 }
196 ipset_nest_end(skb, atd);
197 /* Set listing finished */
198 cb->args[2] = 0;
199
200 return 0;
201
202nla_put_failure:
203 nla_nest_cancel(skb, nested);
204 ipset_nest_end(skb, atd);
205 if (unlikely(id == first)) {
206 cb->args[2] = 0;
207 return -EMSGSIZE;
208 }
209 return 0;
210}
211
212/* Timeout variant */
213
214static int
215bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout)
216{
217 const struct bitmap_ipmac *map = set->data;
218 const struct ipmac *data = value;
219 const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
220
221 switch (elem->match) {
222 case MAC_UNSET:
223 /* Trigger kernel to fill out the ethernet address */
224 return -EAGAIN;
225 case MAC_FILLED:
226 return (data->ether == NULL ||
227 compare_ether_addr(data->ether, elem->ether) == 0) &&
228 !bitmap_expired(map, data->id);
229 }
230 return 0;
231}
232
233static int
234bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout)
235{
236 struct bitmap_ipmac *map = set->data;
237 const struct ipmac *data = value;
238 struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
239
240 switch (elem->match) {
241 case MAC_UNSET:
242 if (!data->ether)
243 /* Already added without ethernet address */
244 return -IPSET_ERR_EXIST;
245 /* Fill the MAC address and activate the timer */
246 memcpy(elem->ether, data->ether, ETH_ALEN);
247 elem->match = MAC_FILLED;
248 if (timeout == map->timeout)
249 /* Timeout was not specified, get stored one */
250 timeout = elem->timeout;
251 elem->timeout = ip_set_timeout_set(timeout);
252 break;
253 case MAC_FILLED:
254 if (!bitmap_expired(map, data->id))
255 return -IPSET_ERR_EXIST;
256 /* Fall through */
257 case MAC_EMPTY:
258 if (data->ether) {
259 memcpy(elem->ether, data->ether, ETH_ALEN);
260 elem->match = MAC_FILLED;
261 } else
262 elem->match = MAC_UNSET;
263 /* If MAC is unset yet, we store plain timeout value
264 * because the timer is not activated yet
265 * and we can reuse it later when MAC is filled out,
266 * possibly by the kernel */
267 elem->timeout = data->ether ? ip_set_timeout_set(timeout)
268 : timeout;
269 break;
270 }
271
272 return 0;
273}
274
275static int
276bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout)
277{
278 struct bitmap_ipmac *map = set->data;
279 const struct ipmac *data = value;
280 struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
281
282 if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id))
283 return -IPSET_ERR_EXIST;
284
285 elem->match = MAC_EMPTY;
286
287 return 0;
288}
289
290static int
291bitmap_ipmac_tlist(const struct ip_set *set,
292 struct sk_buff *skb, struct netlink_callback *cb)
293{
294 const struct bitmap_ipmac *map = set->data;
295 const struct ipmac_telem *elem;
296 struct nlattr *atd, *nested;
297 u32 id, first = cb->args[2];
298 u32 timeout, last = map->last_ip - map->first_ip;
299
300 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
301 if (!atd)
302 return -EMSGSIZE;
303 for (; cb->args[2] <= last; cb->args[2]++) {
304 id = cb->args[2];
305 elem = bitmap_ipmac_elem(map, id);
306 if (!bitmap_ipmac_exist(elem))
307 continue;
308 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
309 if (!nested) {
310 if (id == first) {
311 nla_nest_cancel(skb, atd);
312 return -EMSGSIZE;
313 } else
314 goto nla_put_failure;
315 }
316 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
317 htonl(map->first_ip + id));
318 if (elem->match == MAC_FILLED)
319 NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
320 elem->ether);
321 timeout = elem->match == MAC_UNSET ? elem->timeout
322 : ip_set_timeout_get(elem->timeout);
323 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout));
324 ipset_nest_end(skb, nested);
325 }
326 ipset_nest_end(skb, atd);
327 /* Set listing finished */
328 cb->args[2] = 0;
329
330 return 0;
331
332nla_put_failure:
333 nla_nest_cancel(skb, nested);
334 ipset_nest_end(skb, atd);
335 return -EMSGSIZE;
336}
337
338static int
339bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
340 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
341{
342 struct bitmap_ipmac *map = set->data;
343 ipset_adtfn adtfn = set->variant->adt[adt];
344 struct ipmac data;
345
346 data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
347 if (data.id < map->first_ip || data.id > map->last_ip)
348 return -IPSET_ERR_BITMAP_RANGE;
349
350 /* Backward compatibility: we don't check the second flag */
351 if (skb_mac_header(skb) < skb->head ||
352 (skb_mac_header(skb) + ETH_HLEN) > skb->data)
353 return -EINVAL;
354
355 data.id -= map->first_ip;
356 data.ether = eth_hdr(skb)->h_source;
357
358 return adtfn(set, &data, map->timeout);
359}
360
361static int
362bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
363 enum ipset_adt adt, u32 *lineno, u32 flags)
364{
365 const struct bitmap_ipmac *map = set->data;
366 ipset_adtfn adtfn = set->variant->adt[adt];
367 struct ipmac data;
368 u32 timeout = map->timeout;
369 int ret = 0;
370
371 if (unlikely(!tb[IPSET_ATTR_IP] ||
372 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
373 return -IPSET_ERR_PROTOCOL;
374
375 if (tb[IPSET_ATTR_LINENO])
376 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
377
378 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id);
379 if (ret)
380 return ret;
381
382 if (data.id < map->first_ip || data.id > map->last_ip)
383 return -IPSET_ERR_BITMAP_RANGE;
384
385 if (tb[IPSET_ATTR_ETHER])
386 data.ether = nla_data(tb[IPSET_ATTR_ETHER]);
387 else
388 data.ether = NULL;
389
390 if (tb[IPSET_ATTR_TIMEOUT]) {
391 if (!with_timeout(map->timeout))
392 return -IPSET_ERR_TIMEOUT;
393 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
394 }
395
396 data.id -= map->first_ip;
397
398 ret = adtfn(set, &data, timeout);
399
400 return ip_set_eexist(ret, flags) ? 0 : ret;
401}
402
403static void
404bitmap_ipmac_destroy(struct ip_set *set)
405{
406 struct bitmap_ipmac *map = set->data;
407
408 if (with_timeout(map->timeout))
409 del_timer_sync(&map->gc);
410
411 ip_set_free(map->members);
412 kfree(map);
413
414 set->data = NULL;
415}
416
417static void
418bitmap_ipmac_flush(struct ip_set *set)
419{
420 struct bitmap_ipmac *map = set->data;
421
422 memset(map->members, 0,
423 (map->last_ip - map->first_ip + 1) * map->dsize);
424}
425
426static int
427bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
428{
429 const struct bitmap_ipmac *map = set->data;
430 struct nlattr *nested;
431
432 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
433 if (!nested)
434 goto nla_put_failure;
435 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
436 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
437 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
438 htonl(atomic_read(&set->ref) - 1));
439 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
440 htonl(sizeof(*map)
441 + (map->last_ip - map->first_ip + 1) * map->dsize));
442 if (with_timeout(map->timeout))
443 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
444 ipset_nest_end(skb, nested);
445
446 return 0;
447nla_put_failure:
448 return -EMSGSIZE;
449}
450
451static bool
452bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
453{
454 const struct bitmap_ipmac *x = a->data;
455 const struct bitmap_ipmac *y = b->data;
456
457 return x->first_ip == y->first_ip &&
458 x->last_ip == y->last_ip &&
459 x->timeout == y->timeout;
460}
461
462static const struct ip_set_type_variant bitmap_ipmac = {
463 .kadt = bitmap_ipmac_kadt,
464 .uadt = bitmap_ipmac_uadt,
465 .adt = {
466 [IPSET_ADD] = bitmap_ipmac_add,
467 [IPSET_DEL] = bitmap_ipmac_del,
468 [IPSET_TEST] = bitmap_ipmac_test,
469 },
470 .destroy = bitmap_ipmac_destroy,
471 .flush = bitmap_ipmac_flush,
472 .head = bitmap_ipmac_head,
473 .list = bitmap_ipmac_list,
474 .same_set = bitmap_ipmac_same_set,
475};
476
477static const struct ip_set_type_variant bitmap_tipmac = {
478 .kadt = bitmap_ipmac_kadt,
479 .uadt = bitmap_ipmac_uadt,
480 .adt = {
481 [IPSET_ADD] = bitmap_ipmac_tadd,
482 [IPSET_DEL] = bitmap_ipmac_tdel,
483 [IPSET_TEST] = bitmap_ipmac_ttest,
484 },
485 .destroy = bitmap_ipmac_destroy,
486 .flush = bitmap_ipmac_flush,
487 .head = bitmap_ipmac_head,
488 .list = bitmap_ipmac_tlist,
489 .same_set = bitmap_ipmac_same_set,
490};
491
492static void
493bitmap_ipmac_gc(unsigned long ul_set)
494{
495 struct ip_set *set = (struct ip_set *) ul_set;
496 struct bitmap_ipmac *map = set->data;
497 struct ipmac_telem *elem;
498 u32 id, last = map->last_ip - map->first_ip;
499
500 /* We run parallel with other readers (test element)
501 * but adding/deleting new entries is locked out */
502 read_lock_bh(&set->lock);
503 for (id = 0; id <= last; id++) {
504 elem = bitmap_ipmac_elem(map, id);
505 if (elem->match == MAC_FILLED &&
506 ip_set_timeout_expired(elem->timeout))
507 elem->match = MAC_EMPTY;
508 }
509 read_unlock_bh(&set->lock);
510
511 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
512 add_timer(&map->gc);
513}
514
515static void
516bitmap_ipmac_gc_init(struct ip_set *set)
517{
518 struct bitmap_ipmac *map = set->data;
519
520 init_timer(&map->gc);
521 map->gc.data = (unsigned long) set;
522 map->gc.function = bitmap_ipmac_gc;
523 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
524 add_timer(&map->gc);
525}
526
527/* Create bitmap:ip,mac type of sets */
528
529static bool
530init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
531 u32 first_ip, u32 last_ip)
532{
533 map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
534 if (!map->members)
535 return false;
536 map->first_ip = first_ip;
537 map->last_ip = last_ip;
538 map->timeout = IPSET_NO_TIMEOUT;
539
540 set->data = map;
541 set->family = AF_INET;
542
543 return true;
544}
545
546static int
547bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
548 u32 flags)
549{
550 u32 first_ip, last_ip, elements;
551 struct bitmap_ipmac *map;
552 int ret;
553
554 if (unlikely(!tb[IPSET_ATTR_IP] ||
555 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
556 return -IPSET_ERR_PROTOCOL;
557
558 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
559 if (ret)
560 return ret;
561
562 if (tb[IPSET_ATTR_IP_TO]) {
563 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
564 if (ret)
565 return ret;
566 if (first_ip > last_ip) {
567 u32 tmp = first_ip;
568
569 first_ip = last_ip;
570 last_ip = tmp;
571 }
572 } else if (tb[IPSET_ATTR_CIDR]) {
573 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
574
575 if (cidr >= 32)
576 return -IPSET_ERR_INVALID_CIDR;
577 last_ip = first_ip | ~ip_set_hostmask(cidr);
578 } else
579 return -IPSET_ERR_PROTOCOL;
580
581 elements = last_ip - first_ip + 1;
582
583 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
584 return -IPSET_ERR_BITMAP_RANGE_SIZE;
585
586 map = kzalloc(sizeof(*map), GFP_KERNEL);
587 if (!map)
588 return -ENOMEM;
589
590 if (tb[IPSET_ATTR_TIMEOUT]) {
591 map->dsize = sizeof(struct ipmac_telem);
592
593 if (!init_map_ipmac(set, map, first_ip, last_ip)) {
594 kfree(map);
595 return -ENOMEM;
596 }
597
598 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
599
600 set->variant = &bitmap_tipmac;
601
602 bitmap_ipmac_gc_init(set);
603 } else {
604 map->dsize = sizeof(struct ipmac_elem);
605
606 if (!init_map_ipmac(set, map, first_ip, last_ip)) {
607 kfree(map);
608 return -ENOMEM;
609 }
610 set->variant = &bitmap_ipmac;
611
612 }
613 return 0;
614}
615
616static struct ip_set_type bitmap_ipmac_type = {
617 .name = "bitmap:ip,mac",
618 .protocol = IPSET_PROTOCOL,
619 .features = IPSET_TYPE_IP | IPSET_TYPE_MAC,
620 .dimension = IPSET_DIM_TWO,
621 .family = AF_INET,
622 .revision = 0,
623 .create = bitmap_ipmac_create,
624 .create_policy = {
625 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
626 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
627 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
628 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
629 },
630 .adt_policy = {
631 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
632 [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN },
633 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
634 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
635 },
636 .me = THIS_MODULE,
637};
638
639static int __init
640bitmap_ipmac_init(void)
641{
642 return ip_set_type_register(&bitmap_ipmac_type);
643}
644
645static void __exit
646bitmap_ipmac_fini(void)
647{
648 ip_set_type_unregister(&bitmap_ipmac_type);
649}
650
651module_init(bitmap_ipmac_init);
652module_exit(bitmap_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
new file mode 100644
index 00000000000..165f09b1a9c
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -0,0 +1,515 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the bitmap:port type */
9
10#include <linux/module.h>
11#include <linux/ip.h>
12#include <linux/skbuff.h>
13#include <linux/errno.h>
14#include <linux/netlink.h>
15#include <linux/jiffies.h>
16#include <linux/timer.h>
17#include <net/netlink.h>
18
19#include <linux/netfilter/ipset/ip_set.h>
20#include <linux/netfilter/ipset/ip_set_bitmap.h>
21#include <linux/netfilter/ipset/ip_set_getport.h>
22#define IP_SET_BITMAP_TIMEOUT
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24
25MODULE_LICENSE("GPL");
26MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
27MODULE_DESCRIPTION("bitmap:port type of IP sets");
28MODULE_ALIAS("ip_set_bitmap:port");
29
30/* Type structure */
31struct bitmap_port {
32 void *members; /* the set members */
33 u16 first_port; /* host byte order, included in range */
34 u16 last_port; /* host byte order, included in range */
35 size_t memsize; /* members size */
36 u32 timeout; /* timeout parameter */
37 struct timer_list gc; /* garbage collection */
38};
39
40/* Base variant */
41
42static int
43bitmap_port_test(struct ip_set *set, void *value, u32 timeout)
44{
45 const struct bitmap_port *map = set->data;
46 u16 id = *(u16 *)value;
47
48 return !!test_bit(id, map->members);
49}
50
51static int
52bitmap_port_add(struct ip_set *set, void *value, u32 timeout)
53{
54 struct bitmap_port *map = set->data;
55 u16 id = *(u16 *)value;
56
57 if (test_and_set_bit(id, map->members))
58 return -IPSET_ERR_EXIST;
59
60 return 0;
61}
62
63static int
64bitmap_port_del(struct ip_set *set, void *value, u32 timeout)
65{
66 struct bitmap_port *map = set->data;
67 u16 id = *(u16 *)value;
68
69 if (!test_and_clear_bit(id, map->members))
70 return -IPSET_ERR_EXIST;
71
72 return 0;
73}
74
75static int
76bitmap_port_list(const struct ip_set *set,
77 struct sk_buff *skb, struct netlink_callback *cb)
78{
79 const struct bitmap_port *map = set->data;
80 struct nlattr *atd, *nested;
81 u16 id, first = cb->args[2];
82 u16 last = map->last_port - map->first_port;
83
84 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
85 if (!atd)
86 return -EMSGSIZE;
87 for (; cb->args[2] <= last; cb->args[2]++) {
88 id = cb->args[2];
89 if (!test_bit(id, map->members))
90 continue;
91 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
92 if (!nested) {
93 if (id == first) {
94 nla_nest_cancel(skb, atd);
95 return -EMSGSIZE;
96 } else
97 goto nla_put_failure;
98 }
99 NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
100 htons(map->first_port + id));
101 ipset_nest_end(skb, nested);
102 }
103 ipset_nest_end(skb, atd);
104 /* Set listing finished */
105 cb->args[2] = 0;
106
107 return 0;
108
109nla_put_failure:
110 nla_nest_cancel(skb, nested);
111 ipset_nest_end(skb, atd);
112 if (unlikely(id == first)) {
113 cb->args[2] = 0;
114 return -EMSGSIZE;
115 }
116 return 0;
117}
118
119/* Timeout variant */
120
121static int
122bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout)
123{
124 const struct bitmap_port *map = set->data;
125 const unsigned long *members = map->members;
126 u16 id = *(u16 *)value;
127
128 return ip_set_timeout_test(members[id]);
129}
130
131static int
132bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout)
133{
134 struct bitmap_port *map = set->data;
135 unsigned long *members = map->members;
136 u16 id = *(u16 *)value;
137
138 if (ip_set_timeout_test(members[id]))
139 return -IPSET_ERR_EXIST;
140
141 members[id] = ip_set_timeout_set(timeout);
142
143 return 0;
144}
145
146static int
147bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout)
148{
149 struct bitmap_port *map = set->data;
150 unsigned long *members = map->members;
151 u16 id = *(u16 *)value;
152 int ret = -IPSET_ERR_EXIST;
153
154 if (ip_set_timeout_test(members[id]))
155 ret = 0;
156
157 members[id] = IPSET_ELEM_UNSET;
158 return ret;
159}
160
161static int
162bitmap_port_tlist(const struct ip_set *set,
163 struct sk_buff *skb, struct netlink_callback *cb)
164{
165 const struct bitmap_port *map = set->data;
166 struct nlattr *adt, *nested;
167 u16 id, first = cb->args[2];
168 u16 last = map->last_port - map->first_port;
169 const unsigned long *members = map->members;
170
171 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
172 if (!adt)
173 return -EMSGSIZE;
174 for (; cb->args[2] <= last; cb->args[2]++) {
175 id = cb->args[2];
176 if (!ip_set_timeout_test(members[id]))
177 continue;
178 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
179 if (!nested) {
180 if (id == first) {
181 nla_nest_cancel(skb, adt);
182 return -EMSGSIZE;
183 } else
184 goto nla_put_failure;
185 }
186 NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
187 htons(map->first_port + id));
188 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
189 htonl(ip_set_timeout_get(members[id])));
190 ipset_nest_end(skb, nested);
191 }
192 ipset_nest_end(skb, adt);
193
194 /* Set listing finished */
195 cb->args[2] = 0;
196
197 return 0;
198
199nla_put_failure:
200 nla_nest_cancel(skb, nested);
201 ipset_nest_end(skb, adt);
202 if (unlikely(id == first)) {
203 cb->args[2] = 0;
204 return -EMSGSIZE;
205 }
206 return 0;
207}
208
209static int
210bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
211 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
212{
213 struct bitmap_port *map = set->data;
214 ipset_adtfn adtfn = set->variant->adt[adt];
215 __be16 __port;
216 u16 port = 0;
217
218 if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port))
219 return -EINVAL;
220
221 port = ntohs(__port);
222
223 if (port < map->first_port || port > map->last_port)
224 return -IPSET_ERR_BITMAP_RANGE;
225
226 port -= map->first_port;
227
228 return adtfn(set, &port, map->timeout);
229}
230
231static int
232bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
233 enum ipset_adt adt, u32 *lineno, u32 flags)
234{
235 struct bitmap_port *map = set->data;
236 ipset_adtfn adtfn = set->variant->adt[adt];
237 u32 timeout = map->timeout;
238 u32 port; /* wraparound */
239 u16 id, port_to;
240 int ret = 0;
241
242 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
243 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
244 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
245 return -IPSET_ERR_PROTOCOL;
246
247 if (tb[IPSET_ATTR_LINENO])
248 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
249
250 port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
251 if (port < map->first_port || port > map->last_port)
252 return -IPSET_ERR_BITMAP_RANGE;
253
254 if (tb[IPSET_ATTR_TIMEOUT]) {
255 if (!with_timeout(map->timeout))
256 return -IPSET_ERR_TIMEOUT;
257 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
258 }
259
260 if (adt == IPSET_TEST) {
261 id = port - map->first_port;
262 return adtfn(set, &id, timeout);
263 }
264
265 if (tb[IPSET_ATTR_PORT_TO]) {
266 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
267 if (port > port_to) {
268 swap(port, port_to);
269 if (port < map->first_port)
270 return -IPSET_ERR_BITMAP_RANGE;
271 }
272 } else
273 port_to = port;
274
275 if (port_to > map->last_port)
276 return -IPSET_ERR_BITMAP_RANGE;
277
278 for (; port <= port_to; port++) {
279 id = port - map->first_port;
280 ret = adtfn(set, &id, timeout);
281
282 if (ret && !ip_set_eexist(ret, flags))
283 return ret;
284 else
285 ret = 0;
286 }
287 return ret;
288}
289
290static void
291bitmap_port_destroy(struct ip_set *set)
292{
293 struct bitmap_port *map = set->data;
294
295 if (with_timeout(map->timeout))
296 del_timer_sync(&map->gc);
297
298 ip_set_free(map->members);
299 kfree(map);
300
301 set->data = NULL;
302}
303
304static void
305bitmap_port_flush(struct ip_set *set)
306{
307 struct bitmap_port *map = set->data;
308
309 memset(map->members, 0, map->memsize);
310}
311
312static int
313bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
314{
315 const struct bitmap_port *map = set->data;
316 struct nlattr *nested;
317
318 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
319 if (!nested)
320 goto nla_put_failure;
321 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
322 NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
323 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
324 htonl(atomic_read(&set->ref) - 1));
325 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
326 htonl(sizeof(*map) + map->memsize));
327 if (with_timeout(map->timeout))
328 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
329 ipset_nest_end(skb, nested);
330
331 return 0;
332nla_put_failure:
333 return -EMSGSIZE;
334}
335
336static bool
337bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
338{
339 const struct bitmap_port *x = a->data;
340 const struct bitmap_port *y = b->data;
341
342 return x->first_port == y->first_port &&
343 x->last_port == y->last_port &&
344 x->timeout == y->timeout;
345}
346
347static const struct ip_set_type_variant bitmap_port = {
348 .kadt = bitmap_port_kadt,
349 .uadt = bitmap_port_uadt,
350 .adt = {
351 [IPSET_ADD] = bitmap_port_add,
352 [IPSET_DEL] = bitmap_port_del,
353 [IPSET_TEST] = bitmap_port_test,
354 },
355 .destroy = bitmap_port_destroy,
356 .flush = bitmap_port_flush,
357 .head = bitmap_port_head,
358 .list = bitmap_port_list,
359 .same_set = bitmap_port_same_set,
360};
361
362static const struct ip_set_type_variant bitmap_tport = {
363 .kadt = bitmap_port_kadt,
364 .uadt = bitmap_port_uadt,
365 .adt = {
366 [IPSET_ADD] = bitmap_port_tadd,
367 [IPSET_DEL] = bitmap_port_tdel,
368 [IPSET_TEST] = bitmap_port_ttest,
369 },
370 .destroy = bitmap_port_destroy,
371 .flush = bitmap_port_flush,
372 .head = bitmap_port_head,
373 .list = bitmap_port_tlist,
374 .same_set = bitmap_port_same_set,
375};
376
377static void
378bitmap_port_gc(unsigned long ul_set)
379{
380 struct ip_set *set = (struct ip_set *) ul_set;
381 struct bitmap_port *map = set->data;
382 unsigned long *table = map->members;
383 u32 id; /* wraparound */
384 u16 last = map->last_port - map->first_port;
385
386 /* We run parallel with other readers (test element)
387 * but adding/deleting new entries is locked out */
388 read_lock_bh(&set->lock);
389 for (id = 0; id <= last; id++)
390 if (ip_set_timeout_expired(table[id]))
391 table[id] = IPSET_ELEM_UNSET;
392 read_unlock_bh(&set->lock);
393
394 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
395 add_timer(&map->gc);
396}
397
398static void
399bitmap_port_gc_init(struct ip_set *set)
400{
401 struct bitmap_port *map = set->data;
402
403 init_timer(&map->gc);
404 map->gc.data = (unsigned long) set;
405 map->gc.function = bitmap_port_gc;
406 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
407 add_timer(&map->gc);
408}
409
410/* Create bitmap:ip type of sets */
411
412static bool
413init_map_port(struct ip_set *set, struct bitmap_port *map,
414 u16 first_port, u16 last_port)
415{
416 map->members = ip_set_alloc(map->memsize);
417 if (!map->members)
418 return false;
419 map->first_port = first_port;
420 map->last_port = last_port;
421 map->timeout = IPSET_NO_TIMEOUT;
422
423 set->data = map;
424 set->family = AF_UNSPEC;
425
426 return true;
427}
428
429static int
430bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
431 u32 flags)
432{
433 struct bitmap_port *map;
434 u16 first_port, last_port;
435
436 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
437 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
438 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
439 return -IPSET_ERR_PROTOCOL;
440
441 first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
442 last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
443 if (first_port > last_port) {
444 u16 tmp = first_port;
445
446 first_port = last_port;
447 last_port = tmp;
448 }
449
450 map = kzalloc(sizeof(*map), GFP_KERNEL);
451 if (!map)
452 return -ENOMEM;
453
454 if (tb[IPSET_ATTR_TIMEOUT]) {
455 map->memsize = (last_port - first_port + 1)
456 * sizeof(unsigned long);
457
458 if (!init_map_port(set, map, first_port, last_port)) {
459 kfree(map);
460 return -ENOMEM;
461 }
462
463 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
464 set->variant = &bitmap_tport;
465
466 bitmap_port_gc_init(set);
467 } else {
468 map->memsize = bitmap_bytes(0, last_port - first_port);
469 pr_debug("memsize: %zu\n", map->memsize);
470 if (!init_map_port(set, map, first_port, last_port)) {
471 kfree(map);
472 return -ENOMEM;
473 }
474
475 set->variant = &bitmap_port;
476 }
477 return 0;
478}
479
480static struct ip_set_type bitmap_port_type = {
481 .name = "bitmap:port",
482 .protocol = IPSET_PROTOCOL,
483 .features = IPSET_TYPE_PORT,
484 .dimension = IPSET_DIM_ONE,
485 .family = AF_UNSPEC,
486 .revision = 0,
487 .create = bitmap_port_create,
488 .create_policy = {
489 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
490 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
491 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
492 },
493 .adt_policy = {
494 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
495 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
496 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
497 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
498 },
499 .me = THIS_MODULE,
500};
501
502static int __init
503bitmap_port_init(void)
504{
505 return ip_set_type_register(&bitmap_port_type);
506}
507
508static void __exit
509bitmap_port_fini(void)
510{
511 ip_set_type_unregister(&bitmap_port_type);
512}
513
514module_init(bitmap_port_init);
515module_exit(bitmap_port_fini);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
new file mode 100644
index 00000000000..d6b48230a54
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -0,0 +1,1683 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
20#include <linux/version.h>
21#include <net/netlink.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list); /* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
29
30static struct ip_set **ip_set_list; /* all individual sets */
31static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
32
33#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
34
35static unsigned int max_sets;
36
37module_param(max_sets, int, 0600);
38MODULE_PARM_DESC(max_sets, "maximal number of sets");
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
41MODULE_DESCRIPTION("core IP set support");
42MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
43
44/*
45 * The set types are implemented in modules and registered set types
46 * can be found in ip_set_type_list. Adding/deleting types is
47 * serialized by ip_set_type_mutex.
48 */
49
50static inline void
51ip_set_type_lock(void)
52{
53 mutex_lock(&ip_set_type_mutex);
54}
55
56static inline void
57ip_set_type_unlock(void)
58{
59 mutex_unlock(&ip_set_type_mutex);
60}
61
62/* Register and deregister settype */
63
64static struct ip_set_type *
65find_set_type(const char *name, u8 family, u8 revision)
66{
67 struct ip_set_type *type;
68
69 list_for_each_entry_rcu(type, &ip_set_type_list, list)
70 if (STREQ(type->name, name) &&
71 (type->family == family || type->family == AF_UNSPEC) &&
72 type->revision == revision)
73 return type;
74 return NULL;
75}
76
77/* Unlock, try to load a set type module and lock again */
78static int
79try_to_load_type(const char *name)
80{
81 nfnl_unlock();
82 pr_debug("try to load ip_set_%s\n", name);
83 if (request_module("ip_set_%s", name) < 0) {
84 pr_warning("Can't find ip_set type %s\n", name);
85 nfnl_lock();
86 return -IPSET_ERR_FIND_TYPE;
87 }
88 nfnl_lock();
89 return -EAGAIN;
90}
91
92/* Find a set type and reference it */
93static int
94find_set_type_get(const char *name, u8 family, u8 revision,
95 struct ip_set_type **found)
96{
97 struct ip_set_type *type;
98 int err;
99
100 rcu_read_lock();
101 *found = find_set_type(name, family, revision);
102 if (*found) {
103 err = !try_module_get((*found)->me) ? -EFAULT : 0;
104 goto unlock;
105 }
106 /* Make sure the type is loaded but we don't support the revision */
107 list_for_each_entry_rcu(type, &ip_set_type_list, list)
108 if (STREQ(type->name, name)) {
109 err = -IPSET_ERR_FIND_TYPE;
110 goto unlock;
111 }
112 rcu_read_unlock();
113
114 return try_to_load_type(name);
115
116unlock:
117 rcu_read_unlock();
118 return err;
119}
120
121/* Find a given set type by name and family.
122 * If we succeeded, the supported minimal and maximum revisions are
123 * filled out.
124 */
125static int
126find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
127{
128 struct ip_set_type *type;
129 bool found = false;
130
131 *min = 255; *max = 0;
132 rcu_read_lock();
133 list_for_each_entry_rcu(type, &ip_set_type_list, list)
134 if (STREQ(type->name, name) &&
135 (type->family == family || type->family == AF_UNSPEC)) {
136 found = true;
137 if (type->revision < *min)
138 *min = type->revision;
139 if (type->revision > *max)
140 *max = type->revision;
141 }
142 rcu_read_unlock();
143 if (found)
144 return 0;
145
146 return try_to_load_type(name);
147}
148
149#define family_name(f) ((f) == AF_INET ? "inet" : \
150 (f) == AF_INET6 ? "inet6" : "any")
151
152/* Register a set type structure. The type is identified by
153 * the unique triple of name, family and revision.
154 */
155int
156ip_set_type_register(struct ip_set_type *type)
157{
158 int ret = 0;
159
160 if (type->protocol != IPSET_PROTOCOL) {
161 pr_warning("ip_set type %s, family %s, revision %u uses "
162 "wrong protocol version %u (want %u)\n",
163 type->name, family_name(type->family),
164 type->revision, type->protocol, IPSET_PROTOCOL);
165 return -EINVAL;
166 }
167
168 ip_set_type_lock();
169 if (find_set_type(type->name, type->family, type->revision)) {
170 /* Duplicate! */
171 pr_warning("ip_set type %s, family %s, revision %u "
172 "already registered!\n", type->name,
173 family_name(type->family), type->revision);
174 ret = -EINVAL;
175 goto unlock;
176 }
177 list_add_rcu(&type->list, &ip_set_type_list);
178 pr_debug("type %s, family %s, revision %u registered.\n",
179 type->name, family_name(type->family), type->revision);
180unlock:
181 ip_set_type_unlock();
182 return ret;
183}
184EXPORT_SYMBOL_GPL(ip_set_type_register);
185
186/* Unregister a set type. There's a small race with ip_set_create */
187void
188ip_set_type_unregister(struct ip_set_type *type)
189{
190 ip_set_type_lock();
191 if (!find_set_type(type->name, type->family, type->revision)) {
192 pr_warning("ip_set type %s, family %s, revision %u "
193 "not registered\n", type->name,
194 family_name(type->family), type->revision);
195 goto unlock;
196 }
197 list_del_rcu(&type->list);
198 pr_debug("type %s, family %s, revision %u unregistered.\n",
199 type->name, family_name(type->family), type->revision);
200unlock:
201 ip_set_type_unlock();
202
203 synchronize_rcu();
204}
205EXPORT_SYMBOL_GPL(ip_set_type_unregister);
206
207/* Utility functions */
208void *
209ip_set_alloc(size_t size)
210{
211 void *members = NULL;
212
213 if (size < KMALLOC_MAX_SIZE)
214 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
215
216 if (members) {
217 pr_debug("%p: allocated with kmalloc\n", members);
218 return members;
219 }
220
221 members = vzalloc(size);
222 if (!members)
223 return NULL;
224 pr_debug("%p: allocated with vmalloc\n", members);
225
226 return members;
227}
228EXPORT_SYMBOL_GPL(ip_set_alloc);
229
230void
231ip_set_free(void *members)
232{
233 pr_debug("%p: free with %s\n", members,
234 is_vmalloc_addr(members) ? "vfree" : "kfree");
235 if (is_vmalloc_addr(members))
236 vfree(members);
237 else
238 kfree(members);
239}
240EXPORT_SYMBOL_GPL(ip_set_free);
241
242static inline bool
243flag_nested(const struct nlattr *nla)
244{
245 return nla->nla_type & NLA_F_NESTED;
246}
247
248static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
249 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
250 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
251 .len = sizeof(struct in6_addr) },
252};
253
254int
255ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
256{
257 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
258
259 if (unlikely(!flag_nested(nla)))
260 return -IPSET_ERR_PROTOCOL;
261 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
262 return -IPSET_ERR_PROTOCOL;
263 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
264 return -IPSET_ERR_PROTOCOL;
265
266 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
267 return 0;
268}
269EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
270
271int
272ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
273{
274 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
275
276 if (unlikely(!flag_nested(nla)))
277 return -IPSET_ERR_PROTOCOL;
278
279 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
280 return -IPSET_ERR_PROTOCOL;
281 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
282 return -IPSET_ERR_PROTOCOL;
283
284 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
285 sizeof(struct in6_addr));
286 return 0;
287}
288EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
289
290/*
291 * Creating/destroying/renaming/swapping affect the existence and
292 * the properties of a set. All of these can be executed from userspace
293 * only and serialized by the nfnl mutex indirectly from nfnetlink.
294 *
295 * Sets are identified by their index in ip_set_list and the index
296 * is used by the external references (set/SET netfilter modules).
297 *
298 * The set behind an index may change by swapping only, from userspace.
299 */
300
301static inline void
302__ip_set_get(ip_set_id_t index)
303{
304 atomic_inc(&ip_set_list[index]->ref);
305}
306
307static inline void
308__ip_set_put(ip_set_id_t index)
309{
310 atomic_dec(&ip_set_list[index]->ref);
311}
312
313/*
314 * Add, del and test set entries from kernel.
315 *
316 * The set behind the index must exist and must be referenced
317 * so it can't be destroyed (or changed) under our foot.
318 */
319
320int
321ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
322 u8 family, u8 dim, u8 flags)
323{
324 struct ip_set *set = ip_set_list[index];
325 int ret = 0;
326
327 BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
328 pr_debug("set %s, index %u\n", set->name, index);
329
330 if (dim < set->type->dimension ||
331 !(family == set->family || set->family == AF_UNSPEC))
332 return 0;
333
334 read_lock_bh(&set->lock);
335 ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags);
336 read_unlock_bh(&set->lock);
337
338 if (ret == -EAGAIN) {
339 /* Type requests element to be completed */
340 pr_debug("element must be competed, ADD is triggered\n");
341 write_lock_bh(&set->lock);
342 set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
343 write_unlock_bh(&set->lock);
344 ret = 1;
345 }
346
347 /* Convert error codes to nomatch */
348 return (ret < 0 ? 0 : ret);
349}
350EXPORT_SYMBOL_GPL(ip_set_test);
351
352int
353ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
354 u8 family, u8 dim, u8 flags)
355{
356 struct ip_set *set = ip_set_list[index];
357 int ret;
358
359 BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
360 pr_debug("set %s, index %u\n", set->name, index);
361
362 if (dim < set->type->dimension ||
363 !(family == set->family || set->family == AF_UNSPEC))
364 return 0;
365
366 write_lock_bh(&set->lock);
367 ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
368 write_unlock_bh(&set->lock);
369
370 return ret;
371}
372EXPORT_SYMBOL_GPL(ip_set_add);
373
374int
375ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
376 u8 family, u8 dim, u8 flags)
377{
378 struct ip_set *set = ip_set_list[index];
379 int ret = 0;
380
381 BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
382 pr_debug("set %s, index %u\n", set->name, index);
383
384 if (dim < set->type->dimension ||
385 !(family == set->family || set->family == AF_UNSPEC))
386 return 0;
387
388 write_lock_bh(&set->lock);
389 ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags);
390 write_unlock_bh(&set->lock);
391
392 return ret;
393}
394EXPORT_SYMBOL_GPL(ip_set_del);
395
396/*
397 * Find set by name, reference it once. The reference makes sure the
398 * thing pointed to, does not go away under our feet.
399 *
400 * The nfnl mutex must already be activated.
401 */
402ip_set_id_t
403ip_set_get_byname(const char *name, struct ip_set **set)
404{
405 ip_set_id_t i, index = IPSET_INVALID_ID;
406 struct ip_set *s;
407
408 for (i = 0; i < ip_set_max; i++) {
409 s = ip_set_list[i];
410 if (s != NULL && STREQ(s->name, name)) {
411 __ip_set_get(i);
412 index = i;
413 *set = s;
414 }
415 }
416
417 return index;
418}
419EXPORT_SYMBOL_GPL(ip_set_get_byname);
420
421/*
422 * If the given set pointer points to a valid set, decrement
423 * reference count by 1. The caller shall not assume the index
424 * to be valid, after calling this function.
425 *
426 * The nfnl mutex must already be activated.
427 */
428void
429ip_set_put_byindex(ip_set_id_t index)
430{
431 if (ip_set_list[index] != NULL) {
432 BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
433 __ip_set_put(index);
434 }
435}
436EXPORT_SYMBOL_GPL(ip_set_put_byindex);
437
438/*
439 * Get the name of a set behind a set index.
440 * We assume the set is referenced, so it does exist and
441 * can't be destroyed. The set cannot be renamed due to
442 * the referencing either.
443 *
444 * The nfnl mutex must already be activated.
445 */
446const char *
447ip_set_name_byindex(ip_set_id_t index)
448{
449 const struct ip_set *set = ip_set_list[index];
450
451 BUG_ON(set == NULL);
452 BUG_ON(atomic_read(&set->ref) == 0);
453
454 /* Referenced, so it's safe */
455 return set->name;
456}
457EXPORT_SYMBOL_GPL(ip_set_name_byindex);
458
459/*
460 * Routines to call by external subsystems, which do not
461 * call nfnl_lock for us.
462 */
463
464/*
465 * Find set by name, reference it once. The reference makes sure the
466 * thing pointed to, does not go away under our feet.
467 *
468 * The nfnl mutex is used in the function.
469 */
470ip_set_id_t
471ip_set_nfnl_get(const char *name)
472{
473 struct ip_set *s;
474 ip_set_id_t index;
475
476 nfnl_lock();
477 index = ip_set_get_byname(name, &s);
478 nfnl_unlock();
479
480 return index;
481}
482EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
483
484/*
485 * Find set by index, reference it once. The reference makes sure the
486 * thing pointed to, does not go away under our feet.
487 *
488 * The nfnl mutex is used in the function.
489 */
490ip_set_id_t
491ip_set_nfnl_get_byindex(ip_set_id_t index)
492{
493 if (index > ip_set_max)
494 return IPSET_INVALID_ID;
495
496 nfnl_lock();
497 if (ip_set_list[index])
498 __ip_set_get(index);
499 else
500 index = IPSET_INVALID_ID;
501 nfnl_unlock();
502
503 return index;
504}
505EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
506
507/*
508 * If the given set pointer points to a valid set, decrement
509 * reference count by 1. The caller shall not assume the index
510 * to be valid, after calling this function.
511 *
512 * The nfnl mutex is used in the function.
513 */
514void
515ip_set_nfnl_put(ip_set_id_t index)
516{
517 nfnl_lock();
518 if (ip_set_list[index] != NULL) {
519 BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
520 __ip_set_put(index);
521 }
522 nfnl_unlock();
523}
524EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
525
526/*
527 * Communication protocol with userspace over netlink.
528 *
529 * We already locked by nfnl_lock.
530 */
531
532static inline bool
533protocol_failed(const struct nlattr * const tb[])
534{
535 return !tb[IPSET_ATTR_PROTOCOL] ||
536 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
537}
538
539static inline u32
540flag_exist(const struct nlmsghdr *nlh)
541{
542 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
543}
544
545static struct nlmsghdr *
546start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
547 enum ipset_cmd cmd)
548{
549 struct nlmsghdr *nlh;
550 struct nfgenmsg *nfmsg;
551
552 nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
553 sizeof(*nfmsg), flags);
554 if (nlh == NULL)
555 return NULL;
556
557 nfmsg = nlmsg_data(nlh);
558 nfmsg->nfgen_family = AF_INET;
559 nfmsg->version = NFNETLINK_V0;
560 nfmsg->res_id = 0;
561
562 return nlh;
563}
564
565/* Create a set */
566
567static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
568 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
569 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
570 .len = IPSET_MAXNAMELEN - 1 },
571 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
572 .len = IPSET_MAXNAMELEN - 1},
573 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
574 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
575 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
576};
577
578static ip_set_id_t
579find_set_id(const char *name)
580{
581 ip_set_id_t i, index = IPSET_INVALID_ID;
582 const struct ip_set *set;
583
584 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
585 set = ip_set_list[i];
586 if (set != NULL && STREQ(set->name, name))
587 index = i;
588 }
589 return index;
590}
591
592static inline struct ip_set *
593find_set(const char *name)
594{
595 ip_set_id_t index = find_set_id(name);
596
597 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
598}
599
600static int
601find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
602{
603 ip_set_id_t i;
604
605 *index = IPSET_INVALID_ID;
606 for (i = 0; i < ip_set_max; i++) {
607 if (ip_set_list[i] == NULL) {
608 if (*index == IPSET_INVALID_ID)
609 *index = i;
610 } else if (STREQ(name, ip_set_list[i]->name)) {
611 /* Name clash */
612 *set = ip_set_list[i];
613 return -EEXIST;
614 }
615 }
616 if (*index == IPSET_INVALID_ID)
617 /* No free slot remained */
618 return -IPSET_ERR_MAX_SETS;
619 return 0;
620}
621
622static int
623ip_set_create(struct sock *ctnl, struct sk_buff *skb,
624 const struct nlmsghdr *nlh,
625 const struct nlattr * const attr[])
626{
627 struct ip_set *set, *clash = NULL;
628 ip_set_id_t index = IPSET_INVALID_ID;
629 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
630 const char *name, *typename;
631 u8 family, revision;
632 u32 flags = flag_exist(nlh);
633 int ret = 0;
634
635 if (unlikely(protocol_failed(attr) ||
636 attr[IPSET_ATTR_SETNAME] == NULL ||
637 attr[IPSET_ATTR_TYPENAME] == NULL ||
638 attr[IPSET_ATTR_REVISION] == NULL ||
639 attr[IPSET_ATTR_FAMILY] == NULL ||
640 (attr[IPSET_ATTR_DATA] != NULL &&
641 !flag_nested(attr[IPSET_ATTR_DATA]))))
642 return -IPSET_ERR_PROTOCOL;
643
644 name = nla_data(attr[IPSET_ATTR_SETNAME]);
645 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
646 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
647 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
648 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
649 name, typename, family_name(family), revision);
650
651 /*
652 * First, and without any locks, allocate and initialize
653 * a normal base set structure.
654 */
655 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
656 if (!set)
657 return -ENOMEM;
658 rwlock_init(&set->lock);
659 strlcpy(set->name, name, IPSET_MAXNAMELEN);
660 atomic_set(&set->ref, 0);
661 set->family = family;
662
663 /*
664 * Next, check that we know the type, and take
665 * a reference on the type, to make sure it stays available
666 * while constructing our new set.
667 *
668 * After referencing the type, we try to create the type
669 * specific part of the set without holding any locks.
670 */
671 ret = find_set_type_get(typename, family, revision, &(set->type));
672 if (ret)
673 goto out;
674
675 /*
676 * Without holding any locks, create private part.
677 */
678 if (attr[IPSET_ATTR_DATA] &&
679 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
680 set->type->create_policy)) {
681 ret = -IPSET_ERR_PROTOCOL;
682 goto put_out;
683 }
684
685 ret = set->type->create(set, tb, flags);
686 if (ret != 0)
687 goto put_out;
688
689 /* BTW, ret==0 here. */
690
691 /*
692 * Here, we have a valid, constructed set and we are protected
693 * by nfnl_lock. Find the first free index in ip_set_list and
694 * check clashing.
695 */
696 if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
697 /* If this is the same set and requested, ignore error */
698 if (ret == -EEXIST &&
699 (flags & IPSET_FLAG_EXIST) &&
700 STREQ(set->type->name, clash->type->name) &&
701 set->type->family == clash->type->family &&
702 set->type->revision == clash->type->revision &&
703 set->variant->same_set(set, clash))
704 ret = 0;
705 goto cleanup;
706 }
707
708 /*
709 * Finally! Add our shiny new set to the list, and be done.
710 */
711 pr_debug("create: '%s' created with index %u!\n", set->name, index);
712 ip_set_list[index] = set;
713
714 return ret;
715
716cleanup:
717 set->variant->destroy(set);
718put_out:
719 module_put(set->type->me);
720out:
721 kfree(set);
722 return ret;
723}
724
725/* Destroy sets */
726
727static const struct nla_policy
728ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
729 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
730 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
731 .len = IPSET_MAXNAMELEN - 1 },
732};
733
734static void
735ip_set_destroy_set(ip_set_id_t index)
736{
737 struct ip_set *set = ip_set_list[index];
738
739 pr_debug("set: %s\n", set->name);
740 ip_set_list[index] = NULL;
741
742 /* Must call it without holding any lock */
743 set->variant->destroy(set);
744 module_put(set->type->me);
745 kfree(set);
746}
747
748static int
749ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
750 const struct nlmsghdr *nlh,
751 const struct nlattr * const attr[])
752{
753 ip_set_id_t i;
754
755 if (unlikely(protocol_failed(attr)))
756 return -IPSET_ERR_PROTOCOL;
757
758 /* References are protected by the nfnl mutex */
759 if (!attr[IPSET_ATTR_SETNAME]) {
760 for (i = 0; i < ip_set_max; i++) {
761 if (ip_set_list[i] != NULL &&
762 (atomic_read(&ip_set_list[i]->ref)))
763 return -IPSET_ERR_BUSY;
764 }
765 for (i = 0; i < ip_set_max; i++) {
766 if (ip_set_list[i] != NULL)
767 ip_set_destroy_set(i);
768 }
769 } else {
770 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
771 if (i == IPSET_INVALID_ID)
772 return -ENOENT;
773 else if (atomic_read(&ip_set_list[i]->ref))
774 return -IPSET_ERR_BUSY;
775
776 ip_set_destroy_set(i);
777 }
778 return 0;
779}
780
781/* Flush sets */
782
783static void
784ip_set_flush_set(struct ip_set *set)
785{
786 pr_debug("set: %s\n", set->name);
787
788 write_lock_bh(&set->lock);
789 set->variant->flush(set);
790 write_unlock_bh(&set->lock);
791}
792
793static int
794ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
795 const struct nlmsghdr *nlh,
796 const struct nlattr * const attr[])
797{
798 ip_set_id_t i;
799
800 if (unlikely(protocol_failed(attr)))
801 return -EPROTO;
802
803 if (!attr[IPSET_ATTR_SETNAME]) {
804 for (i = 0; i < ip_set_max; i++)
805 if (ip_set_list[i] != NULL)
806 ip_set_flush_set(ip_set_list[i]);
807 } else {
808 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
809 if (i == IPSET_INVALID_ID)
810 return -ENOENT;
811
812 ip_set_flush_set(ip_set_list[i]);
813 }
814
815 return 0;
816}
817
818/* Rename a set */
819
820static const struct nla_policy
821ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
822 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
823 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
824 .len = IPSET_MAXNAMELEN - 1 },
825 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
826 .len = IPSET_MAXNAMELEN - 1 },
827};
828
829static int
830ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
831 const struct nlmsghdr *nlh,
832 const struct nlattr * const attr[])
833{
834 struct ip_set *set;
835 const char *name2;
836 ip_set_id_t i;
837
838 if (unlikely(protocol_failed(attr) ||
839 attr[IPSET_ATTR_SETNAME] == NULL ||
840 attr[IPSET_ATTR_SETNAME2] == NULL))
841 return -IPSET_ERR_PROTOCOL;
842
843 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
844 if (set == NULL)
845 return -ENOENT;
846 if (atomic_read(&set->ref) != 0)
847 return -IPSET_ERR_REFERENCED;
848
849 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
850 for (i = 0; i < ip_set_max; i++) {
851 if (ip_set_list[i] != NULL &&
852 STREQ(ip_set_list[i]->name, name2))
853 return -IPSET_ERR_EXIST_SETNAME2;
854 }
855 strncpy(set->name, name2, IPSET_MAXNAMELEN);
856
857 return 0;
858}
859
860/* Swap two sets so that name/index points to the other.
861 * References and set names are also swapped.
862 *
863 * We are protected by the nfnl mutex and references are
864 * manipulated only by holding the mutex. The kernel interfaces
865 * do not hold the mutex but the pointer settings are atomic
866 * so the ip_set_list always contains valid pointers to the sets.
867 */
868
869static int
870ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
871 const struct nlmsghdr *nlh,
872 const struct nlattr * const attr[])
873{
874 struct ip_set *from, *to;
875 ip_set_id_t from_id, to_id;
876 char from_name[IPSET_MAXNAMELEN];
877 u32 from_ref;
878
879 if (unlikely(protocol_failed(attr) ||
880 attr[IPSET_ATTR_SETNAME] == NULL ||
881 attr[IPSET_ATTR_SETNAME2] == NULL))
882 return -IPSET_ERR_PROTOCOL;
883
884 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
885 if (from_id == IPSET_INVALID_ID)
886 return -ENOENT;
887
888 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
889 if (to_id == IPSET_INVALID_ID)
890 return -IPSET_ERR_EXIST_SETNAME2;
891
892 from = ip_set_list[from_id];
893 to = ip_set_list[to_id];
894
895 /* Features must not change.
896 * Not an artifical restriction anymore, as we must prevent
897 * possible loops created by swapping in setlist type of sets. */
898 if (!(from->type->features == to->type->features &&
899 from->type->family == to->type->family))
900 return -IPSET_ERR_TYPE_MISMATCH;
901
902 /* No magic here: ref munging protected by the nfnl_lock */
903 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
904 from_ref = atomic_read(&from->ref);
905
906 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
907 atomic_set(&from->ref, atomic_read(&to->ref));
908 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
909 atomic_set(&to->ref, from_ref);
910
911 ip_set_list[from_id] = to;
912 ip_set_list[to_id] = from;
913
914 return 0;
915}
916
917/* List/save set data */
918
919#define DUMP_INIT 0L
920#define DUMP_ALL 1L
921#define DUMP_ONE 2L
922#define DUMP_LAST 3L
923
924static int
925ip_set_dump_done(struct netlink_callback *cb)
926{
927 if (cb->args[2]) {
928 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
929 __ip_set_put((ip_set_id_t) cb->args[1]);
930 }
931 return 0;
932}
933
934static inline void
935dump_attrs(struct nlmsghdr *nlh)
936{
937 const struct nlattr *attr;
938 int rem;
939
940 pr_debug("dump nlmsg\n");
941 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
942 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
943 }
944}
945
946static int
947dump_init(struct netlink_callback *cb)
948{
949 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
950 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
951 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
952 struct nlattr *attr = (void *)nlh + min_len;
953 ip_set_id_t index;
954
955 /* Second pass, so parser can't fail */
956 nla_parse(cda, IPSET_ATTR_CMD_MAX,
957 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
958
959 /* cb->args[0] : dump single set/all sets
960 * [1] : set index
961 * [..]: type specific
962 */
963
964 if (!cda[IPSET_ATTR_SETNAME]) {
965 cb->args[0] = DUMP_ALL;
966 return 0;
967 }
968
969 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
970 if (index == IPSET_INVALID_ID)
971 return -ENOENT;
972
973 cb->args[0] = DUMP_ONE;
974 cb->args[1] = index;
975 return 0;
976}
977
978static int
979ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
980{
981 ip_set_id_t index = IPSET_INVALID_ID, max;
982 struct ip_set *set = NULL;
983 struct nlmsghdr *nlh = NULL;
984 unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
985 int ret = 0;
986
987 if (cb->args[0] == DUMP_INIT) {
988 ret = dump_init(cb);
989 if (ret < 0) {
990 nlh = nlmsg_hdr(cb->skb);
991 /* We have to create and send the error message
992 * manually :-( */
993 if (nlh->nlmsg_flags & NLM_F_ACK)
994 netlink_ack(cb->skb, nlh, ret);
995 return ret;
996 }
997 }
998
999 if (cb->args[1] >= ip_set_max)
1000 goto out;
1001
1002 pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
1003 max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
1004 for (; cb->args[1] < max; cb->args[1]++) {
1005 index = (ip_set_id_t) cb->args[1];
1006 set = ip_set_list[index];
1007 if (set == NULL) {
1008 if (cb->args[0] == DUMP_ONE) {
1009 ret = -ENOENT;
1010 goto out;
1011 }
1012 continue;
1013 }
1014 /* When dumping all sets, we must dump "sorted"
1015 * so that lists (unions of sets) are dumped last.
1016 */
1017 if (cb->args[0] != DUMP_ONE &&
1018 !((cb->args[0] == DUMP_ALL) ^
1019 (set->type->features & IPSET_DUMP_LAST)))
1020 continue;
1021 pr_debug("List set: %s\n", set->name);
1022 if (!cb->args[2]) {
1023 /* Start listing: make sure set won't be destroyed */
1024 pr_debug("reference set\n");
1025 __ip_set_get(index);
1026 }
1027 nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
1028 cb->nlh->nlmsg_seq, flags,
1029 IPSET_CMD_LIST);
1030 if (!nlh) {
1031 ret = -EMSGSIZE;
1032 goto release_refcount;
1033 }
1034 NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1035 NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
1036 switch (cb->args[2]) {
1037 case 0:
1038 /* Core header data */
1039 NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
1040 set->type->name);
1041 NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
1042 set->family);
1043 NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
1044 set->type->revision);
1045 ret = set->variant->head(set, skb);
1046 if (ret < 0)
1047 goto release_refcount;
1048 /* Fall through and add elements */
1049 default:
1050 read_lock_bh(&set->lock);
1051 ret = set->variant->list(set, skb, cb);
1052 read_unlock_bh(&set->lock);
1053 if (!cb->args[2]) {
1054 /* Set is done, proceed with next one */
1055 if (cb->args[0] == DUMP_ONE)
1056 cb->args[1] = IPSET_INVALID_ID;
1057 else
1058 cb->args[1]++;
1059 }
1060 goto release_refcount;
1061 }
1062 }
1063 goto out;
1064
1065nla_put_failure:
1066 ret = -EFAULT;
1067release_refcount:
1068 /* If there was an error or set is done, release set */
1069 if (ret || !cb->args[2]) {
1070 pr_debug("release set %s\n", ip_set_list[index]->name);
1071 __ip_set_put(index);
1072 }
1073
1074 /* If we dump all sets, continue with dumping last ones */
1075 if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
1076 cb->args[0] = DUMP_LAST;
1077
1078out:
1079 if (nlh) {
1080 nlmsg_end(skb, nlh);
1081 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1082 dump_attrs(nlh);
1083 }
1084
1085 return ret < 0 ? ret : skb->len;
1086}
1087
1088static int
1089ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1090 const struct nlmsghdr *nlh,
1091 const struct nlattr * const attr[])
1092{
1093 if (unlikely(protocol_failed(attr)))
1094 return -IPSET_ERR_PROTOCOL;
1095
1096 return netlink_dump_start(ctnl, skb, nlh,
1097 ip_set_dump_start,
1098 ip_set_dump_done);
1099}
1100
1101/* Add, del and test */
1102
1103static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1104 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1105 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1106 .len = IPSET_MAXNAMELEN - 1 },
1107 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1108 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1109 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1110};
1111
1112static int
1113call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1114 struct nlattr *tb[], enum ipset_adt adt,
1115 u32 flags, bool use_lineno)
1116{
1117 int ret, retried = 0;
1118 u32 lineno = 0;
1119 bool eexist = flags & IPSET_FLAG_EXIST;
1120
1121 do {
1122 write_lock_bh(&set->lock);
1123 ret = set->variant->uadt(set, tb, adt, &lineno, flags);
1124 write_unlock_bh(&set->lock);
1125 } while (ret == -EAGAIN &&
1126 set->variant->resize &&
1127 (ret = set->variant->resize(set, retried++)) == 0);
1128
1129 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1130 return 0;
1131 if (lineno && use_lineno) {
1132 /* Error in restore/batch mode: send back lineno */
1133 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1134 struct sk_buff *skb2;
1135 struct nlmsgerr *errmsg;
1136 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1137 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1138 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1139 struct nlattr *cmdattr;
1140 u32 *errline;
1141
1142 skb2 = nlmsg_new(payload, GFP_KERNEL);
1143 if (skb2 == NULL)
1144 return -ENOMEM;
1145 rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
1146 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1147 errmsg = nlmsg_data(rep);
1148 errmsg->error = ret;
1149 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1150 cmdattr = (void *)&errmsg->msg + min_len;
1151
1152 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1153 cmdattr, nlh->nlmsg_len - min_len,
1154 ip_set_adt_policy);
1155
1156 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1157
1158 *errline = lineno;
1159
1160 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1161 /* Signal netlink not to send its ACK/errmsg. */
1162 return -EINTR;
1163 }
1164
1165 return ret;
1166}
1167
1168static int
1169ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1170 const struct nlmsghdr *nlh,
1171 const struct nlattr * const attr[])
1172{
1173 struct ip_set *set;
1174 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1175 const struct nlattr *nla;
1176 u32 flags = flag_exist(nlh);
1177 bool use_lineno;
1178 int ret = 0;
1179
1180 if (unlikely(protocol_failed(attr) ||
1181 attr[IPSET_ATTR_SETNAME] == NULL ||
1182 !((attr[IPSET_ATTR_DATA] != NULL) ^
1183 (attr[IPSET_ATTR_ADT] != NULL)) ||
1184 (attr[IPSET_ATTR_DATA] != NULL &&
1185 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1186 (attr[IPSET_ATTR_ADT] != NULL &&
1187 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1188 attr[IPSET_ATTR_LINENO] == NULL))))
1189 return -IPSET_ERR_PROTOCOL;
1190
1191 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1192 if (set == NULL)
1193 return -ENOENT;
1194
1195 use_lineno = !!attr[IPSET_ATTR_LINENO];
1196 if (attr[IPSET_ATTR_DATA]) {
1197 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1198 attr[IPSET_ATTR_DATA],
1199 set->type->adt_policy))
1200 return -IPSET_ERR_PROTOCOL;
1201 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1202 use_lineno);
1203 } else {
1204 int nla_rem;
1205
1206 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1207 memset(tb, 0, sizeof(tb));
1208 if (nla_type(nla) != IPSET_ATTR_DATA ||
1209 !flag_nested(nla) ||
1210 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1211 set->type->adt_policy))
1212 return -IPSET_ERR_PROTOCOL;
1213 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1214 flags, use_lineno);
1215 if (ret < 0)
1216 return ret;
1217 }
1218 }
1219 return ret;
1220}
1221
1222static int
1223ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1224 const struct nlmsghdr *nlh,
1225 const struct nlattr * const attr[])
1226{
1227 struct ip_set *set;
1228 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1229 const struct nlattr *nla;
1230 u32 flags = flag_exist(nlh);
1231 bool use_lineno;
1232 int ret = 0;
1233
1234 if (unlikely(protocol_failed(attr) ||
1235 attr[IPSET_ATTR_SETNAME] == NULL ||
1236 !((attr[IPSET_ATTR_DATA] != NULL) ^
1237 (attr[IPSET_ATTR_ADT] != NULL)) ||
1238 (attr[IPSET_ATTR_DATA] != NULL &&
1239 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1240 (attr[IPSET_ATTR_ADT] != NULL &&
1241 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1242 attr[IPSET_ATTR_LINENO] == NULL))))
1243 return -IPSET_ERR_PROTOCOL;
1244
1245 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1246 if (set == NULL)
1247 return -ENOENT;
1248
1249 use_lineno = !!attr[IPSET_ATTR_LINENO];
1250 if (attr[IPSET_ATTR_DATA]) {
1251 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1252 attr[IPSET_ATTR_DATA],
1253 set->type->adt_policy))
1254 return -IPSET_ERR_PROTOCOL;
1255 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1256 use_lineno);
1257 } else {
1258 int nla_rem;
1259
1260 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1261 memset(tb, 0, sizeof(*tb));
1262 if (nla_type(nla) != IPSET_ATTR_DATA ||
1263 !flag_nested(nla) ||
1264 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1265 set->type->adt_policy))
1266 return -IPSET_ERR_PROTOCOL;
1267 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1268 flags, use_lineno);
1269 if (ret < 0)
1270 return ret;
1271 }
1272 }
1273 return ret;
1274}
1275
1276static int
1277ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1278 const struct nlmsghdr *nlh,
1279 const struct nlattr * const attr[])
1280{
1281 struct ip_set *set;
1282 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1283 int ret = 0;
1284
1285 if (unlikely(protocol_failed(attr) ||
1286 attr[IPSET_ATTR_SETNAME] == NULL ||
1287 attr[IPSET_ATTR_DATA] == NULL ||
1288 !flag_nested(attr[IPSET_ATTR_DATA])))
1289 return -IPSET_ERR_PROTOCOL;
1290
1291 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1292 if (set == NULL)
1293 return -ENOENT;
1294
1295 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1296 set->type->adt_policy))
1297 return -IPSET_ERR_PROTOCOL;
1298
1299 read_lock_bh(&set->lock);
1300 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0);
1301 read_unlock_bh(&set->lock);
1302 /* Userspace can't trigger element to be re-added */
1303 if (ret == -EAGAIN)
1304 ret = 1;
1305
1306 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1307}
1308
1309/* Get headed data of a set */
1310
1311static int
1312ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1313 const struct nlmsghdr *nlh,
1314 const struct nlattr * const attr[])
1315{
1316 const struct ip_set *set;
1317 struct sk_buff *skb2;
1318 struct nlmsghdr *nlh2;
1319 ip_set_id_t index;
1320 int ret = 0;
1321
1322 if (unlikely(protocol_failed(attr) ||
1323 attr[IPSET_ATTR_SETNAME] == NULL))
1324 return -IPSET_ERR_PROTOCOL;
1325
1326 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1327 if (index == IPSET_INVALID_ID)
1328 return -ENOENT;
1329 set = ip_set_list[index];
1330
1331 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1332 if (skb2 == NULL)
1333 return -ENOMEM;
1334
1335 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1336 IPSET_CMD_HEADER);
1337 if (!nlh2)
1338 goto nlmsg_failure;
1339 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1340 NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
1341 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
1342 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family);
1343 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
1344 nlmsg_end(skb2, nlh2);
1345
1346 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1347 if (ret < 0)
1348 return ret;
1349
1350 return 0;
1351
1352nla_put_failure:
1353 nlmsg_cancel(skb2, nlh2);
1354nlmsg_failure:
1355 kfree_skb(skb2);
1356 return -EMSGSIZE;
1357}
1358
1359/* Get type data */
1360
1361static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1362 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1363 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1364 .len = IPSET_MAXNAMELEN - 1 },
1365 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1366};
1367
1368static int
1369ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1370 const struct nlmsghdr *nlh,
1371 const struct nlattr * const attr[])
1372{
1373 struct sk_buff *skb2;
1374 struct nlmsghdr *nlh2;
1375 u8 family, min, max;
1376 const char *typename;
1377 int ret = 0;
1378
1379 if (unlikely(protocol_failed(attr) ||
1380 attr[IPSET_ATTR_TYPENAME] == NULL ||
1381 attr[IPSET_ATTR_FAMILY] == NULL))
1382 return -IPSET_ERR_PROTOCOL;
1383
1384 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1385 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1386 ret = find_set_type_minmax(typename, family, &min, &max);
1387 if (ret)
1388 return ret;
1389
1390 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1391 if (skb2 == NULL)
1392 return -ENOMEM;
1393
1394 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1395 IPSET_CMD_TYPE);
1396 if (!nlh2)
1397 goto nlmsg_failure;
1398 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1399 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
1400 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
1401 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
1402 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
1403 nlmsg_end(skb2, nlh2);
1404
1405 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1406 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1407 if (ret < 0)
1408 return ret;
1409
1410 return 0;
1411
1412nla_put_failure:
1413 nlmsg_cancel(skb2, nlh2);
1414nlmsg_failure:
1415 kfree_skb(skb2);
1416 return -EMSGSIZE;
1417}
1418
1419/* Get protocol version */
1420
1421static const struct nla_policy
1422ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1423 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1424};
1425
1426static int
1427ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1428 const struct nlmsghdr *nlh,
1429 const struct nlattr * const attr[])
1430{
1431 struct sk_buff *skb2;
1432 struct nlmsghdr *nlh2;
1433 int ret = 0;
1434
1435 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1436 return -IPSET_ERR_PROTOCOL;
1437
1438 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1439 if (skb2 == NULL)
1440 return -ENOMEM;
1441
1442 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1443 IPSET_CMD_PROTOCOL);
1444 if (!nlh2)
1445 goto nlmsg_failure;
1446 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1447 nlmsg_end(skb2, nlh2);
1448
1449 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1450 if (ret < 0)
1451 return ret;
1452
1453 return 0;
1454
1455nla_put_failure:
1456 nlmsg_cancel(skb2, nlh2);
1457nlmsg_failure:
1458 kfree_skb(skb2);
1459 return -EMSGSIZE;
1460}
1461
1462static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1463 [IPSET_CMD_CREATE] = {
1464 .call = ip_set_create,
1465 .attr_count = IPSET_ATTR_CMD_MAX,
1466 .policy = ip_set_create_policy,
1467 },
1468 [IPSET_CMD_DESTROY] = {
1469 .call = ip_set_destroy,
1470 .attr_count = IPSET_ATTR_CMD_MAX,
1471 .policy = ip_set_setname_policy,
1472 },
1473 [IPSET_CMD_FLUSH] = {
1474 .call = ip_set_flush,
1475 .attr_count = IPSET_ATTR_CMD_MAX,
1476 .policy = ip_set_setname_policy,
1477 },
1478 [IPSET_CMD_RENAME] = {
1479 .call = ip_set_rename,
1480 .attr_count = IPSET_ATTR_CMD_MAX,
1481 .policy = ip_set_setname2_policy,
1482 },
1483 [IPSET_CMD_SWAP] = {
1484 .call = ip_set_swap,
1485 .attr_count = IPSET_ATTR_CMD_MAX,
1486 .policy = ip_set_setname2_policy,
1487 },
1488 [IPSET_CMD_LIST] = {
1489 .call = ip_set_dump,
1490 .attr_count = IPSET_ATTR_CMD_MAX,
1491 .policy = ip_set_setname_policy,
1492 },
1493 [IPSET_CMD_SAVE] = {
1494 .call = ip_set_dump,
1495 .attr_count = IPSET_ATTR_CMD_MAX,
1496 .policy = ip_set_setname_policy,
1497 },
1498 [IPSET_CMD_ADD] = {
1499 .call = ip_set_uadd,
1500 .attr_count = IPSET_ATTR_CMD_MAX,
1501 .policy = ip_set_adt_policy,
1502 },
1503 [IPSET_CMD_DEL] = {
1504 .call = ip_set_udel,
1505 .attr_count = IPSET_ATTR_CMD_MAX,
1506 .policy = ip_set_adt_policy,
1507 },
1508 [IPSET_CMD_TEST] = {
1509 .call = ip_set_utest,
1510 .attr_count = IPSET_ATTR_CMD_MAX,
1511 .policy = ip_set_adt_policy,
1512 },
1513 [IPSET_CMD_HEADER] = {
1514 .call = ip_set_header,
1515 .attr_count = IPSET_ATTR_CMD_MAX,
1516 .policy = ip_set_setname_policy,
1517 },
1518 [IPSET_CMD_TYPE] = {
1519 .call = ip_set_type,
1520 .attr_count = IPSET_ATTR_CMD_MAX,
1521 .policy = ip_set_type_policy,
1522 },
1523 [IPSET_CMD_PROTOCOL] = {
1524 .call = ip_set_protocol,
1525 .attr_count = IPSET_ATTR_CMD_MAX,
1526 .policy = ip_set_protocol_policy,
1527 },
1528};
1529
1530static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1531 .name = "ip_set",
1532 .subsys_id = NFNL_SUBSYS_IPSET,
1533 .cb_count = IPSET_MSG_MAX,
1534 .cb = ip_set_netlink_subsys_cb,
1535};
1536
1537/* Interface to iptables/ip6tables */
1538
1539static int
1540ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1541{
1542 unsigned *op;
1543 void *data;
1544 int copylen = *len, ret = 0;
1545
1546 if (!capable(CAP_NET_ADMIN))
1547 return -EPERM;
1548 if (optval != SO_IP_SET)
1549 return -EBADF;
1550 if (*len < sizeof(unsigned))
1551 return -EINVAL;
1552
1553 data = vmalloc(*len);
1554 if (!data)
1555 return -ENOMEM;
1556 if (copy_from_user(data, user, *len) != 0) {
1557 ret = -EFAULT;
1558 goto done;
1559 }
1560 op = (unsigned *) data;
1561
1562 if (*op < IP_SET_OP_VERSION) {
1563 /* Check the version at the beginning of operations */
1564 struct ip_set_req_version *req_version = data;
1565 if (req_version->version != IPSET_PROTOCOL) {
1566 ret = -EPROTO;
1567 goto done;
1568 }
1569 }
1570
1571 switch (*op) {
1572 case IP_SET_OP_VERSION: {
1573 struct ip_set_req_version *req_version = data;
1574
1575 if (*len != sizeof(struct ip_set_req_version)) {
1576 ret = -EINVAL;
1577 goto done;
1578 }
1579
1580 req_version->version = IPSET_PROTOCOL;
1581 ret = copy_to_user(user, req_version,
1582 sizeof(struct ip_set_req_version));
1583 goto done;
1584 }
1585 case IP_SET_OP_GET_BYNAME: {
1586 struct ip_set_req_get_set *req_get = data;
1587
1588 if (*len != sizeof(struct ip_set_req_get_set)) {
1589 ret = -EINVAL;
1590 goto done;
1591 }
1592 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1593 nfnl_lock();
1594 req_get->set.index = find_set_id(req_get->set.name);
1595 nfnl_unlock();
1596 goto copy;
1597 }
1598 case IP_SET_OP_GET_BYINDEX: {
1599 struct ip_set_req_get_set *req_get = data;
1600
1601 if (*len != sizeof(struct ip_set_req_get_set) ||
1602 req_get->set.index >= ip_set_max) {
1603 ret = -EINVAL;
1604 goto done;
1605 }
1606 nfnl_lock();
1607 strncpy(req_get->set.name,
1608 ip_set_list[req_get->set.index]
1609 ? ip_set_list[req_get->set.index]->name : "",
1610 IPSET_MAXNAMELEN);
1611 nfnl_unlock();
1612 goto copy;
1613 }
1614 default:
1615 ret = -EBADMSG;
1616 goto done;
1617 } /* end of switch(op) */
1618
1619copy:
1620 ret = copy_to_user(user, data, copylen);
1621
1622done:
1623 vfree(data);
1624 if (ret > 0)
1625 ret = 0;
1626 return ret;
1627}
1628
1629static struct nf_sockopt_ops so_set __read_mostly = {
1630 .pf = PF_INET,
1631 .get_optmin = SO_IP_SET,
1632 .get_optmax = SO_IP_SET + 1,
1633 .get = &ip_set_sockfn_get,
1634 .owner = THIS_MODULE,
1635};
1636
1637static int __init
1638ip_set_init(void)
1639{
1640 int ret;
1641
1642 if (max_sets)
1643 ip_set_max = max_sets;
1644 if (ip_set_max >= IPSET_INVALID_ID)
1645 ip_set_max = IPSET_INVALID_ID - 1;
1646
1647 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1648 GFP_KERNEL);
1649 if (!ip_set_list) {
1650 pr_err("ip_set: Unable to create ip_set_list\n");
1651 return -ENOMEM;
1652 }
1653
1654 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1655 if (ret != 0) {
1656 pr_err("ip_set: cannot register with nfnetlink.\n");
1657 kfree(ip_set_list);
1658 return ret;
1659 }
1660 ret = nf_register_sockopt(&so_set);
1661 if (ret != 0) {
1662 pr_err("SO_SET registry failed: %d\n", ret);
1663 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1664 kfree(ip_set_list);
1665 return ret;
1666 }
1667
1668 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1669 return 0;
1670}
1671
1672static void __exit
1673ip_set_fini(void)
1674{
1675 /* There can't be any existing set */
1676 nf_unregister_sockopt(&so_set);
1677 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1678 kfree(ip_set_list);
1679 pr_debug("these are the famous last words\n");
1680}
1681
1682module_init(ip_set_init);
1683module_exit(ip_set_fini);
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
new file mode 100644
index 00000000000..8d522721268
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -0,0 +1,141 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Get Layer-4 data from the packets */
9
10#include <linux/ip.h>
11#include <linux/skbuff.h>
12#include <linux/icmp.h>
13#include <linux/icmpv6.h>
14#include <linux/netfilter_ipv6/ip6_tables.h>
15#include <net/ip.h>
16#include <net/ipv6.h>
17
18#include <linux/netfilter/ipset/ip_set_getport.h>
19
20/* We must handle non-linear skbs */
21static bool
22get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
23 bool src, __be16 *port, u8 *proto)
24{
25 switch (protocol) {
26 case IPPROTO_TCP: {
27 struct tcphdr _tcph;
28 const struct tcphdr *th;
29
30 th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
31 if (th == NULL)
32 /* No choice either */
33 return false;
34
35 *port = src ? th->source : th->dest;
36 break;
37 }
38 case IPPROTO_UDP: {
39 struct udphdr _udph;
40 const struct udphdr *uh;
41
42 uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
43 if (uh == NULL)
44 /* No choice either */
45 return false;
46
47 *port = src ? uh->source : uh->dest;
48 break;
49 }
50 case IPPROTO_ICMP: {
51 struct icmphdr _ich;
52 const struct icmphdr *ic;
53
54 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
55 if (ic == NULL)
56 return false;
57
58 *port = (__force __be16)htons((ic->type << 8) | ic->code);
59 break;
60 }
61 case IPPROTO_ICMPV6: {
62 struct icmp6hdr _ich;
63 const struct icmp6hdr *ic;
64
65 ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
66 if (ic == NULL)
67 return false;
68
69 *port = (__force __be16)
70 htons((ic->icmp6_type << 8) | ic->icmp6_code);
71 break;
72 }
73 default:
74 break;
75 }
76 *proto = protocol;
77
78 return true;
79}
80
81bool
82ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
83 __be16 *port, u8 *proto)
84{
85 const struct iphdr *iph = ip_hdr(skb);
86 unsigned int protooff = ip_hdrlen(skb);
87 int protocol = iph->protocol;
88
89 /* See comments at tcp_match in ip_tables.c */
90 if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET))
91 return false;
92
93 return get_port(skb, protocol, protooff, src, port, proto);
94}
95EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
96
97#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
98bool
99ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
100 __be16 *port, u8 *proto)
101{
102 int protoff;
103 u8 nexthdr;
104
105 nexthdr = ipv6_hdr(skb)->nexthdr;
106 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
107 if (protoff < 0)
108 return false;
109
110 return get_port(skb, nexthdr, protoff, src, port, proto);
111}
112EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
113#endif
114
115bool
116ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
117{
118 bool ret;
119 u8 proto;
120
121 switch (pf) {
122 case AF_INET:
123 ret = ip_set_get_ip4_port(skb, src, port, &proto);
124 break;
125 case AF_INET6:
126 ret = ip_set_get_ip6_port(skb, src, port, &proto);
127 break;
128 default:
129 return false;
130 }
131 if (!ret)
132 return ret;
133 switch (proto) {
134 case IPPROTO_TCP:
135 case IPPROTO_UDP:
136 return true;
137 default:
138 return false;
139 }
140}
141EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
new file mode 100644
index 00000000000..43bcce20012
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -0,0 +1,464 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_hash.h>
26
27MODULE_LICENSE("GPL");
28MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
29MODULE_DESCRIPTION("hash:ip type of IP sets");
30MODULE_ALIAS("ip_set_hash:ip");
31
32/* Type specific function prefix */
33#define TYPE hash_ip
34
35static bool
36hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
37
38#define hash_ip4_same_set hash_ip_same_set
39#define hash_ip6_same_set hash_ip_same_set
40
41/* The type variant functions: IPv4 */
42
43/* Member elements without timeout */
44struct hash_ip4_elem {
45 __be32 ip;
46};
47
48/* Member elements with timeout support */
49struct hash_ip4_telem {
50 __be32 ip;
51 unsigned long timeout;
52};
53
54static inline bool
55hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
56 const struct hash_ip4_elem *ip2)
57{
58 return ip1->ip == ip2->ip;
59}
60
61static inline bool
62hash_ip4_data_isnull(const struct hash_ip4_elem *elem)
63{
64 return elem->ip == 0;
65}
66
67static inline void
68hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
69{
70 dst->ip = src->ip;
71}
72
73/* Zero valued IP addresses cannot be stored */
74static inline void
75hash_ip4_data_zero_out(struct hash_ip4_elem *elem)
76{
77 elem->ip = 0;
78}
79
80static inline bool
81hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data)
82{
83 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
84 return 0;
85
86nla_put_failure:
87 return 1;
88}
89
90static bool
91hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data)
92{
93 const struct hash_ip4_telem *tdata =
94 (const struct hash_ip4_telem *)data;
95
96 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
97 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
98 htonl(ip_set_timeout_get(tdata->timeout)));
99
100 return 0;
101
102nla_put_failure:
103 return 1;
104}
105
106#define IP_SET_HASH_WITH_NETMASK
107#define PF 4
108#define HOST_MASK 32
109#include <linux/netfilter/ipset/ip_set_ahash.h>
110
111static int
112hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
113 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
114{
115 const struct ip_set_hash *h = set->data;
116 ipset_adtfn adtfn = set->variant->adt[adt];
117 __be32 ip;
118
119 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip);
120 ip &= ip_set_netmask(h->netmask);
121 if (ip == 0)
122 return -EINVAL;
123
124 return adtfn(set, &ip, h->timeout);
125}
126
127static int
128hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
129 enum ipset_adt adt, u32 *lineno, u32 flags)
130{
131 const struct ip_set_hash *h = set->data;
132 ipset_adtfn adtfn = set->variant->adt[adt];
133 u32 ip, ip_to, hosts, timeout = h->timeout;
134 __be32 nip;
135 int ret = 0;
136
137 if (unlikely(!tb[IPSET_ATTR_IP] ||
138 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
139 return -IPSET_ERR_PROTOCOL;
140
141 if (tb[IPSET_ATTR_LINENO])
142 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
143
144 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
145 if (ret)
146 return ret;
147
148 ip &= ip_set_hostmask(h->netmask);
149
150 if (tb[IPSET_ATTR_TIMEOUT]) {
151 if (!with_timeout(h->timeout))
152 return -IPSET_ERR_TIMEOUT;
153 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
154 }
155
156 if (adt == IPSET_TEST) {
157 nip = htonl(ip);
158 if (nip == 0)
159 return -IPSET_ERR_HASH_ELEM;
160 return adtfn(set, &nip, timeout);
161 }
162
163 if (tb[IPSET_ATTR_IP_TO]) {
164 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
165 if (ret)
166 return ret;
167 if (ip > ip_to)
168 swap(ip, ip_to);
169 } else if (tb[IPSET_ATTR_CIDR]) {
170 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
171
172 if (cidr > 32)
173 return -IPSET_ERR_INVALID_CIDR;
174 ip &= ip_set_hostmask(cidr);
175 ip_to = ip | ~ip_set_hostmask(cidr);
176 } else
177 ip_to = ip;
178
179 hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
180
181 for (; !before(ip_to, ip); ip += hosts) {
182 nip = htonl(ip);
183 if (nip == 0)
184 return -IPSET_ERR_HASH_ELEM;
185 ret = adtfn(set, &nip, timeout);
186
187 if (ret && !ip_set_eexist(ret, flags))
188 return ret;
189 else
190 ret = 0;
191 }
192 return ret;
193}
194
195static bool
196hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
197{
198 const struct ip_set_hash *x = a->data;
199 const struct ip_set_hash *y = b->data;
200
201 /* Resizing changes htable_bits, so we ignore it */
202 return x->maxelem == y->maxelem &&
203 x->timeout == y->timeout &&
204 x->netmask == y->netmask;
205}
206
207/* The type variant functions: IPv6 */
208
209struct hash_ip6_elem {
210 union nf_inet_addr ip;
211};
212
213struct hash_ip6_telem {
214 union nf_inet_addr ip;
215 unsigned long timeout;
216};
217
218static inline bool
219hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
220 const struct hash_ip6_elem *ip2)
221{
222 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
223}
224
225static inline bool
226hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
227{
228 return ipv6_addr_any(&elem->ip.in6);
229}
230
231static inline void
232hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
233{
234 ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
235}
236
237static inline void
238hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
239{
240 ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
241}
242
243static inline void
244ip6_netmask(union nf_inet_addr *ip, u8 prefix)
245{
246 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
247 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
248 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
249 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
250}
251
252static bool
253hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data)
254{
255 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
256 return 0;
257
258nla_put_failure:
259 return 1;
260}
261
262static bool
263hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data)
264{
265 const struct hash_ip6_telem *e =
266 (const struct hash_ip6_telem *)data;
267
268 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
269 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
270 htonl(ip_set_timeout_get(e->timeout)));
271 return 0;
272
273nla_put_failure:
274 return 1;
275}
276
277#undef PF
278#undef HOST_MASK
279
280#define PF 6
281#define HOST_MASK 128
282#include <linux/netfilter/ipset/ip_set_ahash.h>
283
284static int
285hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
286 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
287{
288 const struct ip_set_hash *h = set->data;
289 ipset_adtfn adtfn = set->variant->adt[adt];
290 union nf_inet_addr ip;
291
292 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6);
293 ip6_netmask(&ip, h->netmask);
294 if (ipv6_addr_any(&ip.in6))
295 return -EINVAL;
296
297 return adtfn(set, &ip, h->timeout);
298}
299
300static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
301 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
302 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
303 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
304};
305
306static int
307hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
308 enum ipset_adt adt, u32 *lineno, u32 flags)
309{
310 const struct ip_set_hash *h = set->data;
311 ipset_adtfn adtfn = set->variant->adt[adt];
312 union nf_inet_addr ip;
313 u32 timeout = h->timeout;
314 int ret;
315
316 if (unlikely(!tb[IPSET_ATTR_IP] ||
317 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
318 tb[IPSET_ATTR_IP_TO] ||
319 tb[IPSET_ATTR_CIDR]))
320 return -IPSET_ERR_PROTOCOL;
321
322 if (tb[IPSET_ATTR_LINENO])
323 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
324
325 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip);
326 if (ret)
327 return ret;
328
329 ip6_netmask(&ip, h->netmask);
330 if (ipv6_addr_any(&ip.in6))
331 return -IPSET_ERR_HASH_ELEM;
332
333 if (tb[IPSET_ATTR_TIMEOUT]) {
334 if (!with_timeout(h->timeout))
335 return -IPSET_ERR_TIMEOUT;
336 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
337 }
338
339 ret = adtfn(set, &ip, timeout);
340
341 return ip_set_eexist(ret, flags) ? 0 : ret;
342}
343
344/* Create hash:ip type of sets */
345
346static int
347hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
348{
349 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
350 u8 netmask, hbits;
351 struct ip_set_hash *h;
352
353 if (!(set->family == AF_INET || set->family == AF_INET6))
354 return -IPSET_ERR_INVALID_FAMILY;
355 netmask = set->family == AF_INET ? 32 : 128;
356 pr_debug("Create set %s with family %s\n",
357 set->name, set->family == AF_INET ? "inet" : "inet6");
358
359 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
360 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
361 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
362 return -IPSET_ERR_PROTOCOL;
363
364 if (tb[IPSET_ATTR_HASHSIZE]) {
365 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
366 if (hashsize < IPSET_MIMINAL_HASHSIZE)
367 hashsize = IPSET_MIMINAL_HASHSIZE;
368 }
369
370 if (tb[IPSET_ATTR_MAXELEM])
371 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
372
373 if (tb[IPSET_ATTR_NETMASK]) {
374 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
375
376 if ((set->family == AF_INET && netmask > 32) ||
377 (set->family == AF_INET6 && netmask > 128) ||
378 netmask == 0)
379 return -IPSET_ERR_INVALID_NETMASK;
380 }
381
382 h = kzalloc(sizeof(*h), GFP_KERNEL);
383 if (!h)
384 return -ENOMEM;
385
386 h->maxelem = maxelem;
387 h->netmask = netmask;
388 get_random_bytes(&h->initval, sizeof(h->initval));
389 h->timeout = IPSET_NO_TIMEOUT;
390
391 hbits = htable_bits(hashsize);
392 h->table = ip_set_alloc(
393 sizeof(struct htable)
394 + jhash_size(hbits) * sizeof(struct hbucket));
395 if (!h->table) {
396 kfree(h);
397 return -ENOMEM;
398 }
399 h->table->htable_bits = hbits;
400
401 set->data = h;
402
403 if (tb[IPSET_ATTR_TIMEOUT]) {
404 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
405
406 set->variant = set->family == AF_INET
407 ? &hash_ip4_tvariant : &hash_ip6_tvariant;
408
409 if (set->family == AF_INET)
410 hash_ip4_gc_init(set);
411 else
412 hash_ip6_gc_init(set);
413 } else {
414 set->variant = set->family == AF_INET
415 ? &hash_ip4_variant : &hash_ip6_variant;
416 }
417
418 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
419 set->name, jhash_size(h->table->htable_bits),
420 h->table->htable_bits, h->maxelem, set->data, h->table);
421
422 return 0;
423}
424
425static struct ip_set_type hash_ip_type __read_mostly = {
426 .name = "hash:ip",
427 .protocol = IPSET_PROTOCOL,
428 .features = IPSET_TYPE_IP,
429 .dimension = IPSET_DIM_ONE,
430 .family = AF_UNSPEC,
431 .revision = 0,
432 .create = hash_ip_create,
433 .create_policy = {
434 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
435 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
436 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
437 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
438 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
439 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
440 },
441 .adt_policy = {
442 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
443 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
444 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
445 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
446 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
447 },
448 .me = THIS_MODULE,
449};
450
451static int __init
452hash_ip_init(void)
453{
454 return ip_set_type_register(&hash_ip_type);
455}
456
457static void __exit
458hash_ip_fini(void)
459{
460 ip_set_type_unregister(&hash_ip_type);
461}
462
463module_init(hash_ip_init);
464module_exit(hash_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
new file mode 100644
index 00000000000..b9214145d35
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -0,0 +1,530 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,port type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30MODULE_DESCRIPTION("hash:ip,port type of IP sets");
31MODULE_ALIAS("ip_set_hash:ip,port");
32
33/* Type specific function prefix */
34#define TYPE hash_ipport
35
36static bool
37hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b);
38
39#define hash_ipport4_same_set hash_ipport_same_set
40#define hash_ipport6_same_set hash_ipport_same_set
41
42/* The type variant functions: IPv4 */
43
44/* Member elements without timeout */
45struct hash_ipport4_elem {
46 __be32 ip;
47 __be16 port;
48 u8 proto;
49 u8 padding;
50};
51
52/* Member elements with timeout support */
53struct hash_ipport4_telem {
54 __be32 ip;
55 __be16 port;
56 u8 proto;
57 u8 padding;
58 unsigned long timeout;
59};
60
61static inline bool
62hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
63 const struct hash_ipport4_elem *ip2)
64{
65 return ip1->ip == ip2->ip &&
66 ip1->port == ip2->port &&
67 ip1->proto == ip2->proto;
68}
69
70static inline bool
71hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem)
72{
73 return elem->proto == 0;
74}
75
76static inline void
77hash_ipport4_data_copy(struct hash_ipport4_elem *dst,
78 const struct hash_ipport4_elem *src)
79{
80 dst->ip = src->ip;
81 dst->port = src->port;
82 dst->proto = src->proto;
83}
84
85static inline void
86hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem)
87{
88 elem->proto = 0;
89}
90
91static bool
92hash_ipport4_data_list(struct sk_buff *skb,
93 const struct hash_ipport4_elem *data)
94{
95 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
96 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
97 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
98 return 0;
99
100nla_put_failure:
101 return 1;
102}
103
104static bool
105hash_ipport4_data_tlist(struct sk_buff *skb,
106 const struct hash_ipport4_elem *data)
107{
108 const struct hash_ipport4_telem *tdata =
109 (const struct hash_ipport4_telem *)data;
110
111 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
112 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
113 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
114 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
115 htonl(ip_set_timeout_get(tdata->timeout)));
116
117 return 0;
118
119nla_put_failure:
120 return 1;
121}
122
123#define PF 4
124#define HOST_MASK 32
125#include <linux/netfilter/ipset/ip_set_ahash.h>
126
127static int
128hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
129 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
130{
131 const struct ip_set_hash *h = set->data;
132 ipset_adtfn adtfn = set->variant->adt[adt];
133 struct hash_ipport4_elem data = { };
134
135 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
136 &data.port, &data.proto))
137 return -EINVAL;
138
139 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
140
141 return adtfn(set, &data, h->timeout);
142}
143
144static int
145hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
146 enum ipset_adt adt, u32 *lineno, u32 flags)
147{
148 const struct ip_set_hash *h = set->data;
149 ipset_adtfn adtfn = set->variant->adt[adt];
150 struct hash_ipport4_elem data = { };
151 u32 ip, ip_to, p, port, port_to;
152 u32 timeout = h->timeout;
153 bool with_ports = false;
154 int ret;
155
156 if (unlikely(!tb[IPSET_ATTR_IP] ||
157 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
158 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
159 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
160 return -IPSET_ERR_PROTOCOL;
161
162 if (tb[IPSET_ATTR_LINENO])
163 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
164
165 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
166 if (ret)
167 return ret;
168
169 if (tb[IPSET_ATTR_PORT])
170 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
171 else
172 return -IPSET_ERR_PROTOCOL;
173
174 if (tb[IPSET_ATTR_PROTO]) {
175 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
176 with_ports = ip_set_proto_with_ports(data.proto);
177
178 if (data.proto == 0)
179 return -IPSET_ERR_INVALID_PROTO;
180 } else
181 return -IPSET_ERR_MISSING_PROTO;
182
183 if (!(with_ports || data.proto == IPPROTO_ICMP))
184 data.port = 0;
185
186 if (tb[IPSET_ATTR_TIMEOUT]) {
187 if (!with_timeout(h->timeout))
188 return -IPSET_ERR_TIMEOUT;
189 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
190 }
191
192 if (adt == IPSET_TEST ||
193 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
194 tb[IPSET_ATTR_PORT_TO])) {
195 ret = adtfn(set, &data, timeout);
196 return ip_set_eexist(ret, flags) ? 0 : ret;
197 }
198
199 ip = ntohl(data.ip);
200 if (tb[IPSET_ATTR_IP_TO]) {
201 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
202 if (ret)
203 return ret;
204 if (ip > ip_to)
205 swap(ip, ip_to);
206 } else if (tb[IPSET_ATTR_CIDR]) {
207 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
208
209 if (cidr > 32)
210 return -IPSET_ERR_INVALID_CIDR;
211 ip &= ip_set_hostmask(cidr);
212 ip_to = ip | ~ip_set_hostmask(cidr);
213 } else
214 ip_to = ip;
215
216 port_to = port = ntohs(data.port);
217 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
218 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
219 if (port > port_to)
220 swap(port, port_to);
221 }
222
223 for (; !before(ip_to, ip); ip++)
224 for (p = port; p <= port_to; p++) {
225 data.ip = htonl(ip);
226 data.port = htons(p);
227 ret = adtfn(set, &data, timeout);
228
229 if (ret && !ip_set_eexist(ret, flags))
230 return ret;
231 else
232 ret = 0;
233 }
234 return ret;
235}
236
237static bool
238hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b)
239{
240 const struct ip_set_hash *x = a->data;
241 const struct ip_set_hash *y = b->data;
242
243 /* Resizing changes htable_bits, so we ignore it */
244 return x->maxelem == y->maxelem &&
245 x->timeout == y->timeout;
246}
247
248/* The type variant functions: IPv6 */
249
250struct hash_ipport6_elem {
251 union nf_inet_addr ip;
252 __be16 port;
253 u8 proto;
254 u8 padding;
255};
256
257struct hash_ipport6_telem {
258 union nf_inet_addr ip;
259 __be16 port;
260 u8 proto;
261 u8 padding;
262 unsigned long timeout;
263};
264
265static inline bool
266hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
267 const struct hash_ipport6_elem *ip2)
268{
269 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
270 ip1->port == ip2->port &&
271 ip1->proto == ip2->proto;
272}
273
274static inline bool
275hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem)
276{
277 return elem->proto == 0;
278}
279
280static inline void
281hash_ipport6_data_copy(struct hash_ipport6_elem *dst,
282 const struct hash_ipport6_elem *src)
283{
284 memcpy(dst, src, sizeof(*dst));
285}
286
287static inline void
288hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem)
289{
290 elem->proto = 0;
291}
292
293static bool
294hash_ipport6_data_list(struct sk_buff *skb,
295 const struct hash_ipport6_elem *data)
296{
297 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
298 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
299 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
300 return 0;
301
302nla_put_failure:
303 return 1;
304}
305
306static bool
307hash_ipport6_data_tlist(struct sk_buff *skb,
308 const struct hash_ipport6_elem *data)
309{
310 const struct hash_ipport6_telem *e =
311 (const struct hash_ipport6_telem *)data;
312
313 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
314 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
315 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
316 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
317 htonl(ip_set_timeout_get(e->timeout)));
318 return 0;
319
320nla_put_failure:
321 return 1;
322}
323
324#undef PF
325#undef HOST_MASK
326
327#define PF 6
328#define HOST_MASK 128
329#include <linux/netfilter/ipset/ip_set_ahash.h>
330
331static int
332hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
333 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
334{
335 const struct ip_set_hash *h = set->data;
336 ipset_adtfn adtfn = set->variant->adt[adt];
337 struct hash_ipport6_elem data = { };
338
339 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
340 &data.port, &data.proto))
341 return -EINVAL;
342
343 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
344
345 return adtfn(set, &data, h->timeout);
346}
347
348static int
349hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
350 enum ipset_adt adt, u32 *lineno, u32 flags)
351{
352 const struct ip_set_hash *h = set->data;
353 ipset_adtfn adtfn = set->variant->adt[adt];
354 struct hash_ipport6_elem data = { };
355 u32 port, port_to;
356 u32 timeout = h->timeout;
357 bool with_ports = false;
358 int ret;
359
360 if (unlikely(!tb[IPSET_ATTR_IP] ||
361 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
362 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
363 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
364 tb[IPSET_ATTR_IP_TO] ||
365 tb[IPSET_ATTR_CIDR]))
366 return -IPSET_ERR_PROTOCOL;
367
368 if (tb[IPSET_ATTR_LINENO])
369 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
370
371 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
372 if (ret)
373 return ret;
374
375 if (tb[IPSET_ATTR_PORT])
376 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
377 else
378 return -IPSET_ERR_PROTOCOL;
379
380 if (tb[IPSET_ATTR_PROTO]) {
381 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
382 with_ports = ip_set_proto_with_ports(data.proto);
383
384 if (data.proto == 0)
385 return -IPSET_ERR_INVALID_PROTO;
386 } else
387 return -IPSET_ERR_MISSING_PROTO;
388
389 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
390 data.port = 0;
391
392 if (tb[IPSET_ATTR_TIMEOUT]) {
393 if (!with_timeout(h->timeout))
394 return -IPSET_ERR_TIMEOUT;
395 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
396 }
397
398 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
399 ret = adtfn(set, &data, timeout);
400 return ip_set_eexist(ret, flags) ? 0 : ret;
401 }
402
403 port = ntohs(data.port);
404 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
405 if (port > port_to)
406 swap(port, port_to);
407
408 for (; port <= port_to; port++) {
409 data.port = htons(port);
410 ret = adtfn(set, &data, timeout);
411
412 if (ret && !ip_set_eexist(ret, flags))
413 return ret;
414 else
415 ret = 0;
416 }
417 return ret;
418}
419
420/* Create hash:ip type of sets */
421
422static int
423hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
424{
425 struct ip_set_hash *h;
426 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
427 u8 hbits;
428
429 if (!(set->family == AF_INET || set->family == AF_INET6))
430 return -IPSET_ERR_INVALID_FAMILY;
431
432 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
433 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
434 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
435 return -IPSET_ERR_PROTOCOL;
436
437 if (tb[IPSET_ATTR_HASHSIZE]) {
438 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
439 if (hashsize < IPSET_MIMINAL_HASHSIZE)
440 hashsize = IPSET_MIMINAL_HASHSIZE;
441 }
442
443 if (tb[IPSET_ATTR_MAXELEM])
444 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
445
446 h = kzalloc(sizeof(*h), GFP_KERNEL);
447 if (!h)
448 return -ENOMEM;
449
450 h->maxelem = maxelem;
451 get_random_bytes(&h->initval, sizeof(h->initval));
452 h->timeout = IPSET_NO_TIMEOUT;
453
454 hbits = htable_bits(hashsize);
455 h->table = ip_set_alloc(
456 sizeof(struct htable)
457 + jhash_size(hbits) * sizeof(struct hbucket));
458 if (!h->table) {
459 kfree(h);
460 return -ENOMEM;
461 }
462 h->table->htable_bits = hbits;
463
464 set->data = h;
465
466 if (tb[IPSET_ATTR_TIMEOUT]) {
467 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
468
469 set->variant = set->family == AF_INET
470 ? &hash_ipport4_tvariant : &hash_ipport6_tvariant;
471
472 if (set->family == AF_INET)
473 hash_ipport4_gc_init(set);
474 else
475 hash_ipport6_gc_init(set);
476 } else {
477 set->variant = set->family == AF_INET
478 ? &hash_ipport4_variant : &hash_ipport6_variant;
479 }
480
481 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
482 set->name, jhash_size(h->table->htable_bits),
483 h->table->htable_bits, h->maxelem, set->data, h->table);
484
485 return 0;
486}
487
488static struct ip_set_type hash_ipport_type __read_mostly = {
489 .name = "hash:ip,port",
490 .protocol = IPSET_PROTOCOL,
491 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
492 .dimension = IPSET_DIM_TWO,
493 .family = AF_UNSPEC,
494 .revision = 0,
495 .create = hash_ipport_create,
496 .create_policy = {
497 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
498 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
499 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
500 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
501 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
502 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
503 },
504 .adt_policy = {
505 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
506 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
507 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
508 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
509 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
510 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
511 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
512 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
513 },
514 .me = THIS_MODULE,
515};
516
517static int __init
518hash_ipport_init(void)
519{
520 return ip_set_type_register(&hash_ipport_type);
521}
522
523static void __exit
524hash_ipport_fini(void)
525{
526 ip_set_type_unregister(&hash_ipport_type);
527}
528
529module_init(hash_ipport_init);
530module_exit(hash_ipport_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
new file mode 100644
index 00000000000..4642872df6e
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -0,0 +1,548 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,port,ip type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets");
31MODULE_ALIAS("ip_set_hash:ip,port,ip");
32
33/* Type specific function prefix */
34#define TYPE hash_ipportip
35
36static bool
37hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b);
38
39#define hash_ipportip4_same_set hash_ipportip_same_set
40#define hash_ipportip6_same_set hash_ipportip_same_set
41
42/* The type variant functions: IPv4 */
43
44/* Member elements without timeout */
45struct hash_ipportip4_elem {
46 __be32 ip;
47 __be32 ip2;
48 __be16 port;
49 u8 proto;
50 u8 padding;
51};
52
53/* Member elements with timeout support */
54struct hash_ipportip4_telem {
55 __be32 ip;
56 __be32 ip2;
57 __be16 port;
58 u8 proto;
59 u8 padding;
60 unsigned long timeout;
61};
62
63static inline bool
64hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
65 const struct hash_ipportip4_elem *ip2)
66{
67 return ip1->ip == ip2->ip &&
68 ip1->ip2 == ip2->ip2 &&
69 ip1->port == ip2->port &&
70 ip1->proto == ip2->proto;
71}
72
73static inline bool
74hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem)
75{
76 return elem->proto == 0;
77}
78
79static inline void
80hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst,
81 const struct hash_ipportip4_elem *src)
82{
83 memcpy(dst, src, sizeof(*dst));
84}
85
86static inline void
87hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem)
88{
89 elem->proto = 0;
90}
91
92static bool
93hash_ipportip4_data_list(struct sk_buff *skb,
94 const struct hash_ipportip4_elem *data)
95{
96 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
97 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2);
98 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
99 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
100 return 0;
101
102nla_put_failure:
103 return 1;
104}
105
106static bool
107hash_ipportip4_data_tlist(struct sk_buff *skb,
108 const struct hash_ipportip4_elem *data)
109{
110 const struct hash_ipportip4_telem *tdata =
111 (const struct hash_ipportip4_telem *)data;
112
113 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
114 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2);
115 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
116 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
117 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
118 htonl(ip_set_timeout_get(tdata->timeout)));
119
120 return 0;
121
122nla_put_failure:
123 return 1;
124}
125
126#define PF 4
127#define HOST_MASK 32
128#include <linux/netfilter/ipset/ip_set_ahash.h>
129
130static int
131hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
132 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
133{
134 const struct ip_set_hash *h = set->data;
135 ipset_adtfn adtfn = set->variant->adt[adt];
136 struct hash_ipportip4_elem data = { };
137
138 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
139 &data.port, &data.proto))
140 return -EINVAL;
141
142 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
143 ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2);
144
145 return adtfn(set, &data, h->timeout);
146}
147
148static int
149hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
150 enum ipset_adt adt, u32 *lineno, u32 flags)
151{
152 const struct ip_set_hash *h = set->data;
153 ipset_adtfn adtfn = set->variant->adt[adt];
154 struct hash_ipportip4_elem data = { };
155 u32 ip, ip_to, p, port, port_to;
156 u32 timeout = h->timeout;
157 bool with_ports = false;
158 int ret;
159
160 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
161 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
162 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
163 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
164 return -IPSET_ERR_PROTOCOL;
165
166 if (tb[IPSET_ATTR_LINENO])
167 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
168
169 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
170 if (ret)
171 return ret;
172
173 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
174 if (ret)
175 return ret;
176
177 if (tb[IPSET_ATTR_PORT])
178 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
179 else
180 return -IPSET_ERR_PROTOCOL;
181
182 if (tb[IPSET_ATTR_PROTO]) {
183 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
184 with_ports = ip_set_proto_with_ports(data.proto);
185
186 if (data.proto == 0)
187 return -IPSET_ERR_INVALID_PROTO;
188 } else
189 return -IPSET_ERR_MISSING_PROTO;
190
191 if (!(with_ports || data.proto == IPPROTO_ICMP))
192 data.port = 0;
193
194 if (tb[IPSET_ATTR_TIMEOUT]) {
195 if (!with_timeout(h->timeout))
196 return -IPSET_ERR_TIMEOUT;
197 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
198 }
199
200 if (adt == IPSET_TEST ||
201 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
202 tb[IPSET_ATTR_PORT_TO])) {
203 ret = adtfn(set, &data, timeout);
204 return ip_set_eexist(ret, flags) ? 0 : ret;
205 }
206
207 ip = ntohl(data.ip);
208 if (tb[IPSET_ATTR_IP_TO]) {
209 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
210 if (ret)
211 return ret;
212 if (ip > ip_to)
213 swap(ip, ip_to);
214 } else if (tb[IPSET_ATTR_CIDR]) {
215 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
216
217 if (cidr > 32)
218 return -IPSET_ERR_INVALID_CIDR;
219 ip &= ip_set_hostmask(cidr);
220 ip_to = ip | ~ip_set_hostmask(cidr);
221 } else
222 ip_to = ip;
223
224 port_to = port = ntohs(data.port);
225 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
226 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
227 if (port > port_to)
228 swap(port, port_to);
229 }
230
231 for (; !before(ip_to, ip); ip++)
232 for (p = port; p <= port_to; p++) {
233 data.ip = htonl(ip);
234 data.port = htons(p);
235 ret = adtfn(set, &data, timeout);
236
237 if (ret && !ip_set_eexist(ret, flags))
238 return ret;
239 else
240 ret = 0;
241 }
242 return ret;
243}
244
245static bool
246hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b)
247{
248 const struct ip_set_hash *x = a->data;
249 const struct ip_set_hash *y = b->data;
250
251 /* Resizing changes htable_bits, so we ignore it */
252 return x->maxelem == y->maxelem &&
253 x->timeout == y->timeout;
254}
255
256/* The type variant functions: IPv6 */
257
258struct hash_ipportip6_elem {
259 union nf_inet_addr ip;
260 union nf_inet_addr ip2;
261 __be16 port;
262 u8 proto;
263 u8 padding;
264};
265
266struct hash_ipportip6_telem {
267 union nf_inet_addr ip;
268 union nf_inet_addr ip2;
269 __be16 port;
270 u8 proto;
271 u8 padding;
272 unsigned long timeout;
273};
274
275static inline bool
276hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
277 const struct hash_ipportip6_elem *ip2)
278{
279 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
280 ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
281 ip1->port == ip2->port &&
282 ip1->proto == ip2->proto;
283}
284
285static inline bool
286hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem)
287{
288 return elem->proto == 0;
289}
290
291static inline void
292hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst,
293 const struct hash_ipportip6_elem *src)
294{
295 memcpy(dst, src, sizeof(*dst));
296}
297
298static inline void
299hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem)
300{
301 elem->proto = 0;
302}
303
304static bool
305hash_ipportip6_data_list(struct sk_buff *skb,
306 const struct hash_ipportip6_elem *data)
307{
308 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
309 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
310 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
311 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
312 return 0;
313
314nla_put_failure:
315 return 1;
316}
317
318static bool
319hash_ipportip6_data_tlist(struct sk_buff *skb,
320 const struct hash_ipportip6_elem *data)
321{
322 const struct hash_ipportip6_telem *e =
323 (const struct hash_ipportip6_telem *)data;
324
325 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
326 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
327 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
328 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
329 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
330 htonl(ip_set_timeout_get(e->timeout)));
331 return 0;
332
333nla_put_failure:
334 return 1;
335}
336
337#undef PF
338#undef HOST_MASK
339
340#define PF 6
341#define HOST_MASK 128
342#include <linux/netfilter/ipset/ip_set_ahash.h>
343
344static int
345hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
346 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
347{
348 const struct ip_set_hash *h = set->data;
349 ipset_adtfn adtfn = set->variant->adt[adt];
350 struct hash_ipportip6_elem data = { };
351
352 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
353 &data.port, &data.proto))
354 return -EINVAL;
355
356 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
357 ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
358
359 return adtfn(set, &data, h->timeout);
360}
361
362static int
363hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
364 enum ipset_adt adt, u32 *lineno, u32 flags)
365{
366 const struct ip_set_hash *h = set->data;
367 ipset_adtfn adtfn = set->variant->adt[adt];
368 struct hash_ipportip6_elem data = { };
369 u32 port, port_to;
370 u32 timeout = h->timeout;
371 bool with_ports = false;
372 int ret;
373
374 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
375 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
376 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
377 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
378 tb[IPSET_ATTR_IP_TO] ||
379 tb[IPSET_ATTR_CIDR]))
380 return -IPSET_ERR_PROTOCOL;
381
382 if (tb[IPSET_ATTR_LINENO])
383 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
384
385 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
386 if (ret)
387 return ret;
388
389 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
390 if (ret)
391 return ret;
392
393 if (tb[IPSET_ATTR_PORT])
394 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
395 else
396 return -IPSET_ERR_PROTOCOL;
397
398 if (tb[IPSET_ATTR_PROTO]) {
399 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
400 with_ports = ip_set_proto_with_ports(data.proto);
401
402 if (data.proto == 0)
403 return -IPSET_ERR_INVALID_PROTO;
404 } else
405 return -IPSET_ERR_MISSING_PROTO;
406
407 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
408 data.port = 0;
409
410 if (tb[IPSET_ATTR_TIMEOUT]) {
411 if (!with_timeout(h->timeout))
412 return -IPSET_ERR_TIMEOUT;
413 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
414 }
415
416 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
417 ret = adtfn(set, &data, timeout);
418 return ip_set_eexist(ret, flags) ? 0 : ret;
419 }
420
421 port = ntohs(data.port);
422 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
423 if (port > port_to)
424 swap(port, port_to);
425
426 for (; port <= port_to; port++) {
427 data.port = htons(port);
428 ret = adtfn(set, &data, timeout);
429
430 if (ret && !ip_set_eexist(ret, flags))
431 return ret;
432 else
433 ret = 0;
434 }
435 return ret;
436}
437
438/* Create hash:ip type of sets */
439
440static int
441hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
442{
443 struct ip_set_hash *h;
444 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
445 u8 hbits;
446
447 if (!(set->family == AF_INET || set->family == AF_INET6))
448 return -IPSET_ERR_INVALID_FAMILY;
449
450 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
451 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
452 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
453 return -IPSET_ERR_PROTOCOL;
454
455 if (tb[IPSET_ATTR_HASHSIZE]) {
456 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
457 if (hashsize < IPSET_MIMINAL_HASHSIZE)
458 hashsize = IPSET_MIMINAL_HASHSIZE;
459 }
460
461 if (tb[IPSET_ATTR_MAXELEM])
462 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
463
464 h = kzalloc(sizeof(*h), GFP_KERNEL);
465 if (!h)
466 return -ENOMEM;
467
468 h->maxelem = maxelem;
469 get_random_bytes(&h->initval, sizeof(h->initval));
470 h->timeout = IPSET_NO_TIMEOUT;
471
472 hbits = htable_bits(hashsize);
473 h->table = ip_set_alloc(
474 sizeof(struct htable)
475 + jhash_size(hbits) * sizeof(struct hbucket));
476 if (!h->table) {
477 kfree(h);
478 return -ENOMEM;
479 }
480 h->table->htable_bits = hbits;
481
482 set->data = h;
483
484 if (tb[IPSET_ATTR_TIMEOUT]) {
485 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
486
487 set->variant = set->family == AF_INET
488 ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant;
489
490 if (set->family == AF_INET)
491 hash_ipportip4_gc_init(set);
492 else
493 hash_ipportip6_gc_init(set);
494 } else {
495 set->variant = set->family == AF_INET
496 ? &hash_ipportip4_variant : &hash_ipportip6_variant;
497 }
498
499 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
500 set->name, jhash_size(h->table->htable_bits),
501 h->table->htable_bits, h->maxelem, set->data, h->table);
502
503 return 0;
504}
505
506static struct ip_set_type hash_ipportip_type __read_mostly = {
507 .name = "hash:ip,port,ip",
508 .protocol = IPSET_PROTOCOL,
509 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
510 .dimension = IPSET_DIM_THREE,
511 .family = AF_UNSPEC,
512 .revision = 0,
513 .create = hash_ipportip_create,
514 .create_policy = {
515 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
516 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
517 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
518 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
519 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
520 },
521 .adt_policy = {
522 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
523 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
524 [IPSET_ATTR_IP2] = { .type = NLA_NESTED },
525 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
526 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
527 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
528 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
529 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
530 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
531 },
532 .me = THIS_MODULE,
533};
534
535static int __init
536hash_ipportip_init(void)
537{
538 return ip_set_type_register(&hash_ipportip_type);
539}
540
541static void __exit
542hash_ipportip_fini(void)
543{
544 ip_set_type_unregister(&hash_ipportip_type);
545}
546
547module_init(hash_ipportip_init);
548module_exit(hash_ipportip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
new file mode 100644
index 00000000000..2cb84a54b7a
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -0,0 +1,614 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,port,net type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30MODULE_DESCRIPTION("hash:ip,port,net type of IP sets");
31MODULE_ALIAS("ip_set_hash:ip,port,net");
32
33/* Type specific function prefix */
34#define TYPE hash_ipportnet
35
36static bool
37hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b);
38
39#define hash_ipportnet4_same_set hash_ipportnet_same_set
40#define hash_ipportnet6_same_set hash_ipportnet_same_set
41
42/* The type variant functions: IPv4 */
43
44/* Member elements without timeout */
45struct hash_ipportnet4_elem {
46 __be32 ip;
47 __be32 ip2;
48 __be16 port;
49 u8 cidr;
50 u8 proto;
51};
52
53/* Member elements with timeout support */
54struct hash_ipportnet4_telem {
55 __be32 ip;
56 __be32 ip2;
57 __be16 port;
58 u8 cidr;
59 u8 proto;
60 unsigned long timeout;
61};
62
63static inline bool
64hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
65 const struct hash_ipportnet4_elem *ip2)
66{
67 return ip1->ip == ip2->ip &&
68 ip1->ip2 == ip2->ip2 &&
69 ip1->cidr == ip2->cidr &&
70 ip1->port == ip2->port &&
71 ip1->proto == ip2->proto;
72}
73
74static inline bool
75hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem)
76{
77 return elem->proto == 0;
78}
79
80static inline void
81hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst,
82 const struct hash_ipportnet4_elem *src)
83{
84 memcpy(dst, src, sizeof(*dst));
85}
86
87static inline void
88hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
89{
90 elem->ip2 &= ip_set_netmask(cidr);
91 elem->cidr = cidr;
92}
93
94static inline void
95hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem)
96{
97 elem->proto = 0;
98}
99
100static bool
101hash_ipportnet4_data_list(struct sk_buff *skb,
102 const struct hash_ipportnet4_elem *data)
103{
104 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
105 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2);
106 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
107 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
108 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
109 return 0;
110
111nla_put_failure:
112 return 1;
113}
114
115static bool
116hash_ipportnet4_data_tlist(struct sk_buff *skb,
117 const struct hash_ipportnet4_elem *data)
118{
119 const struct hash_ipportnet4_telem *tdata =
120 (const struct hash_ipportnet4_telem *)data;
121
122 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
123 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2);
124 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
125 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
126 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
127 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
128 htonl(ip_set_timeout_get(tdata->timeout)));
129
130 return 0;
131
132nla_put_failure:
133 return 1;
134}
135
136#define IP_SET_HASH_WITH_PROTO
137#define IP_SET_HASH_WITH_NETS
138
139#define PF 4
140#define HOST_MASK 32
141#include <linux/netfilter/ipset/ip_set_ahash.h>
142
143static int
144hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
145 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
146{
147 const struct ip_set_hash *h = set->data;
148 ipset_adtfn adtfn = set->variant->adt[adt];
149 struct hash_ipportnet4_elem data =
150 { .cidr = h->nets[0].cidr || HOST_MASK };
151
152 if (data.cidr == 0)
153 return -EINVAL;
154 if (adt == IPSET_TEST)
155 data.cidr = HOST_MASK;
156
157 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
158 &data.port, &data.proto))
159 return -EINVAL;
160
161 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
162 ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2);
163 data.ip2 &= ip_set_netmask(data.cidr);
164
165 return adtfn(set, &data, h->timeout);
166}
167
168static int
169hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
170 enum ipset_adt adt, u32 *lineno, u32 flags)
171{
172 const struct ip_set_hash *h = set->data;
173 ipset_adtfn adtfn = set->variant->adt[adt];
174 struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
175 u32 ip, ip_to, p, port, port_to;
176 u32 timeout = h->timeout;
177 bool with_ports = false;
178 int ret;
179
180 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
181 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
182 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
183 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
184 return -IPSET_ERR_PROTOCOL;
185
186 if (tb[IPSET_ATTR_LINENO])
187 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
188
189 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
190 if (ret)
191 return ret;
192
193 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
194 if (ret)
195 return ret;
196
197 if (tb[IPSET_ATTR_CIDR2])
198 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
199
200 if (!data.cidr)
201 return -IPSET_ERR_INVALID_CIDR;
202
203 data.ip2 &= ip_set_netmask(data.cidr);
204
205 if (tb[IPSET_ATTR_PORT])
206 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
207 else
208 return -IPSET_ERR_PROTOCOL;
209
210 if (tb[IPSET_ATTR_PROTO]) {
211 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
212 with_ports = ip_set_proto_with_ports(data.proto);
213
214 if (data.proto == 0)
215 return -IPSET_ERR_INVALID_PROTO;
216 } else
217 return -IPSET_ERR_MISSING_PROTO;
218
219 if (!(with_ports || data.proto == IPPROTO_ICMP))
220 data.port = 0;
221
222 if (tb[IPSET_ATTR_TIMEOUT]) {
223 if (!with_timeout(h->timeout))
224 return -IPSET_ERR_TIMEOUT;
225 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
226 }
227
228 if (adt == IPSET_TEST ||
229 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
230 tb[IPSET_ATTR_PORT_TO])) {
231 ret = adtfn(set, &data, timeout);
232 return ip_set_eexist(ret, flags) ? 0 : ret;
233 }
234
235 ip = ntohl(data.ip);
236 if (tb[IPSET_ATTR_IP_TO]) {
237 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
238 if (ret)
239 return ret;
240 if (ip > ip_to)
241 swap(ip, ip_to);
242 } else if (tb[IPSET_ATTR_CIDR]) {
243 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
244
245 if (cidr > 32)
246 return -IPSET_ERR_INVALID_CIDR;
247 ip &= ip_set_hostmask(cidr);
248 ip_to = ip | ~ip_set_hostmask(cidr);
249 } else
250 ip_to = ip;
251
252 port_to = port = ntohs(data.port);
253 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
254 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
255 if (port > port_to)
256 swap(port, port_to);
257 }
258
259 for (; !before(ip_to, ip); ip++)
260 for (p = port; p <= port_to; p++) {
261 data.ip = htonl(ip);
262 data.port = htons(p);
263 ret = adtfn(set, &data, timeout);
264
265 if (ret && !ip_set_eexist(ret, flags))
266 return ret;
267 else
268 ret = 0;
269 }
270 return ret;
271}
272
273static bool
274hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b)
275{
276 const struct ip_set_hash *x = a->data;
277 const struct ip_set_hash *y = b->data;
278
279 /* Resizing changes htable_bits, so we ignore it */
280 return x->maxelem == y->maxelem &&
281 x->timeout == y->timeout;
282}
283
284/* The type variant functions: IPv6 */
285
286struct hash_ipportnet6_elem {
287 union nf_inet_addr ip;
288 union nf_inet_addr ip2;
289 __be16 port;
290 u8 cidr;
291 u8 proto;
292};
293
294struct hash_ipportnet6_telem {
295 union nf_inet_addr ip;
296 union nf_inet_addr ip2;
297 __be16 port;
298 u8 cidr;
299 u8 proto;
300 unsigned long timeout;
301};
302
303static inline bool
304hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
305 const struct hash_ipportnet6_elem *ip2)
306{
307 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
308 ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
309 ip1->cidr == ip2->cidr &&
310 ip1->port == ip2->port &&
311 ip1->proto == ip2->proto;
312}
313
314static inline bool
315hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem)
316{
317 return elem->proto == 0;
318}
319
320static inline void
321hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst,
322 const struct hash_ipportnet6_elem *src)
323{
324 memcpy(dst, src, sizeof(*dst));
325}
326
327static inline void
328hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem)
329{
330 elem->proto = 0;
331}
332
333static inline void
334ip6_netmask(union nf_inet_addr *ip, u8 prefix)
335{
336 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
337 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
338 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
339 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
340}
341
342static inline void
343hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
344{
345 ip6_netmask(&elem->ip2, cidr);
346 elem->cidr = cidr;
347}
348
349static bool
350hash_ipportnet6_data_list(struct sk_buff *skb,
351 const struct hash_ipportnet6_elem *data)
352{
353 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
354 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
355 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
356 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
357 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
358 return 0;
359
360nla_put_failure:
361 return 1;
362}
363
364static bool
365hash_ipportnet6_data_tlist(struct sk_buff *skb,
366 const struct hash_ipportnet6_elem *data)
367{
368 const struct hash_ipportnet6_telem *e =
369 (const struct hash_ipportnet6_telem *)data;
370
371 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
372 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
373 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
374 NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
375 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
376 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
377 htonl(ip_set_timeout_get(e->timeout)));
378 return 0;
379
380nla_put_failure:
381 return 1;
382}
383
384#undef PF
385#undef HOST_MASK
386
387#define PF 6
388#define HOST_MASK 128
389#include <linux/netfilter/ipset/ip_set_ahash.h>
390
391static int
392hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
393 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
394{
395 const struct ip_set_hash *h = set->data;
396 ipset_adtfn adtfn = set->variant->adt[adt];
397 struct hash_ipportnet6_elem data =
398 { .cidr = h->nets[0].cidr || HOST_MASK };
399
400 if (data.cidr == 0)
401 return -EINVAL;
402 if (adt == IPSET_TEST)
403 data.cidr = HOST_MASK;
404
405 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
406 &data.port, &data.proto))
407 return -EINVAL;
408
409 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
410 ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
411 ip6_netmask(&data.ip2, data.cidr);
412
413 return adtfn(set, &data, h->timeout);
414}
415
416static int
417hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
418 enum ipset_adt adt, u32 *lineno, u32 flags)
419{
420 const struct ip_set_hash *h = set->data;
421 ipset_adtfn adtfn = set->variant->adt[adt];
422 struct hash_ipportnet6_elem data = { .cidr = HOST_MASK };
423 u32 port, port_to;
424 u32 timeout = h->timeout;
425 bool with_ports = false;
426 int ret;
427
428 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
429 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
430 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
431 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
432 tb[IPSET_ATTR_IP_TO] ||
433 tb[IPSET_ATTR_CIDR]))
434 return -IPSET_ERR_PROTOCOL;
435
436 if (tb[IPSET_ATTR_LINENO])
437 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
438
439 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
440 if (ret)
441 return ret;
442
443 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
444 if (ret)
445 return ret;
446
447 if (tb[IPSET_ATTR_CIDR2])
448 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
449
450 if (!data.cidr)
451 return -IPSET_ERR_INVALID_CIDR;
452
453 ip6_netmask(&data.ip2, data.cidr);
454
455 if (tb[IPSET_ATTR_PORT])
456 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
457 else
458 return -IPSET_ERR_PROTOCOL;
459
460 if (tb[IPSET_ATTR_PROTO]) {
461 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
462 with_ports = ip_set_proto_with_ports(data.proto);
463
464 if (data.proto == 0)
465 return -IPSET_ERR_INVALID_PROTO;
466 } else
467 return -IPSET_ERR_MISSING_PROTO;
468
469 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
470 data.port = 0;
471
472 if (tb[IPSET_ATTR_TIMEOUT]) {
473 if (!with_timeout(h->timeout))
474 return -IPSET_ERR_TIMEOUT;
475 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
476 }
477
478 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
479 ret = adtfn(set, &data, timeout);
480 return ip_set_eexist(ret, flags) ? 0 : ret;
481 }
482
483 port = ntohs(data.port);
484 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
485 if (port > port_to)
486 swap(port, port_to);
487
488 for (; port <= port_to; port++) {
489 data.port = htons(port);
490 ret = adtfn(set, &data, timeout);
491
492 if (ret && !ip_set_eexist(ret, flags))
493 return ret;
494 else
495 ret = 0;
496 }
497 return ret;
498}
499
500/* Create hash:ip type of sets */
501
502static int
503hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
504{
505 struct ip_set_hash *h;
506 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
507 u8 hbits;
508
509 if (!(set->family == AF_INET || set->family == AF_INET6))
510 return -IPSET_ERR_INVALID_FAMILY;
511
512 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
513 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
514 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
515 return -IPSET_ERR_PROTOCOL;
516
517 if (tb[IPSET_ATTR_HASHSIZE]) {
518 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
519 if (hashsize < IPSET_MIMINAL_HASHSIZE)
520 hashsize = IPSET_MIMINAL_HASHSIZE;
521 }
522
523 if (tb[IPSET_ATTR_MAXELEM])
524 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
525
526 h = kzalloc(sizeof(*h)
527 + sizeof(struct ip_set_hash_nets)
528 * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
529 if (!h)
530 return -ENOMEM;
531
532 h->maxelem = maxelem;
533 get_random_bytes(&h->initval, sizeof(h->initval));
534 h->timeout = IPSET_NO_TIMEOUT;
535
536 hbits = htable_bits(hashsize);
537 h->table = ip_set_alloc(
538 sizeof(struct htable)
539 + jhash_size(hbits) * sizeof(struct hbucket));
540 if (!h->table) {
541 kfree(h);
542 return -ENOMEM;
543 }
544 h->table->htable_bits = hbits;
545
546 set->data = h;
547
548 if (tb[IPSET_ATTR_TIMEOUT]) {
549 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
550
551 set->variant = set->family == AF_INET
552 ? &hash_ipportnet4_tvariant
553 : &hash_ipportnet6_tvariant;
554
555 if (set->family == AF_INET)
556 hash_ipportnet4_gc_init(set);
557 else
558 hash_ipportnet6_gc_init(set);
559 } else {
560 set->variant = set->family == AF_INET
561 ? &hash_ipportnet4_variant : &hash_ipportnet6_variant;
562 }
563
564 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
565 set->name, jhash_size(h->table->htable_bits),
566 h->table->htable_bits, h->maxelem, set->data, h->table);
567
568 return 0;
569}
570
571static struct ip_set_type hash_ipportnet_type __read_mostly = {
572 .name = "hash:ip,port,net",
573 .protocol = IPSET_PROTOCOL,
574 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
575 .dimension = IPSET_DIM_THREE,
576 .family = AF_UNSPEC,
577 .revision = 0,
578 .create = hash_ipportnet_create,
579 .create_policy = {
580 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
581 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
582 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
583 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
584 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
585 },
586 .adt_policy = {
587 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
588 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
589 [IPSET_ATTR_IP2] = { .type = NLA_NESTED },
590 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
591 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
592 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
593 [IPSET_ATTR_CIDR2] = { .type = NLA_U8 },
594 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
595 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
596 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
597 },
598 .me = THIS_MODULE,
599};
600
601static int __init
602hash_ipportnet_init(void)
603{
604 return ip_set_type_register(&hash_ipportnet_type);
605}
606
607static void __exit
608hash_ipportnet_fini(void)
609{
610 ip_set_type_unregister(&hash_ipportnet_type);
611}
612
613module_init(hash_ipportnet_init);
614module_exit(hash_ipportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
new file mode 100644
index 00000000000..c4db202b7da
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -0,0 +1,458 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:net type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19
20#include <linux/netfilter.h>
21#include <linux/netfilter/ipset/pfxlen.h>
22#include <linux/netfilter/ipset/ip_set.h>
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24#include <linux/netfilter/ipset/ip_set_hash.h>
25
26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
28MODULE_DESCRIPTION("hash:net type of IP sets");
29MODULE_ALIAS("ip_set_hash:net");
30
31/* Type specific function prefix */
32#define TYPE hash_net
33
34static bool
35hash_net_same_set(const struct ip_set *a, const struct ip_set *b);
36
37#define hash_net4_same_set hash_net_same_set
38#define hash_net6_same_set hash_net_same_set
39
40/* The type variant functions: IPv4 */
41
42/* Member elements without timeout */
43struct hash_net4_elem {
44 __be32 ip;
45 u16 padding0;
46 u8 padding1;
47 u8 cidr;
48};
49
50/* Member elements with timeout support */
51struct hash_net4_telem {
52 __be32 ip;
53 u16 padding0;
54 u8 padding1;
55 u8 cidr;
56 unsigned long timeout;
57};
58
59static inline bool
60hash_net4_data_equal(const struct hash_net4_elem *ip1,
61 const struct hash_net4_elem *ip2)
62{
63 return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr;
64}
65
66static inline bool
67hash_net4_data_isnull(const struct hash_net4_elem *elem)
68{
69 return elem->cidr == 0;
70}
71
72static inline void
73hash_net4_data_copy(struct hash_net4_elem *dst,
74 const struct hash_net4_elem *src)
75{
76 dst->ip = src->ip;
77 dst->cidr = src->cidr;
78}
79
80static inline void
81hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
82{
83 elem->ip &= ip_set_netmask(cidr);
84 elem->cidr = cidr;
85}
86
87/* Zero CIDR values cannot be stored */
88static inline void
89hash_net4_data_zero_out(struct hash_net4_elem *elem)
90{
91 elem->cidr = 0;
92}
93
94static bool
95hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
96{
97 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
98 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
99 return 0;
100
101nla_put_failure:
102 return 1;
103}
104
105static bool
106hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data)
107{
108 const struct hash_net4_telem *tdata =
109 (const struct hash_net4_telem *)data;
110
111 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
112 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr);
113 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
114 htonl(ip_set_timeout_get(tdata->timeout)));
115
116 return 0;
117
118nla_put_failure:
119 return 1;
120}
121
122#define IP_SET_HASH_WITH_NETS
123
124#define PF 4
125#define HOST_MASK 32
126#include <linux/netfilter/ipset/ip_set_ahash.h>
127
128static int
129hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
130 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
131{
132 const struct ip_set_hash *h = set->data;
133 ipset_adtfn adtfn = set->variant->adt[adt];
134 struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK };
135
136 if (data.cidr == 0)
137 return -EINVAL;
138 if (adt == IPSET_TEST)
139 data.cidr = HOST_MASK;
140
141 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
142 data.ip &= ip_set_netmask(data.cidr);
143
144 return adtfn(set, &data, h->timeout);
145}
146
147static int
148hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
149 enum ipset_adt adt, u32 *lineno, u32 flags)
150{
151 const struct ip_set_hash *h = set->data;
152 ipset_adtfn adtfn = set->variant->adt[adt];
153 struct hash_net4_elem data = { .cidr = HOST_MASK };
154 u32 timeout = h->timeout;
155 int ret;
156
157 if (unlikely(!tb[IPSET_ATTR_IP] ||
158 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
159 return -IPSET_ERR_PROTOCOL;
160
161 if (tb[IPSET_ATTR_LINENO])
162 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
163
164 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
165 if (ret)
166 return ret;
167
168 if (tb[IPSET_ATTR_CIDR])
169 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
170
171 if (!data.cidr)
172 return -IPSET_ERR_INVALID_CIDR;
173
174 data.ip &= ip_set_netmask(data.cidr);
175
176 if (tb[IPSET_ATTR_TIMEOUT]) {
177 if (!with_timeout(h->timeout))
178 return -IPSET_ERR_TIMEOUT;
179 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
180 }
181
182 ret = adtfn(set, &data, timeout);
183
184 return ip_set_eexist(ret, flags) ? 0 : ret;
185}
186
187static bool
188hash_net_same_set(const struct ip_set *a, const struct ip_set *b)
189{
190 const struct ip_set_hash *x = a->data;
191 const struct ip_set_hash *y = b->data;
192
193 /* Resizing changes htable_bits, so we ignore it */
194 return x->maxelem == y->maxelem &&
195 x->timeout == y->timeout;
196}
197
198/* The type variant functions: IPv6 */
199
200struct hash_net6_elem {
201 union nf_inet_addr ip;
202 u16 padding0;
203 u8 padding1;
204 u8 cidr;
205};
206
207struct hash_net6_telem {
208 union nf_inet_addr ip;
209 u16 padding0;
210 u8 padding1;
211 u8 cidr;
212 unsigned long timeout;
213};
214
215static inline bool
216hash_net6_data_equal(const struct hash_net6_elem *ip1,
217 const struct hash_net6_elem *ip2)
218{
219 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
220 ip1->cidr == ip2->cidr;
221}
222
223static inline bool
224hash_net6_data_isnull(const struct hash_net6_elem *elem)
225{
226 return elem->cidr == 0;
227}
228
229static inline void
230hash_net6_data_copy(struct hash_net6_elem *dst,
231 const struct hash_net6_elem *src)
232{
233 ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
234 dst->cidr = src->cidr;
235}
236
237static inline void
238hash_net6_data_zero_out(struct hash_net6_elem *elem)
239{
240 elem->cidr = 0;
241}
242
243static inline void
244ip6_netmask(union nf_inet_addr *ip, u8 prefix)
245{
246 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
247 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
248 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
249 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
250}
251
252static inline void
253hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
254{
255 ip6_netmask(&elem->ip, cidr);
256 elem->cidr = cidr;
257}
258
259static bool
260hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
261{
262 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
263 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
264 return 0;
265
266nla_put_failure:
267 return 1;
268}
269
270static bool
271hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data)
272{
273 const struct hash_net6_telem *e =
274 (const struct hash_net6_telem *)data;
275
276 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
277 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr);
278 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
279 htonl(ip_set_timeout_get(e->timeout)));
280 return 0;
281
282nla_put_failure:
283 return 1;
284}
285
286#undef PF
287#undef HOST_MASK
288
289#define PF 6
290#define HOST_MASK 128
291#include <linux/netfilter/ipset/ip_set_ahash.h>
292
293static int
294hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
295 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
296{
297 const struct ip_set_hash *h = set->data;
298 ipset_adtfn adtfn = set->variant->adt[adt];
299 struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK };
300
301 if (data.cidr == 0)
302 return -EINVAL;
303 if (adt == IPSET_TEST)
304 data.cidr = HOST_MASK;
305
306 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
307 ip6_netmask(&data.ip, data.cidr);
308
309 return adtfn(set, &data, h->timeout);
310}
311
312static int
313hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
314 enum ipset_adt adt, u32 *lineno, u32 flags)
315{
316 const struct ip_set_hash *h = set->data;
317 ipset_adtfn adtfn = set->variant->adt[adt];
318 struct hash_net6_elem data = { .cidr = HOST_MASK };
319 u32 timeout = h->timeout;
320 int ret;
321
322 if (unlikely(!tb[IPSET_ATTR_IP] ||
323 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
324 return -IPSET_ERR_PROTOCOL;
325
326 if (tb[IPSET_ATTR_LINENO])
327 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
328
329 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
330 if (ret)
331 return ret;
332
333 if (tb[IPSET_ATTR_CIDR])
334 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
335
336 if (!data.cidr)
337 return -IPSET_ERR_INVALID_CIDR;
338
339 ip6_netmask(&data.ip, data.cidr);
340
341 if (tb[IPSET_ATTR_TIMEOUT]) {
342 if (!with_timeout(h->timeout))
343 return -IPSET_ERR_TIMEOUT;
344 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
345 }
346
347 ret = adtfn(set, &data, timeout);
348
349 return ip_set_eexist(ret, flags) ? 0 : ret;
350}
351
352/* Create hash:ip type of sets */
353
354static int
355hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
356{
357 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
358 struct ip_set_hash *h;
359 u8 hbits;
360
361 if (!(set->family == AF_INET || set->family == AF_INET6))
362 return -IPSET_ERR_INVALID_FAMILY;
363
364 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
365 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
366 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
367 return -IPSET_ERR_PROTOCOL;
368
369 if (tb[IPSET_ATTR_HASHSIZE]) {
370 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
371 if (hashsize < IPSET_MIMINAL_HASHSIZE)
372 hashsize = IPSET_MIMINAL_HASHSIZE;
373 }
374
375 if (tb[IPSET_ATTR_MAXELEM])
376 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
377
378 h = kzalloc(sizeof(*h)
379 + sizeof(struct ip_set_hash_nets)
380 * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
381 if (!h)
382 return -ENOMEM;
383
384 h->maxelem = maxelem;
385 get_random_bytes(&h->initval, sizeof(h->initval));
386 h->timeout = IPSET_NO_TIMEOUT;
387
388 hbits = htable_bits(hashsize);
389 h->table = ip_set_alloc(
390 sizeof(struct htable)
391 + jhash_size(hbits) * sizeof(struct hbucket));
392 if (!h->table) {
393 kfree(h);
394 return -ENOMEM;
395 }
396 h->table->htable_bits = hbits;
397
398 set->data = h;
399
400 if (tb[IPSET_ATTR_TIMEOUT]) {
401 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
402
403 set->variant = set->family == AF_INET
404 ? &hash_net4_tvariant : &hash_net6_tvariant;
405
406 if (set->family == AF_INET)
407 hash_net4_gc_init(set);
408 else
409 hash_net6_gc_init(set);
410 } else {
411 set->variant = set->family == AF_INET
412 ? &hash_net4_variant : &hash_net6_variant;
413 }
414
415 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
416 set->name, jhash_size(h->table->htable_bits),
417 h->table->htable_bits, h->maxelem, set->data, h->table);
418
419 return 0;
420}
421
422static struct ip_set_type hash_net_type __read_mostly = {
423 .name = "hash:net",
424 .protocol = IPSET_PROTOCOL,
425 .features = IPSET_TYPE_IP,
426 .dimension = IPSET_DIM_ONE,
427 .family = AF_UNSPEC,
428 .revision = 0,
429 .create = hash_net_create,
430 .create_policy = {
431 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
432 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
433 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
434 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
435 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
436 },
437 .adt_policy = {
438 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
439 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
440 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
441 },
442 .me = THIS_MODULE,
443};
444
445static int __init
446hash_net_init(void)
447{
448 return ip_set_type_register(&hash_net_type);
449}
450
451static void __exit
452hash_net_fini(void)
453{
454 ip_set_type_unregister(&hash_net_type);
455}
456
457module_init(hash_net_init);
458module_exit(hash_net_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
new file mode 100644
index 00000000000..8598676f2a0
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -0,0 +1,564 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:net,port type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/skbuff.h>
14#include <linux/errno.h>
15#include <linux/random.h>
16#include <net/ip.h>
17#include <net/ipv6.h>
18#include <net/netlink.h>
19
20#include <linux/netfilter.h>
21#include <linux/netfilter/ipset/pfxlen.h>
22#include <linux/netfilter/ipset/ip_set.h>
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24#include <linux/netfilter/ipset/ip_set_getport.h>
25#include <linux/netfilter/ipset/ip_set_hash.h>
26
27MODULE_LICENSE("GPL");
28MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
29MODULE_DESCRIPTION("hash:net,port type of IP sets");
30MODULE_ALIAS("ip_set_hash:net,port");
31
32/* Type specific function prefix */
33#define TYPE hash_netport
34
35static bool
36hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
37
38#define hash_netport4_same_set hash_netport_same_set
39#define hash_netport6_same_set hash_netport_same_set
40
41/* The type variant functions: IPv4 */
42
43/* Member elements without timeout */
44struct hash_netport4_elem {
45 __be32 ip;
46 __be16 port;
47 u8 proto;
48 u8 cidr;
49};
50
51/* Member elements with timeout support */
52struct hash_netport4_telem {
53 __be32 ip;
54 __be16 port;
55 u8 proto;
56 u8 cidr;
57 unsigned long timeout;
58};
59
60static inline bool
61hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
62 const struct hash_netport4_elem *ip2)
63{
64 return ip1->ip == ip2->ip &&
65 ip1->port == ip2->port &&
66 ip1->proto == ip2->proto &&
67 ip1->cidr == ip2->cidr;
68}
69
70static inline bool
71hash_netport4_data_isnull(const struct hash_netport4_elem *elem)
72{
73 return elem->proto == 0;
74}
75
76static inline void
77hash_netport4_data_copy(struct hash_netport4_elem *dst,
78 const struct hash_netport4_elem *src)
79{
80 dst->ip = src->ip;
81 dst->port = src->port;
82 dst->proto = src->proto;
83 dst->cidr = src->cidr;
84}
85
86static inline void
87hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
88{
89 elem->ip &= ip_set_netmask(cidr);
90 elem->cidr = cidr;
91}
92
93static inline void
94hash_netport4_data_zero_out(struct hash_netport4_elem *elem)
95{
96 elem->proto = 0;
97}
98
99static bool
100hash_netport4_data_list(struct sk_buff *skb,
101 const struct hash_netport4_elem *data)
102{
103 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
104 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
105 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
106 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
107 return 0;
108
109nla_put_failure:
110 return 1;
111}
112
113static bool
114hash_netport4_data_tlist(struct sk_buff *skb,
115 const struct hash_netport4_elem *data)
116{
117 const struct hash_netport4_telem *tdata =
118 (const struct hash_netport4_telem *)data;
119
120 NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
121 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
122 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
123 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
124 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
125 htonl(ip_set_timeout_get(tdata->timeout)));
126
127 return 0;
128
129nla_put_failure:
130 return 1;
131}
132
133#define IP_SET_HASH_WITH_PROTO
134#define IP_SET_HASH_WITH_NETS
135
136#define PF 4
137#define HOST_MASK 32
138#include <linux/netfilter/ipset/ip_set_ahash.h>
139
140static int
141hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
142 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
143{
144 const struct ip_set_hash *h = set->data;
145 ipset_adtfn adtfn = set->variant->adt[adt];
146 struct hash_netport4_elem data = {
147 .cidr = h->nets[0].cidr || HOST_MASK };
148
149 if (data.cidr == 0)
150 return -EINVAL;
151 if (adt == IPSET_TEST)
152 data.cidr = HOST_MASK;
153
154 if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
155 &data.port, &data.proto))
156 return -EINVAL;
157
158 ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
159 data.ip &= ip_set_netmask(data.cidr);
160
161 return adtfn(set, &data, h->timeout);
162}
163
164static int
165hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
166 enum ipset_adt adt, u32 *lineno, u32 flags)
167{
168 const struct ip_set_hash *h = set->data;
169 ipset_adtfn adtfn = set->variant->adt[adt];
170 struct hash_netport4_elem data = { .cidr = HOST_MASK };
171 u32 port, port_to;
172 u32 timeout = h->timeout;
173 bool with_ports = false;
174 int ret;
175
176 if (unlikely(!tb[IPSET_ATTR_IP] ||
177 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
178 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
179 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
180 return -IPSET_ERR_PROTOCOL;
181
182 if (tb[IPSET_ATTR_LINENO])
183 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
184
185 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
186 if (ret)
187 return ret;
188
189 if (tb[IPSET_ATTR_CIDR])
190 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
191 if (!data.cidr)
192 return -IPSET_ERR_INVALID_CIDR;
193 data.ip &= ip_set_netmask(data.cidr);
194
195 if (tb[IPSET_ATTR_PORT])
196 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
197 else
198 return -IPSET_ERR_PROTOCOL;
199
200 if (tb[IPSET_ATTR_PROTO]) {
201 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
202 with_ports = ip_set_proto_with_ports(data.proto);
203
204 if (data.proto == 0)
205 return -IPSET_ERR_INVALID_PROTO;
206 } else
207 return -IPSET_ERR_MISSING_PROTO;
208
209 if (!(with_ports || data.proto == IPPROTO_ICMP))
210 data.port = 0;
211
212 if (tb[IPSET_ATTR_TIMEOUT]) {
213 if (!with_timeout(h->timeout))
214 return -IPSET_ERR_TIMEOUT;
215 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
216 }
217
218 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
219 ret = adtfn(set, &data, timeout);
220 return ip_set_eexist(ret, flags) ? 0 : ret;
221 }
222
223 port = ntohs(data.port);
224 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
225 if (port > port_to)
226 swap(port, port_to);
227
228 for (; port <= port_to; port++) {
229 data.port = htons(port);
230 ret = adtfn(set, &data, timeout);
231
232 if (ret && !ip_set_eexist(ret, flags))
233 return ret;
234 else
235 ret = 0;
236 }
237 return ret;
238}
239
240static bool
241hash_netport_same_set(const struct ip_set *a, const struct ip_set *b)
242{
243 const struct ip_set_hash *x = a->data;
244 const struct ip_set_hash *y = b->data;
245
246 /* Resizing changes htable_bits, so we ignore it */
247 return x->maxelem == y->maxelem &&
248 x->timeout == y->timeout;
249}
250
251/* The type variant functions: IPv6 */
252
253struct hash_netport6_elem {
254 union nf_inet_addr ip;
255 __be16 port;
256 u8 proto;
257 u8 cidr;
258};
259
260struct hash_netport6_telem {
261 union nf_inet_addr ip;
262 __be16 port;
263 u8 proto;
264 u8 cidr;
265 unsigned long timeout;
266};
267
268static inline bool
269hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
270 const struct hash_netport6_elem *ip2)
271{
272 return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
273 ip1->port == ip2->port &&
274 ip1->proto == ip2->proto &&
275 ip1->cidr == ip2->cidr;
276}
277
278static inline bool
279hash_netport6_data_isnull(const struct hash_netport6_elem *elem)
280{
281 return elem->proto == 0;
282}
283
284static inline void
285hash_netport6_data_copy(struct hash_netport6_elem *dst,
286 const struct hash_netport6_elem *src)
287{
288 memcpy(dst, src, sizeof(*dst));
289}
290
291static inline void
292hash_netport6_data_zero_out(struct hash_netport6_elem *elem)
293{
294 elem->proto = 0;
295}
296
297static inline void
298ip6_netmask(union nf_inet_addr *ip, u8 prefix)
299{
300 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
301 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
302 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
303 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
304}
305
306static inline void
307hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
308{
309 ip6_netmask(&elem->ip, cidr);
310 elem->cidr = cidr;
311}
312
313static bool
314hash_netport6_data_list(struct sk_buff *skb,
315 const struct hash_netport6_elem *data)
316{
317 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
318 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
319 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
320 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
321 return 0;
322
323nla_put_failure:
324 return 1;
325}
326
327static bool
328hash_netport6_data_tlist(struct sk_buff *skb,
329 const struct hash_netport6_elem *data)
330{
331 const struct hash_netport6_telem *e =
332 (const struct hash_netport6_telem *)data;
333
334 NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
335 NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
336 NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
337 NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
338 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
339 htonl(ip_set_timeout_get(e->timeout)));
340 return 0;
341
342nla_put_failure:
343 return 1;
344}
345
346#undef PF
347#undef HOST_MASK
348
349#define PF 6
350#define HOST_MASK 128
351#include <linux/netfilter/ipset/ip_set_ahash.h>
352
353static int
354hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
355 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
356{
357 const struct ip_set_hash *h = set->data;
358 ipset_adtfn adtfn = set->variant->adt[adt];
359 struct hash_netport6_elem data = {
360 .cidr = h->nets[0].cidr || HOST_MASK };
361
362 if (data.cidr == 0)
363 return -EINVAL;
364 if (adt == IPSET_TEST)
365 data.cidr = HOST_MASK;
366
367 if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
368 &data.port, &data.proto))
369 return -EINVAL;
370
371 ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
372 ip6_netmask(&data.ip, data.cidr);
373
374 return adtfn(set, &data, h->timeout);
375}
376
377static int
378hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
379 enum ipset_adt adt, u32 *lineno, u32 flags)
380{
381 const struct ip_set_hash *h = set->data;
382 ipset_adtfn adtfn = set->variant->adt[adt];
383 struct hash_netport6_elem data = { .cidr = HOST_MASK };
384 u32 port, port_to;
385 u32 timeout = h->timeout;
386 bool with_ports = false;
387 int ret;
388
389 if (unlikely(!tb[IPSET_ATTR_IP] ||
390 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
391 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
392 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
393 return -IPSET_ERR_PROTOCOL;
394
395 if (tb[IPSET_ATTR_LINENO])
396 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
397
398 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
399 if (ret)
400 return ret;
401
402 if (tb[IPSET_ATTR_CIDR])
403 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
404 if (!data.cidr)
405 return -IPSET_ERR_INVALID_CIDR;
406 ip6_netmask(&data.ip, data.cidr);
407
408 if (tb[IPSET_ATTR_PORT])
409 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
410 else
411 return -IPSET_ERR_PROTOCOL;
412
413 if (tb[IPSET_ATTR_PROTO]) {
414 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
415 with_ports = ip_set_proto_with_ports(data.proto);
416
417 if (data.proto == 0)
418 return -IPSET_ERR_INVALID_PROTO;
419 } else
420 return -IPSET_ERR_MISSING_PROTO;
421
422 if (!(with_ports || data.proto == IPPROTO_ICMPV6))
423 data.port = 0;
424
425 if (tb[IPSET_ATTR_TIMEOUT]) {
426 if (!with_timeout(h->timeout))
427 return -IPSET_ERR_TIMEOUT;
428 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
429 }
430
431 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
432 ret = adtfn(set, &data, timeout);
433 return ip_set_eexist(ret, flags) ? 0 : ret;
434 }
435
436 port = ntohs(data.port);
437 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
438 if (port > port_to)
439 swap(port, port_to);
440
441 for (; port <= port_to; port++) {
442 data.port = htons(port);
443 ret = adtfn(set, &data, timeout);
444
445 if (ret && !ip_set_eexist(ret, flags))
446 return ret;
447 else
448 ret = 0;
449 }
450 return ret;
451}
452
453/* Create hash:ip type of sets */
454
455static int
456hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
457{
458 struct ip_set_hash *h;
459 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
460 u8 hbits;
461
462 if (!(set->family == AF_INET || set->family == AF_INET6))
463 return -IPSET_ERR_INVALID_FAMILY;
464
465 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
466 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
467 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
468 return -IPSET_ERR_PROTOCOL;
469
470 if (tb[IPSET_ATTR_HASHSIZE]) {
471 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
472 if (hashsize < IPSET_MIMINAL_HASHSIZE)
473 hashsize = IPSET_MIMINAL_HASHSIZE;
474 }
475
476 if (tb[IPSET_ATTR_MAXELEM])
477 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
478
479 h = kzalloc(sizeof(*h)
480 + sizeof(struct ip_set_hash_nets)
481 * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
482 if (!h)
483 return -ENOMEM;
484
485 h->maxelem = maxelem;
486 get_random_bytes(&h->initval, sizeof(h->initval));
487 h->timeout = IPSET_NO_TIMEOUT;
488
489 hbits = htable_bits(hashsize);
490 h->table = ip_set_alloc(
491 sizeof(struct htable)
492 + jhash_size(hbits) * sizeof(struct hbucket));
493 if (!h->table) {
494 kfree(h);
495 return -ENOMEM;
496 }
497 h->table->htable_bits = hbits;
498
499 set->data = h;
500
501 if (tb[IPSET_ATTR_TIMEOUT]) {
502 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
503
504 set->variant = set->family == AF_INET
505 ? &hash_netport4_tvariant : &hash_netport6_tvariant;
506
507 if (set->family == AF_INET)
508 hash_netport4_gc_init(set);
509 else
510 hash_netport6_gc_init(set);
511 } else {
512 set->variant = set->family == AF_INET
513 ? &hash_netport4_variant : &hash_netport6_variant;
514 }
515
516 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
517 set->name, jhash_size(h->table->htable_bits),
518 h->table->htable_bits, h->maxelem, set->data, h->table);
519
520 return 0;
521}
522
523static struct ip_set_type hash_netport_type __read_mostly = {
524 .name = "hash:net,port",
525 .protocol = IPSET_PROTOCOL,
526 .features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
527 .dimension = IPSET_DIM_TWO,
528 .family = AF_UNSPEC,
529 .revision = 0,
530 .create = hash_netport_create,
531 .create_policy = {
532 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
533 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
534 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
535 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
536 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
537 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
538 },
539 .adt_policy = {
540 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
541 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
542 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
543 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
544 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
545 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
546 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
547 },
548 .me = THIS_MODULE,
549};
550
551static int __init
552hash_netport_init(void)
553{
554 return ip_set_type_register(&hash_netport_type);
555}
556
557static void __exit
558hash_netport_fini(void)
559{
560 ip_set_type_unregister(&hash_netport_type);
561}
562
563module_init(hash_netport_init);
564module_exit(hash_netport_fini);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
new file mode 100644
index 00000000000..a47c32982f0
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -0,0 +1,584 @@
1/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the list:set type */
9
10#include <linux/module.h>
11#include <linux/ip.h>
12#include <linux/skbuff.h>
13#include <linux/errno.h>
14
15#include <linux/netfilter/ipset/ip_set.h>
16#include <linux/netfilter/ipset/ip_set_timeout.h>
17#include <linux/netfilter/ipset/ip_set_list.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
21MODULE_DESCRIPTION("list:set type of IP sets");
22MODULE_ALIAS("ip_set_list:set");
23
24/* Member elements without and with timeout */
25struct set_elem {
26 ip_set_id_t id;
27};
28
29struct set_telem {
30 ip_set_id_t id;
31 unsigned long timeout;
32};
33
34/* Type structure */
35struct list_set {
36 size_t dsize; /* element size */
37 u32 size; /* size of set list array */
38 u32 timeout; /* timeout value */
39 struct timer_list gc; /* garbage collection */
40 struct set_elem members[0]; /* the set members */
41};
42
43static inline struct set_elem *
44list_set_elem(const struct list_set *map, u32 id)
45{
46 return (struct set_elem *)((char *)map->members + id * map->dsize);
47}
48
49static inline bool
50list_set_timeout(const struct list_set *map, u32 id)
51{
52 const struct set_telem *elem =
53 (const struct set_telem *) list_set_elem(map, id);
54
55 return ip_set_timeout_test(elem->timeout);
56}
57
58static inline bool
59list_set_expired(const struct list_set *map, u32 id)
60{
61 const struct set_telem *elem =
62 (const struct set_telem *) list_set_elem(map, id);
63
64 return ip_set_timeout_expired(elem->timeout);
65}
66
67static inline int
68list_set_exist(const struct set_telem *elem)
69{
70 return elem->id != IPSET_INVALID_ID &&
71 !ip_set_timeout_expired(elem->timeout);
72}
73
74/* Set list without and with timeout */
75
76static int
77list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
78 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
79{
80 struct list_set *map = set->data;
81 struct set_elem *elem;
82 u32 i;
83 int ret;
84
85 for (i = 0; i < map->size; i++) {
86 elem = list_set_elem(map, i);
87 if (elem->id == IPSET_INVALID_ID)
88 return 0;
89 if (with_timeout(map->timeout) && list_set_expired(map, i))
90 continue;
91 switch (adt) {
92 case IPSET_TEST:
93 ret = ip_set_test(elem->id, skb, pf, dim, flags);
94 if (ret > 0)
95 return ret;
96 break;
97 case IPSET_ADD:
98 ret = ip_set_add(elem->id, skb, pf, dim, flags);
99 if (ret == 0)
100 return ret;
101 break;
102 case IPSET_DEL:
103 ret = ip_set_del(elem->id, skb, pf, dim, flags);
104 if (ret == 0)
105 return ret;
106 break;
107 default:
108 break;
109 }
110 }
111 return -EINVAL;
112}
113
114static bool
115next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id)
116{
117 const struct set_elem *elem;
118
119 if (i + 1 < map->size) {
120 elem = list_set_elem(map, i + 1);
121 return !!(elem->id == id &&
122 !(with_timeout(map->timeout) &&
123 list_set_expired(map, i + 1)));
124 }
125
126 return 0;
127}
128
129static void
130list_elem_add(struct list_set *map, u32 i, ip_set_id_t id)
131{
132 struct set_elem *e;
133
134 for (; i < map->size; i++) {
135 e = list_set_elem(map, i);
136 swap(e->id, id);
137 if (e->id == IPSET_INVALID_ID)
138 break;
139 }
140}
141
142static void
143list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
144 unsigned long timeout)
145{
146 struct set_telem *e;
147
148 for (; i < map->size; i++) {
149 e = (struct set_telem *)list_set_elem(map, i);
150 swap(e->id, id);
151 if (e->id == IPSET_INVALID_ID)
152 break;
153 swap(e->timeout, timeout);
154 }
155}
156
157static int
158list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
159 unsigned long timeout)
160{
161 const struct set_elem *e = list_set_elem(map, i);
162
163 if (i == map->size - 1 && e->id != IPSET_INVALID_ID)
164 /* Last element replaced: e.g. add new,before,last */
165 ip_set_put_byindex(e->id);
166 if (with_timeout(map->timeout))
167 list_elem_tadd(map, i, id, timeout);
168 else
169 list_elem_add(map, i, id);
170
171 return 0;
172}
173
174static int
175list_set_del(struct list_set *map, ip_set_id_t id, u32 i)
176{
177 struct set_elem *a = list_set_elem(map, i), *b;
178
179 ip_set_put_byindex(id);
180
181 for (; i < map->size - 1; i++) {
182 b = list_set_elem(map, i + 1);
183 a->id = b->id;
184 if (with_timeout(map->timeout))
185 ((struct set_telem *)a)->timeout =
186 ((struct set_telem *)b)->timeout;
187 a = b;
188 if (a->id == IPSET_INVALID_ID)
189 break;
190 }
191 /* Last element */
192 a->id = IPSET_INVALID_ID;
193 return 0;
194}
195
196static int
197list_set_uadt(struct ip_set *set, struct nlattr *tb[],
198 enum ipset_adt adt, u32 *lineno, u32 flags)
199{
200 struct list_set *map = set->data;
201 bool with_timeout = with_timeout(map->timeout);
202 int before = 0;
203 u32 timeout = map->timeout;
204 ip_set_id_t id, refid = IPSET_INVALID_ID;
205 const struct set_elem *elem;
206 struct ip_set *s;
207 u32 i;
208 int ret = 0;
209
210 if (unlikely(!tb[IPSET_ATTR_NAME] ||
211 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
212 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
213 return -IPSET_ERR_PROTOCOL;
214
215 if (tb[IPSET_ATTR_LINENO])
216 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
217
218 id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
219 if (id == IPSET_INVALID_ID)
220 return -IPSET_ERR_NAME;
221 /* "Loop detection" */
222 if (s->type->features & IPSET_TYPE_NAME) {
223 ret = -IPSET_ERR_LOOP;
224 goto finish;
225 }
226
227 if (tb[IPSET_ATTR_CADT_FLAGS]) {
228 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
229 before = f & IPSET_FLAG_BEFORE;
230 }
231
232 if (before && !tb[IPSET_ATTR_NAMEREF]) {
233 ret = -IPSET_ERR_BEFORE;
234 goto finish;
235 }
236
237 if (tb[IPSET_ATTR_NAMEREF]) {
238 refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
239 &s);
240 if (refid == IPSET_INVALID_ID) {
241 ret = -IPSET_ERR_NAMEREF;
242 goto finish;
243 }
244 if (!before)
245 before = -1;
246 }
247 if (tb[IPSET_ATTR_TIMEOUT]) {
248 if (!with_timeout) {
249 ret = -IPSET_ERR_TIMEOUT;
250 goto finish;
251 }
252 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
253 }
254
255 switch (adt) {
256 case IPSET_TEST:
257 for (i = 0; i < map->size && !ret; i++) {
258 elem = list_set_elem(map, i);
259 if (elem->id == IPSET_INVALID_ID ||
260 (before != 0 && i + 1 >= map->size))
261 break;
262 else if (with_timeout && list_set_expired(map, i))
263 continue;
264 else if (before > 0 && elem->id == id)
265 ret = next_id_eq(map, i, refid);
266 else if (before < 0 && elem->id == refid)
267 ret = next_id_eq(map, i, id);
268 else if (before == 0 && elem->id == id)
269 ret = 1;
270 }
271 break;
272 case IPSET_ADD:
273 for (i = 0; i < map->size && !ret; i++) {
274 elem = list_set_elem(map, i);
275 if (elem->id == id &&
276 !(with_timeout && list_set_expired(map, i)))
277 ret = -IPSET_ERR_EXIST;
278 }
279 if (ret == -IPSET_ERR_EXIST)
280 break;
281 ret = -IPSET_ERR_LIST_FULL;
282 for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
283 elem = list_set_elem(map, i);
284 if (elem->id == IPSET_INVALID_ID)
285 ret = before != 0 ? -IPSET_ERR_REF_EXIST
286 : list_set_add(map, i, id, timeout);
287 else if (elem->id != refid)
288 continue;
289 else if (with_timeout && list_set_expired(map, i))
290 ret = -IPSET_ERR_REF_EXIST;
291 else if (before)
292 ret = list_set_add(map, i, id, timeout);
293 else if (i + 1 < map->size)
294 ret = list_set_add(map, i + 1, id, timeout);
295 }
296 break;
297 case IPSET_DEL:
298 ret = -IPSET_ERR_EXIST;
299 for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) {
300 elem = list_set_elem(map, i);
301 if (elem->id == IPSET_INVALID_ID) {
302 ret = before != 0 ? -IPSET_ERR_REF_EXIST
303 : -IPSET_ERR_EXIST;
304 break;
305 } else if (with_timeout && list_set_expired(map, i))
306 continue;
307 else if (elem->id == id &&
308 (before == 0 ||
309 (before > 0 &&
310 next_id_eq(map, i, refid))))
311 ret = list_set_del(map, id, i);
312 else if (before < 0 &&
313 elem->id == refid &&
314 next_id_eq(map, i, id))
315 ret = list_set_del(map, id, i + 1);
316 }
317 break;
318 default:
319 break;
320 }
321
322finish:
323 if (refid != IPSET_INVALID_ID)
324 ip_set_put_byindex(refid);
325 if (adt != IPSET_ADD || ret)
326 ip_set_put_byindex(id);
327
328 return ip_set_eexist(ret, flags) ? 0 : ret;
329}
330
331static void
332list_set_flush(struct ip_set *set)
333{
334 struct list_set *map = set->data;
335 struct set_elem *elem;
336 u32 i;
337
338 for (i = 0; i < map->size; i++) {
339 elem = list_set_elem(map, i);
340 if (elem->id != IPSET_INVALID_ID) {
341 ip_set_put_byindex(elem->id);
342 elem->id = IPSET_INVALID_ID;
343 }
344 }
345}
346
347static void
348list_set_destroy(struct ip_set *set)
349{
350 struct list_set *map = set->data;
351
352 if (with_timeout(map->timeout))
353 del_timer_sync(&map->gc);
354 list_set_flush(set);
355 kfree(map);
356
357 set->data = NULL;
358}
359
360static int
361list_set_head(struct ip_set *set, struct sk_buff *skb)
362{
363 const struct list_set *map = set->data;
364 struct nlattr *nested;
365
366 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
367 if (!nested)
368 goto nla_put_failure;
369 NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size));
370 if (with_timeout(map->timeout))
371 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
372 NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
373 htonl(atomic_read(&set->ref) - 1));
374 NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
375 htonl(sizeof(*map) + map->size * map->dsize));
376 ipset_nest_end(skb, nested);
377
378 return 0;
379nla_put_failure:
380 return -EMSGSIZE;
381}
382
383static int
384list_set_list(const struct ip_set *set,
385 struct sk_buff *skb, struct netlink_callback *cb)
386{
387 const struct list_set *map = set->data;
388 struct nlattr *atd, *nested;
389 u32 i, first = cb->args[2];
390 const struct set_elem *e;
391
392 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
393 if (!atd)
394 return -EMSGSIZE;
395 for (; cb->args[2] < map->size; cb->args[2]++) {
396 i = cb->args[2];
397 e = list_set_elem(map, i);
398 if (e->id == IPSET_INVALID_ID)
399 goto finish;
400 if (with_timeout(map->timeout) && list_set_expired(map, i))
401 continue;
402 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
403 if (!nested) {
404 if (i == first) {
405 nla_nest_cancel(skb, atd);
406 return -EMSGSIZE;
407 } else
408 goto nla_put_failure;
409 }
410 NLA_PUT_STRING(skb, IPSET_ATTR_NAME,
411 ip_set_name_byindex(e->id));
412 if (with_timeout(map->timeout)) {
413 const struct set_telem *te =
414 (const struct set_telem *) e;
415 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
416 htonl(ip_set_timeout_get(te->timeout)));
417 }
418 ipset_nest_end(skb, nested);
419 }
420finish:
421 ipset_nest_end(skb, atd);
422 /* Set listing finished */
423 cb->args[2] = 0;
424 return 0;
425
426nla_put_failure:
427 nla_nest_cancel(skb, nested);
428 ipset_nest_end(skb, atd);
429 if (unlikely(i == first)) {
430 cb->args[2] = 0;
431 return -EMSGSIZE;
432 }
433 return 0;
434}
435
436static bool
437list_set_same_set(const struct ip_set *a, const struct ip_set *b)
438{
439 const struct list_set *x = a->data;
440 const struct list_set *y = b->data;
441
442 return x->size == y->size &&
443 x->timeout == y->timeout;
444}
445
446static const struct ip_set_type_variant list_set = {
447 .kadt = list_set_kadt,
448 .uadt = list_set_uadt,
449 .destroy = list_set_destroy,
450 .flush = list_set_flush,
451 .head = list_set_head,
452 .list = list_set_list,
453 .same_set = list_set_same_set,
454};
455
456static void
457list_set_gc(unsigned long ul_set)
458{
459 struct ip_set *set = (struct ip_set *) ul_set;
460 struct list_set *map = set->data;
461 struct set_telem *e;
462 u32 i;
463
464 /* We run parallel with other readers (test element)
465 * but adding/deleting new entries is locked out */
466 read_lock_bh(&set->lock);
467 for (i = map->size - 1; i >= 0; i--) {
468 e = (struct set_telem *) list_set_elem(map, i);
469 if (e->id != IPSET_INVALID_ID &&
470 list_set_expired(map, i))
471 list_set_del(map, e->id, i);
472 }
473 read_unlock_bh(&set->lock);
474
475 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
476 add_timer(&map->gc);
477}
478
479static void
480list_set_gc_init(struct ip_set *set)
481{
482 struct list_set *map = set->data;
483
484 init_timer(&map->gc);
485 map->gc.data = (unsigned long) set;
486 map->gc.function = list_set_gc;
487 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
488 add_timer(&map->gc);
489}
490
491/* Create list:set type of sets */
492
493static bool
494init_list_set(struct ip_set *set, u32 size, size_t dsize,
495 unsigned long timeout)
496{
497 struct list_set *map;
498 struct set_elem *e;
499 u32 i;
500
501 map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
502 if (!map)
503 return false;
504
505 map->size = size;
506 map->dsize = dsize;
507 map->timeout = timeout;
508 set->data = map;
509
510 for (i = 0; i < size; i++) {
511 e = list_set_elem(map, i);
512 e->id = IPSET_INVALID_ID;
513 }
514
515 return true;
516}
517
518static int
519list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
520{
521 u32 size = IP_SET_LIST_DEFAULT_SIZE;
522
523 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
524 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
525 return -IPSET_ERR_PROTOCOL;
526
527 if (tb[IPSET_ATTR_SIZE])
528 size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]);
529 if (size < IP_SET_LIST_MIN_SIZE)
530 size = IP_SET_LIST_MIN_SIZE;
531
532 if (tb[IPSET_ATTR_TIMEOUT]) {
533 if (!init_list_set(set, size, sizeof(struct set_telem),
534 ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT])))
535 return -ENOMEM;
536
537 list_set_gc_init(set);
538 } else {
539 if (!init_list_set(set, size, sizeof(struct set_elem),
540 IPSET_NO_TIMEOUT))
541 return -ENOMEM;
542 }
543 set->variant = &list_set;
544 return 0;
545}
546
547static struct ip_set_type list_set_type __read_mostly = {
548 .name = "list:set",
549 .protocol = IPSET_PROTOCOL,
550 .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST,
551 .dimension = IPSET_DIM_ONE,
552 .family = AF_UNSPEC,
553 .revision = 0,
554 .create = list_set_create,
555 .create_policy = {
556 [IPSET_ATTR_SIZE] = { .type = NLA_U32 },
557 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
558 },
559 .adt_policy = {
560 [IPSET_ATTR_NAME] = { .type = NLA_STRING,
561 .len = IPSET_MAXNAMELEN },
562 [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING,
563 .len = IPSET_MAXNAMELEN },
564 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
565 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
566 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
567 },
568 .me = THIS_MODULE,
569};
570
571static int __init
572list_set_init(void)
573{
574 return ip_set_type_register(&list_set_type);
575}
576
577static void __exit
578list_set_fini(void)
579{
580 ip_set_type_unregister(&list_set_type);
581}
582
583module_init(list_set_init);
584module_exit(list_set_fini);
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
new file mode 100644
index 00000000000..23f8c816221
--- /dev/null
+++ b/net/netfilter/ipset/pfxlen.c
@@ -0,0 +1,291 @@
1#include <linux/netfilter/ipset/pfxlen.h>
2
3/*
4 * Prefixlen maps for fast conversions, by Jan Engelhardt.
5 */
6
7#define E(a, b, c, d) \
8 {.ip6 = { \
9 __constant_htonl(a), __constant_htonl(b), \
10 __constant_htonl(c), __constant_htonl(d), \
11 } }
12
13/*
14 * This table works for both IPv4 and IPv6;
15 * just use prefixlen_netmask_map[prefixlength].ip.
16 */
17const union nf_inet_addr ip_set_netmask_map[] = {
18 E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
19 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
20 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
21 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
22 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
23 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
24 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
25 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
26 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
27 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
28 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
29 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
30 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
31 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
32 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
33 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
34 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
35 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
36 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
37 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
38 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
39 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
40 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
41 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
42 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
43 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
44 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
45 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
46 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
47 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
48 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
49 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
50 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
51 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
52 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
53 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
54 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
55 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
56 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
57 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
58 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
59 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
60 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
61 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
62 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
63 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
64 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
65 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
66 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
67 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
68 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
69 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
70 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
71 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
72 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
73 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
74 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
75 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
76 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
77 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
78 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
79 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
80 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
81 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
82 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
83 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
84 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
85 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
86 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
87 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
88 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
89 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
90 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
91 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
92 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
93 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
94 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
95 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
96 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
97 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
98 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
99 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
100 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
101 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
102 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
103 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
104 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
105 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
106 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
107 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
108 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
109 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
110 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
111 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
112 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
113 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
114 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
115 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
116 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
117 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
118 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
119 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
120 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
121 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
122 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
123 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
124 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
125 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
126 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
127 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
128 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
129 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
130 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
131 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
132 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
133 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
134 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
135 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
136 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
137 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
138 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
139 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
140 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
141 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
142 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
143 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
144 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
145 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
146 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
147};
148EXPORT_SYMBOL_GPL(ip_set_netmask_map);
149
150#undef E
151#define E(a, b, c, d) \
152 {.ip6 = { (__force __be32) a, (__force __be32) b, \
153 (__force __be32) c, (__force __be32) d, \
154 } }
155
156/*
157 * This table works for both IPv4 and IPv6;
158 * just use prefixlen_hostmask_map[prefixlength].ip.
159 */
160const union nf_inet_addr ip_set_hostmask_map[] = {
161 E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
162 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
163 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
164 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
165 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
166 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
167 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
168 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
169 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
170 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
171 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
172 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
173 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
174 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
175 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
176 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
177 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
178 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
179 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
180 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
181 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
182 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
183 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
184 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
185 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
186 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
187 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
188 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
189 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
190 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
191 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
192 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
193 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
194 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
195 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
196 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
197 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
198 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
199 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
200 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
201 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
202 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
203 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
204 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
205 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
206 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
207 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
208 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
209 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
210 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
211 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
212 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
213 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
214 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
215 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
216 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
217 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
218 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
219 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
220 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
221 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
222 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
223 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
224 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
225 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
226 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
227 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
228 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
229 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
230 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
231 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
232 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
233 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
234 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
235 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
236 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
237 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
238 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
239 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
240 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
241 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
242 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
243 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
244 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
245 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
246 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
247 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
248 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
249 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
250 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
251 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
252 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
253 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
254 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
255 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
256 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
257 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
258 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
259 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
260 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
261 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
262 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
263 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
264 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
265 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
266 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
267 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
268 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
269 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
270 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
271 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
272 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
273 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
274 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
275 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
276 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
277 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
278 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
279 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
280 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
281 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
282 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
283 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
284 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
285 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
286 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
287 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
288 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
289 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
290};
291EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index a475edee091..2dc6de13ac1 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
43EXPORT_SYMBOL(unregister_ip_vs_app); 43EXPORT_SYMBOL(unregister_ip_vs_app);
44EXPORT_SYMBOL(register_ip_vs_app_inc); 44EXPORT_SYMBOL(register_ip_vs_app_inc);
45 45
46/* ipvs application list head */
47static LIST_HEAD(ip_vs_app_list);
48static DEFINE_MUTEX(__ip_vs_app_mutex); 46static DEFINE_MUTEX(__ip_vs_app_mutex);
49 47
50
51/* 48/*
52 * Get an ip_vs_app object 49 * Get an ip_vs_app object
53 */ 50 */
@@ -67,7 +64,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
67 * Allocate/initialize app incarnation and register it in proto apps. 64 * Allocate/initialize app incarnation and register it in proto apps.
68 */ 65 */
69static int 66static int
70ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) 67ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
68 __u16 port)
71{ 69{
72 struct ip_vs_protocol *pp; 70 struct ip_vs_protocol *pp;
73 struct ip_vs_app *inc; 71 struct ip_vs_app *inc;
@@ -98,7 +96,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
98 } 96 }
99 } 97 }
100 98
101 ret = pp->register_app(inc); 99 ret = pp->register_app(net, inc);
102 if (ret) 100 if (ret)
103 goto out; 101 goto out;
104 102
@@ -119,7 +117,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
119 * Release app incarnation 117 * Release app incarnation
120 */ 118 */
121static void 119static void
122ip_vs_app_inc_release(struct ip_vs_app *inc) 120ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
123{ 121{
124 struct ip_vs_protocol *pp; 122 struct ip_vs_protocol *pp;
125 123
@@ -127,7 +125,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
127 return; 125 return;
128 126
129 if (pp->unregister_app) 127 if (pp->unregister_app)
130 pp->unregister_app(inc); 128 pp->unregister_app(net, inc);
131 129
132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 130 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
133 pp->name, inc->name, ntohs(inc->port)); 131 pp->name, inc->name, ntohs(inc->port));
@@ -168,13 +166,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
168 * Register an application incarnation in protocol applications 166 * Register an application incarnation in protocol applications
169 */ 167 */
170int 168int
171register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) 169register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
170 __u16 port)
172{ 171{
173 int result; 172 int result;
174 173
175 mutex_lock(&__ip_vs_app_mutex); 174 mutex_lock(&__ip_vs_app_mutex);
176 175
177 result = ip_vs_app_inc_new(app, proto, port); 176 result = ip_vs_app_inc_new(net, app, proto, port);
178 177
179 mutex_unlock(&__ip_vs_app_mutex); 178 mutex_unlock(&__ip_vs_app_mutex);
180 179
@@ -185,14 +184,15 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
185/* 184/*
186 * ip_vs_app registration routine 185 * ip_vs_app registration routine
187 */ 186 */
188int register_ip_vs_app(struct ip_vs_app *app) 187int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
189{ 188{
189 struct netns_ipvs *ipvs = net_ipvs(net);
190 /* increase the module use count */ 190 /* increase the module use count */
191 ip_vs_use_count_inc(); 191 ip_vs_use_count_inc();
192 192
193 mutex_lock(&__ip_vs_app_mutex); 193 mutex_lock(&__ip_vs_app_mutex);
194 194
195 list_add(&app->a_list, &ip_vs_app_list); 195 list_add(&app->a_list, &ipvs->app_list);
196 196
197 mutex_unlock(&__ip_vs_app_mutex); 197 mutex_unlock(&__ip_vs_app_mutex);
198 198
@@ -204,14 +204,14 @@ int register_ip_vs_app(struct ip_vs_app *app)
204 * ip_vs_app unregistration routine 204 * ip_vs_app unregistration routine
205 * We are sure there are no app incarnations attached to services 205 * We are sure there are no app incarnations attached to services
206 */ 206 */
207void unregister_ip_vs_app(struct ip_vs_app *app) 207void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
208{ 208{
209 struct ip_vs_app *inc, *nxt; 209 struct ip_vs_app *inc, *nxt;
210 210
211 mutex_lock(&__ip_vs_app_mutex); 211 mutex_lock(&__ip_vs_app_mutex);
212 212
213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { 213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
214 ip_vs_app_inc_release(inc); 214 ip_vs_app_inc_release(net, inc);
215 } 215 }
216 216
217 list_del(&app->a_list); 217 list_del(&app->a_list);
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
226/* 226/*
227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
228 */ 228 */
229int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) 229int ip_vs_bind_app(struct ip_vs_conn *cp,
230 struct ip_vs_protocol *pp)
230{ 231{
231 return pp->app_conn_bind(cp); 232 return pp->app_conn_bind(cp);
232} 233}
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
481 * /proc/net/ip_vs_app entry function 482 * /proc/net/ip_vs_app entry function
482 */ 483 */
483 484
484static struct ip_vs_app *ip_vs_app_idx(loff_t pos) 485static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
485{ 486{
486 struct ip_vs_app *app, *inc; 487 struct ip_vs_app *app, *inc;
487 488
488 list_for_each_entry(app, &ip_vs_app_list, a_list) { 489 list_for_each_entry(app, &ipvs->app_list, a_list) {
489 list_for_each_entry(inc, &app->incs_list, a_list) { 490 list_for_each_entry(inc, &app->incs_list, a_list) {
490 if (pos-- == 0) 491 if (pos-- == 0)
491 return inc; 492 return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
497 498
498static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 499static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
499{ 500{
501 struct net *net = seq_file_net(seq);
502 struct netns_ipvs *ipvs = net_ipvs(net);
503
500 mutex_lock(&__ip_vs_app_mutex); 504 mutex_lock(&__ip_vs_app_mutex);
501 505
502 return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; 506 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
503} 507}
504 508
505static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 509static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
506{ 510{
507 struct ip_vs_app *inc, *app; 511 struct ip_vs_app *inc, *app;
508 struct list_head *e; 512 struct list_head *e;
513 struct net *net = seq_file_net(seq);
514 struct netns_ipvs *ipvs = net_ipvs(net);
509 515
510 ++*pos; 516 ++*pos;
511 if (v == SEQ_START_TOKEN) 517 if (v == SEQ_START_TOKEN)
512 return ip_vs_app_idx(0); 518 return ip_vs_app_idx(ipvs, 0);
513 519
514 inc = v; 520 inc = v;
515 app = inc->app; 521 app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518 return list_entry(e, struct ip_vs_app, a_list); 524 return list_entry(e, struct ip_vs_app, a_list);
519 525
520 /* go on to next application */ 526 /* go on to next application */
521 for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { 527 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
522 app = list_entry(e, struct ip_vs_app, a_list); 528 app = list_entry(e, struct ip_vs_app, a_list);
523 list_for_each_entry(inc, &app->incs_list, a_list) { 529 list_for_each_entry(inc, &app->incs_list, a_list) {
524 return inc; 530 return inc;
@@ -557,7 +563,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
557 563
558static int ip_vs_app_open(struct inode *inode, struct file *file) 564static int ip_vs_app_open(struct inode *inode, struct file *file)
559{ 565{
560 return seq_open(file, &ip_vs_app_seq_ops); 566 return seq_open_net(inode, file, &ip_vs_app_seq_ops,
567 sizeof(struct seq_net_private));
561} 568}
562 569
563static const struct file_operations ip_vs_app_fops = { 570static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +576,35 @@ static const struct file_operations ip_vs_app_fops = {
569}; 576};
570#endif 577#endif
571 578
572int __init ip_vs_app_init(void) 579static int __net_init __ip_vs_app_init(struct net *net)
573{ 580{
574 /* we will replace it with proc_net_ipvs_create() soon */ 581 struct netns_ipvs *ipvs = net_ipvs(net);
575 proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); 582
583 INIT_LIST_HEAD(&ipvs->app_list);
584 proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
576 return 0; 585 return 0;
577} 586}
578 587
588static void __net_exit __ip_vs_app_cleanup(struct net *net)
589{
590 proc_net_remove(net, "ip_vs_app");
591}
592
593static struct pernet_operations ip_vs_app_ops = {
594 .init = __ip_vs_app_init,
595 .exit = __ip_vs_app_cleanup,
596};
597
598int __init ip_vs_app_init(void)
599{
600 int rv;
601
602 rv = register_pernet_subsys(&ip_vs_app_ops);
603 return rv;
604}
605
579 606
580void ip_vs_app_cleanup(void) 607void ip_vs_app_cleanup(void)
581{ 608{
582 proc_net_remove(&init_net, "ip_vs_app"); 609 unregister_pernet_subsys(&ip_vs_app_ops);
583} 610}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecdc8ca..f289306cbf1 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
48/* 48/*
49 * Connection hash size. Default is what was selected at compile time. 49 * Connection hash size. Default is what was selected at compile time.
50*/ 50*/
51int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; 51static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
52module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); 52module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
53MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); 53MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
54 54
55/* size and mask values */ 55/* size and mask values */
56int ip_vs_conn_tab_size; 56int ip_vs_conn_tab_size __read_mostly;
57int ip_vs_conn_tab_mask; 57static int ip_vs_conn_tab_mask __read_mostly;
58 58
59/* 59/*
60 * Connection hash table: for input and output packets lookups of IPVS 60 * Connection hash table: for input and output packets lookups of IPVS
61 */ 61 */
62static struct list_head *ip_vs_conn_tab; 62static struct hlist_head *ip_vs_conn_tab __read_mostly;
63 63
64/* SLAB cache for IPVS connections */ 64/* SLAB cache for IPVS connections */
65static struct kmem_cache *ip_vs_conn_cachep __read_mostly; 65static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
66 66
67/* counter for current IPVS connections */
68static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
69
70/* counter for no client port connections */ 67/* counter for no client port connections */
71static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); 68static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
72 69
73/* random value for IPVS connection hash */ 70/* random value for IPVS connection hash */
74static unsigned int ip_vs_conn_rnd; 71static unsigned int ip_vs_conn_rnd __read_mostly;
75 72
76/* 73/*
77 * Fine locking granularity for big connection hash table 74 * Fine locking granularity for big connection hash table
78 */ 75 */
79#define CT_LOCKARRAY_BITS 4 76#define CT_LOCKARRAY_BITS 5
80#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) 77#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
81#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) 78#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
82 79
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
133/* 130/*
134 * Returns hash value for IPVS connection entry 131 * Returns hash value for IPVS connection entry
135 */ 132 */
136static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, 133static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
137 const union nf_inet_addr *addr, 134 const union nf_inet_addr *addr,
138 __be16 port) 135 __be16 port)
139{ 136{
140#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
141 if (af == AF_INET6) 138 if (af == AF_INET6)
142 return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), 139 return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
143 (__force u32)port, proto, ip_vs_conn_rnd) 140 (__force u32)port, proto, ip_vs_conn_rnd) ^
144 & ip_vs_conn_tab_mask; 141 ((size_t)net>>8)) & ip_vs_conn_tab_mask;
145#endif 142#endif
146 return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, 143 return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
147 ip_vs_conn_rnd) 144 ip_vs_conn_rnd) ^
148 & ip_vs_conn_tab_mask; 145 ((size_t)net>>8)) & ip_vs_conn_tab_mask;
149} 146}
150 147
151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, 148static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
166 port = p->vport; 163 port = p->vport;
167 } 164 }
168 165
169 return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); 166 return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
170} 167}
171 168
172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) 169static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
173{ 170{
174 struct ip_vs_conn_param p; 171 struct ip_vs_conn_param p;
175 172
176 ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, 173 ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
177 NULL, 0, &p); 174 &cp->caddr, cp->cport, NULL, 0, &p);
178 175
179 if (cp->dest && cp->dest->svc->pe) { 176 if (cp->pe) {
180 p.pe = cp->dest->svc->pe; 177 p.pe = cp->pe;
181 p.pe_data = cp->pe_data; 178 p.pe_data = cp->pe_data;
182 p.pe_data_len = cp->pe_data_len; 179 p.pe_data_len = cp->pe_data_len;
183 } 180 }
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
186} 183}
187 184
188/* 185/*
189 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 186 * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
190 * returns bool success. 187 * returns bool success.
191 */ 188 */
192static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) 189static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -204,7 +201,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
204 spin_lock(&cp->lock); 201 spin_lock(&cp->lock);
205 202
206 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 203 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
207 list_add(&cp->c_list, &ip_vs_conn_tab[hash]); 204 hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
208 cp->flags |= IP_VS_CONN_F_HASHED; 205 cp->flags |= IP_VS_CONN_F_HASHED;
209 atomic_inc(&cp->refcnt); 206 atomic_inc(&cp->refcnt);
210 ret = 1; 207 ret = 1;
@@ -237,7 +234,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
237 spin_lock(&cp->lock); 234 spin_lock(&cp->lock);
238 235
239 if (cp->flags & IP_VS_CONN_F_HASHED) { 236 if (cp->flags & IP_VS_CONN_F_HASHED) {
240 list_del(&cp->c_list); 237 hlist_del(&cp->c_list);
241 cp->flags &= ~IP_VS_CONN_F_HASHED; 238 cp->flags &= ~IP_VS_CONN_F_HASHED;
242 atomic_dec(&cp->refcnt); 239 atomic_dec(&cp->refcnt);
243 ret = 1; 240 ret = 1;
@@ -262,18 +259,20 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
262{ 259{
263 unsigned hash; 260 unsigned hash;
264 struct ip_vs_conn *cp; 261 struct ip_vs_conn *cp;
262 struct hlist_node *n;
265 263
266 hash = ip_vs_conn_hashkey_param(p, false); 264 hash = ip_vs_conn_hashkey_param(p, false);
267 265
268 ct_read_lock(hash); 266 ct_read_lock(hash);
269 267
270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 268 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
271 if (cp->af == p->af && 269 if (cp->af == p->af &&
270 p->cport == cp->cport && p->vport == cp->vport &&
272 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && 271 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
273 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 272 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
274 p->cport == cp->cport && p->vport == cp->vport &&
275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 273 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
276 p->protocol == cp->protocol) { 274 p->protocol == cp->protocol &&
275 ip_vs_conn_net_eq(cp, p->net)) {
277 /* HIT */ 276 /* HIT */
278 atomic_inc(&cp->refcnt); 277 atomic_inc(&cp->refcnt);
279 ct_read_unlock(hash); 278 ct_read_unlock(hash);
@@ -313,23 +312,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
313 struct ip_vs_conn_param *p) 312 struct ip_vs_conn_param *p)
314{ 313{
315 __be16 _ports[2], *pptr; 314 __be16 _ports[2], *pptr;
315 struct net *net = skb_net(skb);
316 316
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
318 if (pptr == NULL) 318 if (pptr == NULL)
319 return 1; 319 return 1;
320 320
321 if (likely(!inverse)) 321 if (likely(!inverse))
322 ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], 322 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
323 &iph->daddr, pptr[1], p); 323 pptr[0], &iph->daddr, pptr[1], p);
324 else 324 else
325 ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], 325 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
326 &iph->saddr, pptr[0], p); 326 pptr[1], &iph->saddr, pptr[0], p);
327 return 0; 327 return 0;
328} 328}
329 329
330struct ip_vs_conn * 330struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 struct ip_vs_protocol *pp,
333 const struct ip_vs_iphdr *iph, 332 const struct ip_vs_iphdr *iph,
334 unsigned int proto_off, int inverse) 333 unsigned int proto_off, int inverse)
335{ 334{
@@ -347,14 +346,17 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
347{ 346{
348 unsigned hash; 347 unsigned hash;
349 struct ip_vs_conn *cp; 348 struct ip_vs_conn *cp;
349 struct hlist_node *n;
350 350
351 hash = ip_vs_conn_hashkey_param(p, false); 351 hash = ip_vs_conn_hashkey_param(p, false);
352 352
353 ct_read_lock(hash); 353 ct_read_lock(hash);
354 354
355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 355 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
356 if (!ip_vs_conn_net_eq(cp, p->net))
357 continue;
356 if (p->pe_data && p->pe->ct_match) { 358 if (p->pe_data && p->pe->ct_match) {
357 if (p->pe->ct_match(p, cp)) 359 if (p->pe == cp->pe && p->pe->ct_match(p, cp))
358 goto out; 360 goto out;
359 continue; 361 continue;
360 } 362 }
@@ -394,6 +396,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
394{ 396{
395 unsigned hash; 397 unsigned hash;
396 struct ip_vs_conn *cp, *ret=NULL; 398 struct ip_vs_conn *cp, *ret=NULL;
399 struct hlist_node *n;
397 400
398 /* 401 /*
399 * Check for "full" addressed entries 402 * Check for "full" addressed entries
@@ -402,12 +405,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
402 405
403 ct_read_lock(hash); 406 ct_read_lock(hash);
404 407
405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 408 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
406 if (cp->af == p->af && 409 if (cp->af == p->af &&
410 p->vport == cp->cport && p->cport == cp->dport &&
407 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 411 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
408 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 412 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
409 p->vport == cp->cport && p->cport == cp->dport && 413 p->protocol == cp->protocol &&
410 p->protocol == cp->protocol) { 414 ip_vs_conn_net_eq(cp, p->net)) {
411 /* HIT */ 415 /* HIT */
412 atomic_inc(&cp->refcnt); 416 atomic_inc(&cp->refcnt);
413 ret = cp; 417 ret = cp;
@@ -428,7 +432,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
428 432
429struct ip_vs_conn * 433struct ip_vs_conn *
430ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 434ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
431 struct ip_vs_protocol *pp,
432 const struct ip_vs_iphdr *iph, 435 const struct ip_vs_iphdr *iph,
433 unsigned int proto_off, int inverse) 436 unsigned int proto_off, int inverse)
434{ 437{
@@ -611,9 +614,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
611 struct ip_vs_dest *dest; 614 struct ip_vs_dest *dest;
612 615
613 if ((cp) && (!cp->dest)) { 616 if ((cp) && (!cp->dest)) {
614 dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, 617 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
615 &cp->vaddr, cp->vport, 618 cp->dport, &cp->vaddr, cp->vport,
616 cp->protocol); 619 cp->protocol, cp->fwmark);
617 ip_vs_bind_dest(cp, dest); 620 ip_vs_bind_dest(cp, dest);
618 return dest; 621 return dest;
619 } else 622 } else
@@ -677,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
677 atomic_dec(&dest->refcnt); 680 atomic_dec(&dest->refcnt);
678} 681}
679 682
683static int expire_quiescent_template(struct netns_ipvs *ipvs,
684 struct ip_vs_dest *dest)
685{
686#ifdef CONFIG_SYSCTL
687 return ipvs->sysctl_expire_quiescent_template &&
688 (atomic_read(&dest->weight) == 0);
689#else
690 return 0;
691#endif
692}
680 693
681/* 694/*
682 * Checking if the destination of a connection template is available. 695 * Checking if the destination of a connection template is available.
@@ -686,14 +699,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
686int ip_vs_check_template(struct ip_vs_conn *ct) 699int ip_vs_check_template(struct ip_vs_conn *ct)
687{ 700{
688 struct ip_vs_dest *dest = ct->dest; 701 struct ip_vs_dest *dest = ct->dest;
702 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
689 703
690 /* 704 /*
691 * Checking the dest server status. 705 * Checking the dest server status.
692 */ 706 */
693 if ((dest == NULL) || 707 if ((dest == NULL) ||
694 !(dest->flags & IP_VS_DEST_F_AVAILABLE) || 708 !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
695 (sysctl_ip_vs_expire_quiescent_template && 709 expire_quiescent_template(ipvs, dest)) {
696 (atomic_read(&dest->weight) == 0))) {
697 IP_VS_DBG_BUF(9, "check_template: dest not available for " 710 IP_VS_DBG_BUF(9, "check_template: dest not available for "
698 "protocol %s s:%s:%d v:%s:%d " 711 "protocol %s s:%s:%d v:%s:%d "
699 "-> d:%s:%d\n", 712 "-> d:%s:%d\n",
@@ -730,6 +743,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
730static void ip_vs_conn_expire(unsigned long data) 743static void ip_vs_conn_expire(unsigned long data)
731{ 744{
732 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 745 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
746 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
733 747
734 cp->timeout = 60*HZ; 748 cp->timeout = 60*HZ;
735 749
@@ -765,13 +779,14 @@ static void ip_vs_conn_expire(unsigned long data)
765 if (cp->flags & IP_VS_CONN_F_NFCT) 779 if (cp->flags & IP_VS_CONN_F_NFCT)
766 ip_vs_conn_drop_conntrack(cp); 780 ip_vs_conn_drop_conntrack(cp);
767 781
782 ip_vs_pe_put(cp->pe);
768 kfree(cp->pe_data); 783 kfree(cp->pe_data);
769 if (unlikely(cp->app != NULL)) 784 if (unlikely(cp->app != NULL))
770 ip_vs_unbind_app(cp); 785 ip_vs_unbind_app(cp);
771 ip_vs_unbind_dest(cp); 786 ip_vs_unbind_dest(cp);
772 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 787 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
773 atomic_dec(&ip_vs_conn_no_cport_cnt); 788 atomic_dec(&ip_vs_conn_no_cport_cnt);
774 atomic_dec(&ip_vs_conn_count); 789 atomic_dec(&ipvs->conn_count);
775 790
776 kmem_cache_free(ip_vs_conn_cachep, cp); 791 kmem_cache_free(ip_vs_conn_cachep, cp);
777 return; 792 return;
@@ -802,10 +817,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
802struct ip_vs_conn * 817struct ip_vs_conn *
803ip_vs_conn_new(const struct ip_vs_conn_param *p, 818ip_vs_conn_new(const struct ip_vs_conn_param *p,
804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 819 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
805 struct ip_vs_dest *dest) 820 struct ip_vs_dest *dest, __u32 fwmark)
806{ 821{
807 struct ip_vs_conn *cp; 822 struct ip_vs_conn *cp;
808 struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); 823 struct netns_ipvs *ipvs = net_ipvs(p->net);
824 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
825 p->protocol);
809 826
810 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 827 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
811 if (cp == NULL) { 828 if (cp == NULL) {
@@ -813,8 +830,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
813 return NULL; 830 return NULL;
814 } 831 }
815 832
816 INIT_LIST_HEAD(&cp->c_list); 833 INIT_HLIST_NODE(&cp->c_list);
817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 834 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
835 ip_vs_conn_net_set(cp, p->net);
818 cp->af = p->af; 836 cp->af = p->af;
819 cp->protocol = p->protocol; 837 cp->protocol = p->protocol;
820 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); 838 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +844,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
826 &cp->daddr, daddr); 844 &cp->daddr, daddr);
827 cp->dport = dport; 845 cp->dport = dport;
828 cp->flags = flags; 846 cp->flags = flags;
829 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { 847 cp->fwmark = fwmark;
848 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
849 ip_vs_pe_get(p->pe);
850 cp->pe = p->pe;
830 cp->pe_data = p->pe_data; 851 cp->pe_data = p->pe_data;
831 cp->pe_data_len = p->pe_data_len; 852 cp->pe_data_len = p->pe_data_len;
832 } 853 }
@@ -842,7 +863,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
842 atomic_set(&cp->n_control, 0); 863 atomic_set(&cp->n_control, 0);
843 atomic_set(&cp->in_pkts, 0); 864 atomic_set(&cp->in_pkts, 0);
844 865
845 atomic_inc(&ip_vs_conn_count); 866 atomic_inc(&ipvs->conn_count);
846 if (flags & IP_VS_CONN_F_NO_CPORT) 867 if (flags & IP_VS_CONN_F_NO_CPORT)
847 atomic_inc(&ip_vs_conn_no_cport_cnt); 868 atomic_inc(&ip_vs_conn_no_cport_cnt);
848 869
@@ -861,8 +882,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
861#endif 882#endif
862 ip_vs_bind_xmit(cp); 883 ip_vs_bind_xmit(cp);
863 884
864 if (unlikely(pp && atomic_read(&pp->appcnt))) 885 if (unlikely(pd && atomic_read(&pd->appcnt)))
865 ip_vs_bind_app(cp, pp); 886 ip_vs_bind_app(cp, pd->pp);
866 887
867 /* 888 /*
868 * Allow conntrack to be preserved. By default, conntrack 889 * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +892,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
871 * IP_VS_CONN_F_ONE_PACKET too. 892 * IP_VS_CONN_F_ONE_PACKET too.
872 */ 893 */
873 894
874 if (ip_vs_conntrack_enabled()) 895 if (ip_vs_conntrack_enabled(ipvs))
875 cp->flags |= IP_VS_CONN_F_NFCT; 896 cp->flags |= IP_VS_CONN_F_NFCT;
876 897
877 /* Hash it in the ip_vs_conn_tab finally */ 898 /* Hash it in the ip_vs_conn_tab finally */
@@ -884,18 +905,24 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
884 * /proc/net/ip_vs_conn entries 905 * /proc/net/ip_vs_conn entries
885 */ 906 */
886#ifdef CONFIG_PROC_FS 907#ifdef CONFIG_PROC_FS
908struct ip_vs_iter_state {
909 struct seq_net_private p;
910 struct hlist_head *l;
911};
887 912
888static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) 913static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
889{ 914{
890 int idx; 915 int idx;
891 struct ip_vs_conn *cp; 916 struct ip_vs_conn *cp;
917 struct ip_vs_iter_state *iter = seq->private;
918 struct hlist_node *n;
892 919
893 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 920 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
894 ct_read_lock_bh(idx); 921 ct_read_lock_bh(idx);
895 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 922 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
896 if (pos-- == 0) { 923 if (pos-- == 0) {
897 seq->private = &ip_vs_conn_tab[idx]; 924 iter->l = &ip_vs_conn_tab[idx];
898 return cp; 925 return cp;
899 } 926 }
900 } 927 }
901 ct_read_unlock_bh(idx); 928 ct_read_unlock_bh(idx);
@@ -906,14 +933,18 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
906 933
907static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) 934static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
908{ 935{
909 seq->private = NULL; 936 struct ip_vs_iter_state *iter = seq->private;
937
938 iter->l = NULL;
910 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; 939 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
911} 940}
912 941
913static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) 942static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
914{ 943{
915 struct ip_vs_conn *cp = v; 944 struct ip_vs_conn *cp = v;
916 struct list_head *e, *l = seq->private; 945 struct ip_vs_iter_state *iter = seq->private;
946 struct hlist_node *e;
947 struct hlist_head *l = iter->l;
917 int idx; 948 int idx;
918 949
919 ++*pos; 950 ++*pos;
@@ -921,27 +952,28 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
921 return ip_vs_conn_array(seq, 0); 952 return ip_vs_conn_array(seq, 0);
922 953
923 /* more on same hash chain? */ 954 /* more on same hash chain? */
924 if ((e = cp->c_list.next) != l) 955 if ((e = cp->c_list.next))
925 return list_entry(e, struct ip_vs_conn, c_list); 956 return hlist_entry(e, struct ip_vs_conn, c_list);
926 957
927 idx = l - ip_vs_conn_tab; 958 idx = l - ip_vs_conn_tab;
928 ct_read_unlock_bh(idx); 959 ct_read_unlock_bh(idx);
929 960
930 while (++idx < ip_vs_conn_tab_size) { 961 while (++idx < ip_vs_conn_tab_size) {
931 ct_read_lock_bh(idx); 962 ct_read_lock_bh(idx);
932 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 963 hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) {
933 seq->private = &ip_vs_conn_tab[idx]; 964 iter->l = &ip_vs_conn_tab[idx];
934 return cp; 965 return cp;
935 } 966 }
936 ct_read_unlock_bh(idx); 967 ct_read_unlock_bh(idx);
937 } 968 }
938 seq->private = NULL; 969 iter->l = NULL;
939 return NULL; 970 return NULL;
940} 971}
941 972
942static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) 973static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
943{ 974{
944 struct list_head *l = seq->private; 975 struct ip_vs_iter_state *iter = seq->private;
976 struct hlist_head *l = iter->l;
945 977
946 if (l) 978 if (l)
947 ct_read_unlock_bh(l - ip_vs_conn_tab); 979 ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +987,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); 987 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
956 else { 988 else {
957 const struct ip_vs_conn *cp = v; 989 const struct ip_vs_conn *cp = v;
990 struct net *net = seq_file_net(seq);
958 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; 991 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
959 size_t len = 0; 992 size_t len = 0;
960 993
961 if (cp->dest && cp->pe_data && 994 if (!ip_vs_conn_net_eq(cp, net))
962 cp->dest->svc->pe->show_pe_data) { 995 return 0;
996 if (cp->pe_data) {
963 pe_data[0] = ' '; 997 pe_data[0] = ' ';
964 len = strlen(cp->dest->svc->pe->name); 998 len = strlen(cp->pe->name);
965 memcpy(pe_data + 1, cp->dest->svc->pe->name, len); 999 memcpy(pe_data + 1, cp->pe->name, len);
966 pe_data[len + 1] = ' '; 1000 pe_data[len + 1] = ' ';
967 len += 2; 1001 len += 2;
968 len += cp->dest->svc->pe->show_pe_data(cp, 1002 len += cp->pe->show_pe_data(cp, pe_data + len);
969 pe_data + len);
970 } 1003 }
971 pe_data[len] = '\0'; 1004 pe_data[len] = '\0';
972 1005
@@ -1004,7 +1037,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
1004 1037
1005static int ip_vs_conn_open(struct inode *inode, struct file *file) 1038static int ip_vs_conn_open(struct inode *inode, struct file *file)
1006{ 1039{
1007 return seq_open(file, &ip_vs_conn_seq_ops); 1040 return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
1041 sizeof(struct ip_vs_iter_state));
1008} 1042}
1009 1043
1010static const struct file_operations ip_vs_conn_fops = { 1044static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1065,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
1031 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); 1065 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
1032 else { 1066 else {
1033 const struct ip_vs_conn *cp = v; 1067 const struct ip_vs_conn *cp = v;
1068 struct net *net = seq_file_net(seq);
1069
1070 if (!ip_vs_conn_net_eq(cp, net))
1071 return 0;
1034 1072
1035#ifdef CONFIG_IP_VS_IPV6 1073#ifdef CONFIG_IP_VS_IPV6
1036 if (cp->af == AF_INET6) 1074 if (cp->af == AF_INET6)
@@ -1067,7 +1105,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
1067 1105
1068static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) 1106static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
1069{ 1107{
1070 return seq_open(file, &ip_vs_conn_sync_seq_ops); 1108 return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
1109 sizeof(struct ip_vs_iter_state));
1071} 1110}
1072 1111
1073static const struct file_operations ip_vs_conn_sync_fops = { 1112static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1152,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1113} 1152}
1114 1153
1115/* Called from keventd and must protect itself from softirqs */ 1154/* Called from keventd and must protect itself from softirqs */
1116void ip_vs_random_dropentry(void) 1155void ip_vs_random_dropentry(struct net *net)
1117{ 1156{
1118 int idx; 1157 int idx;
1119 struct ip_vs_conn *cp; 1158 struct ip_vs_conn *cp;
@@ -1123,17 +1162,19 @@ void ip_vs_random_dropentry(void)
1123 */ 1162 */
1124 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { 1163 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
1125 unsigned hash = net_random() & ip_vs_conn_tab_mask; 1164 unsigned hash = net_random() & ip_vs_conn_tab_mask;
1165 struct hlist_node *n;
1126 1166
1127 /* 1167 /*
1128 * Lock is actually needed in this loop. 1168 * Lock is actually needed in this loop.
1129 */ 1169 */
1130 ct_write_lock_bh(hash); 1170 ct_write_lock_bh(hash);
1131 1171
1132 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 1172 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
1133 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1173 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1134 /* connection template */ 1174 /* connection template */
1135 continue; 1175 continue;
1136 1176 if (!ip_vs_conn_net_eq(cp, net))
1177 continue;
1137 if (cp->protocol == IPPROTO_TCP) { 1178 if (cp->protocol == IPPROTO_TCP) {
1138 switch(cp->state) { 1179 switch(cp->state) {
1139 case IP_VS_TCP_S_SYN_RECV: 1180 case IP_VS_TCP_S_SYN_RECV:
@@ -1168,20 +1209,24 @@ void ip_vs_random_dropentry(void)
1168/* 1209/*
1169 * Flush all the connection entries in the ip_vs_conn_tab 1210 * Flush all the connection entries in the ip_vs_conn_tab
1170 */ 1211 */
1171static void ip_vs_conn_flush(void) 1212static void ip_vs_conn_flush(struct net *net)
1172{ 1213{
1173 int idx; 1214 int idx;
1174 struct ip_vs_conn *cp; 1215 struct ip_vs_conn *cp;
1216 struct netns_ipvs *ipvs = net_ipvs(net);
1175 1217
1176 flush_again: 1218flush_again:
1177 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1219 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
1220 struct hlist_node *n;
1221
1178 /* 1222 /*
1179 * Lock is actually needed in this loop. 1223 * Lock is actually needed in this loop.
1180 */ 1224 */
1181 ct_write_lock_bh(idx); 1225 ct_write_lock_bh(idx);
1182 1226
1183 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1227 hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
1184 1228 if (!ip_vs_conn_net_eq(cp, net))
1229 continue;
1185 IP_VS_DBG(4, "del connection\n"); 1230 IP_VS_DBG(4, "del connection\n");
1186 ip_vs_conn_expire_now(cp); 1231 ip_vs_conn_expire_now(cp);
1187 if (cp->control) { 1232 if (cp->control) {
@@ -1194,16 +1239,41 @@ static void ip_vs_conn_flush(void)
1194 1239
1195 /* the counter may be not NULL, because maybe some conn entries 1240 /* the counter may be not NULL, because maybe some conn entries
1196 are run by slow timer handler or unhashed but still referred */ 1241 are run by slow timer handler or unhashed but still referred */
1197 if (atomic_read(&ip_vs_conn_count) != 0) { 1242 if (atomic_read(&ipvs->conn_count) != 0) {
1198 schedule(); 1243 schedule();
1199 goto flush_again; 1244 goto flush_again;
1200 } 1245 }
1201} 1246}
1247/*
1248 * per netns init and exit
1249 */
1250int __net_init __ip_vs_conn_init(struct net *net)
1251{
1252 struct netns_ipvs *ipvs = net_ipvs(net);
1253
1254 atomic_set(&ipvs->conn_count, 0);
1202 1255
1256 proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
1257 proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
1258 return 0;
1259}
1260
1261static void __net_exit __ip_vs_conn_cleanup(struct net *net)
1262{
1263 /* flush all the connection entries first */
1264 ip_vs_conn_flush(net);
1265 proc_net_remove(net, "ip_vs_conn");
1266 proc_net_remove(net, "ip_vs_conn_sync");
1267}
1268static struct pernet_operations ipvs_conn_ops = {
1269 .init = __ip_vs_conn_init,
1270 .exit = __ip_vs_conn_cleanup,
1271};
1203 1272
1204int __init ip_vs_conn_init(void) 1273int __init ip_vs_conn_init(void)
1205{ 1274{
1206 int idx; 1275 int idx;
1276 int retc;
1207 1277
1208 /* Compute size and mask */ 1278 /* Compute size and mask */
1209 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; 1279 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1212,8 +1282,7 @@ int __init ip_vs_conn_init(void)
1212 /* 1282 /*
1213 * Allocate the connection hash table and initialize its list heads 1283 * Allocate the connection hash table and initialize its list heads
1214 */ 1284 */
1215 ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * 1285 ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(*ip_vs_conn_tab));
1216 sizeof(struct list_head));
1217 if (!ip_vs_conn_tab) 1286 if (!ip_vs_conn_tab)
1218 return -ENOMEM; 1287 return -ENOMEM;
1219 1288
@@ -1233,32 +1302,25 @@ int __init ip_vs_conn_init(void)
1233 IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n", 1302 IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
1234 sizeof(struct ip_vs_conn)); 1303 sizeof(struct ip_vs_conn));
1235 1304
1236 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1305 for (idx = 0; idx < ip_vs_conn_tab_size; idx++)
1237 INIT_LIST_HEAD(&ip_vs_conn_tab[idx]); 1306 INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
1238 }
1239 1307
1240 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { 1308 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
1241 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); 1309 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
1242 } 1310 }
1243 1311
1244 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); 1312 retc = register_pernet_subsys(&ipvs_conn_ops);
1245 proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
1246 1313
1247 /* calculate the random value for connection hash */ 1314 /* calculate the random value for connection hash */
1248 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); 1315 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
1249 1316
1250 return 0; 1317 return retc;
1251} 1318}
1252 1319
1253
1254void ip_vs_conn_cleanup(void) 1320void ip_vs_conn_cleanup(void)
1255{ 1321{
1256 /* flush all the connection entries first */ 1322 unregister_pernet_subsys(&ipvs_conn_ops);
1257 ip_vs_conn_flush();
1258
1259 /* Release the empty cache */ 1323 /* Release the empty cache */
1260 kmem_cache_destroy(ip_vs_conn_cachep); 1324 kmem_cache_destroy(ip_vs_conn_cachep);
1261 proc_net_remove(&init_net, "ip_vs_conn");
1262 proc_net_remove(&init_net, "ip_vs_conn_sync");
1263 vfree(ip_vs_conn_tab); 1325 vfree(ip_vs_conn_tab);
1264} 1326}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9c5a0..07accf6b240 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h> 43#include <net/ip6_checksum.h>
44#include <net/netns/generic.h> /* net_generic() */
44 45
45#include <linux/netfilter.h> 46#include <linux/netfilter.h>
46#include <linux/netfilter_ipv4.h> 47#include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
68EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
69#endif 70#endif
70 71
72int ip_vs_net_id __read_mostly;
73#ifdef IP_VS_GENERIC_NETNS
74EXPORT_SYMBOL(ip_vs_net_id);
75#endif
76/* netns cnt used for uniqueness */
77static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
71 78
72/* ID used in ICMP lookups */ 79/* ID used in ICMP lookups */
73#define icmp_id(icmph) (((icmph)->un).echo.id) 80#define icmp_id(icmph) (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
108ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 115ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
109{ 116{
110 struct ip_vs_dest *dest = cp->dest; 117 struct ip_vs_dest *dest = cp->dest;
118 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
119
111 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 120 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
112 spin_lock(&dest->stats.lock); 121 struct ip_vs_cpu_stats *s;
113 dest->stats.ustats.inpkts++; 122
114 dest->stats.ustats.inbytes += skb->len; 123 s = this_cpu_ptr(dest->stats.cpustats);
115 spin_unlock(&dest->stats.lock); 124 s->ustats.inpkts++;
116 125 u64_stats_update_begin(&s->syncp);
117 spin_lock(&dest->svc->stats.lock); 126 s->ustats.inbytes += skb->len;
118 dest->svc->stats.ustats.inpkts++; 127 u64_stats_update_end(&s->syncp);
119 dest->svc->stats.ustats.inbytes += skb->len; 128
120 spin_unlock(&dest->svc->stats.lock); 129 s = this_cpu_ptr(dest->svc->stats.cpustats);
121 130 s->ustats.inpkts++;
122 spin_lock(&ip_vs_stats.lock); 131 u64_stats_update_begin(&s->syncp);
123 ip_vs_stats.ustats.inpkts++; 132 s->ustats.inbytes += skb->len;
124 ip_vs_stats.ustats.inbytes += skb->len; 133 u64_stats_update_end(&s->syncp);
125 spin_unlock(&ip_vs_stats.lock); 134
135 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
136 s->ustats.inpkts++;
137 u64_stats_update_begin(&s->syncp);
138 s->ustats.inbytes += skb->len;
139 u64_stats_update_end(&s->syncp);
126 } 140 }
127} 141}
128 142
@@ -131,21 +145,28 @@ static inline void
131ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 145ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
132{ 146{
133 struct ip_vs_dest *dest = cp->dest; 147 struct ip_vs_dest *dest = cp->dest;
148 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
149
134 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 150 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
135 spin_lock(&dest->stats.lock); 151 struct ip_vs_cpu_stats *s;
136 dest->stats.ustats.outpkts++; 152
137 dest->stats.ustats.outbytes += skb->len; 153 s = this_cpu_ptr(dest->stats.cpustats);
138 spin_unlock(&dest->stats.lock); 154 s->ustats.outpkts++;
139 155 u64_stats_update_begin(&s->syncp);
140 spin_lock(&dest->svc->stats.lock); 156 s->ustats.outbytes += skb->len;
141 dest->svc->stats.ustats.outpkts++; 157 u64_stats_update_end(&s->syncp);
142 dest->svc->stats.ustats.outbytes += skb->len; 158
143 spin_unlock(&dest->svc->stats.lock); 159 s = this_cpu_ptr(dest->svc->stats.cpustats);
144 160 s->ustats.outpkts++;
145 spin_lock(&ip_vs_stats.lock); 161 u64_stats_update_begin(&s->syncp);
146 ip_vs_stats.ustats.outpkts++; 162 s->ustats.outbytes += skb->len;
147 ip_vs_stats.ustats.outbytes += skb->len; 163 u64_stats_update_end(&s->syncp);
148 spin_unlock(&ip_vs_stats.lock); 164
165 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
166 s->ustats.outpkts++;
167 u64_stats_update_begin(&s->syncp);
168 s->ustats.outbytes += skb->len;
169 u64_stats_update_end(&s->syncp);
149 } 170 }
150} 171}
151 172
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153static inline void 174static inline void
154ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) 175ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
155{ 176{
156 spin_lock(&cp->dest->stats.lock); 177 struct netns_ipvs *ipvs = net_ipvs(svc->net);
157 cp->dest->stats.ustats.conns++; 178 struct ip_vs_cpu_stats *s;
158 spin_unlock(&cp->dest->stats.lock); 179
180 s = this_cpu_ptr(cp->dest->stats.cpustats);
181 s->ustats.conns++;
159 182
160 spin_lock(&svc->stats.lock); 183 s = this_cpu_ptr(svc->stats.cpustats);
161 svc->stats.ustats.conns++; 184 s->ustats.conns++;
162 spin_unlock(&svc->stats.lock);
163 185
164 spin_lock(&ip_vs_stats.lock); 186 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
165 ip_vs_stats.ustats.conns++; 187 s->ustats.conns++;
166 spin_unlock(&ip_vs_stats.lock);
167} 188}
168 189
169 190
170static inline int 191static inline int
171ip_vs_set_state(struct ip_vs_conn *cp, int direction, 192ip_vs_set_state(struct ip_vs_conn *cp, int direction,
172 const struct sk_buff *skb, 193 const struct sk_buff *skb,
173 struct ip_vs_protocol *pp) 194 struct ip_vs_proto_data *pd)
174{ 195{
175 if (unlikely(!pp->state_transition)) 196 if (unlikely(!pd->pp->state_transition))
176 return 0; 197 return 0;
177 return pp->state_transition(cp, direction, skb, pp); 198 return pd->pp->state_transition(cp, direction, skb, pd);
178} 199}
179 200
180static inline void 201static inline int
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, 202ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol, 203 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport, 204 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport, 205 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p) 206 struct ip_vs_conn_param *p)
186{ 207{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); 208 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
209 vport, p);
188 p->pe = svc->pe; 210 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param) 211 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb); 212 return p->pe->fill_param(p, skb);
213
214 return 0;
191} 215}
192 216
193/* 217/*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
200static struct ip_vs_conn * 224static struct ip_vs_conn *
201ip_vs_sched_persist(struct ip_vs_service *svc, 225ip_vs_sched_persist(struct ip_vs_service *svc,
202 struct sk_buff *skb, 226 struct sk_buff *skb,
203 __be16 ports[2]) 227 __be16 src_port, __be16 dst_port, int *ignored)
204{ 228{
205 struct ip_vs_conn *cp = NULL; 229 struct ip_vs_conn *cp = NULL;
206 struct ip_vs_iphdr iph; 230 struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
224 248
225 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
226 "mnet %s\n", 250 "mnet %s\n",
227 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), 251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
228 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), 252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
229 IP_VS_DBG_ADDR(svc->af, &snet)); 253 IP_VS_DBG_ADDR(svc->af, &snet));
230 254
231 /* 255 /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; 271 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
248 __be16 vport = 0; 272 __be16 vport = 0;
249 273
250 if (ports[1] == svc->port) { 274 if (dst_port == svc->port) {
251 /* non-FTP template: 275 /* non-FTP template:
252 * <protocol, caddr, 0, vaddr, vport, daddr, dport> 276 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
253 * FTP template: 277 * FTP template:
254 * <protocol, caddr, 0, vaddr, 0, daddr, 0> 278 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
255 */ 279 */
256 if (svc->port != FTPPORT) 280 if (svc->port != FTPPORT)
257 vport = ports[1]; 281 vport = dst_port;
258 } else { 282 } else {
259 /* Note: persistent fwmark-based services and 283 /* Note: persistent fwmark-based services and
260 * persistent port zero service are handled here. 284 * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
268 vaddr = &fwmark; 292 vaddr = &fwmark;
269 } 293 }
270 } 294 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, 295 /* return *ignored = -1 so NF_DROP can be used */
272 vaddr, vport, &param); 296 if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
297 vaddr, vport, &param) < 0) {
298 *ignored = -1;
299 return NULL;
300 }
273 } 301 }
274 302
275 /* Check if a template already exists */ 303 /* Check if a template already exists */
276 ct = ip_vs_ct_in_get(&param); 304 ct = ip_vs_ct_in_get(&param);
277 if (!ct || !ip_vs_check_template(ct)) { 305 if (!ct || !ip_vs_check_template(ct)) {
278 /* No template found or the dest of the connection 306 /*
307 * No template found or the dest of the connection
279 * template is not available. 308 * template is not available.
309 * return *ignored=0 i.e. ICMP and NF_DROP
280 */ 310 */
281 dest = svc->scheduler->schedule(svc, skb); 311 dest = svc->scheduler->schedule(svc, skb);
282 if (!dest) { 312 if (!dest) {
283 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 313 IP_VS_DBG(1, "p-schedule: no dest found.\n");
284 kfree(param.pe_data); 314 kfree(param.pe_data);
315 *ignored = 0;
285 return NULL; 316 return NULL;
286 } 317 }
287 318
288 if (ports[1] == svc->port && svc->port != FTPPORT) 319 if (dst_port == svc->port && svc->port != FTPPORT)
289 dport = dest->port; 320 dport = dest->port;
290 321
291 /* Create a template 322 /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
293 * and thus param.pe_data will be destroyed 324 * and thus param.pe_data will be destroyed
294 * when the template expires */ 325 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport, 326 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest); 327 IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
297 if (ct == NULL) { 328 if (ct == NULL) {
298 kfree(param.pe_data); 329 kfree(param.pe_data);
330 *ignored = -1;
299 return NULL; 331 return NULL;
300 } 332 }
301 333
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
306 kfree(param.pe_data); 338 kfree(param.pe_data);
307 } 339 }
308 340
309 dport = ports[1]; 341 dport = dst_port;
310 if (dport == svc->port && dest->port) 342 if (dport == svc->port && dest->port)
311 dport = dest->port; 343 dport = dest->port;
312 344
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
317 /* 349 /*
318 * Create a new connection according to the template 350 * Create a new connection according to the template
319 */ 351 */
320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], 352 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
321 &iph.daddr, ports[1], &param); 353 src_port, &iph.daddr, dst_port, &param);
322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest); 354
355 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
323 if (cp == NULL) { 356 if (cp == NULL) {
324 ip_vs_conn_put(ct); 357 ip_vs_conn_put(ct);
358 *ignored = -1;
325 return NULL; 359 return NULL;
326 } 360 }
327 361
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
341 * It selects a server according to the virtual service, and 375 * It selects a server according to the virtual service, and
342 * creates a connection entry. 376 * creates a connection entry.
343 * Protocols supported: TCP, UDP 377 * Protocols supported: TCP, UDP
378 *
379 * Usage of *ignored
380 *
381 * 1 : protocol tried to schedule (eg. on SYN), found svc but the
382 * svc/scheduler decides that this packet should be accepted with
383 * NF_ACCEPT because it must not be scheduled.
384 *
385 * 0 : scheduler can not find destination, so try bypass or
386 * return ICMP and then NF_DROP (ip_vs_leave).
387 *
388 * -1 : scheduler tried to schedule but fatal error occurred, eg.
389 * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
390 * failure such as missing Call-ID, ENOMEM on skb_linearize
391 * or pe_data. In this case we should return NF_DROP without
392 * any attempts to send ICMP with ip_vs_leave.
344 */ 393 */
345struct ip_vs_conn * 394struct ip_vs_conn *
346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 395ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored) 396 struct ip_vs_proto_data *pd, int *ignored)
348{ 397{
398 struct ip_vs_protocol *pp = pd->pp;
349 struct ip_vs_conn *cp = NULL; 399 struct ip_vs_conn *cp = NULL;
350 struct ip_vs_iphdr iph; 400 struct ip_vs_iphdr iph;
351 struct ip_vs_dest *dest; 401 struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
371 } 421 }
372 422
373 /* 423 /*
374 * Do not schedule replies from local real server. It is risky 424 * Do not schedule replies from local real server.
375 * for fwmark services but mostly for persistent services.
376 */ 425 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 426 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && 427 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 428 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection"); 429 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp); 430 __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
386 /* 434 /*
387 * Persistent service 435 * Persistent service
388 */ 436 */
389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) { 437 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
390 *ignored = 0; 438 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
391 return ip_vs_sched_persist(svc, skb, pptr); 439
392 } 440 *ignored = 0;
393 441
394 /* 442 /*
395 * Non-persistent service 443 * Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
402 return NULL; 450 return NULL;
403 } 451 }
404 452
405 *ignored = 0;
406
407 dest = svc->scheduler->schedule(svc, skb); 453 dest = svc->scheduler->schedule(svc, skb);
408 if (dest == NULL) { 454 if (dest == NULL) {
409 IP_VS_DBG(1, "Schedule: no dest found.\n"); 455 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
419 */ 465 */
420 { 466 {
421 struct ip_vs_conn_param p; 467 struct ip_vs_conn_param p;
422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, 468
423 pptr[0], &iph.daddr, pptr[1], &p); 469 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
470 &iph.saddr, pptr[0], &iph.daddr, pptr[1],
471 &p);
424 cp = ip_vs_conn_new(&p, &dest->addr, 472 cp = ip_vs_conn_new(&p, &dest->addr,
425 dest->port ? dest->port : pptr[1], 473 dest->port ? dest->port : pptr[1],
426 flags, dest); 474 flags, dest, skb->mark);
427 if (!cp) 475 if (!cp) {
476 *ignored = -1;
428 return NULL; 477 return NULL;
478 }
429 } 479 }
430 480
431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 481 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
447 * no destination is available for a new connection. 497 * no destination is available for a new connection.
448 */ 498 */
449int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 499int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
450 struct ip_vs_protocol *pp) 500 struct ip_vs_proto_data *pd)
451{ 501{
452 __be16 _ports[2], *pptr; 502 __be16 _ports[2], *pptr;
453 struct ip_vs_iphdr iph; 503 struct ip_vs_iphdr iph;
504#ifdef CONFIG_SYSCTL
505 struct net *net;
506 struct netns_ipvs *ipvs;
454 int unicast; 507 int unicast;
508#endif
509
455 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 510 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
456 511
457 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 512 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -460,17 +515,21 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
460 return NF_DROP; 515 return NF_DROP;
461 } 516 }
462 517
518#ifdef CONFIG_SYSCTL
519 net = skb_net(skb);
520
463#ifdef CONFIG_IP_VS_IPV6 521#ifdef CONFIG_IP_VS_IPV6
464 if (svc->af == AF_INET6) 522 if (svc->af == AF_INET6)
465 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 523 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
466 else 524 else
467#endif 525#endif
468 unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); 526 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
469 527
470 /* if it is fwmark-based service, the cache_bypass sysctl is up 528 /* if it is fwmark-based service, the cache_bypass sysctl is up
471 and the destination is a non-local unicast, then create 529 and the destination is a non-local unicast, then create
472 a cache_bypass connection entry */ 530 a cache_bypass connection entry */
473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 531 ipvs = net_ipvs(net);
532 if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
474 int ret, cs; 533 int ret, cs;
475 struct ip_vs_conn *cp; 534 struct ip_vs_conn *cp;
476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 535 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +543,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 543 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
485 { 544 {
486 struct ip_vs_conn_param p; 545 struct ip_vs_conn_param p;
487 ip_vs_conn_fill_param(svc->af, iph.protocol, 546 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
488 &iph.saddr, pptr[0], 547 &iph.saddr, pptr[0],
489 &iph.daddr, pptr[1], &p); 548 &iph.daddr, pptr[1], &p);
490 cp = ip_vs_conn_new(&p, &daddr, 0, 549 cp = ip_vs_conn_new(&p, &daddr, 0,
491 IP_VS_CONN_F_BYPASS | flags, 550 IP_VS_CONN_F_BYPASS | flags,
492 NULL); 551 NULL, skb->mark);
493 if (!cp) 552 if (!cp)
494 return NF_DROP; 553 return NF_DROP;
495 } 554 }
@@ -498,16 +557,17 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
498 ip_vs_in_stats(cp, skb); 557 ip_vs_in_stats(cp, skb);
499 558
500 /* set state */ 559 /* set state */
501 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 560 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
502 561
503 /* transmit the first SYN packet */ 562 /* transmit the first SYN packet */
504 ret = cp->packet_xmit(skb, cp, pp); 563 ret = cp->packet_xmit(skb, cp, pd->pp);
505 /* do not touch skb anymore */ 564 /* do not touch skb anymore */
506 565
507 atomic_inc(&cp->in_pkts); 566 atomic_inc(&cp->in_pkts);
508 ip_vs_conn_put(cp); 567 ip_vs_conn_put(cp);
509 return ret; 568 return ret;
510 } 569 }
570#endif
511 571
512 /* 572 /*
513 * When the virtual ftp service is presented, packets destined 573 * When the virtual ftp service is presented, packets destined
@@ -544,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
544 return NF_DROP; 604 return NF_DROP;
545} 605}
546 606
607#ifdef CONFIG_SYSCTL
608
609static int sysctl_snat_reroute(struct sk_buff *skb)
610{
611 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
612 return ipvs->sysctl_snat_reroute;
613}
614
615static int sysctl_nat_icmp_send(struct net *net)
616{
617 struct netns_ipvs *ipvs = net_ipvs(net);
618 return ipvs->sysctl_nat_icmp_send;
619}
620
621static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
622{
623 return ipvs->sysctl_expire_nodest_conn;
624}
625
626#else
627
628static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
629static int sysctl_nat_icmp_send(struct net *net) { return 0; }
630static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
631
632#endif
633
547__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 634__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
548{ 635{
549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 636 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -576,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
576} 663}
577#endif 664#endif
578 665
666static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
667{
668#ifdef CONFIG_IP_VS_IPV6
669 if (af == AF_INET6) {
670 if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
671 return 1;
672 } else
673#endif
674 if ((sysctl_snat_reroute(skb) ||
675 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
676 ip_route_me_harder(skb, RTN_LOCAL) != 0)
677 return 1;
678
679 return 0;
680}
681
579/* 682/*
580 * Packet has been made sufficiently writable in caller 683 * Packet has been made sufficiently writable in caller
581 * - inout: 1=in->out, 0=out->in 684 * - inout: 1=in->out, 0=out->in
@@ -674,7 +777,7 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
674#endif 777#endif
675 778
676/* Handle relevant response ICMP messages - forward to the right 779/* Handle relevant response ICMP messages - forward to the right
677 * destination host. Used for NAT and local client. 780 * destination host.
678 */ 781 */
679static int handle_response_icmp(int af, struct sk_buff *skb, 782static int handle_response_icmp(int af, struct sk_buff *skb,
680 union nf_inet_addr *snet, 783 union nf_inet_addr *snet,
@@ -710,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
710#endif 813#endif
711 ip_vs_nat_icmp(skb, pp, cp, 1); 814 ip_vs_nat_icmp(skb, pp, cp, 1);
712 815
713#ifdef CONFIG_IP_VS_IPV6 816 if (ip_vs_route_me_harder(af, skb))
714 if (af == AF_INET6) { 817 goto out;
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out;
717 } else
718#endif
719 if ((sysctl_ip_vs_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out;
723 818
724 /* do the statistics and put it back */ 819 /* do the statistics and put it back */
725 ip_vs_out_stats(cp, skb); 820 ip_vs_out_stats(cp, skb);
@@ -808,7 +903,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
808 903
809 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 904 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
810 /* The embedded headers contain source and dest in reverse order */ 905 /* The embedded headers contain source and dest in reverse order */
811 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 906 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
812 if (!cp) 907 if (!cp)
813 return NF_ACCEPT; 908 return NF_ACCEPT;
814 909
@@ -885,7 +980,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
885 980
886 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 981 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
887 /* The embedded headers contain source and dest in reverse order */ 982 /* The embedded headers contain source and dest in reverse order */
888 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 983 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
889 if (!cp) 984 if (!cp)
890 return NF_ACCEPT; 985 return NF_ACCEPT;
891 986
@@ -921,12 +1016,13 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
921} 1016}
922 1017
923/* Handle response packets: rewrite addresses and send away... 1018/* Handle response packets: rewrite addresses and send away...
924 * Used for NAT and local client.
925 */ 1019 */
926static unsigned int 1020static unsigned int
927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 1021handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
928 struct ip_vs_conn *cp, int ihl) 1022 struct ip_vs_conn *cp, int ihl)
929{ 1023{
1024 struct ip_vs_protocol *pp = pd->pp;
1025
930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1026 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
931 1027
932 if (!skb_make_writable(skb, ihl)) 1028 if (!skb_make_writable(skb, ihl))
@@ -961,21 +1057,13 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
961 * if it came from this machine itself. So re-compute 1057 * if it came from this machine itself. So re-compute
962 * the routing information. 1058 * the routing information.
963 */ 1059 */
964#ifdef CONFIG_IP_VS_IPV6 1060 if (ip_vs_route_me_harder(af, skb))
965 if (af == AF_INET6) { 1061 goto drop;
966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
967 goto drop;
968 } else
969#endif
970 if ((sysctl_ip_vs_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0)
973 goto drop;
974 1062
975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1063 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
976 1064
977 ip_vs_out_stats(cp, skb); 1065 ip_vs_out_stats(cp, skb);
978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 1066 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
979 skb->ipvs_property = 1; 1067 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT)) 1068 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb); 1069 ip_vs_notrack(skb);
@@ -999,8 +1087,10 @@ drop:
999static unsigned int 1087static unsigned int
1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) 1088ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1001{ 1089{
1090 struct net *net = NULL;
1002 struct ip_vs_iphdr iph; 1091 struct ip_vs_iphdr iph;
1003 struct ip_vs_protocol *pp; 1092 struct ip_vs_protocol *pp;
1093 struct ip_vs_proto_data *pd;
1004 struct ip_vs_conn *cp; 1094 struct ip_vs_conn *cp;
1005 1095
1006 EnterFunction(11); 1096 EnterFunction(11);
@@ -1022,6 +1112,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1022 if (unlikely(!skb_dst(skb))) 1112 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT; 1113 return NF_ACCEPT;
1024 1114
1115 net = skb_net(skb);
1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1116 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1026#ifdef CONFIG_IP_VS_IPV6 1117#ifdef CONFIG_IP_VS_IPV6
1027 if (af == AF_INET6) { 1118 if (af == AF_INET6) {
@@ -1045,9 +1136,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1045 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1136 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1046 } 1137 }
1047 1138
1048 pp = ip_vs_proto_get(iph.protocol); 1139 pd = ip_vs_proto_data_get(net, iph.protocol);
1049 if (unlikely(!pp)) 1140 if (unlikely(!pd))
1050 return NF_ACCEPT; 1141 return NF_ACCEPT;
1142 pp = pd->pp;
1051 1143
1052 /* reassemble IP fragments */ 1144 /* reassemble IP fragments */
1053#ifdef CONFIG_IP_VS_IPV6 1145#ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1165,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1073 /* 1165 /*
1074 * Check if the packet belongs to an existing entry 1166 * Check if the packet belongs to an existing entry
1075 */ 1167 */
1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1168 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
1077 1169
1078 if (likely(cp)) 1170 if (likely(cp))
1079 return handle_response(af, skb, pp, cp, iph.len); 1171 return handle_response(af, skb, pd, cp, iph.len);
1080 if (sysctl_ip_vs_nat_icmp_send && 1172 if (sysctl_nat_icmp_send(net) &&
1081 (pp->protocol == IPPROTO_TCP || 1173 (pp->protocol == IPPROTO_TCP ||
1082 pp->protocol == IPPROTO_UDP || 1174 pp->protocol == IPPROTO_UDP ||
1083 pp->protocol == IPPROTO_SCTP)) { 1175 pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1179,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1087 sizeof(_ports), _ports); 1179 sizeof(_ports), _ports);
1088 if (pptr == NULL) 1180 if (pptr == NULL)
1089 return NF_ACCEPT; /* Not for me */ 1181 return NF_ACCEPT; /* Not for me */
1090 if (ip_vs_lookup_real_service(af, iph.protocol, 1182 if (ip_vs_lookup_real_service(net, af, iph.protocol,
1091 &iph.saddr, 1183 &iph.saddr,
1092 pptr[0])) { 1184 pptr[0])) {
1093 /* 1185 /*
@@ -1202,14 +1294,15 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1202static int 1294static int
1203ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) 1295ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1204{ 1296{
1297 struct net *net = NULL;
1205 struct iphdr *iph; 1298 struct iphdr *iph;
1206 struct icmphdr _icmph, *ic; 1299 struct icmphdr _icmph, *ic;
1207 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ 1300 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
1208 struct ip_vs_iphdr ciph; 1301 struct ip_vs_iphdr ciph;
1209 struct ip_vs_conn *cp; 1302 struct ip_vs_conn *cp;
1210 struct ip_vs_protocol *pp; 1303 struct ip_vs_protocol *pp;
1304 struct ip_vs_proto_data *pd;
1211 unsigned int offset, ihl, verdict; 1305 unsigned int offset, ihl, verdict;
1212 union nf_inet_addr snet;
1213 1306
1214 *related = 1; 1307 *related = 1;
1215 1308
@@ -1249,9 +1342,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1249 if (cih == NULL) 1342 if (cih == NULL)
1250 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1343 return NF_ACCEPT; /* The packet looks wrong, ignore */
1251 1344
1252 pp = ip_vs_proto_get(cih->protocol); 1345 net = skb_net(skb);
1253 if (!pp) 1346 pd = ip_vs_proto_data_get(net, cih->protocol);
1347 if (!pd)
1254 return NF_ACCEPT; 1348 return NF_ACCEPT;
1349 pp = pd->pp;
1255 1350
1256 /* Is the embedded protocol header present? */ 1351 /* Is the embedded protocol header present? */
1257 if (unlikely(cih->frag_off & htons(IP_OFFSET) && 1352 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,18 +1360,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1265 1360
1266 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1361 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
1267 /* The embedded headers contain source and dest in reverse order */ 1362 /* The embedded headers contain source and dest in reverse order */
1268 cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); 1363 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
1269 if (!cp) { 1364 if (!cp)
1270 /* The packet could also belong to a local client */
1271 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
1272 if (cp) {
1273 snet.ip = iph->saddr;
1274 return handle_response_icmp(AF_INET, skb, &snet,
1275 cih->protocol, cp, pp,
1276 offset, ihl);
1277 }
1278 return NF_ACCEPT; 1365 return NF_ACCEPT;
1279 }
1280 1366
1281 verdict = NF_DROP; 1367 verdict = NF_DROP;
1282 1368
@@ -1312,6 +1398,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1312static int 1398static int
1313ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1399ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1314{ 1400{
1401 struct net *net = NULL;
1315 struct ipv6hdr *iph; 1402 struct ipv6hdr *iph;
1316 struct icmp6hdr _icmph, *ic; 1403 struct icmp6hdr _icmph, *ic;
1317 struct ipv6hdr _ciph, *cih; /* The ip header contained 1404 struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1319,8 +1406,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1319 struct ip_vs_iphdr ciph; 1406 struct ip_vs_iphdr ciph;
1320 struct ip_vs_conn *cp; 1407 struct ip_vs_conn *cp;
1321 struct ip_vs_protocol *pp; 1408 struct ip_vs_protocol *pp;
1409 struct ip_vs_proto_data *pd;
1322 unsigned int offset, verdict; 1410 unsigned int offset, verdict;
1323 union nf_inet_addr snet;
1324 struct rt6_info *rt; 1411 struct rt6_info *rt;
1325 1412
1326 *related = 1; 1413 *related = 1;
@@ -1361,9 +1448,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1361 if (cih == NULL) 1448 if (cih == NULL)
1362 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1449 return NF_ACCEPT; /* The packet looks wrong, ignore */
1363 1450
1364 pp = ip_vs_proto_get(cih->nexthdr); 1451 net = skb_net(skb);
1365 if (!pp) 1452 pd = ip_vs_proto_data_get(net, cih->nexthdr);
1453 if (!pd)
1366 return NF_ACCEPT; 1454 return NF_ACCEPT;
1455 pp = pd->pp;
1367 1456
1368 /* Is the embedded protocol header present? */ 1457 /* Is the embedded protocol header present? */
1369 /* TODO: we don't support fragmentation at the moment anyways */ 1458 /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,19 +1466,9 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1377 1466
1378 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 1467 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1379 /* The embedded headers contain source and dest in reverse order */ 1468 /* The embedded headers contain source and dest in reverse order */
1380 cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); 1469 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1381 if (!cp) { 1470 if (!cp)
1382 /* The packet could also belong to a local client */
1383 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
1384 if (cp) {
1385 ipv6_addr_copy(&snet.in6, &iph->saddr);
1386 return handle_response_icmp(AF_INET6, skb, &snet,
1387 cih->nexthdr,
1388 cp, pp, offset,
1389 sizeof(struct ipv6hdr));
1390 }
1391 return NF_ACCEPT; 1471 return NF_ACCEPT;
1392 }
1393 1472
1394 verdict = NF_DROP; 1473 verdict = NF_DROP;
1395 1474
@@ -1423,10 +1502,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1423static unsigned int 1502static unsigned int
1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) 1503ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1425{ 1504{
1505 struct net *net;
1426 struct ip_vs_iphdr iph; 1506 struct ip_vs_iphdr iph;
1427 struct ip_vs_protocol *pp; 1507 struct ip_vs_protocol *pp;
1508 struct ip_vs_proto_data *pd;
1428 struct ip_vs_conn *cp; 1509 struct ip_vs_conn *cp;
1429 int ret, restart, pkts; 1510 int ret, restart, pkts;
1511 struct netns_ipvs *ipvs;
1430 1512
1431 /* Already marked as IPVS request or reply? */ 1513 /* Already marked as IPVS request or reply? */
1432 if (skb->ipvs_property) 1514 if (skb->ipvs_property)
@@ -1480,20 +1562,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1480 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1562 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1481 } 1563 }
1482 1564
1565 net = skb_net(skb);
1483 /* Protocol supported? */ 1566 /* Protocol supported? */
1484 pp = ip_vs_proto_get(iph.protocol); 1567 pd = ip_vs_proto_data_get(net, iph.protocol);
1485 if (unlikely(!pp)) 1568 if (unlikely(!pd))
1486 return NF_ACCEPT; 1569 return NF_ACCEPT;
1487 1570 pp = pd->pp;
1488 /* 1571 /*
1489 * Check if the packet belongs to an existing connection entry 1572 * Check if the packet belongs to an existing connection entry
1490 */ 1573 */
1491 cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); 1574 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
1492 1575
1493 if (unlikely(!cp)) { 1576 if (unlikely(!cp)) {
1494 int v; 1577 int v;
1495 1578
1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1579 if (!pp->conn_schedule(af, skb, pd, &v, &cp))
1497 return v; 1580 return v;
1498 } 1581 }
1499 1582
@@ -1505,12 +1588,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1505 } 1588 }
1506 1589
1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); 1590 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1508 1591 net = skb_net(skb);
1592 ipvs = net_ipvs(net);
1509 /* Check the server status */ 1593 /* Check the server status */
1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1594 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1511 /* the destination server is not available */ 1595 /* the destination server is not available */
1512 1596
1513 if (sysctl_ip_vs_expire_nodest_conn) { 1597 if (sysctl_expire_nodest_conn(ipvs)) {
1514 /* try to expire the connection immediately */ 1598 /* try to expire the connection immediately */
1515 ip_vs_conn_expire_now(cp); 1599 ip_vs_conn_expire_now(cp);
1516 } 1600 }
@@ -1521,7 +1605,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1521 } 1605 }
1522 1606
1523 ip_vs_in_stats(cp, skb); 1607 ip_vs_in_stats(cp, skb);
1524 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 1608 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1525 if (cp->packet_xmit) 1609 if (cp->packet_xmit)
1526 ret = cp->packet_xmit(skb, cp, pp); 1610 ret = cp->packet_xmit(skb, cp, pp);
1527 /* do not touch skb anymore */ 1611 /* do not touch skb anymore */
@@ -1535,35 +1619,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1535 * 1619 *
1536 * Sync connection if it is about to close to 1620 * Sync connection if it is about to close to
1537 * encorage the standby servers to update the connections timeout 1621 * encorage the standby servers to update the connections timeout
1622 *
1623 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
1538 */ 1624 */
1539 pkts = atomic_add_return(1, &cp->in_pkts); 1625
1540 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1626 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
1627 pkts = sysctl_sync_threshold(ipvs);
1628 else
1629 pkts = atomic_add_return(1, &cp->in_pkts);
1630
1631 if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1541 cp->protocol == IPPROTO_SCTP) { 1632 cp->protocol == IPPROTO_SCTP) {
1542 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1633 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1543 (pkts % sysctl_ip_vs_sync_threshold[1] 1634 (pkts % sysctl_sync_period(ipvs)
1544 == sysctl_ip_vs_sync_threshold[0])) || 1635 == sysctl_sync_threshold(ipvs))) ||
1545 (cp->old_state != cp->state && 1636 (cp->old_state != cp->state &&
1546 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1637 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1547 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || 1638 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1548 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { 1639 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1549 ip_vs_sync_conn(cp); 1640 ip_vs_sync_conn(net, cp);
1550 goto out; 1641 goto out;
1551 } 1642 }
1552 } 1643 }
1553 1644
1554 /* Keep this block last: TCP and others with pp->num_states <= 1 */ 1645 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1555 else if (af == AF_INET && 1646 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1556 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1557 (((cp->protocol != IPPROTO_TCP || 1647 (((cp->protocol != IPPROTO_TCP ||
1558 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1648 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1559 (pkts % sysctl_ip_vs_sync_threshold[1] 1649 (pkts % sysctl_sync_period(ipvs)
1560 == sysctl_ip_vs_sync_threshold[0])) || 1650 == sysctl_sync_threshold(ipvs))) ||
1561 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 1651 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1562 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 1652 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1563 (cp->state == IP_VS_TCP_S_CLOSE) || 1653 (cp->state == IP_VS_TCP_S_CLOSE) ||
1564 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || 1654 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1565 (cp->state == IP_VS_TCP_S_TIME_WAIT))))) 1655 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1566 ip_vs_sync_conn(cp); 1656 ip_vs_sync_conn(net, cp);
1567out: 1657out:
1568 cp->old_state = cp->state; 1658 cp->old_state = cp->state;
1569 1659
@@ -1782,7 +1872,39 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1782 }, 1872 },
1783#endif 1873#endif
1784}; 1874};
1875/*
1876 * Initialize IP Virtual Server netns mem.
1877 */
1878static int __net_init __ip_vs_init(struct net *net)
1879{
1880 struct netns_ipvs *ipvs;
1881
1882 ipvs = net_generic(net, ip_vs_net_id);
1883 if (ipvs == NULL) {
1884 pr_err("%s(): no memory.\n", __func__);
1885 return -ENOMEM;
1886 }
1887 ipvs->net = net;
1888 /* Counters used for creating unique names */
1889 ipvs->gen = atomic_read(&ipvs_netns_cnt);
1890 atomic_inc(&ipvs_netns_cnt);
1891 net->ipvs = ipvs;
1892 printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
1893 sizeof(struct netns_ipvs), ipvs->gen);
1894 return 0;
1895}
1785 1896
1897static void __net_exit __ip_vs_cleanup(struct net *net)
1898{
1899 IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
1900}
1901
1902static struct pernet_operations ipvs_core_ops = {
1903 .init = __ip_vs_init,
1904 .exit = __ip_vs_cleanup,
1905 .id = &ip_vs_net_id,
1906 .size = sizeof(struct netns_ipvs),
1907};
1786 1908
1787/* 1909/*
1788 * Initialize IP Virtual Server 1910 * Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
1791{ 1913{
1792 int ret; 1914 int ret;
1793 1915
1794 ip_vs_estimator_init(); 1916 ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
1917 if (ret < 0)
1918 return ret;
1795 1919
1920 ip_vs_estimator_init();
1796 ret = ip_vs_control_init(); 1921 ret = ip_vs_control_init();
1797 if (ret < 0) { 1922 if (ret < 0) {
1798 pr_err("can't setup control.\n"); 1923 pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
1813 goto cleanup_app; 1938 goto cleanup_app;
1814 } 1939 }
1815 1940
1941 ret = ip_vs_sync_init();
1942 if (ret < 0) {
1943 pr_err("can't setup sync data.\n");
1944 goto cleanup_conn;
1945 }
1946
1816 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1947 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1817 if (ret < 0) { 1948 if (ret < 0) {
1818 pr_err("can't register hooks.\n"); 1949 pr_err("can't register hooks.\n");
1819 goto cleanup_conn; 1950 goto cleanup_sync;
1820 } 1951 }
1821 1952
1822 pr_info("ipvs loaded.\n"); 1953 pr_info("ipvs loaded.\n");
1823 return ret; 1954 return ret;
1824 1955
1956cleanup_sync:
1957 ip_vs_sync_cleanup();
1825 cleanup_conn: 1958 cleanup_conn:
1826 ip_vs_conn_cleanup(); 1959 ip_vs_conn_cleanup();
1827 cleanup_app: 1960 cleanup_app:
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
1831 ip_vs_control_cleanup(); 1964 ip_vs_control_cleanup();
1832 cleanup_estimator: 1965 cleanup_estimator:
1833 ip_vs_estimator_cleanup(); 1966 ip_vs_estimator_cleanup();
1967 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
1834 return ret; 1968 return ret;
1835} 1969}
1836 1970
1837static void __exit ip_vs_cleanup(void) 1971static void __exit ip_vs_cleanup(void)
1838{ 1972{
1839 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1973 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1974 ip_vs_sync_cleanup();
1840 ip_vs_conn_cleanup(); 1975 ip_vs_conn_cleanup();
1841 ip_vs_app_cleanup(); 1976 ip_vs_app_cleanup();
1842 ip_vs_protocol_cleanup(); 1977 ip_vs_protocol_cleanup();
1843 ip_vs_control_cleanup(); 1978 ip_vs_control_cleanup();
1844 ip_vs_estimator_cleanup(); 1979 ip_vs_estimator_cleanup();
1980 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
1845 pr_info("ipvs unloaded.\n"); 1981 pr_info("ipvs unloaded.\n");
1846} 1982}
1847 1983
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ba98e1308f3..33733c8872e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39 39
40#include <net/net_namespace.h> 40#include <net/net_namespace.h>
41#include <linux/nsproxy.h>
41#include <net/ip.h> 42#include <net/ip.h>
42#ifdef CONFIG_IP_VS_IPV6 43#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h> 44#include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
57/* lock for service table */ 58/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock); 59static DEFINE_RWLOCK(__ip_vs_svc_lock);
59 60
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */ 61/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
95
96 62
97#ifdef CONFIG_IP_VS_DEBUG 63#ifdef CONFIG_IP_VS_DEBUG
98static int sysctl_ip_vs_debug_level = 0; 64static int sysctl_ip_vs_debug_level = 0;
@@ -105,27 +71,28 @@ int ip_vs_get_debug_level(void)
105 71
106#ifdef CONFIG_IP_VS_IPV6 72#ifdef CONFIG_IP_VS_IPV6
107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) 74static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
109{ 76{
110 struct rt6_info *rt; 77 struct rt6_info *rt;
111 struct flowi fl = { 78 struct flowi6 fl6 = {
112 .oif = 0, 79 .daddr = *addr,
113 .fl6_dst = *addr,
114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
115 }; 80 };
116 81
117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 82 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) 83 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119 return 1; 84 return 1;
120 85
121 return 0; 86 return 0;
122} 87}
123#endif 88#endif
89
90#ifdef CONFIG_SYSCTL
124/* 91/*
125 * update_defense_level is called from keventd and from sysctl, 92 * update_defense_level is called from keventd and from sysctl,
126 * so it needs to protect itself from softirqs 93 * so it needs to protect itself from softirqs
127 */ 94 */
128static void update_defense_level(void) 95static void update_defense_level(struct netns_ipvs *ipvs)
129{ 96{
130 struct sysinfo i; 97 struct sysinfo i;
131 static int old_secure_tcp = 0; 98 static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
141 /* si_swapinfo(&i); */ 108 /* si_swapinfo(&i); */
142 /* availmem = availmem - (i.totalswap - i.freeswap); */ 109 /* availmem = availmem - (i.totalswap - i.freeswap); */
143 110
144 nomem = (availmem < sysctl_ip_vs_amemthresh); 111 nomem = (availmem < ipvs->sysctl_amemthresh);
145 112
146 local_bh_disable(); 113 local_bh_disable();
147 114
148 /* drop_entry */ 115 /* drop_entry */
149 spin_lock(&__ip_vs_dropentry_lock); 116 spin_lock(&ipvs->dropentry_lock);
150 switch (sysctl_ip_vs_drop_entry) { 117 switch (ipvs->sysctl_drop_entry) {
151 case 0: 118 case 0:
152 atomic_set(&ip_vs_dropentry, 0); 119 atomic_set(&ipvs->dropentry, 0);
153 break; 120 break;
154 case 1: 121 case 1:
155 if (nomem) { 122 if (nomem) {
156 atomic_set(&ip_vs_dropentry, 1); 123 atomic_set(&ipvs->dropentry, 1);
157 sysctl_ip_vs_drop_entry = 2; 124 ipvs->sysctl_drop_entry = 2;
158 } else { 125 } else {
159 atomic_set(&ip_vs_dropentry, 0); 126 atomic_set(&ipvs->dropentry, 0);
160 } 127 }
161 break; 128 break;
162 case 2: 129 case 2:
163 if (nomem) { 130 if (nomem) {
164 atomic_set(&ip_vs_dropentry, 1); 131 atomic_set(&ipvs->dropentry, 1);
165 } else { 132 } else {
166 atomic_set(&ip_vs_dropentry, 0); 133 atomic_set(&ipvs->dropentry, 0);
167 sysctl_ip_vs_drop_entry = 1; 134 ipvs->sysctl_drop_entry = 1;
168 }; 135 };
169 break; 136 break;
170 case 3: 137 case 3:
171 atomic_set(&ip_vs_dropentry, 1); 138 atomic_set(&ipvs->dropentry, 1);
172 break; 139 break;
173 } 140 }
174 spin_unlock(&__ip_vs_dropentry_lock); 141 spin_unlock(&ipvs->dropentry_lock);
175 142
176 /* drop_packet */ 143 /* drop_packet */
177 spin_lock(&__ip_vs_droppacket_lock); 144 spin_lock(&ipvs->droppacket_lock);
178 switch (sysctl_ip_vs_drop_packet) { 145 switch (ipvs->sysctl_drop_packet) {
179 case 0: 146 case 0:
180 ip_vs_drop_rate = 0; 147 ipvs->drop_rate = 0;
181 break; 148 break;
182 case 1: 149 case 1:
183 if (nomem) { 150 if (nomem) {
184 ip_vs_drop_rate = ip_vs_drop_counter 151 ipvs->drop_rate = ipvs->drop_counter
185 = sysctl_ip_vs_amemthresh / 152 = ipvs->sysctl_amemthresh /
186 (sysctl_ip_vs_amemthresh-availmem); 153 (ipvs->sysctl_amemthresh-availmem);
187 sysctl_ip_vs_drop_packet = 2; 154 ipvs->sysctl_drop_packet = 2;
188 } else { 155 } else {
189 ip_vs_drop_rate = 0; 156 ipvs->drop_rate = 0;
190 } 157 }
191 break; 158 break;
192 case 2: 159 case 2:
193 if (nomem) { 160 if (nomem) {
194 ip_vs_drop_rate = ip_vs_drop_counter 161 ipvs->drop_rate = ipvs->drop_counter
195 = sysctl_ip_vs_amemthresh / 162 = ipvs->sysctl_amemthresh /
196 (sysctl_ip_vs_amemthresh-availmem); 163 (ipvs->sysctl_amemthresh-availmem);
197 } else { 164 } else {
198 ip_vs_drop_rate = 0; 165 ipvs->drop_rate = 0;
199 sysctl_ip_vs_drop_packet = 1; 166 ipvs->sysctl_drop_packet = 1;
200 } 167 }
201 break; 168 break;
202 case 3: 169 case 3:
203 ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
204 break; 171 break;
205 } 172 }
206 spin_unlock(&__ip_vs_droppacket_lock); 173 spin_unlock(&ipvs->droppacket_lock);
207 174
208 /* secure_tcp */ 175 /* secure_tcp */
209 spin_lock(&ip_vs_securetcp_lock); 176 spin_lock(&ipvs->securetcp_lock);
210 switch (sysctl_ip_vs_secure_tcp) { 177 switch (ipvs->sysctl_secure_tcp) {
211 case 0: 178 case 0:
212 if (old_secure_tcp >= 2) 179 if (old_secure_tcp >= 2)
213 to_change = 0; 180 to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
216 if (nomem) { 183 if (nomem) {
217 if (old_secure_tcp < 2) 184 if (old_secure_tcp < 2)
218 to_change = 1; 185 to_change = 1;
219 sysctl_ip_vs_secure_tcp = 2; 186 ipvs->sysctl_secure_tcp = 2;
220 } else { 187 } else {
221 if (old_secure_tcp >= 2) 188 if (old_secure_tcp >= 2)
222 to_change = 0; 189 to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
229 } else { 196 } else {
230 if (old_secure_tcp >= 2) 197 if (old_secure_tcp >= 2)
231 to_change = 0; 198 to_change = 0;
232 sysctl_ip_vs_secure_tcp = 1; 199 ipvs->sysctl_secure_tcp = 1;
233 } 200 }
234 break; 201 break;
235 case 3: 202 case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
237 to_change = 1; 204 to_change = 1;
238 break; 205 break;
239 } 206 }
240 old_secure_tcp = sysctl_ip_vs_secure_tcp; 207 old_secure_tcp = ipvs->sysctl_secure_tcp;
241 if (to_change >= 0) 208 if (to_change >= 0)
242 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 209 ip_vs_protocol_timeout_change(ipvs,
243 spin_unlock(&ip_vs_securetcp_lock); 210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
244 212
245 local_bh_enable(); 213 local_bh_enable();
246} 214}
@@ -250,17 +218,18 @@ static void update_defense_level(void)
250 * Timer for checking the defense 218 * Timer for checking the defense
251 */ 219 */
252#define DEFENSE_TIMER_PERIOD 1*HZ 220#define DEFENSE_TIMER_PERIOD 1*HZ
253static void defense_work_handler(struct work_struct *work);
254static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
255 221
256static void defense_work_handler(struct work_struct *work) 222static void defense_work_handler(struct work_struct *work)
257{ 223{
258 update_defense_level(); 224 struct netns_ipvs *ipvs =
259 if (atomic_read(&ip_vs_dropentry)) 225 container_of(work, struct netns_ipvs, defense_work.work);
260 ip_vs_random_dropentry();
261 226
262 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 227 update_defense_level(ipvs);
228 if (atomic_read(&ipvs->dropentry))
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
263} 231}
232#endif
264 233
265int 234int
266ip_vs_use_count_inc(void) 235ip_vs_use_count_inc(void)
@@ -287,33 +256,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287/* the service table hashed by fwmark */ 256/* the service table hashed by fwmark */
288static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
289 258
290/*
291 * Hash table: for real service lookups
292 */
293#define IP_VS_RTAB_BITS 4
294#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
295#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
296
297static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
298
299/*
300 * Trash for destinations
301 */
302static LIST_HEAD(ip_vs_dest_trash);
303
304/*
305 * FTP & NULL virtual service counters
306 */
307static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
308static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
309
310 259
311/* 260/*
312 * Returns hash value for virtual service 261 * Returns hash value for virtual service
313 */ 262 */
314static __inline__ unsigned 263static inline unsigned
315ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, 264ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
316 __be16 port) 265 const union nf_inet_addr *addr, __be16 port)
317{ 266{
318 register unsigned porth = ntohs(port); 267 register unsigned porth = ntohs(port);
319 __be32 addr_fold = addr->ip; 268 __be32 addr_fold = addr->ip;
@@ -323,6 +272,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
323 addr_fold = addr->ip6[0]^addr->ip6[1]^ 272 addr_fold = addr->ip6[0]^addr->ip6[1]^
324 addr->ip6[2]^addr->ip6[3]; 273 addr->ip6[2]^addr->ip6[3];
325#endif 274#endif
275 addr_fold ^= ((size_t)net>>8);
326 276
327 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 277 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
328 & IP_VS_SVC_TAB_MASK; 278 & IP_VS_SVC_TAB_MASK;
@@ -331,13 +281,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
331/* 281/*
332 * Returns hash value of fwmark for virtual service lookup 282 * Returns hash value of fwmark for virtual service lookup
333 */ 283 */
334static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 284static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
335{ 285{
336 return fwmark & IP_VS_SVC_TAB_MASK; 286 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
337} 287}
338 288
339/* 289/*
340 * Hashes a service in the ip_vs_svc_table by <proto,addr,port> 290 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
341 * or in the ip_vs_svc_fwm_table by fwmark. 291 * or in the ip_vs_svc_fwm_table by fwmark.
342 * Should be called with locked tables. 292 * Should be called with locked tables.
343 */ 293 */
@@ -353,16 +303,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
353 303
354 if (svc->fwmark == 0) { 304 if (svc->fwmark == 0) {
355 /* 305 /*
356 * Hash it by <protocol,addr,port> in ip_vs_svc_table 306 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
357 */ 307 */
358 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, 308 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
359 svc->port); 309 &svc->addr, svc->port);
360 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 310 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
361 } else { 311 } else {
362 /* 312 /*
363 * Hash it by fwmark in ip_vs_svc_fwm_table 313 * Hash it by fwmark in svc_fwm_table
364 */ 314 */
365 hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 315 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 316 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
367 } 317 }
368 318
@@ -374,7 +324,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
374 324
375 325
376/* 326/*
377 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. 327 * Unhashes a service from svc_table / svc_fwm_table.
378 * Should be called with locked tables. 328 * Should be called with locked tables.
379 */ 329 */
380static int ip_vs_svc_unhash(struct ip_vs_service *svc) 330static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +336,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
386 } 336 }
387 337
388 if (svc->fwmark == 0) { 338 if (svc->fwmark == 0) {
389 /* Remove it from the ip_vs_svc_table table */ 339 /* Remove it from the svc_table table */
390 list_del(&svc->s_list); 340 list_del(&svc->s_list);
391 } else { 341 } else {
392 /* Remove it from the ip_vs_svc_fwm_table table */ 342 /* Remove it from the svc_fwm_table table */
393 list_del(&svc->f_list); 343 list_del(&svc->f_list);
394 } 344 }
395 345
@@ -400,23 +350,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
400 350
401 351
402/* 352/*
403 * Get service by {proto,addr,port} in the service table. 353 * Get service by {netns, proto,addr,port} in the service table.
404 */ 354 */
405static inline struct ip_vs_service * 355static inline struct ip_vs_service *
406__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, 356__ip_vs_service_find(struct net *net, int af, __u16 protocol,
407 __be16 vport) 357 const union nf_inet_addr *vaddr, __be16 vport)
408{ 358{
409 unsigned hash; 359 unsigned hash;
410 struct ip_vs_service *svc; 360 struct ip_vs_service *svc;
411 361
412 /* Check for "full" addressed entries */ 362 /* Check for "full" addressed entries */
413 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); 363 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
414 364
415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 365 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
416 if ((svc->af == af) 366 if ((svc->af == af)
417 && ip_vs_addr_equal(af, &svc->addr, vaddr) 367 && ip_vs_addr_equal(af, &svc->addr, vaddr)
418 && (svc->port == vport) 368 && (svc->port == vport)
419 && (svc->protocol == protocol)) { 369 && (svc->protocol == protocol)
370 && net_eq(svc->net, net)) {
420 /* HIT */ 371 /* HIT */
421 return svc; 372 return svc;
422 } 373 }
@@ -430,16 +381,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
430 * Get service by {fwmark} in the service table. 381 * Get service by {fwmark} in the service table.
431 */ 382 */
432static inline struct ip_vs_service * 383static inline struct ip_vs_service *
433__ip_vs_svc_fwm_find(int af, __u32 fwmark) 384__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
434{ 385{
435 unsigned hash; 386 unsigned hash;
436 struct ip_vs_service *svc; 387 struct ip_vs_service *svc;
437 388
438 /* Check for fwmark addressed entries */ 389 /* Check for fwmark addressed entries */
439 hash = ip_vs_svc_fwm_hashkey(fwmark); 390 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
440 391
441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 392 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
442 if (svc->fwmark == fwmark && svc->af == af) { 393 if (svc->fwmark == fwmark && svc->af == af
394 && net_eq(svc->net, net)) {
443 /* HIT */ 395 /* HIT */
444 return svc; 396 return svc;
445 } 397 }
@@ -449,42 +401,46 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
449} 401}
450 402
451struct ip_vs_service * 403struct ip_vs_service *
452ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 404ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
453 const union nf_inet_addr *vaddr, __be16 vport) 405 const union nf_inet_addr *vaddr, __be16 vport)
454{ 406{
455 struct ip_vs_service *svc; 407 struct ip_vs_service *svc;
408 struct netns_ipvs *ipvs = net_ipvs(net);
456 409
457 read_lock(&__ip_vs_svc_lock); 410 read_lock(&__ip_vs_svc_lock);
458 411
459 /* 412 /*
460 * Check the table hashed by fwmark first 413 * Check the table hashed by fwmark first
461 */ 414 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) 415 if (fwmark) {
463 goto out; 416 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
417 if (svc)
418 goto out;
419 }
464 420
465 /* 421 /*
466 * Check the table hashed by <protocol,addr,port> 422 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries 423 * for "full" addressed entries
468 */ 424 */
469 svc = __ip_vs_service_find(af, protocol, vaddr, vport); 425 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
470 426
471 if (svc == NULL 427 if (svc == NULL
472 && protocol == IPPROTO_TCP 428 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter) 429 && atomic_read(&ipvs->ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 430 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475 /* 431 /*
476 * Check if ftp service entry exists, the packet 432 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections. 433 * might belong to FTP data connections.
478 */ 434 */
479 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); 435 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
480 } 436 }
481 437
482 if (svc == NULL 438 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) { 439 && atomic_read(&ipvs->nullsvc_counter)) {
484 /* 440 /*
485 * Check if the catch-all port (port zero) exists 441 * Check if the catch-all port (port zero) exists
486 */ 442 */
487 svc = __ip_vs_service_find(af, protocol, vaddr, 0); 443 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
488 } 444 }
489 445
490 out: 446 out:
@@ -519,6 +475,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
519 svc->fwmark, 475 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr), 476 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt)); 477 ntohs(svc->port), atomic_read(&svc->usecnt));
478 free_percpu(svc->stats.cpustats);
522 kfree(svc); 479 kfree(svc);
523 } 480 }
524} 481}
@@ -545,10 +502,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
545} 502}
546 503
547/* 504/*
548 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. 505 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
549 * should be called with locked tables. 506 * should be called with locked tables.
550 */ 507 */
551static int ip_vs_rs_hash(struct ip_vs_dest *dest) 508static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
552{ 509{
553 unsigned hash; 510 unsigned hash;
554 511
@@ -562,19 +519,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
562 */ 519 */
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 520 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564 521
565 list_add(&dest->d_list, &ip_vs_rtable[hash]); 522 list_add(&dest->d_list, &ipvs->rs_table[hash]);
566 523
567 return 1; 524 return 1;
568} 525}
569 526
570/* 527/*
571 * UNhashes ip_vs_dest from ip_vs_rtable. 528 * UNhashes ip_vs_dest from rs_table.
572 * should be called with locked tables. 529 * should be called with locked tables.
573 */ 530 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 531static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{ 532{
576 /* 533 /*
577 * Remove it from the ip_vs_rtable table. 534 * Remove it from the rs_table table.
578 */ 535 */
579 if (!list_empty(&dest->d_list)) { 536 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list); 537 list_del(&dest->d_list);
@@ -588,10 +545,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
588 * Lookup real service by <proto,addr,port> in the real service table. 545 * Lookup real service by <proto,addr,port> in the real service table.
589 */ 546 */
590struct ip_vs_dest * 547struct ip_vs_dest *
591ip_vs_lookup_real_service(int af, __u16 protocol, 548ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
592 const union nf_inet_addr *daddr, 549 const union nf_inet_addr *daddr,
593 __be16 dport) 550 __be16 dport)
594{ 551{
552 struct netns_ipvs *ipvs = net_ipvs(net);
595 unsigned hash; 553 unsigned hash;
596 struct ip_vs_dest *dest; 554 struct ip_vs_dest *dest;
597 555
@@ -601,19 +559,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
601 */ 559 */
602 hash = ip_vs_rs_hashkey(af, daddr, dport); 560 hash = ip_vs_rs_hashkey(af, daddr, dport);
603 561
604 read_lock(&__ip_vs_rs_lock); 562 read_lock(&ipvs->rs_lock);
605 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { 563 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
606 if ((dest->af == af) 564 if ((dest->af == af)
607 && ip_vs_addr_equal(af, &dest->addr, daddr) 565 && ip_vs_addr_equal(af, &dest->addr, daddr)
608 && (dest->port == dport) 566 && (dest->port == dport)
609 && ((dest->protocol == protocol) || 567 && ((dest->protocol == protocol) ||
610 dest->vfwmark)) { 568 dest->vfwmark)) {
611 /* HIT */ 569 /* HIT */
612 read_unlock(&__ip_vs_rs_lock); 570 read_unlock(&ipvs->rs_lock);
613 return dest; 571 return dest;
614 } 572 }
615 } 573 }
616 read_unlock(&__ip_vs_rs_lock); 574 read_unlock(&ipvs->rs_lock);
617 575
618 return NULL; 576 return NULL;
619} 577}
@@ -652,15 +610,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
652 * ip_vs_lookup_real_service() looked promissing, but 610 * ip_vs_lookup_real_service() looked promissing, but
653 * seems not working as expected. 611 * seems not working as expected.
654 */ 612 */
655struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, 613struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
614 const union nf_inet_addr *daddr,
656 __be16 dport, 615 __be16 dport,
657 const union nf_inet_addr *vaddr, 616 const union nf_inet_addr *vaddr,
658 __be16 vport, __u16 protocol) 617 __be16 vport, __u16 protocol, __u32 fwmark)
659{ 618{
660 struct ip_vs_dest *dest; 619 struct ip_vs_dest *dest;
661 struct ip_vs_service *svc; 620 struct ip_vs_service *svc;
662 621
663 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); 622 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
664 if (!svc) 623 if (!svc)
665 return NULL; 624 return NULL;
666 dest = ip_vs_lookup_dest(svc, daddr, dport); 625 dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +644,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
685 __be16 dport) 644 __be16 dport)
686{ 645{
687 struct ip_vs_dest *dest, *nxt; 646 struct ip_vs_dest *dest, *nxt;
647 struct netns_ipvs *ipvs = net_ipvs(svc->net);
688 648
689 /* 649 /*
690 * Find the destination in trash 650 * Find the destination in trash
691 */ 651 */
692 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 652 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
693 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 653 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
694 "dest->refcnt=%d\n", 654 "dest->refcnt=%d\n",
695 dest->vfwmark, 655 dest->vfwmark,
@@ -720,6 +680,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
720 list_del(&dest->n_list); 680 list_del(&dest->n_list);
721 ip_vs_dst_reset(dest); 681 ip_vs_dst_reset(dest);
722 __ip_vs_unbind_svc(dest); 682 __ip_vs_unbind_svc(dest);
683 free_percpu(dest->stats.cpustats);
723 kfree(dest); 684 kfree(dest);
724 } 685 }
725 } 686 }
@@ -737,25 +698,53 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
737 * are expired, and the refcnt of each destination in the trash must 698 * are expired, and the refcnt of each destination in the trash must
738 * be 1, so we simply release them here. 699 * be 1, so we simply release them here.
739 */ 700 */
740static void ip_vs_trash_cleanup(void) 701static void ip_vs_trash_cleanup(struct net *net)
741{ 702{
742 struct ip_vs_dest *dest, *nxt; 703 struct ip_vs_dest *dest, *nxt;
704 struct netns_ipvs *ipvs = net_ipvs(net);
743 705
744 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 706 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
745 list_del(&dest->n_list); 707 list_del(&dest->n_list);
746 ip_vs_dst_reset(dest); 708 ip_vs_dst_reset(dest);
747 __ip_vs_unbind_svc(dest); 709 __ip_vs_unbind_svc(dest);
710 free_percpu(dest->stats.cpustats);
748 kfree(dest); 711 kfree(dest);
749 } 712 }
750} 713}
751 714
715static void
716ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
717{
718#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
719
720 spin_lock_bh(&src->lock);
721
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
727
728 ip_vs_read_estimator(dst, src);
729
730 spin_unlock_bh(&src->lock);
731}
752 732
753static void 733static void
754ip_vs_zero_stats(struct ip_vs_stats *stats) 734ip_vs_zero_stats(struct ip_vs_stats *stats)
755{ 735{
756 spin_lock_bh(&stats->lock); 736 spin_lock_bh(&stats->lock);
757 737
758 memset(&stats->ustats, 0, sizeof(stats->ustats)); 738 /* get current counters as zero point, rates are zeroed */
739
740#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
741
742 IP_VS_ZERO_STATS_COUNTER(conns);
743 IP_VS_ZERO_STATS_COUNTER(inpkts);
744 IP_VS_ZERO_STATS_COUNTER(outpkts);
745 IP_VS_ZERO_STATS_COUNTER(inbytes);
746 IP_VS_ZERO_STATS_COUNTER(outbytes);
747
759 ip_vs_zero_estimator(stats); 748 ip_vs_zero_estimator(stats);
760 749
761 spin_unlock_bh(&stats->lock); 750 spin_unlock_bh(&stats->lock);
@@ -768,6 +757,7 @@ static void
768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 757__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add) 758 struct ip_vs_dest_user_kern *udest, int add)
770{ 759{
760 struct netns_ipvs *ipvs = net_ipvs(svc->net);
771 int conn_flags; 761 int conn_flags;
772 762
773 /* set the weight and the flags */ 763 /* set the weight and the flags */
@@ -780,12 +770,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
780 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 770 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781 } else { 771 } else {
782 /* 772 /*
783 * Put the real service in ip_vs_rtable if not present. 773 * Put the real service in rs_table if not present.
784 * For now only for NAT! 774 * For now only for NAT!
785 */ 775 */
786 write_lock_bh(&__ip_vs_rs_lock); 776 write_lock_bh(&ipvs->rs_lock);
787 ip_vs_rs_hash(dest); 777 ip_vs_rs_hash(ipvs, dest);
788 write_unlock_bh(&__ip_vs_rs_lock); 778 write_unlock_bh(&ipvs->rs_lock);
789 } 779 }
790 atomic_set(&dest->conn_flags, conn_flags); 780 atomic_set(&dest->conn_flags, conn_flags);
791 781
@@ -813,7 +803,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
813 spin_unlock_bh(&dest->dst_lock); 803 spin_unlock_bh(&dest->dst_lock);
814 804
815 if (add) 805 if (add)
816 ip_vs_new_estimator(&dest->stats); 806 ip_vs_start_estimator(svc->net, &dest->stats);
817 807
818 write_lock_bh(&__ip_vs_svc_lock); 808 write_lock_bh(&__ip_vs_svc_lock);
819 809
@@ -850,12 +840,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
850 atype = ipv6_addr_type(&udest->addr.in6); 840 atype = ipv6_addr_type(&udest->addr.in6);
851 if ((!(atype & IPV6_ADDR_UNICAST) || 841 if ((!(atype & IPV6_ADDR_UNICAST) ||
852 atype & IPV6_ADDR_LINKLOCAL) && 842 atype & IPV6_ADDR_LINKLOCAL) &&
853 !__ip_vs_addr_is_local_v6(&udest->addr.in6)) 843 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
854 return -EINVAL; 844 return -EINVAL;
855 } else 845 } else
856#endif 846#endif
857 { 847 {
858 atype = inet_addr_type(&init_net, udest->addr.ip); 848 atype = inet_addr_type(svc->net, udest->addr.ip);
859 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860 return -EINVAL; 850 return -EINVAL;
861 } 851 }
@@ -865,6 +855,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
865 pr_err("%s(): no memory.\n", __func__); 855 pr_err("%s(): no memory.\n", __func__);
866 return -ENOMEM; 856 return -ENOMEM;
867 } 857 }
858 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
859 if (!dest->stats.cpustats) {
860 pr_err("%s() alloc_percpu failed\n", __func__);
861 goto err_alloc;
862 }
868 863
869 dest->af = svc->af; 864 dest->af = svc->af;
870 dest->protocol = svc->protocol; 865 dest->protocol = svc->protocol;
@@ -888,6 +883,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
888 883
889 LeaveFunction(2); 884 LeaveFunction(2);
890 return 0; 885 return 0;
886
887err_alloc:
888 kfree(dest);
889 return -ENOMEM;
891} 890}
892 891
893 892
@@ -1006,16 +1005,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1006/* 1005/*
1007 * Delete a destination (must be already unlinked from the service) 1006 * Delete a destination (must be already unlinked from the service)
1008 */ 1007 */
1009static void __ip_vs_del_dest(struct ip_vs_dest *dest) 1008static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1010{ 1009{
1011 ip_vs_kill_estimator(&dest->stats); 1010 struct netns_ipvs *ipvs = net_ipvs(net);
1011
1012 ip_vs_stop_estimator(net, &dest->stats);
1012 1013
1013 /* 1014 /*
1014 * Remove it from the d-linked list with the real services. 1015 * Remove it from the d-linked list with the real services.
1015 */ 1016 */
1016 write_lock_bh(&__ip_vs_rs_lock); 1017 write_lock_bh(&ipvs->rs_lock);
1017 ip_vs_rs_unhash(dest); 1018 ip_vs_rs_unhash(dest);
1018 write_unlock_bh(&__ip_vs_rs_lock); 1019 write_unlock_bh(&ipvs->rs_lock);
1019 1020
1020 /* 1021 /*
1021 * Decrease the refcnt of the dest, and free the dest 1022 * Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1035,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1034 and only one user context can update virtual service at a 1035 and only one user context can update virtual service at a
1035 time, so the operation here is OK */ 1036 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt); 1037 atomic_dec(&dest->svc->refcnt);
1038 free_percpu(dest->stats.cpustats);
1037 kfree(dest); 1039 kfree(dest);
1038 } else { 1040 } else {
1039 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " 1041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1043,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1041 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1042 ntohs(dest->port), 1044 ntohs(dest->port),
1043 atomic_read(&dest->refcnt)); 1045 atomic_read(&dest->refcnt));
1044 list_add(&dest->n_list, &ip_vs_dest_trash); 1046 list_add(&dest->n_list, &ipvs->dest_trash);
1045 atomic_inc(&dest->refcnt); 1047 atomic_inc(&dest->refcnt);
1046 } 1048 }
1047} 1049}
@@ -1105,7 +1107,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1105 /* 1107 /*
1106 * Delete the destination 1108 * Delete the destination
1107 */ 1109 */
1108 __ip_vs_del_dest(dest); 1110 __ip_vs_del_dest(svc->net, dest);
1109 1111
1110 LeaveFunction(2); 1112 LeaveFunction(2);
1111 1113
@@ -1117,13 +1119,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1117 * Add a service into the service hash table 1119 * Add a service into the service hash table
1118 */ 1120 */
1119static int 1121static int
1120ip_vs_add_service(struct ip_vs_service_user_kern *u, 1122ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1121 struct ip_vs_service **svc_p) 1123 struct ip_vs_service **svc_p)
1122{ 1124{
1123 int ret = 0; 1125 int ret = 0;
1124 struct ip_vs_scheduler *sched = NULL; 1126 struct ip_vs_scheduler *sched = NULL;
1125 struct ip_vs_pe *pe = NULL; 1127 struct ip_vs_pe *pe = NULL;
1126 struct ip_vs_service *svc = NULL; 1128 struct ip_vs_service *svc = NULL;
1129 struct netns_ipvs *ipvs = net_ipvs(net);
1127 1130
1128 /* increase the module use count */ 1131 /* increase the module use count */
1129 ip_vs_use_count_inc(); 1132 ip_vs_use_count_inc();
@@ -1137,7 +1140,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1137 } 1140 }
1138 1141
1139 if (u->pe_name && *u->pe_name) { 1142 if (u->pe_name && *u->pe_name) {
1140 pe = ip_vs_pe_get(u->pe_name); 1143 pe = ip_vs_pe_getbyname(u->pe_name);
1141 if (pe == NULL) { 1144 if (pe == NULL) {
1142 pr_info("persistence engine module ip_vs_pe_%s " 1145 pr_info("persistence engine module ip_vs_pe_%s "
1143 "not found\n", u->pe_name); 1146 "not found\n", u->pe_name);
@@ -1159,6 +1162,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1159 ret = -ENOMEM; 1162 ret = -ENOMEM;
1160 goto out_err; 1163 goto out_err;
1161 } 1164 }
1165 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1166 if (!svc->stats.cpustats) {
1167 pr_err("%s() alloc_percpu failed\n", __func__);
1168 goto out_err;
1169 }
1162 1170
1163 /* I'm the first user of the service */ 1171 /* I'm the first user of the service */
1164 atomic_set(&svc->usecnt, 0); 1172 atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1180,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1172 svc->flags = u->flags; 1180 svc->flags = u->flags;
1173 svc->timeout = u->timeout * HZ; 1181 svc->timeout = u->timeout * HZ;
1174 svc->netmask = u->netmask; 1182 svc->netmask = u->netmask;
1183 svc->net = net;
1175 1184
1176 INIT_LIST_HEAD(&svc->destinations); 1185 INIT_LIST_HEAD(&svc->destinations);
1177 rwlock_init(&svc->sched_lock); 1186 rwlock_init(&svc->sched_lock);
@@ -1189,15 +1198,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1189 1198
1190 /* Update the virtual service counters */ 1199 /* Update the virtual service counters */
1191 if (svc->port == FTPPORT) 1200 if (svc->port == FTPPORT)
1192 atomic_inc(&ip_vs_ftpsvc_counter); 1201 atomic_inc(&ipvs->ftpsvc_counter);
1193 else if (svc->port == 0) 1202 else if (svc->port == 0)
1194 atomic_inc(&ip_vs_nullsvc_counter); 1203 atomic_inc(&ipvs->nullsvc_counter);
1195 1204
1196 ip_vs_new_estimator(&svc->stats); 1205 ip_vs_start_estimator(net, &svc->stats);
1197 1206
1198 /* Count only IPv4 services for old get/setsockopt interface */ 1207 /* Count only IPv4 services for old get/setsockopt interface */
1199 if (svc->af == AF_INET) 1208 if (svc->af == AF_INET)
1200 ip_vs_num_services++; 1209 ipvs->num_services++;
1201 1210
1202 /* Hash the service into the service table */ 1211 /* Hash the service into the service table */
1203 write_lock_bh(&__ip_vs_svc_lock); 1212 write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1216,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1207 *svc_p = svc; 1216 *svc_p = svc;
1208 return 0; 1217 return 0;
1209 1218
1219
1210 out_err: 1220 out_err:
1211 if (svc != NULL) { 1221 if (svc != NULL) {
1212 ip_vs_unbind_scheduler(svc); 1222 ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1225,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1215 ip_vs_app_inc_put(svc->inc); 1225 ip_vs_app_inc_put(svc->inc);
1216 local_bh_enable(); 1226 local_bh_enable();
1217 } 1227 }
1228 if (svc->stats.cpustats)
1229 free_percpu(svc->stats.cpustats);
1218 kfree(svc); 1230 kfree(svc);
1219 } 1231 }
1220 ip_vs_scheduler_put(sched); 1232 ip_vs_scheduler_put(sched);
@@ -1248,7 +1260,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1248 old_sched = sched; 1260 old_sched = sched;
1249 1261
1250 if (u->pe_name && *u->pe_name) { 1262 if (u->pe_name && *u->pe_name) {
1251 pe = ip_vs_pe_get(u->pe_name); 1263 pe = ip_vs_pe_getbyname(u->pe_name);
1252 if (pe == NULL) { 1264 if (pe == NULL) {
1253 pr_info("persistence engine module ip_vs_pe_%s " 1265 pr_info("persistence engine module ip_vs_pe_%s "
1254 "not found\n", u->pe_name); 1266 "not found\n", u->pe_name);
@@ -1334,14 +1346,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1334 struct ip_vs_dest *dest, *nxt; 1346 struct ip_vs_dest *dest, *nxt;
1335 struct ip_vs_scheduler *old_sched; 1347 struct ip_vs_scheduler *old_sched;
1336 struct ip_vs_pe *old_pe; 1348 struct ip_vs_pe *old_pe;
1349 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1337 1350
1338 pr_info("%s: enter\n", __func__); 1351 pr_info("%s: enter\n", __func__);
1339 1352
1340 /* Count only IPv4 services for old get/setsockopt interface */ 1353 /* Count only IPv4 services for old get/setsockopt interface */
1341 if (svc->af == AF_INET) 1354 if (svc->af == AF_INET)
1342 ip_vs_num_services--; 1355 ipvs->num_services--;
1343 1356
1344 ip_vs_kill_estimator(&svc->stats); 1357 ip_vs_stop_estimator(svc->net, &svc->stats);
1345 1358
1346 /* Unbind scheduler */ 1359 /* Unbind scheduler */
1347 old_sched = svc->scheduler; 1360 old_sched = svc->scheduler;
@@ -1364,16 +1377,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1364 */ 1377 */
1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1378 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366 __ip_vs_unlink_dest(svc, dest, 0); 1379 __ip_vs_unlink_dest(svc, dest, 0);
1367 __ip_vs_del_dest(dest); 1380 __ip_vs_del_dest(svc->net, dest);
1368 } 1381 }
1369 1382
1370 /* 1383 /*
1371 * Update the virtual service counters 1384 * Update the virtual service counters
1372 */ 1385 */
1373 if (svc->port == FTPPORT) 1386 if (svc->port == FTPPORT)
1374 atomic_dec(&ip_vs_ftpsvc_counter); 1387 atomic_dec(&ipvs->ftpsvc_counter);
1375 else if (svc->port == 0) 1388 else if (svc->port == 0)
1376 atomic_dec(&ip_vs_nullsvc_counter); 1389 atomic_dec(&ipvs->nullsvc_counter);
1377 1390
1378 /* 1391 /*
1379 * Free the service if nobody refers to it 1392 * Free the service if nobody refers to it
@@ -1383,6 +1396,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1383 svc->fwmark, 1396 svc->fwmark,
1384 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1397 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1385 ntohs(svc->port), atomic_read(&svc->usecnt)); 1398 ntohs(svc->port), atomic_read(&svc->usecnt));
1399 free_percpu(svc->stats.cpustats);
1386 kfree(svc); 1400 kfree(svc);
1387 } 1401 }
1388 1402
@@ -1428,17 +1442,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1428/* 1442/*
1429 * Flush all the virtual services 1443 * Flush all the virtual services
1430 */ 1444 */
1431static int ip_vs_flush(void) 1445static int ip_vs_flush(struct net *net)
1432{ 1446{
1433 int idx; 1447 int idx;
1434 struct ip_vs_service *svc, *nxt; 1448 struct ip_vs_service *svc, *nxt;
1435 1449
1436 /* 1450 /*
1437 * Flush the service table hashed by <protocol,addr,port> 1451 * Flush the service table hashed by <netns,protocol,addr,port>
1438 */ 1452 */
1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1453 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1454 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1441 ip_vs_unlink_service(svc); 1455 s_list) {
1456 if (net_eq(svc->net, net))
1457 ip_vs_unlink_service(svc);
1442 } 1458 }
1443 } 1459 }
1444 1460
@@ -1448,7 +1464,8 @@ static int ip_vs_flush(void)
1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1464 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449 list_for_each_entry_safe(svc, nxt, 1465 list_for_each_entry_safe(svc, nxt,
1450 &ip_vs_svc_fwm_table[idx], f_list) { 1466 &ip_vs_svc_fwm_table[idx], f_list) {
1451 ip_vs_unlink_service(svc); 1467 if (net_eq(svc->net, net))
1468 ip_vs_unlink_service(svc);
1452 } 1469 }
1453 } 1470 }
1454 1471
@@ -1472,32 +1489,35 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
1472 return 0; 1489 return 0;
1473} 1490}
1474 1491
1475static int ip_vs_zero_all(void) 1492static int ip_vs_zero_all(struct net *net)
1476{ 1493{
1477 int idx; 1494 int idx;
1478 struct ip_vs_service *svc; 1495 struct ip_vs_service *svc;
1479 1496
1480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1497 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1498 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1482 ip_vs_zero_service(svc); 1499 if (net_eq(svc->net, net))
1500 ip_vs_zero_service(svc);
1483 } 1501 }
1484 } 1502 }
1485 1503
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1504 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1505 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1488 ip_vs_zero_service(svc); 1506 if (net_eq(svc->net, net))
1507 ip_vs_zero_service(svc);
1489 } 1508 }
1490 } 1509 }
1491 1510
1492 ip_vs_zero_stats(&ip_vs_stats); 1511 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1493 return 0; 1512 return 0;
1494} 1513}
1495 1514
1496 1515#ifdef CONFIG_SYSCTL
1497static int 1516static int
1498proc_do_defense_mode(ctl_table *table, int write, 1517proc_do_defense_mode(ctl_table *table, int write,
1499 void __user *buffer, size_t *lenp, loff_t *ppos) 1518 void __user *buffer, size_t *lenp, loff_t *ppos)
1500{ 1519{
1520 struct net *net = current->nsproxy->net_ns;
1501 int *valp = table->data; 1521 int *valp = table->data;
1502 int val = *valp; 1522 int val = *valp;
1503 int rc; 1523 int rc;
@@ -1508,13 +1528,12 @@ proc_do_defense_mode(ctl_table *table, int write,
1508 /* Restore the correct value */ 1528 /* Restore the correct value */
1509 *valp = val; 1529 *valp = val;
1510 } else { 1530 } else {
1511 update_defense_level(); 1531 update_defense_level(net_ipvs(net));
1512 } 1532 }
1513 } 1533 }
1514 return rc; 1534 return rc;
1515} 1535}
1516 1536
1517
1518static int 1537static int
1519proc_do_sync_threshold(ctl_table *table, int write, 1538proc_do_sync_threshold(ctl_table *table, int write,
1520 void __user *buffer, size_t *lenp, loff_t *ppos) 1539 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1534,45 +1553,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
1534 return rc; 1553 return rc;
1535} 1554}
1536 1555
1556static int
1557proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1559{
1560 int *valp = table->data;
1561 int val = *valp;
1562 int rc;
1563
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1568 *valp = val;
1569 } else {
1570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
1572 }
1573 }
1574 return rc;
1575}
1537 1576
1538/* 1577/*
1539 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
1540 */ 1581 */
1541 1582
1542static struct ctl_table vs_vars[] = { 1583static struct ctl_table vs_vars[] = {
1543 { 1584 {
1544 .procname = "amemthresh", 1585 .procname = "amemthresh",
1545 .data = &sysctl_ip_vs_amemthresh,
1546 .maxlen = sizeof(int),
1547 .mode = 0644,
1548 .proc_handler = proc_dointvec,
1549 },
1550#ifdef CONFIG_IP_VS_DEBUG
1551 {
1552 .procname = "debug_level",
1553 .data = &sysctl_ip_vs_debug_level,
1554 .maxlen = sizeof(int), 1586 .maxlen = sizeof(int),
1555 .mode = 0644, 1587 .mode = 0644,
1556 .proc_handler = proc_dointvec, 1588 .proc_handler = proc_dointvec,
1557 }, 1589 },
1558#endif
1559 { 1590 {
1560 .procname = "am_droprate", 1591 .procname = "am_droprate",
1561 .data = &sysctl_ip_vs_am_droprate,
1562 .maxlen = sizeof(int), 1592 .maxlen = sizeof(int),
1563 .mode = 0644, 1593 .mode = 0644,
1564 .proc_handler = proc_dointvec, 1594 .proc_handler = proc_dointvec,
1565 }, 1595 },
1566 { 1596 {
1567 .procname = "drop_entry", 1597 .procname = "drop_entry",
1568 .data = &sysctl_ip_vs_drop_entry,
1569 .maxlen = sizeof(int), 1598 .maxlen = sizeof(int),
1570 .mode = 0644, 1599 .mode = 0644,
1571 .proc_handler = proc_do_defense_mode, 1600 .proc_handler = proc_do_defense_mode,
1572 }, 1601 },
1573 { 1602 {
1574 .procname = "drop_packet", 1603 .procname = "drop_packet",
1575 .data = &sysctl_ip_vs_drop_packet,
1576 .maxlen = sizeof(int), 1604 .maxlen = sizeof(int),
1577 .mode = 0644, 1605 .mode = 0644,
1578 .proc_handler = proc_do_defense_mode, 1606 .proc_handler = proc_do_defense_mode,
@@ -1580,7 +1608,6 @@ static struct ctl_table vs_vars[] = {
1580#ifdef CONFIG_IP_VS_NFCT 1608#ifdef CONFIG_IP_VS_NFCT
1581 { 1609 {
1582 .procname = "conntrack", 1610 .procname = "conntrack",
1583 .data = &sysctl_ip_vs_conntrack,
1584 .maxlen = sizeof(int), 1611 .maxlen = sizeof(int),
1585 .mode = 0644, 1612 .mode = 0644,
1586 .proc_handler = &proc_dointvec, 1613 .proc_handler = &proc_dointvec,
@@ -1588,18 +1615,62 @@ static struct ctl_table vs_vars[] = {
1588#endif 1615#endif
1589 { 1616 {
1590 .procname = "secure_tcp", 1617 .procname = "secure_tcp",
1591 .data = &sysctl_ip_vs_secure_tcp,
1592 .maxlen = sizeof(int), 1618 .maxlen = sizeof(int),
1593 .mode = 0644, 1619 .mode = 0644,
1594 .proc_handler = proc_do_defense_mode, 1620 .proc_handler = proc_do_defense_mode,
1595 }, 1621 },
1596 { 1622 {
1597 .procname = "snat_reroute", 1623 .procname = "snat_reroute",
1598 .data = &sysctl_ip_vs_snat_reroute,
1599 .maxlen = sizeof(int), 1624 .maxlen = sizeof(int),
1600 .mode = 0644, 1625 .mode = 0644,
1601 .proc_handler = &proc_dointvec, 1626 .proc_handler = &proc_dointvec,
1602 }, 1627 },
1628 {
1629 .procname = "sync_version",
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = &proc_do_sync_mode,
1633 },
1634 {
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = proc_dointvec,
1639 },
1640 {
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = proc_dointvec,
1645 },
1646 {
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec,
1651 },
1652 {
1653 .procname = "sync_threshold",
1654 .maxlen =
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1656 .mode = 0644,
1657 .proc_handler = proc_do_sync_threshold,
1658 },
1659 {
1660 .procname = "nat_icmp_send",
1661 .maxlen = sizeof(int),
1662 .mode = 0644,
1663 .proc_handler = proc_dointvec,
1664 },
1665#ifdef CONFIG_IP_VS_DEBUG
1666 {
1667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
1671 .proc_handler = proc_dointvec,
1672 },
1673#endif
1603#if 0 1674#if 0
1604 { 1675 {
1605 .procname = "timeout_established", 1676 .procname = "timeout_established",
@@ -1686,41 +1757,6 @@ static struct ctl_table vs_vars[] = {
1686 .proc_handler = proc_dointvec_jiffies, 1757 .proc_handler = proc_dointvec_jiffies,
1687 }, 1758 },
1688#endif 1759#endif
1689 {
1690 .procname = "cache_bypass",
1691 .data = &sysctl_ip_vs_cache_bypass,
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
1694 .proc_handler = proc_dointvec,
1695 },
1696 {
1697 .procname = "expire_nodest_conn",
1698 .data = &sysctl_ip_vs_expire_nodest_conn,
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
1701 .proc_handler = proc_dointvec,
1702 },
1703 {
1704 .procname = "expire_quiescent_template",
1705 .data = &sysctl_ip_vs_expire_quiescent_template,
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
1708 .proc_handler = proc_dointvec,
1709 },
1710 {
1711 .procname = "sync_threshold",
1712 .data = &sysctl_ip_vs_sync_threshold,
1713 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1714 .mode = 0644,
1715 .proc_handler = proc_do_sync_threshold,
1716 },
1717 {
1718 .procname = "nat_icmp_send",
1719 .data = &sysctl_ip_vs_nat_icmp_send,
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
1722 .proc_handler = proc_dointvec,
1723 },
1724 { } 1760 { }
1725}; 1761};
1726 1762
@@ -1731,12 +1767,12 @@ const struct ctl_path net_vs_ctl_path[] = {
1731 { } 1767 { }
1732}; 1768};
1733EXPORT_SYMBOL_GPL(net_vs_ctl_path); 1769EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1734 1770#endif
1735static struct ctl_table_header * sysctl_header;
1736 1771
1737#ifdef CONFIG_PROC_FS 1772#ifdef CONFIG_PROC_FS
1738 1773
1739struct ip_vs_iter { 1774struct ip_vs_iter {
1775 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1740 struct list_head *table; 1776 struct list_head *table;
1741 int bucket; 1777 int bucket;
1742}; 1778};
@@ -1763,6 +1799,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
1763/* Get the Nth entry in the two lists */ 1799/* Get the Nth entry in the two lists */
1764static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1800static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1765{ 1801{
1802 struct net *net = seq_file_net(seq);
1766 struct ip_vs_iter *iter = seq->private; 1803 struct ip_vs_iter *iter = seq->private;
1767 int idx; 1804 int idx;
1768 struct ip_vs_service *svc; 1805 struct ip_vs_service *svc;
@@ -1770,7 +1807,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1770 /* look in hash by protocol */ 1807 /* look in hash by protocol */
1771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1808 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1809 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1773 if (pos-- == 0){ 1810 if (net_eq(svc->net, net) && pos-- == 0) {
1774 iter->table = ip_vs_svc_table; 1811 iter->table = ip_vs_svc_table;
1775 iter->bucket = idx; 1812 iter->bucket = idx;
1776 return svc; 1813 return svc;
@@ -1781,7 +1818,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1781 /* keep looking in fwmark */ 1818 /* keep looking in fwmark */
1782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1819 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1820 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1784 if (pos-- == 0) { 1821 if (net_eq(svc->net, net) && pos-- == 0) {
1785 iter->table = ip_vs_svc_fwm_table; 1822 iter->table = ip_vs_svc_fwm_table;
1786 iter->bucket = idx; 1823 iter->bucket = idx;
1787 return svc; 1824 return svc;
@@ -1935,7 +1972,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
1935 1972
1936static int ip_vs_info_open(struct inode *inode, struct file *file) 1973static int ip_vs_info_open(struct inode *inode, struct file *file)
1937{ 1974{
1938 return seq_open_private(file, &ip_vs_info_seq_ops, 1975 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter)); 1976 sizeof(struct ip_vs_iter));
1940} 1977}
1941 1978
@@ -1949,13 +1986,11 @@ static const struct file_operations ip_vs_info_fops = {
1949 1986
1950#endif 1987#endif
1951 1988
1952struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1954};
1955
1956#ifdef CONFIG_PROC_FS 1989#ifdef CONFIG_PROC_FS
1957static int ip_vs_stats_show(struct seq_file *seq, void *v) 1990static int ip_vs_stats_show(struct seq_file *seq, void *v)
1958{ 1991{
1992 struct net *net = seq_file_single_net(seq);
1993 struct ip_vs_stats_user show;
1959 1994
1960/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1995/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1961 seq_puts(seq, 1996 seq_puts(seq,
@@ -1963,29 +1998,25 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
1963 seq_printf(seq, 1998 seq_printf(seq,
1964 " Conns Packets Packets Bytes Bytes\n"); 1999 " Conns Packets Packets Bytes Bytes\n");
1965 2000
1966 spin_lock_bh(&ip_vs_stats.lock); 2001 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, 2002 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
1968 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, 2003 show.inpkts, show.outpkts,
1969 (unsigned long long) ip_vs_stats.ustats.inbytes, 2004 (unsigned long long) show.inbytes,
1970 (unsigned long long) ip_vs_stats.ustats.outbytes); 2005 (unsigned long long) show.outbytes);
1971 2006
1972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2007/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973 seq_puts(seq, 2008 seq_puts(seq,
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2009 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 2010 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.ustats.cps, 2011 show.cps, show.inpps, show.outpps,
1977 ip_vs_stats.ustats.inpps, 2012 show.inbps, show.outbps);
1978 ip_vs_stats.ustats.outpps,
1979 ip_vs_stats.ustats.inbps,
1980 ip_vs_stats.ustats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock);
1982 2013
1983 return 0; 2014 return 0;
1984} 2015}
1985 2016
1986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 2017static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1987{ 2018{
1988 return single_open(file, ip_vs_stats_show, NULL); 2019 return single_open_net(inode, file, ip_vs_stats_show);
1989} 2020}
1990 2021
1991static const struct file_operations ip_vs_stats_fops = { 2022static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2027,85 @@ static const struct file_operations ip_vs_stats_fops = {
1996 .release = single_release, 2027 .release = single_release,
1997}; 2028};
1998 2029
2030static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2031{
2032 struct net *net = seq_file_single_net(seq);
2033 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2034 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2035 struct ip_vs_stats_user rates;
2036 int i;
2037
2038/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2039 seq_puts(seq,
2040 " Total Incoming Outgoing Incoming Outgoing\n");
2041 seq_printf(seq,
2042 "CPU Conns Packets Packets Bytes Bytes\n");
2043
2044 for_each_possible_cpu(i) {
2045 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2046 unsigned int start;
2047 __u64 inbytes, outbytes;
2048
2049 do {
2050 start = u64_stats_fetch_begin_bh(&u->syncp);
2051 inbytes = u->ustats.inbytes;
2052 outbytes = u->ustats.outbytes;
2053 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2054
2055 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2056 i, u->ustats.conns, u->ustats.inpkts,
2057 u->ustats.outpkts, (__u64)inbytes,
2058 (__u64)outbytes);
2059 }
2060
2061 spin_lock_bh(&tot_stats->lock);
2062
2063 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2064 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2065 tot_stats->ustats.outpkts,
2066 (unsigned long long) tot_stats->ustats.inbytes,
2067 (unsigned long long) tot_stats->ustats.outbytes);
2068
2069 ip_vs_read_estimator(&rates, tot_stats);
2070
2071 spin_unlock_bh(&tot_stats->lock);
2072
2073/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2074 seq_puts(seq,
2075 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2076 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2077 rates.cps,
2078 rates.inpps,
2079 rates.outpps,
2080 rates.inbps,
2081 rates.outbps);
2082
2083 return 0;
2084}
2085
2086static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2087{
2088 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2089}
2090
2091static const struct file_operations ip_vs_stats_percpu_fops = {
2092 .owner = THIS_MODULE,
2093 .open = ip_vs_stats_percpu_seq_open,
2094 .read = seq_read,
2095 .llseek = seq_lseek,
2096 .release = single_release,
2097};
1999#endif 2098#endif
2000 2099
2001/* 2100/*
2002 * Set timeout values for tcp tcpfin udp in the timeout_table. 2101 * Set timeout values for tcp tcpfin udp in the timeout_table.
2003 */ 2102 */
2004static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) 2103static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2005{ 2104{
2105#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2106 struct ip_vs_proto_data *pd;
2107#endif
2108
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2109 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2007 u->tcp_timeout, 2110 u->tcp_timeout,
2008 u->tcp_fin_timeout, 2111 u->tcp_fin_timeout,
@@ -2010,19 +2113,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2010 2113
2011#ifdef CONFIG_IP_VS_PROTO_TCP 2114#ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) { 2115 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] 2116 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2117 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ; 2118 = u->tcp_timeout * HZ;
2015 } 2119 }
2016 2120
2017 if (u->tcp_fin_timeout) { 2121 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] 2122 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ; 2124 = u->tcp_fin_timeout * HZ;
2020 } 2125 }
2021#endif 2126#endif
2022 2127
2023#ifdef CONFIG_IP_VS_PROTO_UDP 2128#ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) { 2129 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] 2130 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2131 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ; 2132 = u->udp_timeout * HZ;
2027 } 2133 }
2028#endif 2134#endif
@@ -2087,6 +2193,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2087static int 2193static int
2088do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2194do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2089{ 2195{
2196 struct net *net = sock_net(sk);
2090 int ret; 2197 int ret;
2091 unsigned char arg[MAX_ARG_LEN]; 2198 unsigned char arg[MAX_ARG_LEN];
2092 struct ip_vs_service_user *usvc_compat; 2199 struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2228,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2121 2228
2122 if (cmd == IP_VS_SO_SET_FLUSH) { 2229 if (cmd == IP_VS_SO_SET_FLUSH) {
2123 /* Flush the virtual service */ 2230 /* Flush the virtual service */
2124 ret = ip_vs_flush(); 2231 ret = ip_vs_flush(net);
2125 goto out_unlock; 2232 goto out_unlock;
2126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2233 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2127 /* Set timeout values for (tcp tcpfin udp) */ 2234 /* Set timeout values for (tcp tcpfin udp) */
2128 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); 2235 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2129 goto out_unlock; 2236 goto out_unlock;
2130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2237 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2238 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2132 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); 2239 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2240 dm->syncid);
2133 goto out_unlock; 2241 goto out_unlock;
2134 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 2242 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2135 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2243 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2136 ret = stop_sync_thread(dm->state); 2244 ret = stop_sync_thread(net, dm->state);
2137 goto out_unlock; 2245 goto out_unlock;
2138 } 2246 }
2139 2247
@@ -2148,7 +2256,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2148 if (cmd == IP_VS_SO_SET_ZERO) { 2256 if (cmd == IP_VS_SO_SET_ZERO) {
2149 /* if no service address is set, zero counters in all */ 2257 /* if no service address is set, zero counters in all */
2150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2258 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2151 ret = ip_vs_zero_all(); 2259 ret = ip_vs_zero_all(net);
2152 goto out_unlock; 2260 goto out_unlock;
2153 } 2261 }
2154 } 2262 }
@@ -2165,10 +2273,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2165 2273
2166 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2274 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2167 if (usvc.fwmark == 0) 2275 if (usvc.fwmark == 0)
2168 svc = __ip_vs_service_find(usvc.af, usvc.protocol, 2276 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2169 &usvc.addr, usvc.port); 2277 &usvc.addr, usvc.port);
2170 else 2278 else
2171 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); 2279 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2172 2280
2173 if (cmd != IP_VS_SO_SET_ADD 2281 if (cmd != IP_VS_SO_SET_ADD
2174 && (svc == NULL || svc->protocol != usvc.protocol)) { 2282 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2289,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2181 if (svc != NULL) 2289 if (svc != NULL)
2182 ret = -EEXIST; 2290 ret = -EEXIST;
2183 else 2291 else
2184 ret = ip_vs_add_service(&usvc, &svc); 2292 ret = ip_vs_add_service(net, &usvc, &svc);
2185 break; 2293 break;
2186 case IP_VS_SO_SET_EDIT: 2294 case IP_VS_SO_SET_EDIT:
2187 ret = ip_vs_edit_service(svc, &usvc); 2295 ret = ip_vs_edit_service(svc, &usvc);
@@ -2218,14 +2326,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2218 2326
2219 2327
2220static void 2328static void
2221ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2222{
2223 spin_lock_bh(&src->lock);
2224 memcpy(dst, &src->ustats, sizeof(*dst));
2225 spin_unlock_bh(&src->lock);
2226}
2227
2228static void
2229ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2329ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2230{ 2330{
2231 dst->protocol = src->protocol; 2331 dst->protocol = src->protocol;
@@ -2241,7 +2341,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2241} 2341}
2242 2342
2243static inline int 2343static inline int
2244__ip_vs_get_service_entries(const struct ip_vs_get_services *get, 2344__ip_vs_get_service_entries(struct net *net,
2345 const struct ip_vs_get_services *get,
2245 struct ip_vs_get_services __user *uptr) 2346 struct ip_vs_get_services __user *uptr)
2246{ 2347{
2247 int idx, count=0; 2348 int idx, count=0;
@@ -2252,7 +2353,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2353 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2354 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2254 /* Only expose IPv4 entries to old interface */ 2355 /* Only expose IPv4 entries to old interface */
2255 if (svc->af != AF_INET) 2356 if (svc->af != AF_INET || !net_eq(svc->net, net))
2256 continue; 2357 continue;
2257 2358
2258 if (count >= get->num_services) 2359 if (count >= get->num_services)
@@ -2271,7 +2372,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2372 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2373 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2273 /* Only expose IPv4 entries to old interface */ 2374 /* Only expose IPv4 entries to old interface */
2274 if (svc->af != AF_INET) 2375 if (svc->af != AF_INET || !net_eq(svc->net, net))
2275 continue; 2376 continue;
2276 2377
2277 if (count >= get->num_services) 2378 if (count >= get->num_services)
@@ -2291,7 +2392,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2291} 2392}
2292 2393
2293static inline int 2394static inline int
2294__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 2395__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2295 struct ip_vs_get_dests __user *uptr) 2396 struct ip_vs_get_dests __user *uptr)
2296{ 2397{
2297 struct ip_vs_service *svc; 2398 struct ip_vs_service *svc;
@@ -2299,9 +2400,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2299 int ret = 0; 2400 int ret = 0;
2300 2401
2301 if (get->fwmark) 2402 if (get->fwmark)
2302 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); 2403 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2303 else 2404 else
2304 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, 2405 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2305 get->port); 2406 get->port);
2306 2407
2307 if (svc) { 2408 if (svc) {
@@ -2336,17 +2437,21 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2336} 2437}
2337 2438
2338static inline void 2439static inline void
2339__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 2440__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2340{ 2441{
2442#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2443 struct ip_vs_proto_data *pd;
2444#endif
2445
2341#ifdef CONFIG_IP_VS_PROTO_TCP 2446#ifdef CONFIG_IP_VS_PROTO_TCP
2342 u->tcp_timeout = 2447 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2343 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2448 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344 u->tcp_fin_timeout = 2449 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2346#endif 2450#endif
2347#ifdef CONFIG_IP_VS_PROTO_UDP 2451#ifdef CONFIG_IP_VS_PROTO_UDP
2452 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2348 u->udp_timeout = 2453 u->udp_timeout =
2349 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2454 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2350#endif 2455#endif
2351} 2456}
2352 2457
@@ -2375,7 +2480,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2375 unsigned char arg[128]; 2480 unsigned char arg[128];
2376 int ret = 0; 2481 int ret = 0;
2377 unsigned int copylen; 2482 unsigned int copylen;
2483 struct net *net = sock_net(sk);
2484 struct netns_ipvs *ipvs = net_ipvs(net);
2378 2485
2486 BUG_ON(!net);
2379 if (!capable(CAP_NET_ADMIN)) 2487 if (!capable(CAP_NET_ADMIN))
2380 return -EPERM; 2488 return -EPERM;
2381 2489
@@ -2418,7 +2526,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2418 struct ip_vs_getinfo info; 2526 struct ip_vs_getinfo info;
2419 info.version = IP_VS_VERSION_CODE; 2527 info.version = IP_VS_VERSION_CODE;
2420 info.size = ip_vs_conn_tab_size; 2528 info.size = ip_vs_conn_tab_size;
2421 info.num_services = ip_vs_num_services; 2529 info.num_services = ipvs->num_services;
2422 if (copy_to_user(user, &info, sizeof(info)) != 0) 2530 if (copy_to_user(user, &info, sizeof(info)) != 0)
2423 ret = -EFAULT; 2531 ret = -EFAULT;
2424 } 2532 }
@@ -2437,7 +2545,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2437 ret = -EINVAL; 2545 ret = -EINVAL;
2438 goto out; 2546 goto out;
2439 } 2547 }
2440 ret = __ip_vs_get_service_entries(get, user); 2548 ret = __ip_vs_get_service_entries(net, get, user);
2441 } 2549 }
2442 break; 2550 break;
2443 2551
@@ -2450,10 +2558,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2450 entry = (struct ip_vs_service_entry *)arg; 2558 entry = (struct ip_vs_service_entry *)arg;
2451 addr.ip = entry->addr; 2559 addr.ip = entry->addr;
2452 if (entry->fwmark) 2560 if (entry->fwmark)
2453 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); 2561 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2454 else 2562 else
2455 svc = __ip_vs_service_find(AF_INET, entry->protocol, 2563 svc = __ip_vs_service_find(net, AF_INET,
2456 &addr, entry->port); 2564 entry->protocol, &addr,
2565 entry->port);
2457 if (svc) { 2566 if (svc) {
2458 ip_vs_copy_service(entry, svc); 2567 ip_vs_copy_service(entry, svc);
2459 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2568 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2585,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2476 ret = -EINVAL; 2585 ret = -EINVAL;
2477 goto out; 2586 goto out;
2478 } 2587 }
2479 ret = __ip_vs_get_dest_entries(get, user); 2588 ret = __ip_vs_get_dest_entries(net, get, user);
2480 } 2589 }
2481 break; 2590 break;
2482 2591
@@ -2484,7 +2593,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2484 { 2593 {
2485 struct ip_vs_timeout_user t; 2594 struct ip_vs_timeout_user t;
2486 2595
2487 __ip_vs_get_timeouts(&t); 2596 __ip_vs_get_timeouts(net, &t);
2488 if (copy_to_user(user, &t, sizeof(t)) != 0) 2597 if (copy_to_user(user, &t, sizeof(t)) != 0)
2489 ret = -EFAULT; 2598 ret = -EFAULT;
2490 } 2599 }
@@ -2495,15 +2604,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2495 struct ip_vs_daemon_user d[2]; 2604 struct ip_vs_daemon_user d[2];
2496 2605
2497 memset(&d, 0, sizeof(d)); 2606 memset(&d, 0, sizeof(d));
2498 if (ip_vs_sync_state & IP_VS_STATE_MASTER) { 2607 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2499 d[0].state = IP_VS_STATE_MASTER; 2608 d[0].state = IP_VS_STATE_MASTER;
2500 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); 2609 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2501 d[0].syncid = ip_vs_master_syncid; 2610 sizeof(d[0].mcast_ifn));
2611 d[0].syncid = ipvs->master_syncid;
2502 } 2612 }
2503 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { 2613 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2504 d[1].state = IP_VS_STATE_BACKUP; 2614 d[1].state = IP_VS_STATE_BACKUP;
2505 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); 2615 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2506 d[1].syncid = ip_vs_backup_syncid; 2616 sizeof(d[1].mcast_ifn));
2617 d[1].syncid = ipvs->backup_syncid;
2507 } 2618 }
2508 if (copy_to_user(user, &d, sizeof(d)) != 0) 2619 if (copy_to_user(user, &d, sizeof(d)) != 0)
2509 ret = -EFAULT; 2620 ret = -EFAULT;
@@ -2542,6 +2653,7 @@ static struct genl_family ip_vs_genl_family = {
2542 .name = IPVS_GENL_NAME, 2653 .name = IPVS_GENL_NAME,
2543 .version = IPVS_GENL_VERSION, 2654 .version = IPVS_GENL_VERSION,
2544 .maxattr = IPVS_CMD_MAX, 2655 .maxattr = IPVS_CMD_MAX,
2656 .netnsok = true, /* Make ipvsadm to work on netns */
2545}; 2657};
2546 2658
2547/* Policy used for first-level command attributes */ 2659/* Policy used for first-level command attributes */
@@ -2599,31 +2711,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2599static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2711static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2600 struct ip_vs_stats *stats) 2712 struct ip_vs_stats *stats)
2601{ 2713{
2714 struct ip_vs_stats_user ustats;
2602 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2715 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2603 if (!nl_stats) 2716 if (!nl_stats)
2604 return -EMSGSIZE; 2717 return -EMSGSIZE;
2605 2718
2606 spin_lock_bh(&stats->lock); 2719 ip_vs_copy_stats(&ustats, stats);
2607
2608 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2609 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2610 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2611 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2612 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2613 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2614 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2617 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2618 2720
2619 spin_unlock_bh(&stats->lock); 2721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2725 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2730 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2620 2731
2621 nla_nest_end(skb, nl_stats); 2732 nla_nest_end(skb, nl_stats);
2622 2733
2623 return 0; 2734 return 0;
2624 2735
2625nla_put_failure: 2736nla_put_failure:
2626 spin_unlock_bh(&stats->lock);
2627 nla_nest_cancel(skb, nl_stats); 2737 nla_nest_cancel(skb, nl_stats);
2628 return -EMSGSIZE; 2738 return -EMSGSIZE;
2629} 2739}
@@ -2696,11 +2806,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2696 int idx = 0, i; 2806 int idx = 0, i;
2697 int start = cb->args[0]; 2807 int start = cb->args[0];
2698 struct ip_vs_service *svc; 2808 struct ip_vs_service *svc;
2809 struct net *net = skb_sknet(skb);
2699 2810
2700 mutex_lock(&__ip_vs_mutex); 2811 mutex_lock(&__ip_vs_mutex);
2701 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2812 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2702 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2813 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2703 if (++idx <= start) 2814 if (++idx <= start || !net_eq(svc->net, net))
2704 continue; 2815 continue;
2705 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2816 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2706 idx--; 2817 idx--;
@@ -2711,7 +2822,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2711 2822
2712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2823 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2824 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2714 if (++idx <= start) 2825 if (++idx <= start || !net_eq(svc->net, net))
2715 continue; 2826 continue;
2716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2827 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2717 idx--; 2828 idx--;
@@ -2727,7 +2838,8 @@ nla_put_failure:
2727 return skb->len; 2838 return skb->len;
2728} 2839}
2729 2840
2730static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2841static int ip_vs_genl_parse_service(struct net *net,
2842 struct ip_vs_service_user_kern *usvc,
2731 struct nlattr *nla, int full_entry, 2843 struct nlattr *nla, int full_entry,
2732 struct ip_vs_service **ret_svc) 2844 struct ip_vs_service **ret_svc)
2733{ 2845{
@@ -2770,9 +2882,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2770 } 2882 }
2771 2883
2772 if (usvc->fwmark) 2884 if (usvc->fwmark)
2773 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); 2885 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2774 else 2886 else
2775 svc = __ip_vs_service_find(usvc->af, usvc->protocol, 2887 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2776 &usvc->addr, usvc->port); 2888 &usvc->addr, usvc->port);
2777 *ret_svc = svc; 2889 *ret_svc = svc;
2778 2890
@@ -2809,13 +2921,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2809 return 0; 2921 return 0;
2810} 2922}
2811 2923
2812static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2924static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2925 struct nlattr *nla)
2813{ 2926{
2814 struct ip_vs_service_user_kern usvc; 2927 struct ip_vs_service_user_kern usvc;
2815 struct ip_vs_service *svc; 2928 struct ip_vs_service *svc;
2816 int ret; 2929 int ret;
2817 2930
2818 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); 2931 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2819 return ret ? ERR_PTR(ret) : svc; 2932 return ret ? ERR_PTR(ret) : svc;
2820} 2933}
2821 2934
@@ -2883,6 +2996,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2883 struct ip_vs_service *svc; 2996 struct ip_vs_service *svc;
2884 struct ip_vs_dest *dest; 2997 struct ip_vs_dest *dest;
2885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 2998 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2999 struct net *net = skb_sknet(skb);
2886 3000
2887 mutex_lock(&__ip_vs_mutex); 3001 mutex_lock(&__ip_vs_mutex);
2888 3002
@@ -2891,7 +3005,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 3005 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2892 goto out_err; 3006 goto out_err;
2893 3007
2894 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); 3008
3009 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2895 if (IS_ERR(svc) || svc == NULL) 3010 if (IS_ERR(svc) || svc == NULL)
2896 goto out_err; 3011 goto out_err;
2897 3012
@@ -3005,20 +3120,23 @@ nla_put_failure:
3005static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3120static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3006 struct netlink_callback *cb) 3121 struct netlink_callback *cb)
3007{ 3122{
3123 struct net *net = skb_net(skb);
3124 struct netns_ipvs *ipvs = net_ipvs(net);
3125
3008 mutex_lock(&__ip_vs_mutex); 3126 mutex_lock(&__ip_vs_mutex);
3009 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3127 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3010 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3128 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3011 ip_vs_master_mcast_ifn, 3129 ipvs->master_mcast_ifn,
3012 ip_vs_master_syncid, cb) < 0) 3130 ipvs->master_syncid, cb) < 0)
3013 goto nla_put_failure; 3131 goto nla_put_failure;
3014 3132
3015 cb->args[0] = 1; 3133 cb->args[0] = 1;
3016 } 3134 }
3017 3135
3018 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3136 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3019 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3137 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3020 ip_vs_backup_mcast_ifn, 3138 ipvs->backup_mcast_ifn,
3021 ip_vs_backup_syncid, cb) < 0) 3139 ipvs->backup_syncid, cb) < 0)
3022 goto nla_put_failure; 3140 goto nla_put_failure;
3023 3141
3024 cb->args[1] = 1; 3142 cb->args[1] = 1;
@@ -3030,31 +3148,33 @@ nla_put_failure:
3030 return skb->len; 3148 return skb->len;
3031} 3149}
3032 3150
3033static int ip_vs_genl_new_daemon(struct nlattr **attrs) 3151static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3034{ 3152{
3035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3153 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3154 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3037 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3155 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3038 return -EINVAL; 3156 return -EINVAL;
3039 3157
3040 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3158 return start_sync_thread(net,
3159 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3041 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3160 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3042 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 3161 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3043} 3162}
3044 3163
3045static int ip_vs_genl_del_daemon(struct nlattr **attrs) 3164static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3046{ 3165{
3047 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3166 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3048 return -EINVAL; 3167 return -EINVAL;
3049 3168
3050 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3169 return stop_sync_thread(net,
3170 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3051} 3171}
3052 3172
3053static int ip_vs_genl_set_config(struct nlattr **attrs) 3173static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3054{ 3174{
3055 struct ip_vs_timeout_user t; 3175 struct ip_vs_timeout_user t;
3056 3176
3057 __ip_vs_get_timeouts(&t); 3177 __ip_vs_get_timeouts(net, &t);
3058 3178
3059 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3179 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3060 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3180 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3186,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
3066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3186 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3187 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3068 3188
3069 return ip_vs_set_timeout(&t); 3189 return ip_vs_set_timeout(net, &t);
3070} 3190}
3071 3191
3072static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3192static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3196,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3076 struct ip_vs_dest_user_kern udest; 3196 struct ip_vs_dest_user_kern udest;
3077 int ret = 0, cmd; 3197 int ret = 0, cmd;
3078 int need_full_svc = 0, need_full_dest = 0; 3198 int need_full_svc = 0, need_full_dest = 0;
3199 struct net *net;
3200 struct netns_ipvs *ipvs;
3079 3201
3202 net = skb_sknet(skb);
3203 ipvs = net_ipvs(net);
3080 cmd = info->genlhdr->cmd; 3204 cmd = info->genlhdr->cmd;
3081 3205
3082 mutex_lock(&__ip_vs_mutex); 3206 mutex_lock(&__ip_vs_mutex);
3083 3207
3084 if (cmd == IPVS_CMD_FLUSH) { 3208 if (cmd == IPVS_CMD_FLUSH) {
3085 ret = ip_vs_flush(); 3209 ret = ip_vs_flush(net);
3086 goto out; 3210 goto out;
3087 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3211 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3088 ret = ip_vs_genl_set_config(info->attrs); 3212 ret = ip_vs_genl_set_config(net, info->attrs);
3089 goto out; 3213 goto out;
3090 } else if (cmd == IPVS_CMD_NEW_DAEMON || 3214 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3091 cmd == IPVS_CMD_DEL_DAEMON) { 3215 cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3225,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3101 } 3225 }
3102 3226
3103 if (cmd == IPVS_CMD_NEW_DAEMON) 3227 if (cmd == IPVS_CMD_NEW_DAEMON)
3104 ret = ip_vs_genl_new_daemon(daemon_attrs); 3228 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3105 else 3229 else
3106 ret = ip_vs_genl_del_daemon(daemon_attrs); 3230 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3107 goto out; 3231 goto out;
3108 } else if (cmd == IPVS_CMD_ZERO && 3232 } else if (cmd == IPVS_CMD_ZERO &&
3109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3233 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3110 ret = ip_vs_zero_all(); 3234 ret = ip_vs_zero_all(net);
3111 goto out; 3235 goto out;
3112 } 3236 }
3113 3237
@@ -3117,7 +3241,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3241 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3118 need_full_svc = 1; 3242 need_full_svc = 1;
3119 3243
3120 ret = ip_vs_genl_parse_service(&usvc, 3244 ret = ip_vs_genl_parse_service(net, &usvc,
3121 info->attrs[IPVS_CMD_ATTR_SERVICE], 3245 info->attrs[IPVS_CMD_ATTR_SERVICE],
3122 need_full_svc, &svc); 3246 need_full_svc, &svc);
3123 if (ret) 3247 if (ret)
@@ -3147,7 +3271,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3147 switch (cmd) { 3271 switch (cmd) {
3148 case IPVS_CMD_NEW_SERVICE: 3272 case IPVS_CMD_NEW_SERVICE:
3149 if (svc == NULL) 3273 if (svc == NULL)
3150 ret = ip_vs_add_service(&usvc, &svc); 3274 ret = ip_vs_add_service(net, &usvc, &svc);
3151 else 3275 else
3152 ret = -EEXIST; 3276 ret = -EEXIST;
3153 break; 3277 break;
@@ -3185,7 +3309,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3185 struct sk_buff *msg; 3309 struct sk_buff *msg;
3186 void *reply; 3310 void *reply;
3187 int ret, cmd, reply_cmd; 3311 int ret, cmd, reply_cmd;
3312 struct net *net;
3313 struct netns_ipvs *ipvs;
3188 3314
3315 net = skb_sknet(skb);
3316 ipvs = net_ipvs(net);
3189 cmd = info->genlhdr->cmd; 3317 cmd = info->genlhdr->cmd;
3190 3318
3191 if (cmd == IPVS_CMD_GET_SERVICE) 3319 if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3342,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3214 { 3342 {
3215 struct ip_vs_service *svc; 3343 struct ip_vs_service *svc;
3216 3344
3217 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); 3345 svc = ip_vs_genl_find_service(net,
3346 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3218 if (IS_ERR(svc)) { 3347 if (IS_ERR(svc)) {
3219 ret = PTR_ERR(svc); 3348 ret = PTR_ERR(svc);
3220 goto out_err; 3349 goto out_err;
@@ -3234,7 +3363,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3234 { 3363 {
3235 struct ip_vs_timeout_user t; 3364 struct ip_vs_timeout_user t;
3236 3365
3237 __ip_vs_get_timeouts(&t); 3366 __ip_vs_get_timeouts(net, &t);
3238#ifdef CONFIG_IP_VS_PROTO_TCP 3367#ifdef CONFIG_IP_VS_PROTO_TCP
3239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); 3368 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3369 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3509,189 @@ static void ip_vs_genl_unregister(void)
3380 3509
3381/* End of Generic Netlink interface definitions */ 3510/* End of Generic Netlink interface definitions */
3382 3511
3512/*
3513 * per netns intit/exit func.
3514 */
3515#ifdef CONFIG_SYSCTL
3516int __net_init __ip_vs_control_init_sysctl(struct net *net)
3517{
3518 int idx;
3519 struct netns_ipvs *ipvs = net_ipvs(net);
3520 struct ctl_table *tbl;
3521
3522 atomic_set(&ipvs->dropentry, 0);
3523 spin_lock_init(&ipvs->dropentry_lock);
3524 spin_lock_init(&ipvs->droppacket_lock);
3525 spin_lock_init(&ipvs->securetcp_lock);
3526
3527 if (!net_eq(net, &init_net)) {
3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529 if (tbl == NULL)
3530 return -ENOMEM;
3531 } else
3532 tbl = vs_vars;
3533 /* Initialize sysctl defaults */
3534 idx = 0;
3535 ipvs->sysctl_amemthresh = 1024;
3536 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537 ipvs->sysctl_am_droprate = 10;
3538 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541#ifdef CONFIG_IP_VS_NFCT
3542 tbl[idx++].data = &ipvs->sysctl_conntrack;
3543#endif
3544 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545 ipvs->sysctl_snat_reroute = 1;
3546 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547 ipvs->sysctl_sync_ver = 1;
3548 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3552 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3553 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3557
3558
3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3560 tbl);
3561 if (ipvs->sysctl_hdr == NULL) {
3562 if (!net_eq(net, &init_net))
3563 kfree(tbl);
3564 return -ENOMEM;
3565 }
3566 ip_vs_start_estimator(net, &ipvs->tot_stats);
3567 ipvs->sysctl_tbl = tbl;
3568 /* Schedule defense work */
3569 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3570 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3571
3572 return 0;
3573}
3574
3575void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3576{
3577 struct netns_ipvs *ipvs = net_ipvs(net);
3578
3579 cancel_delayed_work_sync(&ipvs->defense_work);
3580 cancel_work_sync(&ipvs->defense_work.work);
3581 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3582}
3583
3584#else
3585
3586int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3587void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3588
3589#endif
3590
3591int __net_init __ip_vs_control_init(struct net *net)
3592{
3593 int idx;
3594 struct netns_ipvs *ipvs = net_ipvs(net);
3595
3596 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3597
3598 /* Initialize rs_table */
3599 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3600 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3601
3602 INIT_LIST_HEAD(&ipvs->dest_trash);
3603 atomic_set(&ipvs->ftpsvc_counter, 0);
3604 atomic_set(&ipvs->nullsvc_counter, 0);
3605
3606 /* procfs stats */
3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3608 if (!ipvs->tot_stats.cpustats) {
3609 pr_err("%s(): alloc_percpu.\n", __func__);
3610 return -ENOMEM;
3611 }
3612 spin_lock_init(&ipvs->tot_stats.lock);
3613
3614 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3615 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3616 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3617 &ip_vs_stats_percpu_fops);
3618
3619 if (__ip_vs_control_init_sysctl(net))
3620 goto err;
3621
3622 return 0;
3623
3624err:
3625 free_percpu(ipvs->tot_stats.cpustats);
3626 return -ENOMEM;
3627}
3628
3629static void __net_exit __ip_vs_control_cleanup(struct net *net)
3630{
3631 struct netns_ipvs *ipvs = net_ipvs(net);
3632
3633 ip_vs_trash_cleanup(net);
3634 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3635 __ip_vs_control_cleanup_sysctl(net);
3636 proc_net_remove(net, "ip_vs_stats_percpu");
3637 proc_net_remove(net, "ip_vs_stats");
3638 proc_net_remove(net, "ip_vs");
3639 free_percpu(ipvs->tot_stats.cpustats);
3640}
3641
3642static struct pernet_operations ipvs_control_ops = {
3643 .init = __ip_vs_control_init,
3644 .exit = __ip_vs_control_cleanup,
3645};
3383 3646
3384int __init ip_vs_control_init(void) 3647int __init ip_vs_control_init(void)
3385{ 3648{
3386 int ret;
3387 int idx; 3649 int idx;
3650 int ret;
3388 3651
3389 EnterFunction(2); 3652 EnterFunction(2);
3390 3653
3391 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 3654 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3392 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3655 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3393 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3656 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3394 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3657 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3395 } 3658 }
3396 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 3659
3397 INIT_LIST_HEAD(&ip_vs_rtable[idx]); 3660 ret = register_pernet_subsys(&ipvs_control_ops);
3661 if (ret) {
3662 pr_err("cannot register namespace.\n");
3663 goto err;
3398 } 3664 }
3399 smp_wmb(); 3665
3666 smp_wmb(); /* Do we really need it now ? */
3400 3667
3401 ret = nf_register_sockopt(&ip_vs_sockopts); 3668 ret = nf_register_sockopt(&ip_vs_sockopts);
3402 if (ret) { 3669 if (ret) {
3403 pr_err("cannot register sockopt.\n"); 3670 pr_err("cannot register sockopt.\n");
3404 return ret; 3671 goto err_net;
3405 } 3672 }
3406 3673
3407 ret = ip_vs_genl_register(); 3674 ret = ip_vs_genl_register();
3408 if (ret) { 3675 if (ret) {
3409 pr_err("cannot register Generic Netlink interface.\n"); 3676 pr_err("cannot register Generic Netlink interface.\n");
3410 nf_unregister_sockopt(&ip_vs_sockopts); 3677 nf_unregister_sockopt(&ip_vs_sockopts);
3411 return ret; 3678 goto err_net;
3412 } 3679 }
3413 3680
3414 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3415 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3416
3417 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3418
3419 ip_vs_new_estimator(&ip_vs_stats);
3420
3421 /* Hook the defense timer */
3422 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3423
3424 LeaveFunction(2); 3681 LeaveFunction(2);
3425 return 0; 3682 return 0;
3683
3684err_net:
3685 unregister_pernet_subsys(&ipvs_control_ops);
3686err:
3687 return ret;
3426} 3688}
3427 3689
3428 3690
3429void ip_vs_control_cleanup(void) 3691void ip_vs_control_cleanup(void)
3430{ 3692{
3431 EnterFunction(2); 3693 EnterFunction(2);
3432 ip_vs_trash_cleanup(); 3694 unregister_pernet_subsys(&ipvs_control_ops);
3433 cancel_delayed_work_sync(&defense_work);
3434 cancel_work_sync(&defense_work.work);
3435 ip_vs_kill_estimator(&ip_vs_stats);
3436 unregister_sysctl_table(sysctl_header);
3437 proc_net_remove(&init_net, "ip_vs_stats");
3438 proc_net_remove(&init_net, "ip_vs");
3439 ip_vs_genl_unregister(); 3695 ip_vs_genl_unregister();
3440 nf_unregister_sockopt(&ip_vs_sockopts); 3696 nf_unregister_sockopt(&ip_vs_sockopts);
3441 LeaveFunction(2); 3697 LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801962e..8c8766ca56a 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 * Changes: 11 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
12 * 12 * Network name space (netns) aware.
13 * Global data moved to netns i.e struct netns_ipvs
14 * Affected data: est_list and est_lock.
15 * estimation_timer() runs with timer per netns.
16 * get_stats()) do the per cpu summing.
13 */ 17 */
14 18
15#define KMSG_COMPONENT "IPVS" 19#define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
48 */ 52 */
49 53
50 54
51static void estimation_timer(unsigned long arg); 55/*
56 * Make a summary from each cpu
57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats)
60{
61 int i;
62
63 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start;
66 __u64 inbytes, outbytes;
67 if (i) {
68 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts;
71 do {
72 start = u64_stats_fetch_begin(&s->syncp);
73 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes;
75 } while (u64_stats_fetch_retry(&s->syncp, start));
76 sum->inbytes += inbytes;
77 sum->outbytes += outbytes;
78 } else {
79 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts;
82 do {
83 start = u64_stats_fetch_begin(&s->syncp);
84 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes;
86 } while (u64_stats_fetch_retry(&s->syncp, start));
87 }
88 }
89}
52 90
53static LIST_HEAD(est_list);
54static DEFINE_SPINLOCK(est_lock);
55static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
56 91
57static void estimation_timer(unsigned long arg) 92static void estimation_timer(unsigned long arg)
58{ 93{
@@ -62,12 +97,16 @@ static void estimation_timer(unsigned long arg)
62 u32 n_inpkts, n_outpkts; 97 u32 n_inpkts, n_outpkts;
63 u64 n_inbytes, n_outbytes; 98 u64 n_inbytes, n_outbytes;
64 u32 rate; 99 u32 rate;
100 struct net *net = (struct net *)arg;
101 struct netns_ipvs *ipvs;
65 102
66 spin_lock(&est_lock); 103 ipvs = net_ipvs(net);
67 list_for_each_entry(e, &est_list, list) { 104 spin_lock(&ipvs->est_lock);
105 list_for_each_entry(e, &ipvs->est_list, list) {
68 s = container_of(e, struct ip_vs_stats, est); 106 s = container_of(e, struct ip_vs_stats, est);
69 107
70 spin_lock(&s->lock); 108 spin_lock(&s->lock);
109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
71 n_conns = s->ustats.conns; 110 n_conns = s->ustats.conns;
72 n_inpkts = s->ustats.inpkts; 111 n_inpkts = s->ustats.inpkts;
73 n_outpkts = s->ustats.outpkts; 112 n_outpkts = s->ustats.outpkts;
@@ -75,81 +114,64 @@ static void estimation_timer(unsigned long arg)
75 n_outbytes = s->ustats.outbytes; 114 n_outbytes = s->ustats.outbytes;
76 115
77 /* scaled by 2^10, but divided 2 seconds */ 116 /* scaled by 2^10, but divided 2 seconds */
78 rate = (n_conns - e->last_conns)<<9; 117 rate = (n_conns - e->last_conns) << 9;
79 e->last_conns = n_conns; 118 e->last_conns = n_conns;
80 e->cps += ((long)rate - (long)e->cps)>>2; 119 e->cps += ((long)rate - (long)e->cps) >> 2;
81 s->ustats.cps = (e->cps+0x1FF)>>10;
82 120
83 rate = (n_inpkts - e->last_inpkts)<<9; 121 rate = (n_inpkts - e->last_inpkts) << 9;
84 e->last_inpkts = n_inpkts; 122 e->last_inpkts = n_inpkts;
85 e->inpps += ((long)rate - (long)e->inpps)>>2; 123 e->inpps += ((long)rate - (long)e->inpps) >> 2;
86 s->ustats.inpps = (e->inpps+0x1FF)>>10;
87 124
88 rate = (n_outpkts - e->last_outpkts)<<9; 125 rate = (n_outpkts - e->last_outpkts) << 9;
89 e->last_outpkts = n_outpkts; 126 e->last_outpkts = n_outpkts;
90 e->outpps += ((long)rate - (long)e->outpps)>>2; 127 e->outpps += ((long)rate - (long)e->outpps) >> 2;
91 s->ustats.outpps = (e->outpps+0x1FF)>>10;
92 128
93 rate = (n_inbytes - e->last_inbytes)<<4; 129 rate = (n_inbytes - e->last_inbytes) << 4;
94 e->last_inbytes = n_inbytes; 130 e->last_inbytes = n_inbytes;
95 e->inbps += ((long)rate - (long)e->inbps)>>2; 131 e->inbps += ((long)rate - (long)e->inbps) >> 2;
96 s->ustats.inbps = (e->inbps+0xF)>>5;
97 132
98 rate = (n_outbytes - e->last_outbytes)<<4; 133 rate = (n_outbytes - e->last_outbytes) << 4;
99 e->last_outbytes = n_outbytes; 134 e->last_outbytes = n_outbytes;
100 e->outbps += ((long)rate - (long)e->outbps)>>2; 135 e->outbps += ((long)rate - (long)e->outbps) >> 2;
101 s->ustats.outbps = (e->outbps+0xF)>>5;
102 spin_unlock(&s->lock); 136 spin_unlock(&s->lock);
103 } 137 }
104 spin_unlock(&est_lock); 138 spin_unlock(&ipvs->est_lock);
105 mod_timer(&est_timer, jiffies + 2*HZ); 139 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
106} 140}
107 141
108void ip_vs_new_estimator(struct ip_vs_stats *stats) 142void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
109{ 143{
144 struct netns_ipvs *ipvs = net_ipvs(net);
110 struct ip_vs_estimator *est = &stats->est; 145 struct ip_vs_estimator *est = &stats->est;
111 146
112 INIT_LIST_HEAD(&est->list); 147 INIT_LIST_HEAD(&est->list);
113 148
114 est->last_conns = stats->ustats.conns; 149 spin_lock_bh(&ipvs->est_lock);
115 est->cps = stats->ustats.cps<<10; 150 list_add(&est->list, &ipvs->est_list);
116 151 spin_unlock_bh(&ipvs->est_lock);
117 est->last_inpkts = stats->ustats.inpkts;
118 est->inpps = stats->ustats.inpps<<10;
119
120 est->last_outpkts = stats->ustats.outpkts;
121 est->outpps = stats->ustats.outpps<<10;
122
123 est->last_inbytes = stats->ustats.inbytes;
124 est->inbps = stats->ustats.inbps<<5;
125
126 est->last_outbytes = stats->ustats.outbytes;
127 est->outbps = stats->ustats.outbps<<5;
128
129 spin_lock_bh(&est_lock);
130 list_add(&est->list, &est_list);
131 spin_unlock_bh(&est_lock);
132} 152}
133 153
134void ip_vs_kill_estimator(struct ip_vs_stats *stats) 154void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
135{ 155{
156 struct netns_ipvs *ipvs = net_ipvs(net);
136 struct ip_vs_estimator *est = &stats->est; 157 struct ip_vs_estimator *est = &stats->est;
137 158
138 spin_lock_bh(&est_lock); 159 spin_lock_bh(&ipvs->est_lock);
139 list_del(&est->list); 160 list_del(&est->list);
140 spin_unlock_bh(&est_lock); 161 spin_unlock_bh(&ipvs->est_lock);
141} 162}
142 163
143void ip_vs_zero_estimator(struct ip_vs_stats *stats) 164void ip_vs_zero_estimator(struct ip_vs_stats *stats)
144{ 165{
145 struct ip_vs_estimator *est = &stats->est; 166 struct ip_vs_estimator *est = &stats->est;
146 167 struct ip_vs_stats_user *u = &stats->ustats;
147 /* set counters zero, caller must hold the stats->lock lock */ 168
148 est->last_inbytes = 0; 169 /* reset counters, caller must hold the stats->lock lock */
149 est->last_outbytes = 0; 170 est->last_inbytes = u->inbytes;
150 est->last_conns = 0; 171 est->last_outbytes = u->outbytes;
151 est->last_inpkts = 0; 172 est->last_conns = u->conns;
152 est->last_outpkts = 0; 173 est->last_inpkts = u->inpkts;
174 est->last_outpkts = u->outpkts;
153 est->cps = 0; 175 est->cps = 0;
154 est->inpps = 0; 176 est->inpps = 0;
155 est->outpps = 0; 177 est->outpps = 0;
@@ -157,13 +179,48 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
157 est->outbps = 0; 179 est->outbps = 0;
158} 180}
159 181
160int __init ip_vs_estimator_init(void) 182/* Get decoded rates */
183void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
184 struct ip_vs_stats *stats)
161{ 185{
162 mod_timer(&est_timer, jiffies + 2 * HZ); 186 struct ip_vs_estimator *e = &stats->est;
187
188 dst->cps = (e->cps + 0x1FF) >> 10;
189 dst->inpps = (e->inpps + 0x1FF) >> 10;
190 dst->outpps = (e->outpps + 0x1FF) >> 10;
191 dst->inbps = (e->inbps + 0xF) >> 5;
192 dst->outbps = (e->outbps + 0xF) >> 5;
193}
194
195static int __net_init __ip_vs_estimator_init(struct net *net)
196{
197 struct netns_ipvs *ipvs = net_ipvs(net);
198
199 INIT_LIST_HEAD(&ipvs->est_list);
200 spin_lock_init(&ipvs->est_lock);
201 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
202 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
163 return 0; 203 return 0;
164} 204}
165 205
206static void __net_exit __ip_vs_estimator_exit(struct net *net)
207{
208 del_timer_sync(&net_ipvs(net)->est_timer);
209}
210static struct pernet_operations ip_vs_app_ops = {
211 .init = __ip_vs_estimator_init,
212 .exit = __ip_vs_estimator_exit,
213};
214
215int __init ip_vs_estimator_init(void)
216{
217 int rv;
218
219 rv = register_pernet_subsys(&ip_vs_app_ops);
220 return rv;
221}
222
166void ip_vs_estimator_cleanup(void) 223void ip_vs_estimator_cleanup(void)
167{ 224{
168 del_timer_sync(&est_timer); 225 unregister_pernet_subsys(&ip_vs_app_ops);
169} 226}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 75455000ad1..6b5dd6ddaae 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
157 int ret = 0; 157 int ret = 0;
158 enum ip_conntrack_info ctinfo; 158 enum ip_conntrack_info ctinfo;
159 struct nf_conn *ct; 159 struct nf_conn *ct;
160 struct net *net;
160 161
161#ifdef CONFIG_IP_VS_IPV6 162#ifdef CONFIG_IP_VS_IPV6
162 /* This application helper doesn't work with IPv6 yet, 163 /* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
197 */ 198 */
198 { 199 {
199 struct ip_vs_conn_param p; 200 struct ip_vs_conn_param p;
200 ip_vs_conn_fill_param(AF_INET, iph->protocol, 201 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
201 &from, port, &cp->caddr, 0, &p); 202 iph->protocol, &from, port,
203 &cp->caddr, 0, &p);
202 n_cp = ip_vs_conn_out_get(&p); 204 n_cp = ip_vs_conn_out_get(&p);
203 } 205 }
204 if (!n_cp) { 206 if (!n_cp) {
205 struct ip_vs_conn_param p; 207 struct ip_vs_conn_param p;
206 ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, 208 ip_vs_conn_fill_param(ip_vs_conn_net(cp),
209 AF_INET, IPPROTO_TCP, &cp->caddr,
207 0, &cp->vaddr, port, &p); 210 0, &cp->vaddr, port, &p);
208 n_cp = ip_vs_conn_new(&p, &from, port, 211 n_cp = ip_vs_conn_new(&p, &from, port,
209 IP_VS_CONN_F_NO_CPORT | 212 IP_VS_CONN_F_NO_CPORT |
210 IP_VS_CONN_F_NFCT, 213 IP_VS_CONN_F_NFCT,
211 cp->dest); 214 cp->dest, skb->mark);
212 if (!n_cp) 215 if (!n_cp)
213 return 0; 216 return 0;
214 217
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
257 * would be adjusted twice. 260 * would be adjusted twice.
258 */ 261 */
259 262
263 net = skb_net(skb);
260 cp->app_data = NULL; 264 cp->app_data = NULL;
261 ip_vs_tcp_conn_listen(n_cp); 265 ip_vs_tcp_conn_listen(net, n_cp);
262 ip_vs_conn_put(n_cp); 266 ip_vs_conn_put(n_cp);
263 return ret; 267 return ret;
264 } 268 }
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
287 union nf_inet_addr to; 291 union nf_inet_addr to;
288 __be16 port; 292 __be16 port;
289 struct ip_vs_conn *n_cp; 293 struct ip_vs_conn *n_cp;
294 struct net *net;
290 295
291#ifdef CONFIG_IP_VS_IPV6 296#ifdef CONFIG_IP_VS_IPV6
292 /* This application helper doesn't work with IPv6 yet, 297 /* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
358 363
359 { 364 {
360 struct ip_vs_conn_param p; 365 struct ip_vs_conn_param p;
361 ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, 366 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
362 &cp->vaddr, htons(ntohs(cp->vport)-1), 367 iph->protocol, &to, port, &cp->vaddr,
363 &p); 368 htons(ntohs(cp->vport)-1), &p);
364 n_cp = ip_vs_conn_in_get(&p); 369 n_cp = ip_vs_conn_in_get(&p);
365 if (!n_cp) { 370 if (!n_cp) {
366 n_cp = ip_vs_conn_new(&p, &cp->daddr, 371 n_cp = ip_vs_conn_new(&p, &cp->daddr,
367 htons(ntohs(cp->dport)-1), 372 htons(ntohs(cp->dport)-1),
368 IP_VS_CONN_F_NFCT, cp->dest); 373 IP_VS_CONN_F_NFCT, cp->dest,
374 skb->mark);
369 if (!n_cp) 375 if (!n_cp)
370 return 0; 376 return 0;
371 377
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
377 /* 383 /*
378 * Move tunnel to listen state 384 * Move tunnel to listen state
379 */ 385 */
380 ip_vs_tcp_conn_listen(n_cp); 386 net = skb_net(skb);
387 ip_vs_tcp_conn_listen(net, n_cp);
381 ip_vs_conn_put(n_cp); 388 ip_vs_conn_put(n_cp);
382 389
383 return 1; 390 return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
398 .pkt_in = ip_vs_ftp_in, 405 .pkt_in = ip_vs_ftp_in,
399}; 406};
400 407
401
402/* 408/*
403 * ip_vs_ftp initialization 409 * per netns ip_vs_ftp initialization
404 */ 410 */
405static int __init ip_vs_ftp_init(void) 411static int __net_init __ip_vs_ftp_init(struct net *net)
406{ 412{
407 int i, ret; 413 int i, ret;
408 struct ip_vs_app *app = &ip_vs_ftp; 414 struct ip_vs_app *app = &ip_vs_ftp;
409 415
410 ret = register_ip_vs_app(app); 416 ret = register_ip_vs_app(net, app);
411 if (ret) 417 if (ret)
412 return ret; 418 return ret;
413 419
414 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { 420 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
415 if (!ports[i]) 421 if (!ports[i])
416 continue; 422 continue;
417 ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); 423 ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
418 if (ret) 424 if (ret)
419 break; 425 break;
420 pr_info("%s: loaded support on port[%d] = %d\n", 426 pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
422 } 428 }
423 429
424 if (ret) 430 if (ret)
425 unregister_ip_vs_app(app); 431 unregister_ip_vs_app(net, app);
426 432
427 return ret; 433 return ret;
428} 434}
435/*
436 * netns exit
437 */
438static void __ip_vs_ftp_exit(struct net *net)
439{
440 struct ip_vs_app *app = &ip_vs_ftp;
441
442 unregister_ip_vs_app(net, app);
443}
444
445static struct pernet_operations ip_vs_ftp_ops = {
446 .init = __ip_vs_ftp_init,
447 .exit = __ip_vs_ftp_exit,
448};
429 449
450int __init ip_vs_ftp_init(void)
451{
452 int rv;
453
454 rv = register_pernet_subsys(&ip_vs_ftp_ops);
455 return rv;
456}
430 457
431/* 458/*
432 * ip_vs_ftp finish. 459 * ip_vs_ftp finish.
433 */ 460 */
434static void __exit ip_vs_ftp_exit(void) 461static void __exit ip_vs_ftp_exit(void)
435{ 462{
436 unregister_ip_vs_app(&ip_vs_ftp); 463 unregister_pernet_subsys(&ip_vs_ftp_ops);
437} 464}
438 465
439 466
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9323f894419..f276df9896b 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -63,6 +63,8 @@
63#define CHECK_EXPIRE_INTERVAL (60*HZ) 63#define CHECK_EXPIRE_INTERVAL (60*HZ)
64#define ENTRY_TIMEOUT (6*60*HZ) 64#define ENTRY_TIMEOUT (6*60*HZ)
65 65
66#define DEFAULT_EXPIRATION (24*60*60*HZ)
67
66/* 68/*
67 * It is for full expiration check. 69 * It is for full expiration check.
68 * When there is no partial expiration check (garbage collection) 70 * When there is no partial expiration check (garbage collection)
@@ -70,7 +72,6 @@
70 * entries that haven't been touched for a day. 72 * entries that haven't been touched for a day.
71 */ 73 */
72#define COUNT_FOR_FULL_EXPIRATION 30 74#define COUNT_FOR_FULL_EXPIRATION 30
73static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
74 75
75 76
76/* 77/*
@@ -113,19 +114,18 @@ struct ip_vs_lblc_table {
113/* 114/*
114 * IPVS LBLC sysctl table 115 * IPVS LBLC sysctl table
115 */ 116 */
116 117#ifdef CONFIG_SYSCTL
117static ctl_table vs_vars_table[] = { 118static ctl_table vs_vars_table[] = {
118 { 119 {
119 .procname = "lblc_expiration", 120 .procname = "lblc_expiration",
120 .data = &sysctl_ip_vs_lblc_expiration, 121 .data = NULL,
121 .maxlen = sizeof(int), 122 .maxlen = sizeof(int),
122 .mode = 0644, 123 .mode = 0644,
123 .proc_handler = proc_dointvec_jiffies, 124 .proc_handler = proc_dointvec_jiffies,
124 }, 125 },
125 { } 126 { }
126}; 127};
127 128#endif
128static struct ctl_table_header * sysctl_header;
129 129
130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
131{ 131{
@@ -241,6 +241,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
241 } 241 }
242} 242}
243 243
244static int sysctl_lblc_expiration(struct ip_vs_service *svc)
245{
246#ifdef CONFIG_SYSCTL
247 struct netns_ipvs *ipvs = net_ipvs(svc->net);
248 return ipvs->sysctl_lblc_expiration;
249#else
250 return DEFAULT_EXPIRATION;
251#endif
252}
244 253
245static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) 254static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
246{ 255{
@@ -255,7 +264,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
255 write_lock(&svc->sched_lock); 264 write_lock(&svc->sched_lock);
256 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 265 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
257 if (time_before(now, 266 if (time_before(now,
258 en->lastuse + sysctl_ip_vs_lblc_expiration)) 267 en->lastuse +
268 sysctl_lblc_expiration(svc)))
259 continue; 269 continue;
260 270
261 ip_vs_lblc_free(en); 271 ip_vs_lblc_free(en);
@@ -390,12 +400,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
390 int loh, doh; 400 int loh, doh;
391 401
392 /* 402 /*
393 * We think the overhead of processing active connections is fifty 403 * We use the following formula to estimate the load:
394 * times higher than that of inactive connections in average. (This
395 * fifty times might not be accurate, we will change it later.) We
396 * use the following formula to estimate the overhead:
397 * dest->activeconns*50 + dest->inactconns
398 * and the load:
399 * (dest overhead) / dest->weight 404 * (dest overhead) / dest->weight
400 * 405 *
401 * Remember -- no floats in kernel mode!!! 406 * Remember -- no floats in kernel mode!!!
@@ -411,8 +416,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
411 continue; 416 continue;
412 if (atomic_read(&dest->weight) > 0) { 417 if (atomic_read(&dest->weight) > 0) {
413 least = dest; 418 least = dest;
414 loh = atomic_read(&least->activeconns) * 50 419 loh = ip_vs_dest_conn_overhead(least);
415 + atomic_read(&least->inactconns);
416 goto nextstage; 420 goto nextstage;
417 } 421 }
418 } 422 }
@@ -426,8 +430,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
426 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 430 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
427 continue; 431 continue;
428 432
429 doh = atomic_read(&dest->activeconns) * 50 433 doh = ip_vs_dest_conn_overhead(dest);
430 + atomic_read(&dest->inactconns);
431 if (loh * atomic_read(&dest->weight) > 434 if (loh * atomic_read(&dest->weight) >
432 doh * atomic_read(&least->weight)) { 435 doh * atomic_read(&least->weight)) {
433 least = dest; 436 least = dest;
@@ -511,7 +514,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
511 /* No cache entry or it is invalid, time to schedule */ 514 /* No cache entry or it is invalid, time to schedule */
512 dest = __ip_vs_lblc_schedule(svc); 515 dest = __ip_vs_lblc_schedule(svc);
513 if (!dest) { 516 if (!dest) {
514 IP_VS_ERR_RL("LBLC: no destination available\n"); 517 ip_vs_scheduler_err(svc, "no destination available");
515 return NULL; 518 return NULL;
516 } 519 }
517 520
@@ -543,23 +546,77 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
543 .schedule = ip_vs_lblc_schedule, 546 .schedule = ip_vs_lblc_schedule,
544}; 547};
545 548
549/*
550 * per netns init.
551 */
552#ifdef CONFIG_SYSCTL
553static int __net_init __ip_vs_lblc_init(struct net *net)
554{
555 struct netns_ipvs *ipvs = net_ipvs(net);
556
557 if (!net_eq(net, &init_net)) {
558 ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
559 sizeof(vs_vars_table),
560 GFP_KERNEL);
561 if (ipvs->lblc_ctl_table == NULL)
562 return -ENOMEM;
563 } else
564 ipvs->lblc_ctl_table = vs_vars_table;
565 ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
566 ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
567
568 ipvs->lblc_ctl_header =
569 register_net_sysctl_table(net, net_vs_ctl_path,
570 ipvs->lblc_ctl_table);
571 if (!ipvs->lblc_ctl_header) {
572 if (!net_eq(net, &init_net))
573 kfree(ipvs->lblc_ctl_table);
574 return -ENOMEM;
575 }
576
577 return 0;
578}
579
580static void __net_exit __ip_vs_lblc_exit(struct net *net)
581{
582 struct netns_ipvs *ipvs = net_ipvs(net);
583
584 unregister_net_sysctl_table(ipvs->lblc_ctl_header);
585
586 if (!net_eq(net, &init_net))
587 kfree(ipvs->lblc_ctl_table);
588}
589
590#else
591
592static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
593static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
594
595#endif
596
597static struct pernet_operations ip_vs_lblc_ops = {
598 .init = __ip_vs_lblc_init,
599 .exit = __ip_vs_lblc_exit,
600};
546 601
547static int __init ip_vs_lblc_init(void) 602static int __init ip_vs_lblc_init(void)
548{ 603{
549 int ret; 604 int ret;
550 605
551 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 606 ret = register_pernet_subsys(&ip_vs_lblc_ops);
607 if (ret)
608 return ret;
609
552 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); 610 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
553 if (ret) 611 if (ret)
554 unregister_sysctl_table(sysctl_header); 612 unregister_pernet_subsys(&ip_vs_lblc_ops);
555 return ret; 613 return ret;
556} 614}
557 615
558
559static void __exit ip_vs_lblc_cleanup(void) 616static void __exit ip_vs_lblc_cleanup(void)
560{ 617{
561 unregister_sysctl_table(sysctl_header);
562 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 618 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
619 unregister_pernet_subsys(&ip_vs_lblc_ops);
563} 620}
564 621
565 622
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index dbeed8ea421..cb1c9913d38 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -63,6 +63,8 @@
63#define CHECK_EXPIRE_INTERVAL (60*HZ) 63#define CHECK_EXPIRE_INTERVAL (60*HZ)
64#define ENTRY_TIMEOUT (6*60*HZ) 64#define ENTRY_TIMEOUT (6*60*HZ)
65 65
66#define DEFAULT_EXPIRATION (24*60*60*HZ)
67
66/* 68/*
67 * It is for full expiration check. 69 * It is for full expiration check.
68 * When there is no partial expiration check (garbage collection) 70 * When there is no partial expiration check (garbage collection)
@@ -70,8 +72,6 @@
70 * entries that haven't been touched for a day. 72 * entries that haven't been touched for a day.
71 */ 73 */
72#define COUNT_FOR_FULL_EXPIRATION 30 74#define COUNT_FOR_FULL_EXPIRATION 30
73static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
74
75 75
76/* 76/*
77 * for IPVS lblcr entry hash table 77 * for IPVS lblcr entry hash table
@@ -180,8 +180,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
180 180
181 if ((atomic_read(&least->weight) > 0) 181 if ((atomic_read(&least->weight) > 0)
182 && (least->flags & IP_VS_DEST_F_AVAILABLE)) { 182 && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
183 loh = atomic_read(&least->activeconns) * 50 183 loh = ip_vs_dest_conn_overhead(least);
184 + atomic_read(&least->inactconns);
185 goto nextstage; 184 goto nextstage;
186 } 185 }
187 } 186 }
@@ -194,8 +193,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
194 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 193 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
195 continue; 194 continue;
196 195
197 doh = atomic_read(&dest->activeconns) * 50 196 doh = ip_vs_dest_conn_overhead(dest);
198 + atomic_read(&dest->inactconns);
199 if ((loh * atomic_read(&dest->weight) > 197 if ((loh * atomic_read(&dest->weight) >
200 doh * atomic_read(&least->weight)) 198 doh * atomic_read(&least->weight))
201 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 199 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -230,8 +228,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
230 list_for_each_entry(e, &set->list, list) { 228 list_for_each_entry(e, &set->list, list) {
231 most = e->dest; 229 most = e->dest;
232 if (atomic_read(&most->weight) > 0) { 230 if (atomic_read(&most->weight) > 0) {
233 moh = atomic_read(&most->activeconns) * 50 231 moh = ip_vs_dest_conn_overhead(most);
234 + atomic_read(&most->inactconns);
235 goto nextstage; 232 goto nextstage;
236 } 233 }
237 } 234 }
@@ -241,8 +238,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
241 nextstage: 238 nextstage:
242 list_for_each_entry(e, &set->list, list) { 239 list_for_each_entry(e, &set->list, list) {
243 dest = e->dest; 240 dest = e->dest;
244 doh = atomic_read(&dest->activeconns) * 50 241 doh = ip_vs_dest_conn_overhead(dest);
245 + atomic_read(&dest->inactconns);
246 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 242 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
247 if ((moh * atomic_read(&dest->weight) < 243 if ((moh * atomic_read(&dest->weight) <
248 doh * atomic_read(&most->weight)) 244 doh * atomic_read(&most->weight))
@@ -289,6 +285,7 @@ struct ip_vs_lblcr_table {
289}; 285};
290 286
291 287
288#ifdef CONFIG_SYSCTL
292/* 289/*
293 * IPVS LBLCR sysctl table 290 * IPVS LBLCR sysctl table
294 */ 291 */
@@ -296,15 +293,14 @@ struct ip_vs_lblcr_table {
296static ctl_table vs_vars_table[] = { 293static ctl_table vs_vars_table[] = {
297 { 294 {
298 .procname = "lblcr_expiration", 295 .procname = "lblcr_expiration",
299 .data = &sysctl_ip_vs_lblcr_expiration, 296 .data = NULL,
300 .maxlen = sizeof(int), 297 .maxlen = sizeof(int),
301 .mode = 0644, 298 .mode = 0644,
302 .proc_handler = proc_dointvec_jiffies, 299 .proc_handler = proc_dointvec_jiffies,
303 }, 300 },
304 { } 301 { }
305}; 302};
306 303#endif
307static struct ctl_table_header * sysctl_header;
308 304
309static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 305static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
310{ 306{
@@ -418,6 +414,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
418 } 414 }
419} 415}
420 416
417static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
418{
419#ifdef CONFIG_SYSCTL
420 struct netns_ipvs *ipvs = net_ipvs(svc->net);
421 return ipvs->sysctl_lblcr_expiration;
422#else
423 return DEFAULT_EXPIRATION;
424#endif
425}
421 426
422static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) 427static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
423{ 428{
@@ -431,8 +436,8 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
431 436
432 write_lock(&svc->sched_lock); 437 write_lock(&svc->sched_lock);
433 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 438 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
434 if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, 439 if (time_after(en->lastuse +
435 now)) 440 sysctl_lblcr_expiration(svc), now))
436 continue; 441 continue;
437 442
438 ip_vs_lblcr_free(en); 443 ip_vs_lblcr_free(en);
@@ -566,12 +571,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
566 int loh, doh; 571 int loh, doh;
567 572
568 /* 573 /*
569 * We think the overhead of processing active connections is fifty 574 * We use the following formula to estimate the load:
570 * times higher than that of inactive connections in average. (This
571 * fifty times might not be accurate, we will change it later.) We
572 * use the following formula to estimate the overhead:
573 * dest->activeconns*50 + dest->inactconns
574 * and the load:
575 * (dest overhead) / dest->weight 575 * (dest overhead) / dest->weight
576 * 576 *
577 * Remember -- no floats in kernel mode!!! 577 * Remember -- no floats in kernel mode!!!
@@ -588,8 +588,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
588 588
589 if (atomic_read(&dest->weight) > 0) { 589 if (atomic_read(&dest->weight) > 0) {
590 least = dest; 590 least = dest;
591 loh = atomic_read(&least->activeconns) * 50 591 loh = ip_vs_dest_conn_overhead(least);
592 + atomic_read(&least->inactconns);
593 goto nextstage; 592 goto nextstage;
594 } 593 }
595 } 594 }
@@ -603,8 +602,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
603 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 602 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
604 continue; 603 continue;
605 604
606 doh = atomic_read(&dest->activeconns) * 50 605 doh = ip_vs_dest_conn_overhead(dest);
607 + atomic_read(&dest->inactconns);
608 if (loh * atomic_read(&dest->weight) > 606 if (loh * atomic_read(&dest->weight) >
609 doh * atomic_read(&least->weight)) { 607 doh * atomic_read(&least->weight)) {
610 least = dest; 608 least = dest;
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
675 /* More than one destination + enough time passed by, cleanup */ 673 /* More than one destination + enough time passed by, cleanup */
676 if (atomic_read(&en->set.size) > 1 && 674 if (atomic_read(&en->set.size) > 1 &&
677 time_after(jiffies, en->set.lastmod + 675 time_after(jiffies, en->set.lastmod +
678 sysctl_ip_vs_lblcr_expiration)) { 676 sysctl_lblcr_expiration(svc))) {
679 struct ip_vs_dest *m; 677 struct ip_vs_dest *m;
680 678
681 write_lock(&en->set.lock); 679 write_lock(&en->set.lock);
@@ -694,7 +692,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
694 /* The cache entry is invalid, time to schedule */ 692 /* The cache entry is invalid, time to schedule */
695 dest = __ip_vs_lblcr_schedule(svc); 693 dest = __ip_vs_lblcr_schedule(svc);
696 if (!dest) { 694 if (!dest) {
697 IP_VS_ERR_RL("LBLCR: no destination available\n"); 695 ip_vs_scheduler_err(svc, "no destination available");
698 read_unlock(&svc->sched_lock); 696 read_unlock(&svc->sched_lock);
699 return NULL; 697 return NULL;
700 } 698 }
@@ -744,23 +742,77 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
744 .schedule = ip_vs_lblcr_schedule, 742 .schedule = ip_vs_lblcr_schedule,
745}; 743};
746 744
745/*
746 * per netns init.
747 */
748#ifdef CONFIG_SYSCTL
749static int __net_init __ip_vs_lblcr_init(struct net *net)
750{
751 struct netns_ipvs *ipvs = net_ipvs(net);
752
753 if (!net_eq(net, &init_net)) {
754 ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
755 sizeof(vs_vars_table),
756 GFP_KERNEL);
757 if (ipvs->lblcr_ctl_table == NULL)
758 return -ENOMEM;
759 } else
760 ipvs->lblcr_ctl_table = vs_vars_table;
761 ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
762 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
763
764 ipvs->lblcr_ctl_header =
765 register_net_sysctl_table(net, net_vs_ctl_path,
766 ipvs->lblcr_ctl_table);
767 if (!ipvs->lblcr_ctl_header) {
768 if (!net_eq(net, &init_net))
769 kfree(ipvs->lblcr_ctl_table);
770 return -ENOMEM;
771 }
772
773 return 0;
774}
775
776static void __net_exit __ip_vs_lblcr_exit(struct net *net)
777{
778 struct netns_ipvs *ipvs = net_ipvs(net);
779
780 unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
781
782 if (!net_eq(net, &init_net))
783 kfree(ipvs->lblcr_ctl_table);
784}
785
786#else
787
788static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
789static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
790
791#endif
792
793static struct pernet_operations ip_vs_lblcr_ops = {
794 .init = __ip_vs_lblcr_init,
795 .exit = __ip_vs_lblcr_exit,
796};
747 797
748static int __init ip_vs_lblcr_init(void) 798static int __init ip_vs_lblcr_init(void)
749{ 799{
750 int ret; 800 int ret;
751 801
752 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 802 ret = register_pernet_subsys(&ip_vs_lblcr_ops);
803 if (ret)
804 return ret;
805
753 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 806 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
754 if (ret) 807 if (ret)
755 unregister_sysctl_table(sysctl_header); 808 unregister_pernet_subsys(&ip_vs_lblcr_ops);
756 return ret; 809 return ret;
757} 810}
758 811
759
760static void __exit ip_vs_lblcr_cleanup(void) 812static void __exit ip_vs_lblcr_cleanup(void)
761{ 813{
762 unregister_sysctl_table(sysctl_header);
763 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 814 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
815 unregister_pernet_subsys(&ip_vs_lblcr_ops);
764} 816}
765 817
766 818
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 4f69db1fac5..f391819c0cc 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -22,22 +22,6 @@
22 22
23#include <net/ip_vs.h> 23#include <net/ip_vs.h>
24 24
25
26static inline unsigned int
27ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
28{
29 /*
30 * We think the overhead of processing active connections is 256
31 * times higher than that of inactive connections in average. (This
32 * 256 times might not be accurate, we will change it later) We
33 * use the following formula to estimate the overhead now:
34 * dest->activeconns*256 + dest->inactconns
35 */
36 return (atomic_read(&dest->activeconns) << 8) +
37 atomic_read(&dest->inactconns);
38}
39
40
41/* 25/*
42 * Least Connection scheduling 26 * Least Connection scheduling
43 */ 27 */
@@ -62,7 +46,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
62 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || 46 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
63 atomic_read(&dest->weight) == 0) 47 atomic_read(&dest->weight) == 0)
64 continue; 48 continue;
65 doh = ip_vs_lc_dest_overhead(dest); 49 doh = ip_vs_dest_conn_overhead(dest);
66 if (!least || doh < loh) { 50 if (!least || doh < loh) {
67 least = dest; 51 least = dest;
68 loh = doh; 52 loh = doh;
@@ -70,7 +54,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
70 } 54 }
71 55
72 if (!least) 56 if (!least)
73 IP_VS_ERR_RL("LC: no destination available\n"); 57 ip_vs_scheduler_err(svc, "no destination available");
74 else 58 else
75 IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d " 59 IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
76 "inactconns %d\n", 60 "inactconns %d\n",
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647cd45..f454c80df0a 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
141 struct nf_conntrack_tuple *orig, new_reply; 141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp; 142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p; 143 struct ip_vs_conn_param p;
144 struct net *net = nf_ct_net(ct);
144 145
145 if (exp->tuple.src.l3num != PF_INET) 146 if (exp->tuple.src.l3num != PF_INET)
146 return; 147 return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
155 156
156 /* RS->CLIENT */ 157 /* RS->CLIENT */
157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 158 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
158 ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, 159 ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
159 &orig->src.u3, orig->src.u.tcp.port, 160 &orig->src.u3, orig->src.u.tcp.port,
160 &orig->dst.u3, orig->dst.u.tcp.port, &p); 161 &orig->dst.u3, orig->dst.u.tcp.port, &p);
161 cp = ip_vs_conn_out_get(&p); 162 cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
268 " for conn " FMT_CONN "\n", 269 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 270 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270 271
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); 272 h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
273 &tuple);
272 if (h) { 274 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h); 275 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */ 276 /* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index c413e183082..984d9c137d8 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -99,7 +99,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
99 } 99 }
100 100
101 if (!least) { 101 if (!least) {
102 IP_VS_ERR_RL("NQ: no destination available\n"); 102 ip_vs_scheduler_err(svc, "no destination available");
103 return NULL; 103 return NULL;
104 } 104 }
105 105
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af70ee1..5cf859ccb31 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
29} 29}
30 30
31/* Get pe in the pe list by name */ 31/* Get pe in the pe list by name */
32static struct ip_vs_pe * 32struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
33ip_vs_pe_getbyname(const char *pe_name)
34{ 33{
35 struct ip_vs_pe *pe; 34 struct ip_vs_pe *pe;
36 35
37 IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, 36 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
38 pe_name); 37 pe_name);
39 38
40 spin_lock_bh(&ip_vs_pe_lock); 39 spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
60} 59}
61 60
62/* Lookup pe and try to load it if it doesn't exist */ 61/* Lookup pe and try to load it if it doesn't exist */
63struct ip_vs_pe *ip_vs_pe_get(const char *name) 62struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
64{ 63{
65 struct ip_vs_pe *pe; 64 struct ip_vs_pe *pe;
66 65
67 /* Search for the pe by name */ 66 /* Search for the pe by name */
68 pe = ip_vs_pe_getbyname(name); 67 pe = __ip_vs_pe_getbyname(name);
69 68
70 /* If pe not found, load the module and search again */ 69 /* If pe not found, load the module and search again */
71 if (!pe) { 70 if (!pe) {
72 request_module("ip_vs_pe_%s", name); 71 request_module("ip_vs_pe_%s", name);
73 pe = ip_vs_pe_getbyname(name); 72 pe = __ip_vs_pe_getbyname(name);
74 } 73 }
75 74
76 return pe; 75 return pe;
77} 76}
78 77
79void ip_vs_pe_put(struct ip_vs_pe *pe)
80{
81 if (pe && pe->module)
82 module_put(pe->module);
83}
84
85/* Register a pe in the pe list */ 78/* Register a pe in the pe list */
86int register_ip_vs_pe(struct ip_vs_pe *pe) 79int register_ip_vs_pe(struct ip_vs_pe *pe)
87{ 80{
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e9620f3..13d607ae9c5 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
71 struct ip_vs_iphdr iph; 71 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 72 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 73 const char *dptr;
74 int retc;
74 75
75 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
76 77
@@ -83,20 +84,21 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
83 if (dataoff >= skb->len) 84 if (dataoff >= skb->len)
84 return -EINVAL; 85 return -EINVAL;
85 86
87 if ((retc=skb_linearize(skb)) < 0)
88 return retc;
86 dptr = skb->data + dataoff; 89 dptr = skb->data + dataoff;
87 datalen = skb->len - dataoff; 90 datalen = skb->len - dataoff;
88 91
89 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) 92 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
90 return -EINVAL; 93 return -EINVAL;
91 94
92 p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
93 if (!p->pe_data)
94 return -ENOMEM;
95
96 /* N.B: pe_data is only set on success, 95 /* N.B: pe_data is only set on success,
97 * this allows fallback to the default persistence logic on failure 96 * this allows fallback to the default persistence logic on failure
98 */ 97 */
99 memcpy(p->pe_data, dptr + matchoff, matchlen); 98 p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
99 if (!p->pe_data)
100 return -ENOMEM;
101
100 p->pe_data_len = matchlen; 102 p->pe_data_len = matchlen;
101 103
102 return 0; 104 return 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index c5399839087..17484a4416e 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,35 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
60 return 0; 60 return 0;
61} 61}
62 62
63#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \
64 defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \
65 defined(CONFIG_IP_VS_PROTO_ESP)
66/*
67 * register an ipvs protocols netns related data
68 */
69static int
70register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
71{
72 struct netns_ipvs *ipvs = net_ipvs(net);
73 unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
74 struct ip_vs_proto_data *pd =
75 kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
76
77 if (!pd) {
78 pr_err("%s(): no memory.\n", __func__);
79 return -ENOMEM;
80 }
81 pd->pp = pp; /* For speed issues */
82 pd->next = ipvs->proto_data_table[hash];
83 ipvs->proto_data_table[hash] = pd;
84 atomic_set(&pd->appcnt, 0); /* Init app counter */
85
86 if (pp->init_netns != NULL)
87 pp->init_netns(net, pd);
88
89 return 0;
90}
91#endif
63 92
64/* 93/*
65 * unregister an ipvs protocol 94 * unregister an ipvs protocol
@@ -82,6 +111,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
82 return -ESRCH; 111 return -ESRCH;
83} 112}
84 113
114/*
115 * unregister an ipvs protocols netns data
116 */
117static int
118unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
119{
120 struct netns_ipvs *ipvs = net_ipvs(net);
121 struct ip_vs_proto_data **pd_p;
122 unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
123
124 pd_p = &ipvs->proto_data_table[hash];
125 for (; *pd_p; pd_p = &(*pd_p)->next) {
126 if (*pd_p == pd) {
127 *pd_p = pd->next;
128 if (pd->pp->exit_netns != NULL)
129 pd->pp->exit_netns(net, pd);
130 kfree(pd);
131 return 0;
132 }
133 }
134
135 return -ESRCH;
136}
85 137
86/* 138/*
87 * get ip_vs_protocol object by its proto. 139 * get ip_vs_protocol object by its proto.
@@ -100,19 +152,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
100} 152}
101EXPORT_SYMBOL(ip_vs_proto_get); 153EXPORT_SYMBOL(ip_vs_proto_get);
102 154
155/*
156 * get ip_vs_protocol object data by netns and proto
157 */
158struct ip_vs_proto_data *
159__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
160{
161 struct ip_vs_proto_data *pd;
162 unsigned hash = IP_VS_PROTO_HASH(proto);
163
164 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
165 if (pd->pp->protocol == proto)
166 return pd;
167 }
168
169 return NULL;
170}
171
172struct ip_vs_proto_data *
173ip_vs_proto_data_get(struct net *net, unsigned short proto)
174{
175 struct netns_ipvs *ipvs = net_ipvs(net);
176
177 return __ipvs_proto_data_get(ipvs, proto);
178}
179EXPORT_SYMBOL(ip_vs_proto_data_get);
103 180
104/* 181/*
105 * Propagate event for state change to all protocols 182 * Propagate event for state change to all protocols
106 */ 183 */
107void ip_vs_protocol_timeout_change(int flags) 184void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
108{ 185{
109 struct ip_vs_protocol *pp; 186 struct ip_vs_proto_data *pd;
110 int i; 187 int i;
111 188
112 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 189 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
113 for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { 190 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
114 if (pp->timeout_change) 191 if (pd->pp->timeout_change)
115 pp->timeout_change(pp, flags); 192 pd->pp->timeout_change(pd, flags);
116 } 193 }
117 } 194 }
118} 195}
@@ -236,6 +313,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
236 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 313 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
237} 314}
238 315
316/*
317 * per network name-space init
318 */
319static int __net_init __ip_vs_protocol_init(struct net *net)
320{
321#ifdef CONFIG_IP_VS_PROTO_TCP
322 register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
323#endif
324#ifdef CONFIG_IP_VS_PROTO_UDP
325 register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
326#endif
327#ifdef CONFIG_IP_VS_PROTO_SCTP
328 register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
329#endif
330#ifdef CONFIG_IP_VS_PROTO_AH
331 register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
332#endif
333#ifdef CONFIG_IP_VS_PROTO_ESP
334 register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
335#endif
336 return 0;
337}
338
339static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
340{
341 struct netns_ipvs *ipvs = net_ipvs(net);
342 struct ip_vs_proto_data *pd;
343 int i;
344
345 /* unregister all the ipvs proto data for this netns */
346 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
347 while ((pd = ipvs->proto_data_table[i]) != NULL)
348 unregister_ip_vs_proto_netns(net, pd);
349 }
350}
351
352static struct pernet_operations ipvs_proto_ops = {
353 .init = __ip_vs_protocol_init,
354 .exit = __ip_vs_protocol_cleanup,
355};
239 356
240int __init ip_vs_protocol_init(void) 357int __init ip_vs_protocol_init(void)
241{ 358{
@@ -265,6 +382,7 @@ int __init ip_vs_protocol_init(void)
265 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 382 REGISTER_PROTOCOL(&ip_vs_protocol_esp);
266#endif 383#endif
267 pr_info("Registered protocols (%s)\n", &protocols[2]); 384 pr_info("Registered protocols (%s)\n", &protocols[2]);
385 return register_pernet_subsys(&ipvs_proto_ops);
268 386
269 return 0; 387 return 0;
270} 388}
@@ -275,6 +393,7 @@ void ip_vs_protocol_cleanup(void)
275 struct ip_vs_protocol *pp; 393 struct ip_vs_protocol *pp;
276 int i; 394 int i;
277 395
396 unregister_pernet_subsys(&ipvs_proto_ops);
278 /* unregister all the ipvs protocols */ 397 /* unregister all the ipvs protocols */
279 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 398 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
280 while ((pp = ip_vs_proto_table[i]) != NULL) 399 while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a0461117d3..5b8eb8b12c3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@ struct isakmp_hdr {
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void 43static void
44ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, 44ah_esp_conn_fill_param_proto(struct net *net, int af,
45 int inverse, struct ip_vs_conn_param *p) 45 const struct ip_vs_iphdr *iph, int inverse,
46 struct ip_vs_conn_param *p)
46{ 47{
47 if (likely(!inverse)) 48 if (likely(!inverse))
48 ip_vs_conn_fill_param(af, IPPROTO_UDP, 49 ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
49 &iph->saddr, htons(PORT_ISAKMP), 50 &iph->saddr, htons(PORT_ISAKMP),
50 &iph->daddr, htons(PORT_ISAKMP), p); 51 &iph->daddr, htons(PORT_ISAKMP), p);
51 else 52 else
52 ip_vs_conn_fill_param(af, IPPROTO_UDP, 53 ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
53 &iph->daddr, htons(PORT_ISAKMP), 54 &iph->daddr, htons(PORT_ISAKMP),
54 &iph->saddr, htons(PORT_ISAKMP), p); 55 &iph->saddr, htons(PORT_ISAKMP), p);
55} 56}
56 57
57static struct ip_vs_conn * 58static struct ip_vs_conn *
58ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
59 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph, unsigned int proto_off,
60 int inverse) 61 int inverse)
61{ 62{
62 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
63 struct ip_vs_conn_param p; 64 struct ip_vs_conn_param p;
65 struct net *net = skb_net(skb);
64 66
65 ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 67 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
66 cp = ip_vs_conn_in_get(&p); 68 cp = ip_vs_conn_in_get(&p);
67 if (!cp) { 69 if (!cp) {
68 /* 70 /*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
72 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " 74 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
73 "%s%s %s->%s\n", 75 "%s%s %s->%s\n",
74 inverse ? "ICMP+" : "", 76 inverse ? "ICMP+" : "",
75 pp->name, 77 ip_vs_proto_get(iph->protocol)->name,
76 IP_VS_DBG_ADDR(af, &iph->saddr), 78 IP_VS_DBG_ADDR(af, &iph->saddr),
77 IP_VS_DBG_ADDR(af, &iph->daddr)); 79 IP_VS_DBG_ADDR(af, &iph->daddr));
78 } 80 }
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
83 85
84static struct ip_vs_conn * 86static struct ip_vs_conn *
85ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
86 struct ip_vs_protocol *pp,
87 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph,
88 unsigned int proto_off, 89 unsigned int proto_off,
89 int inverse) 90 int inverse)
90{ 91{
91 struct ip_vs_conn *cp; 92 struct ip_vs_conn *cp;
92 struct ip_vs_conn_param p; 93 struct ip_vs_conn_param p;
94 struct net *net = skb_net(skb);
93 95
94 ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 96 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
95 cp = ip_vs_conn_out_get(&p); 97 cp = ip_vs_conn_out_get(&p);
96 if (!cp) { 98 if (!cp) {
97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 99 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
98 "%s%s %s->%s\n", 100 "%s%s %s->%s\n",
99 inverse ? "ICMP+" : "", 101 inverse ? "ICMP+" : "",
100 pp->name, 102 ip_vs_proto_get(iph->protocol)->name,
101 IP_VS_DBG_ADDR(af, &iph->saddr), 103 IP_VS_DBG_ADDR(af, &iph->saddr),
102 IP_VS_DBG_ADDR(af, &iph->daddr)); 104 IP_VS_DBG_ADDR(af, &iph->daddr));
103 } 105 }
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
107 109
108 110
109static int 111static int
110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
111 int *verdict, struct ip_vs_conn **cpp) 113 int *verdict, struct ip_vs_conn **cpp)
112{ 114{
113 /* 115 /*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
117 return 0; 119 return 0;
118} 120}
119 121
120static void ah_esp_init(struct ip_vs_protocol *pp)
121{
122 /* nothing to do now */
123}
124
125
126static void ah_esp_exit(struct ip_vs_protocol *pp)
127{
128 /* nothing to do now */
129}
130
131
132#ifdef CONFIG_IP_VS_PROTO_AH 122#ifdef CONFIG_IP_VS_PROTO_AH
133struct ip_vs_protocol ip_vs_protocol_ah = { 123struct ip_vs_protocol ip_vs_protocol_ah = {
134 .name = "AH", 124 .name = "AH",
135 .protocol = IPPROTO_AH, 125 .protocol = IPPROTO_AH,
136 .num_states = 1, 126 .num_states = 1,
137 .dont_defrag = 1, 127 .dont_defrag = 1,
138 .init = ah_esp_init, 128 .init = NULL,
139 .exit = ah_esp_exit, 129 .exit = NULL,
140 .conn_schedule = ah_esp_conn_schedule, 130 .conn_schedule = ah_esp_conn_schedule,
141 .conn_in_get = ah_esp_conn_in_get, 131 .conn_in_get = ah_esp_conn_in_get,
142 .conn_out_get = ah_esp_conn_out_get, 132 .conn_out_get = ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
149 .app_conn_bind = NULL, 139 .app_conn_bind = NULL,
150 .debug_packet = ip_vs_tcpudp_debug_packet, 140 .debug_packet = ip_vs_tcpudp_debug_packet,
151 .timeout_change = NULL, /* ISAKMP */ 141 .timeout_change = NULL, /* ISAKMP */
152 .set_state_timeout = NULL,
153}; 142};
154#endif 143#endif
155 144
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
159 .protocol = IPPROTO_ESP, 148 .protocol = IPPROTO_ESP,
160 .num_states = 1, 149 .num_states = 1,
161 .dont_defrag = 1, 150 .dont_defrag = 1,
162 .init = ah_esp_init, 151 .init = NULL,
163 .exit = ah_esp_exit, 152 .exit = NULL,
164 .conn_schedule = ah_esp_conn_schedule, 153 .conn_schedule = ah_esp_conn_schedule,
165 .conn_in_get = ah_esp_conn_in_get, 154 .conn_in_get = ah_esp_conn_in_get,
166 .conn_out_get = ah_esp_conn_out_get, 155 .conn_out_get = ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd342..b027ccc49f4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
9#include <net/ip_vs.h> 9#include <net/ip_vs.h>
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp)
14{ 14{
15 struct net *net;
15 struct ip_vs_service *svc; 16 struct ip_vs_service *svc;
16 sctp_chunkhdr_t _schunkh, *sch; 17 sctp_chunkhdr_t _schunkh, *sch;
17 sctp_sctphdr_t *sh, _sctph; 18 sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
27 sizeof(_schunkh), &_schunkh); 28 sizeof(_schunkh), &_schunkh);
28 if (sch == NULL) 29 if (sch == NULL)
29 return 0; 30 return 0;
30 31 net = skb_net(skb);
31 if ((sch->type == SCTP_CID_INIT) && 32 if ((sch->type == SCTP_CID_INIT) &&
32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
33 &iph.daddr, sh->dest))) { 34 &iph.daddr, sh->dest))) {
34 int ignored; 35 int ignored;
35 36
36 if (ip_vs_todrop()) { 37 if (ip_vs_todrop(net_ipvs(net))) {
37 /* 38 /*
38 * It seems that we are very loaded. 39 * It seems that we are very loaded.
39 * We have to drop this packet :( 40 * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
46 * Let the virtual server select a real server for the 47 * Let the virtual server select a real server for the
47 * incoming connection, and create a connection entry. 48 * incoming connection, and create a connection entry.
48 */ 49 */
49 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
50 if (!*cpp && !ignored) { 51 if (!*cpp && ignored <= 0) {
51 *verdict = ip_vs_leave(svc, skb, pp); 52 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd);
54 else {
55 ip_vs_service_put(svc);
56 *verdict = NF_DROP;
57 }
52 return 0; 58 return 0;
53 } 59 }
54 ip_vs_service_put(svc); 60 ip_vs_service_put(svc);
55 } 61 }
56 62 /* NF_ACCEPT */
57 return 1; 63 return 1;
58} 64}
59 65
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
856/* 862/*
857 * Timeout table[state] 863 * Timeout table[state]
858 */ 864 */
859static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 865static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
860 [IP_VS_SCTP_S_NONE] = 2 * HZ, 866 [IP_VS_SCTP_S_NONE] = 2 * HZ,
861 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, 867 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
862 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, 868 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
900 return "?"; 906 return "?";
901} 907}
902 908
903static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
904{
905}
906
907static int
908sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
909{
910
911return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
912 sctp_state_name_table, sname, to);
913}
914
915static inline int 909static inline int
916set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 910set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
917 int direction, const struct sk_buff *skb) 911 int direction, const struct sk_buff *skb)
918{ 912{
919 sctp_chunkhdr_t _sctpch, *sch; 913 sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
971 965
972 IP_VS_DBG_BUF(8, "%s %s %s:%d->" 966 IP_VS_DBG_BUF(8, "%s %s %s:%d->"
973 "%s:%d state: %s->%s conn->refcnt:%d\n", 967 "%s:%d state: %s->%s conn->refcnt:%d\n",
974 pp->name, 968 pd->pp->name,
975 ((direction == IP_VS_DIR_OUTPUT) ? 969 ((direction == IP_VS_DIR_OUTPUT) ?
976 "output " : "input "), 970 "output " : "input "),
977 IP_VS_DBG_ADDR(cp->af, &cp->daddr), 971 IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
995 } 989 }
996 } 990 }
997 } 991 }
992 if (likely(pd))
993 cp->timeout = pd->timeout_table[cp->state = next_state];
994 else /* What to do ? */
995 cp->timeout = sctp_timeouts[cp->state = next_state];
998 996
999 cp->timeout = pp->timeout_table[cp->state = next_state]; 997 return 1;
1000
1001 return 1;
1002} 998}
1003 999
1004static int 1000static int
1005sctp_state_transition(struct ip_vs_conn *cp, int direction, 1001sctp_state_transition(struct ip_vs_conn *cp, int direction,
1006 const struct sk_buff *skb, struct ip_vs_protocol *pp) 1002 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
1007{ 1003{
1008 int ret = 0; 1004 int ret = 0;
1009 1005
1010 spin_lock(&cp->lock); 1006 spin_lock(&cp->lock);
1011 ret = set_sctp_state(pp, cp, direction, skb); 1007 ret = set_sctp_state(pd, cp, direction, skb);
1012 spin_unlock(&cp->lock); 1008 spin_unlock(&cp->lock);
1013 1009
1014 return ret; 1010 return ret;
1015} 1011}
1016 1012
1017/*
1018 * Hash table for SCTP application incarnations
1019 */
1020#define SCTP_APP_TAB_BITS 4
1021#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
1022#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
1023
1024static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
1025static DEFINE_SPINLOCK(sctp_app_lock);
1026
1027static inline __u16 sctp_app_hashkey(__be16 port) 1013static inline __u16 sctp_app_hashkey(__be16 port)
1028{ 1014{
1029 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) 1015 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
1030 & SCTP_APP_TAB_MASK; 1016 & SCTP_APP_TAB_MASK;
1031} 1017}
1032 1018
1033static int sctp_register_app(struct ip_vs_app *inc) 1019static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
1034{ 1020{
1035 struct ip_vs_app *i; 1021 struct ip_vs_app *i;
1036 __u16 hash; 1022 __u16 hash;
1037 __be16 port = inc->port; 1023 __be16 port = inc->port;
1038 int ret = 0; 1024 int ret = 0;
1025 struct netns_ipvs *ipvs = net_ipvs(net);
1026 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1039 1027
1040 hash = sctp_app_hashkey(port); 1028 hash = sctp_app_hashkey(port);
1041 1029
1042 spin_lock_bh(&sctp_app_lock); 1030 spin_lock_bh(&ipvs->sctp_app_lock);
1043 list_for_each_entry(i, &sctp_apps[hash], p_list) { 1031 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
1044 if (i->port == port) { 1032 if (i->port == port) {
1045 ret = -EEXIST; 1033 ret = -EEXIST;
1046 goto out; 1034 goto out;
1047 } 1035 }
1048 } 1036 }
1049 list_add(&inc->p_list, &sctp_apps[hash]); 1037 list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
1050 atomic_inc(&ip_vs_protocol_sctp.appcnt); 1038 atomic_inc(&pd->appcnt);
1051out: 1039out:
1052 spin_unlock_bh(&sctp_app_lock); 1040 spin_unlock_bh(&ipvs->sctp_app_lock);
1053 1041
1054 return ret; 1042 return ret;
1055} 1043}
1056 1044
1057static void sctp_unregister_app(struct ip_vs_app *inc) 1045static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
1058{ 1046{
1059 spin_lock_bh(&sctp_app_lock); 1047 struct netns_ipvs *ipvs = net_ipvs(net);
1060 atomic_dec(&ip_vs_protocol_sctp.appcnt); 1048 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1049
1050 spin_lock_bh(&ipvs->sctp_app_lock);
1051 atomic_dec(&pd->appcnt);
1061 list_del(&inc->p_list); 1052 list_del(&inc->p_list);
1062 spin_unlock_bh(&sctp_app_lock); 1053 spin_unlock_bh(&ipvs->sctp_app_lock);
1063} 1054}
1064 1055
1065static int sctp_app_conn_bind(struct ip_vs_conn *cp) 1056static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1066{ 1057{
1058 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
1067 int hash; 1059 int hash;
1068 struct ip_vs_app *inc; 1060 struct ip_vs_app *inc;
1069 int result = 0; 1061 int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1074 /* Lookup application incarnations and bind the right one */ 1066 /* Lookup application incarnations and bind the right one */
1075 hash = sctp_app_hashkey(cp->vport); 1067 hash = sctp_app_hashkey(cp->vport);
1076 1068
1077 spin_lock(&sctp_app_lock); 1069 spin_lock(&ipvs->sctp_app_lock);
1078 list_for_each_entry(inc, &sctp_apps[hash], p_list) { 1070 list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
1079 if (inc->port == cp->vport) { 1071 if (inc->port == cp->vport) {
1080 if (unlikely(!ip_vs_app_inc_get(inc))) 1072 if (unlikely(!ip_vs_app_inc_get(inc)))
1081 break; 1073 break;
1082 spin_unlock(&sctp_app_lock); 1074 spin_unlock(&ipvs->sctp_app_lock);
1083 1075
1084 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 1076 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
1085 "%s:%u to app %s on port %u\n", 1077 "%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1095 goto out; 1087 goto out;
1096 } 1088 }
1097 } 1089 }
1098 spin_unlock(&sctp_app_lock); 1090 spin_unlock(&ipvs->sctp_app_lock);
1099out: 1091out:
1100 return result; 1092 return result;
1101} 1093}
1102 1094
1103static void ip_vs_sctp_init(struct ip_vs_protocol *pp) 1095/* ---------------------------------------------
1096 * timeouts is netns related now.
1097 * ---------------------------------------------
1098 */
1099static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
1104{ 1100{
1105 IP_VS_INIT_HASH_TABLE(sctp_apps); 1101 struct netns_ipvs *ipvs = net_ipvs(net);
1106 pp->timeout_table = sctp_timeouts;
1107}
1108 1102
1103 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
1104 spin_lock_init(&ipvs->sctp_app_lock);
1105 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
1106 sizeof(sctp_timeouts));
1107}
1109 1108
1110static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) 1109static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
1111{ 1110{
1112 1111 kfree(pd->timeout_table);
1113} 1112}
1114 1113
1115struct ip_vs_protocol ip_vs_protocol_sctp = { 1114struct ip_vs_protocol ip_vs_protocol_sctp = {
1116 .name = "SCTP", 1115 .name = "SCTP",
1117 .protocol = IPPROTO_SCTP, 1116 .protocol = IPPROTO_SCTP,
1118 .num_states = IP_VS_SCTP_S_LAST, 1117 .num_states = IP_VS_SCTP_S_LAST,
1119 .dont_defrag = 0, 1118 .dont_defrag = 0,
1120 .appcnt = ATOMIC_INIT(0), 1119 .init = NULL,
1121 .init = ip_vs_sctp_init, 1120 .exit = NULL,
1122 .exit = ip_vs_sctp_exit, 1121 .init_netns = __ip_vs_sctp_init,
1123 .register_app = sctp_register_app, 1122 .exit_netns = __ip_vs_sctp_exit,
1123 .register_app = sctp_register_app,
1124 .unregister_app = sctp_unregister_app, 1124 .unregister_app = sctp_unregister_app,
1125 .conn_schedule = sctp_conn_schedule, 1125 .conn_schedule = sctp_conn_schedule,
1126 .conn_in_get = ip_vs_conn_in_get_proto, 1126 .conn_in_get = ip_vs_conn_in_get_proto,
1127 .conn_out_get = ip_vs_conn_out_get_proto, 1127 .conn_out_get = ip_vs_conn_out_get_proto,
1128 .snat_handler = sctp_snat_handler, 1128 .snat_handler = sctp_snat_handler,
1129 .dnat_handler = sctp_dnat_handler, 1129 .dnat_handler = sctp_dnat_handler,
1130 .csum_check = sctp_csum_check, 1130 .csum_check = sctp_csum_check,
1131 .state_name = sctp_state_name, 1131 .state_name = sctp_state_name,
1132 .state_transition = sctp_state_transition, 1132 .state_transition = sctp_state_transition,
1133 .app_conn_bind = sctp_app_conn_bind, 1133 .app_conn_bind = sctp_app_conn_bind,
1134 .debug_packet = ip_vs_tcpudp_debug_packet, 1134 .debug_packet = ip_vs_tcpudp_debug_packet,
1135 .timeout_change = sctp_timeout_change, 1135 .timeout_change = NULL,
1136 .set_state_timeout = sctp_set_state_timeout,
1137}; 1136};
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200e214..c0cc341b840 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * 13 *
14 * Network name space (netns) aware.
15 * Global data moved to netns i.e struct netns_ipvs
16 * tcp_timeouts table has copy per netns in a hash table per
17 * protocol ip_vs_proto_data and is handled by netns
14 */ 18 */
15 19
16#define KMSG_COMPONENT "IPVS" 20#define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
28#include <net/ip_vs.h> 32#include <net/ip_vs.h>
29 33
30static int 34static int
31tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
32 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp)
33{ 37{
38 struct net *net;
34 struct ip_vs_service *svc; 39 struct ip_vs_service *svc;
35 struct tcphdr _tcph, *th; 40 struct tcphdr _tcph, *th;
36 struct ip_vs_iphdr iph; 41 struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
42 *verdict = NF_DROP; 47 *verdict = NF_DROP;
43 return 0; 48 return 0;
44 } 49 }
45 50 net = skb_net(skb);
46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
47 if (th->syn && 52 if (th->syn &&
48 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
49 th->dest))) { 54 &iph.daddr, th->dest))) {
50 int ignored; 55 int ignored;
51 56
52 if (ip_vs_todrop()) { 57 if (ip_vs_todrop(net_ipvs(net))) {
53 /* 58 /*
54 * It seems that we are very loaded. 59 * It seems that we are very loaded.
55 * We have to drop this packet :( 60 * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
63 * Let the virtual server select a real server for the 68 * Let the virtual server select a real server for the
64 * incoming connection, and create a connection entry. 69 * incoming connection, and create a connection entry.
65 */ 70 */
66 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
67 if (!*cpp && !ignored) { 72 if (!*cpp && ignored <= 0) {
68 *verdict = ip_vs_leave(svc, skb, pp); 73 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd);
75 else {
76 ip_vs_service_put(svc);
77 *verdict = NF_DROP;
78 }
69 return 0; 79 return 0;
70 } 80 }
71 ip_vs_service_put(svc); 81 ip_vs_service_put(svc);
72 } 82 }
83 /* NF_ACCEPT */
73 return 1; 84 return 1;
74} 85}
75 86
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
338/* 349/*
339 * Timeout table[state] 350 * Timeout table[state]
340 */ 351 */
341static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { 352static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
342 [IP_VS_TCP_S_NONE] = 2*HZ, 353 [IP_VS_TCP_S_NONE] = 2*HZ,
343 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, 354 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
344 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, 355 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
437/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 448/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
438}; 449};
439 450
440static struct tcp_states_t *tcp_state_table = tcp_states; 451static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
441
442
443static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
444{ 452{
445 int on = (flags & 1); /* secure_tcp */ 453 int on = (flags & 1); /* secure_tcp */
446 454
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
450 ** for most if not for all of the applications. Something 458 ** for most if not for all of the applications. Something
451 ** like "capabilities" (flags) for each object. 459 ** like "capabilities" (flags) for each object.
452 */ 460 */
453 tcp_state_table = (on? tcp_states_dos : tcp_states); 461 pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
454}
455
456static int
457tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
458{
459 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
460 tcp_state_name_table, sname, to);
461} 462}
462 463
463static inline int tcp_state_idx(struct tcphdr *th) 464static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
474} 475}
475 476
476static inline void 477static inline void
477set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 478set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
478 int direction, struct tcphdr *th) 479 int direction, struct tcphdr *th)
479{ 480{
480 int state_idx; 481 int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
497 goto tcp_state_out; 498 goto tcp_state_out;
498 } 499 }
499 500
500 new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; 501 new_state =
502 pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
501 503
502 tcp_state_out: 504 tcp_state_out:
503 if (new_state != cp->state) { 505 if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
505 507
506 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" 508 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
507 "%s:%d state: %s->%s conn->refcnt:%d\n", 509 "%s:%d state: %s->%s conn->refcnt:%d\n",
508 pp->name, 510 pd->pp->name,
509 ((state_off == TCP_DIR_OUTPUT) ? 511 ((state_off == TCP_DIR_OUTPUT) ?
510 "output " : "input "), 512 "output " : "input "),
511 th->syn ? 'S' : '.', 513 th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
535 } 537 }
536 } 538 }
537 539
538 cp->timeout = pp->timeout_table[cp->state = new_state]; 540 if (likely(pd))
541 cp->timeout = pd->timeout_table[cp->state = new_state];
542 else /* What to do ? */
543 cp->timeout = tcp_timeouts[cp->state = new_state];
539} 544}
540 545
541
542/* 546/*
543 * Handle state transitions 547 * Handle state transitions
544 */ 548 */
545static int 549static int
546tcp_state_transition(struct ip_vs_conn *cp, int direction, 550tcp_state_transition(struct ip_vs_conn *cp, int direction,
547 const struct sk_buff *skb, 551 const struct sk_buff *skb,
548 struct ip_vs_protocol *pp) 552 struct ip_vs_proto_data *pd)
549{ 553{
550 struct tcphdr _tcph, *th; 554 struct tcphdr _tcph, *th;
551 555
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
560 return 0; 564 return 0;
561 565
562 spin_lock(&cp->lock); 566 spin_lock(&cp->lock);
563 set_tcp_state(pp, cp, direction, th); 567 set_tcp_state(pd, cp, direction, th);
564 spin_unlock(&cp->lock); 568 spin_unlock(&cp->lock);
565 569
566 return 1; 570 return 1;
567} 571}
568 572
569
570/*
571 * Hash table for TCP application incarnations
572 */
573#define TCP_APP_TAB_BITS 4
574#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
575#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
576
577static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
578static DEFINE_SPINLOCK(tcp_app_lock);
579
580static inline __u16 tcp_app_hashkey(__be16 port) 573static inline __u16 tcp_app_hashkey(__be16 port)
581{ 574{
582 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) 575 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
584} 577}
585 578
586 579
587static int tcp_register_app(struct ip_vs_app *inc) 580static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
588{ 581{
589 struct ip_vs_app *i; 582 struct ip_vs_app *i;
590 __u16 hash; 583 __u16 hash;
591 __be16 port = inc->port; 584 __be16 port = inc->port;
592 int ret = 0; 585 int ret = 0;
586 struct netns_ipvs *ipvs = net_ipvs(net);
587 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
593 588
594 hash = tcp_app_hashkey(port); 589 hash = tcp_app_hashkey(port);
595 590
596 spin_lock_bh(&tcp_app_lock); 591 spin_lock_bh(&ipvs->tcp_app_lock);
597 list_for_each_entry(i, &tcp_apps[hash], p_list) { 592 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
598 if (i->port == port) { 593 if (i->port == port) {
599 ret = -EEXIST; 594 ret = -EEXIST;
600 goto out; 595 goto out;
601 } 596 }
602 } 597 }
603 list_add(&inc->p_list, &tcp_apps[hash]); 598 list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
604 atomic_inc(&ip_vs_protocol_tcp.appcnt); 599 atomic_inc(&pd->appcnt);
605 600
606 out: 601 out:
607 spin_unlock_bh(&tcp_app_lock); 602 spin_unlock_bh(&ipvs->tcp_app_lock);
608 return ret; 603 return ret;
609} 604}
610 605
611 606
612static void 607static void
613tcp_unregister_app(struct ip_vs_app *inc) 608tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
614{ 609{
615 spin_lock_bh(&tcp_app_lock); 610 struct netns_ipvs *ipvs = net_ipvs(net);
616 atomic_dec(&ip_vs_protocol_tcp.appcnt); 611 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
612
613 spin_lock_bh(&ipvs->tcp_app_lock);
614 atomic_dec(&pd->appcnt);
617 list_del(&inc->p_list); 615 list_del(&inc->p_list);
618 spin_unlock_bh(&tcp_app_lock); 616 spin_unlock_bh(&ipvs->tcp_app_lock);
619} 617}
620 618
621 619
622static int 620static int
623tcp_app_conn_bind(struct ip_vs_conn *cp) 621tcp_app_conn_bind(struct ip_vs_conn *cp)
624{ 622{
623 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
625 int hash; 624 int hash;
626 struct ip_vs_app *inc; 625 struct ip_vs_app *inc;
627 int result = 0; 626 int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
633 /* Lookup application incarnations and bind the right one */ 632 /* Lookup application incarnations and bind the right one */
634 hash = tcp_app_hashkey(cp->vport); 633 hash = tcp_app_hashkey(cp->vport);
635 634
636 spin_lock(&tcp_app_lock); 635 spin_lock(&ipvs->tcp_app_lock);
637 list_for_each_entry(inc, &tcp_apps[hash], p_list) { 636 list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
638 if (inc->port == cp->vport) { 637 if (inc->port == cp->vport) {
639 if (unlikely(!ip_vs_app_inc_get(inc))) 638 if (unlikely(!ip_vs_app_inc_get(inc)))
640 break; 639 break;
641 spin_unlock(&tcp_app_lock); 640 spin_unlock(&ipvs->tcp_app_lock);
642 641
643 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 642 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
644 "%s:%u to app %s on port %u\n", 643 "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
655 goto out; 654 goto out;
656 } 655 }
657 } 656 }
658 spin_unlock(&tcp_app_lock); 657 spin_unlock(&ipvs->tcp_app_lock);
659 658
660 out: 659 out:
661 return result; 660 return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
665/* 664/*
666 * Set LISTEN timeout. (ip_vs_conn_put will setup timer) 665 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
667 */ 666 */
668void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) 667void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
669{ 668{
669 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
670
670 spin_lock(&cp->lock); 671 spin_lock(&cp->lock);
671 cp->state = IP_VS_TCP_S_LISTEN; 672 cp->state = IP_VS_TCP_S_LISTEN;
672 cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; 673 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
674 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
673 spin_unlock(&cp->lock); 675 spin_unlock(&cp->lock);
674} 676}
675 677
676 678/* ---------------------------------------------
677static void ip_vs_tcp_init(struct ip_vs_protocol *pp) 679 * timeouts is netns related now.
680 * ---------------------------------------------
681 */
682static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
678{ 683{
679 IP_VS_INIT_HASH_TABLE(tcp_apps); 684 struct netns_ipvs *ipvs = net_ipvs(net);
680 pp->timeout_table = tcp_timeouts;
681}
682 685
686 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
687 spin_lock_init(&ipvs->tcp_app_lock);
688 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
689 sizeof(tcp_timeouts));
690 pd->tcp_state_table = tcp_states;
691}
683 692
684static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) 693static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
685{ 694{
695 kfree(pd->timeout_table);
686} 696}
687 697
688 698
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
691 .protocol = IPPROTO_TCP, 701 .protocol = IPPROTO_TCP,
692 .num_states = IP_VS_TCP_S_LAST, 702 .num_states = IP_VS_TCP_S_LAST,
693 .dont_defrag = 0, 703 .dont_defrag = 0,
694 .appcnt = ATOMIC_INIT(0), 704 .init = NULL,
695 .init = ip_vs_tcp_init, 705 .exit = NULL,
696 .exit = ip_vs_tcp_exit, 706 .init_netns = __ip_vs_tcp_init,
707 .exit_netns = __ip_vs_tcp_exit,
697 .register_app = tcp_register_app, 708 .register_app = tcp_register_app,
698 .unregister_app = tcp_unregister_app, 709 .unregister_app = tcp_unregister_app,
699 .conn_schedule = tcp_conn_schedule, 710 .conn_schedule = tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
707 .app_conn_bind = tcp_app_conn_bind, 718 .app_conn_bind = tcp_app_conn_bind,
708 .debug_packet = ip_vs_tcpudp_debug_packet, 719 .debug_packet = ip_vs_tcpudp_debug_packet,
709 .timeout_change = tcp_timeout_change, 720 .timeout_change = tcp_timeout_change,
710 .set_state_timeout = tcp_set_state_timeout,
711}; 721};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a06bb0..f1282cbe6fe 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * Network name space (netns) aware.
13 * 14 *
14 */ 15 */
15 16
@@ -28,9 +29,10 @@
28#include <net/ip6_checksum.h> 29#include <net/ip6_checksum.h>
29 30
30static int 31static int
31udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
32 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp)
33{ 34{
35 struct net *net;
34 struct ip_vs_service *svc; 36 struct ip_vs_service *svc;
35 struct udphdr _udph, *uh; 37 struct udphdr _udph, *uh;
36 struct ip_vs_iphdr iph; 38 struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
42 *verdict = NF_DROP; 44 *verdict = NF_DROP;
43 return 0; 45 return 0;
44 } 46 }
45 47 net = skb_net(skb);
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol, 48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest); 49 &iph.daddr, uh->dest);
48 if (svc) { 50 if (svc) {
49 int ignored; 51 int ignored;
50 52
51 if (ip_vs_todrop()) { 53 if (ip_vs_todrop(net_ipvs(net))) {
52 /* 54 /*
53 * It seems that we are very loaded. 55 * It seems that we are very loaded.
54 * We have to drop this packet :( 56 * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
62 * Let the virtual server select a real server for the 64 * Let the virtual server select a real server for the
63 * incoming connection, and create a connection entry. 65 * incoming connection, and create a connection entry.
64 */ 66 */
65 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
66 if (!*cpp && !ignored) { 68 if (!*cpp && ignored <= 0) {
67 *verdict = ip_vs_leave(svc, skb, pp); 69 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd);
71 else {
72 ip_vs_service_put(svc);
73 *verdict = NF_DROP;
74 }
68 return 0; 75 return 0;
69 } 76 }
70 ip_vs_service_put(svc); 77 ip_vs_service_put(svc);
71 } 78 }
79 /* NF_ACCEPT */
72 return 1; 80 return 1;
73} 81}
74 82
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
338 return 1; 346 return 1;
339} 347}
340 348
341
342/*
343 * Note: the caller guarantees that only one of register_app,
344 * unregister_app or app_conn_bind is called each time.
345 */
346
347#define UDP_APP_TAB_BITS 4
348#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
349#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
350
351static struct list_head udp_apps[UDP_APP_TAB_SIZE];
352static DEFINE_SPINLOCK(udp_app_lock);
353
354static inline __u16 udp_app_hashkey(__be16 port) 349static inline __u16 udp_app_hashkey(__be16 port)
355{ 350{
356 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) 351 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
358} 353}
359 354
360 355
361static int udp_register_app(struct ip_vs_app *inc) 356static int udp_register_app(struct net *net, struct ip_vs_app *inc)
362{ 357{
363 struct ip_vs_app *i; 358 struct ip_vs_app *i;
364 __u16 hash; 359 __u16 hash;
365 __be16 port = inc->port; 360 __be16 port = inc->port;
366 int ret = 0; 361 int ret = 0;
362 struct netns_ipvs *ipvs = net_ipvs(net);
363 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
367 364
368 hash = udp_app_hashkey(port); 365 hash = udp_app_hashkey(port);
369 366
370 367
371 spin_lock_bh(&udp_app_lock); 368 spin_lock_bh(&ipvs->udp_app_lock);
372 list_for_each_entry(i, &udp_apps[hash], p_list) { 369 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
373 if (i->port == port) { 370 if (i->port == port) {
374 ret = -EEXIST; 371 ret = -EEXIST;
375 goto out; 372 goto out;
376 } 373 }
377 } 374 }
378 list_add(&inc->p_list, &udp_apps[hash]); 375 list_add(&inc->p_list, &ipvs->udp_apps[hash]);
379 atomic_inc(&ip_vs_protocol_udp.appcnt); 376 atomic_inc(&pd->appcnt);
380 377
381 out: 378 out:
382 spin_unlock_bh(&udp_app_lock); 379 spin_unlock_bh(&ipvs->udp_app_lock);
383 return ret; 380 return ret;
384} 381}
385 382
386 383
387static void 384static void
388udp_unregister_app(struct ip_vs_app *inc) 385udp_unregister_app(struct net *net, struct ip_vs_app *inc)
389{ 386{
390 spin_lock_bh(&udp_app_lock); 387 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
391 atomic_dec(&ip_vs_protocol_udp.appcnt); 388 struct netns_ipvs *ipvs = net_ipvs(net);
389
390 spin_lock_bh(&ipvs->udp_app_lock);
391 atomic_dec(&pd->appcnt);
392 list_del(&inc->p_list); 392 list_del(&inc->p_list);
393 spin_unlock_bh(&udp_app_lock); 393 spin_unlock_bh(&ipvs->udp_app_lock);
394} 394}
395 395
396 396
397static int udp_app_conn_bind(struct ip_vs_conn *cp) 397static int udp_app_conn_bind(struct ip_vs_conn *cp)
398{ 398{
399 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
399 int hash; 400 int hash;
400 struct ip_vs_app *inc; 401 struct ip_vs_app *inc;
401 int result = 0; 402 int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
407 /* Lookup application incarnations and bind the right one */ 408 /* Lookup application incarnations and bind the right one */
408 hash = udp_app_hashkey(cp->vport); 409 hash = udp_app_hashkey(cp->vport);
409 410
410 spin_lock(&udp_app_lock); 411 spin_lock(&ipvs->udp_app_lock);
411 list_for_each_entry(inc, &udp_apps[hash], p_list) { 412 list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
412 if (inc->port == cp->vport) { 413 if (inc->port == cp->vport) {
413 if (unlikely(!ip_vs_app_inc_get(inc))) 414 if (unlikely(!ip_vs_app_inc_get(inc)))
414 break; 415 break;
415 spin_unlock(&udp_app_lock); 416 spin_unlock(&ipvs->udp_app_lock);
416 417
417 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 418 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
418 "%s:%u to app %s on port %u\n", 419 "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
429 goto out; 430 goto out;
430 } 431 }
431 } 432 }
432 spin_unlock(&udp_app_lock); 433 spin_unlock(&ipvs->udp_app_lock);
433 434
434 out: 435 out:
435 return result; 436 return result;
436} 437}
437 438
438 439
439static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 440static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
440 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 441 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
441 [IP_VS_UDP_S_LAST] = 2*HZ, 442 [IP_VS_UDP_S_LAST] = 2*HZ,
442}; 443};
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
446 [IP_VS_UDP_S_LAST] = "BUG!", 447 [IP_VS_UDP_S_LAST] = "BUG!",
447}; 448};
448 449
449
450static int
451udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
452{
453 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
454 udp_state_name_table, sname, to);
455}
456
457static const char * udp_state_name(int state) 450static const char * udp_state_name(int state)
458{ 451{
459 if (state >= IP_VS_UDP_S_LAST) 452 if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
464static int 457static int
465udp_state_transition(struct ip_vs_conn *cp, int direction, 458udp_state_transition(struct ip_vs_conn *cp, int direction,
466 const struct sk_buff *skb, 459 const struct sk_buff *skb,
467 struct ip_vs_protocol *pp) 460 struct ip_vs_proto_data *pd)
468{ 461{
469 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; 462 if (unlikely(!pd)) {
463 pr_err("UDP no ns data\n");
464 return 0;
465 }
466
467 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
470 return 1; 468 return 1;
471} 469}
472 470
473static void udp_init(struct ip_vs_protocol *pp) 471static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
474{ 472{
475 IP_VS_INIT_HASH_TABLE(udp_apps); 473 struct netns_ipvs *ipvs = net_ipvs(net);
476 pp->timeout_table = udp_timeouts; 474
475 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
476 spin_lock_init(&ipvs->udp_app_lock);
477 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
478 sizeof(udp_timeouts));
477} 479}
478 480
479static void udp_exit(struct ip_vs_protocol *pp) 481static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
480{ 482{
483 kfree(pd->timeout_table);
481} 484}
482 485
483 486
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
486 .protocol = IPPROTO_UDP, 489 .protocol = IPPROTO_UDP,
487 .num_states = IP_VS_UDP_S_LAST, 490 .num_states = IP_VS_UDP_S_LAST,
488 .dont_defrag = 0, 491 .dont_defrag = 0,
489 .init = udp_init, 492 .init = NULL,
490 .exit = udp_exit, 493 .exit = NULL,
494 .init_netns = __udp_init,
495 .exit_netns = __udp_exit,
491 .conn_schedule = udp_conn_schedule, 496 .conn_schedule = udp_conn_schedule,
492 .conn_in_get = ip_vs_conn_in_get_proto, 497 .conn_in_get = ip_vs_conn_in_get_proto,
493 .conn_out_get = ip_vs_conn_out_get_proto, 498 .conn_out_get = ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
501 .app_conn_bind = udp_app_conn_bind, 506 .app_conn_bind = udp_app_conn_bind,
502 .debug_packet = ip_vs_tcpudp_debug_packet, 507 .debug_packet = ip_vs_tcpudp_debug_packet,
503 .timeout_change = NULL, 508 .timeout_change = NULL,
504 .set_state_timeout = udp_set_state_timeout,
505}; 509};
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index e210f37d8ea..c49b388d108 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -72,7 +72,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
72 q = q->next; 72 q = q->next;
73 } while (q != p); 73 } while (q != p);
74 write_unlock(&svc->sched_lock); 74 write_unlock(&svc->sched_lock);
75 IP_VS_ERR_RL("RR: no destination available\n"); 75 ip_vs_scheduler_err(svc, "no destination available");
76 return NULL; 76 return NULL;
77 77
78 out: 78 out:
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 076ebe00435..08dbdd5bc18 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -29,6 +29,7 @@
29 29
30#include <net/ip_vs.h> 30#include <net/ip_vs.h>
31 31
32EXPORT_SYMBOL(ip_vs_scheduler_err);
32/* 33/*
33 * IPVS scheduler list 34 * IPVS scheduler list
34 */ 35 */
@@ -146,6 +147,30 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
146 module_put(scheduler->module); 147 module_put(scheduler->module);
147} 148}
148 149
150/*
151 * Common error output helper for schedulers
152 */
153
154void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
155{
156 if (svc->fwmark) {
157 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
158 svc->scheduler->name, svc->fwmark,
159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n",
163 svc->scheduler->name,
164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg);
166#endif
167 } else {
168 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
169 svc->scheduler->name,
170 ip_vs_proto_name(svc->protocol),
171 &svc->addr.ip, ntohs(svc->port), msg);
172 }
173}
149 174
150/* 175/*
151 * Register a scheduler in the scheduler list 176 * Register a scheduler in the scheduler list
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 1ab75a9dc40..89ead246ed3 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -87,7 +87,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
87 goto nextstage; 87 goto nextstage;
88 } 88 }
89 } 89 }
90 IP_VS_ERR_RL("SED: no destination available\n"); 90 ip_vs_scheduler_err(svc, "no destination available");
91 return NULL; 91 return NULL;
92 92
93 /* 93 /*
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e6cc174fbc0..b5e2556c581 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -223,7 +223,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
223 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 223 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
224 || atomic_read(&dest->weight) <= 0 224 || atomic_read(&dest->weight) <= 0
225 || is_overloaded(dest)) { 225 || is_overloaded(dest)) {
226 IP_VS_ERR_RL("SH: no destination available\n"); 226 ip_vs_scheduler_err(svc, "no destination available");
227 return NULL; 227 return NULL;
228 } 228 }
229 229
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aedea17..3e7961e85e9 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version 1, is capable of handling both version 0 and 1 messages.
9 * Version 0 is the plain old format.
10 * Note Version 0 receivers will just drop Ver 1 messages.
11 * Version 1 is capable of handle IPv6, Persistence data,
12 * time-outs, and firewall marks.
13 * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
14 * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
15 *
16 * Definitions Message: is a complete datagram
17 * Sync_conn: is a part of a Message
18 * Param Data is an option to a Sync_conn.
19 *
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 20 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * 21 *
10 * ip_vs_sync: sync connection info from master load balancer to backups 22 * ip_vs_sync: sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
15 * Alexandre Cassen : Added SyncID support for incoming sync 27 * Alexandre Cassen : Added SyncID support for incoming sync
16 * messages filtering. 28 * messages filtering.
17 * Justin Ossevoort : Fix endian problem on sync message size. 29 * Justin Ossevoort : Fix endian problem on sync message size.
30 * Hans Schillstrom : Added Version 1: i.e. IPv6,
31 * Persistence support, fwmark and time-out.
18 */ 32 */
19 33
20#define KMSG_COMPONENT "IPVS" 34#define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
35#include <linux/wait.h> 49#include <linux/wait.h>
36#include <linux/kernel.h> 50#include <linux/kernel.h>
37 51
52#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
53
38#include <net/ip.h> 54#include <net/ip.h>
39#include <net/sock.h> 55#include <net/sock.h>
40 56
@@ -43,11 +59,13 @@
43#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ 59#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
44#define IP_VS_SYNC_PORT 8848 /* multicast port */ 60#define IP_VS_SYNC_PORT 8848 /* multicast port */
45 61
62#define SYNC_PROTO_VER 1 /* Protocol version in header */
46 63
47/* 64/*
48 * IPVS sync connection entry 65 * IPVS sync connection entry
66 * Version 0, i.e. original version.
49 */ 67 */
50struct ip_vs_sync_conn { 68struct ip_vs_sync_conn_v0 {
51 __u8 reserved; 69 __u8 reserved;
52 70
53 /* Protocol, addresses and port numbers */ 71 /* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
71 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 89 struct ip_vs_seq out_seq; /* outgoing seq. struct */
72}; 90};
73 91
92/*
93 Sync Connection format (sync_conn)
94
95 0 1 2 3
96 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
97 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
98 | Type | Protocol | Ver. | Size |
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 | Flags |
101 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
102 | State | cport |
103 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104 | vport | dport |
105 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106 | fwmark |
107 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
108 | timeout (in sec.) |
109 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
110 | ... |
111 | IP-Addresses (v4 or v6) |
112 | ... |
113 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
114 Optional Parameters.
115 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
116 | Param. Type | Param. Length | Param. data |
117 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
118 | ... |
119 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
120 | | Param Type | Param. Length |
121 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
122 | Param data |
123 | Last Param data should be padded for 32 bit alignment |
124 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
125*/
126
127/*
128 * Type 0, IPv4 sync connection format
129 */
130struct ip_vs_sync_v4 {
131 __u8 type;
132 __u8 protocol; /* Which protocol (TCP/UDP) */
133 __be16 ver_size; /* Version msb 4 bits */
134 /* Flags and state transition */
135 __be32 flags; /* status flags */
136 __be16 state; /* state info */
137 /* Protocol, addresses and port numbers */
138 __be16 cport;
139 __be16 vport;
140 __be16 dport;
141 __be32 fwmark; /* Firewall mark from skb */
142 __be32 timeout; /* cp timeout */
143 __be32 caddr; /* client address */
144 __be32 vaddr; /* virtual address */
145 __be32 daddr; /* destination address */
146 /* The sequence options start here */
147 /* PE data padded to 32bit alignment after seq. options */
148};
149/*
150 * Type 2 messages IPv6
151 */
152struct ip_vs_sync_v6 {
153 __u8 type;
154 __u8 protocol; /* Which protocol (TCP/UDP) */
155 __be16 ver_size; /* Version msb 4 bits */
156 /* Flags and state transition */
157 __be32 flags; /* status flags */
158 __be16 state; /* state info */
159 /* Protocol, addresses and port numbers */
160 __be16 cport;
161 __be16 vport;
162 __be16 dport;
163 __be32 fwmark; /* Firewall mark from skb */
164 __be32 timeout; /* cp timeout */
165 struct in6_addr caddr; /* client address */
166 struct in6_addr vaddr; /* virtual address */
167 struct in6_addr daddr; /* destination address */
168 /* The sequence options start here */
169 /* PE data padded to 32bit alignment after seq. options */
170};
171
172union ip_vs_sync_conn {
173 struct ip_vs_sync_v4 v4;
174 struct ip_vs_sync_v6 v6;
175};
176
177/* Bits in Type field in above */
178#define STYPE_INET6 0
179#define STYPE_F_INET6 (1 << STYPE_INET6)
180
181#define SVER_SHIFT 12 /* Shift to get version */
182#define SVER_MASK 0x0fff /* Mask to strip version */
183
184#define IPVS_OPT_SEQ_DATA 1
185#define IPVS_OPT_PE_DATA 2
186#define IPVS_OPT_PE_NAME 3
187#define IPVS_OPT_PARAM 7
188
189#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
190#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
191#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
192#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
193
74struct ip_vs_sync_thread_data { 194struct ip_vs_sync_thread_data {
195 struct net *net;
75 struct socket *sock; 196 struct socket *sock;
76 char *buf; 197 char *buf;
77}; 198};
78 199
79#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 200/* Version 0 definition of packet sizes */
201#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
80#define FULL_CONN_SIZE \ 202#define FULL_CONN_SIZE \
81(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) 203(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
82 204
83 205
84/* 206/*
85 The master mulitcasts messages to the backup load balancers in the 207 The master mulitcasts messages (Datagrams) to the backup load balancers
86 following format. 208 in the following format.
209
210 Version 1:
211 Note, first byte should be Zero, so ver 0 receivers will drop the packet.
87 212
88 0 1 2 3 213 0 1 2 3
89 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 214 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
90 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 215 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
91 | Count Conns | SyncID | Size | 216 | 0 | SyncID | Size |
217 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
218 | Count Conns | Version | Reserved, set to Zero |
92 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 219 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
93 | | 220 | |
94 | IPVS Sync Connection (1) | 221 | IPVS Sync Connection (1) |
95 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 222 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
96 | . | 223 | . |
97 | . | 224 ~ . ~
98 | . | 225 | . |
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 226 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 | | 227 | |
101 | IPVS Sync Connection (n) | 228 | IPVS Sync Connection (n) |
102 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 229 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
230
231 Version 0 Header
232 0 1 2 3
233 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
234 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
235 | Count Conns | SyncID | Size |
236 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
237 | IPVS Sync Connection (1) |
103*/ 238*/
104 239
105#define SYNC_MESG_HEADER_LEN 4 240#define SYNC_MESG_HEADER_LEN 4
106#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ 241#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
107 242
108struct ip_vs_sync_mesg { 243/* Version 0 header */
244struct ip_vs_sync_mesg_v0 {
109 __u8 nr_conns; 245 __u8 nr_conns;
110 __u8 syncid; 246 __u8 syncid;
111 __u16 size; 247 __u16 size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
113 /* ip_vs_sync_conn entries start here */ 249 /* ip_vs_sync_conn entries start here */
114}; 250};
115 251
116/* the maximum length of sync (sending/receiving) message */ 252/* Version 1 header */
117static int sync_send_mesg_maxlen; 253struct ip_vs_sync_mesg {
118static int sync_recv_mesg_maxlen; 254 __u8 reserved; /* must be zero */
255 __u8 syncid;
256 __u16 size;
257 __u8 nr_conns;
258 __s8 version; /* SYNC_PROTO_VER */
259 __u16 spare;
260 /* ip_vs_sync_conn entries start here */
261};
119 262
120struct ip_vs_sync_buff { 263struct ip_vs_sync_buff {
121 struct list_head list; 264 struct list_head list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
127 unsigned char *end; 270 unsigned char *end;
128}; 271};
129 272
130
131/* the sync_buff list head and the lock */
132static LIST_HEAD(ip_vs_sync_queue);
133static DEFINE_SPINLOCK(ip_vs_sync_lock);
134
135/* current sync_buff for accepting new conn entries */
136static struct ip_vs_sync_buff *curr_sb = NULL;
137static DEFINE_SPINLOCK(curr_sb_lock);
138
139/* ipvs sync daemon state */
140volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
141volatile int ip_vs_master_syncid = 0;
142volatile int ip_vs_backup_syncid = 0;
143
144/* multicast interface name */
145char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
146char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
147
148/* sync daemon tasks */
149static struct task_struct *sync_master_thread;
150static struct task_struct *sync_backup_thread;
151
152/* multicast addr */ 273/* multicast addr */
153static struct sockaddr_in mcast_addr = { 274static struct sockaddr_in mcast_addr = {
154 .sin_family = AF_INET, 275 .sin_family = AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
156 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), 277 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
157}; 278};
158 279
280/*
281 * Copy of struct ip_vs_seq
282 * From unaligned network order to aligned host order
283 */
284static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
285{
286 ho->init_seq = get_unaligned_be32(&no->init_seq);
287 ho->delta = get_unaligned_be32(&no->delta);
288 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
289}
290
291/*
292 * Copy of struct ip_vs_seq
293 * From Aligned host order to unaligned network order
294 */
295static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
296{
297 put_unaligned_be32(ho->init_seq, &no->init_seq);
298 put_unaligned_be32(ho->delta, &no->delta);
299 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
300}
159 301
160static inline struct ip_vs_sync_buff *sb_dequeue(void) 302static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
161{ 303{
162 struct ip_vs_sync_buff *sb; 304 struct ip_vs_sync_buff *sb;
163 305
164 spin_lock_bh(&ip_vs_sync_lock); 306 spin_lock_bh(&ipvs->sync_lock);
165 if (list_empty(&ip_vs_sync_queue)) { 307 if (list_empty(&ipvs->sync_queue)) {
166 sb = NULL; 308 sb = NULL;
167 } else { 309 } else {
168 sb = list_entry(ip_vs_sync_queue.next, 310 sb = list_entry(ipvs->sync_queue.next,
169 struct ip_vs_sync_buff, 311 struct ip_vs_sync_buff,
170 list); 312 list);
171 list_del(&sb->list); 313 list_del(&sb->list);
172 } 314 }
173 spin_unlock_bh(&ip_vs_sync_lock); 315 spin_unlock_bh(&ipvs->sync_lock);
174 316
175 return sb; 317 return sb;
176} 318}
177 319
178static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) 320/*
321 * Create a new sync buffer for Version 1 proto.
322 */
323static inline struct ip_vs_sync_buff *
324ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
179{ 325{
180 struct ip_vs_sync_buff *sb; 326 struct ip_vs_sync_buff *sb;
181 327
182 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 328 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
183 return NULL; 329 return NULL;
184 330
185 if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { 331 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
332 if (!sb->mesg) {
186 kfree(sb); 333 kfree(sb);
187 return NULL; 334 return NULL;
188 } 335 }
336 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
337 sb->mesg->version = SYNC_PROTO_VER;
338 sb->mesg->syncid = ipvs->master_syncid;
339 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
189 sb->mesg->nr_conns = 0; 340 sb->mesg->nr_conns = 0;
190 sb->mesg->syncid = ip_vs_master_syncid; 341 sb->mesg->spare = 0;
191 sb->mesg->size = 4; 342 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
192 sb->head = (unsigned char *)sb->mesg + 4; 343 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
193 sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; 344
194 sb->firstuse = jiffies; 345 sb->firstuse = jiffies;
195 return sb; 346 return sb;
196} 347}
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
201 kfree(sb); 352 kfree(sb);
202} 353}
203 354
204static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 355static inline void sb_queue_tail(struct netns_ipvs *ipvs)
205{ 356{
206 spin_lock(&ip_vs_sync_lock); 357 struct ip_vs_sync_buff *sb = ipvs->sync_buff;
207 if (ip_vs_sync_state & IP_VS_STATE_MASTER) 358
208 list_add_tail(&sb->list, &ip_vs_sync_queue); 359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER)
361 list_add_tail(&sb->list, &ipvs->sync_queue);
209 else 362 else
210 ip_vs_sync_buff_release(sb); 363 ip_vs_sync_buff_release(sb);
211 spin_unlock(&ip_vs_sync_lock); 364 spin_unlock(&ipvs->sync_lock);
212} 365}
213 366
214/* 367/*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
216 * than the specified time or the specified time is zero. 369 * than the specified time or the specified time is zero.
217 */ 370 */
218static inline struct ip_vs_sync_buff * 371static inline struct ip_vs_sync_buff *
219get_curr_sync_buff(unsigned long time) 372get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
220{ 373{
221 struct ip_vs_sync_buff *sb; 374 struct ip_vs_sync_buff *sb;
222 375
223 spin_lock_bh(&curr_sb_lock); 376 spin_lock_bh(&ipvs->sync_buff_lock);
224 if (curr_sb && (time == 0 || 377 if (ipvs->sync_buff &&
225 time_before(jiffies - curr_sb->firstuse, time))) { 378 time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
226 sb = curr_sb; 379 sb = ipvs->sync_buff;
227 curr_sb = NULL; 380 ipvs->sync_buff = NULL;
228 } else 381 } else
229 sb = NULL; 382 sb = NULL;
230 spin_unlock_bh(&curr_sb_lock); 383 spin_unlock_bh(&ipvs->sync_buff_lock);
231 return sb; 384 return sb;
232} 385}
233 386
387/*
388 * Switch mode from sending version 0 or 1
389 * - must handle sync_buf
390 */
391void ip_vs_sync_switch_mode(struct net *net, int mode)
392{
393 struct netns_ipvs *ipvs = net_ipvs(net);
394
395 if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
396 return;
397 if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
398 return;
399
400 spin_lock_bh(&ipvs->sync_buff_lock);
401 /* Buffer empty ? then let buf_create do the job */
402 if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
403 kfree(ipvs->sync_buff);
404 ipvs->sync_buff = NULL;
405 } else {
406 spin_lock_bh(&ipvs->sync_lock);
407 if (ipvs->sync_state & IP_VS_STATE_MASTER)
408 list_add_tail(&ipvs->sync_buff->list,
409 &ipvs->sync_queue);
410 else
411 ip_vs_sync_buff_release(ipvs->sync_buff);
412 spin_unlock_bh(&ipvs->sync_lock);
413 }
414 spin_unlock_bh(&ipvs->sync_buff_lock);
415}
234 416
235/* 417/*
418 * Create a new sync buffer for Version 0 proto.
419 */
420static inline struct ip_vs_sync_buff *
421ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
422{
423 struct ip_vs_sync_buff *sb;
424 struct ip_vs_sync_mesg_v0 *mesg;
425
426 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
427 return NULL;
428
429 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
430 if (!sb->mesg) {
431 kfree(sb);
432 return NULL;
433 }
434 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
435 mesg->nr_conns = 0;
436 mesg->syncid = ipvs->master_syncid;
437 mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
438 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
439 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
440 sb->firstuse = jiffies;
441 return sb;
442}
443
444/*
445 * Version 0 , could be switched in by sys_ctl.
236 * Add an ip_vs_conn information into the current sync_buff. 446 * Add an ip_vs_conn information into the current sync_buff.
237 * Called by ip_vs_in.
238 */ 447 */
239void ip_vs_sync_conn(struct ip_vs_conn *cp) 448void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
240{ 449{
241 struct ip_vs_sync_mesg *m; 450 struct netns_ipvs *ipvs = net_ipvs(net);
242 struct ip_vs_sync_conn *s; 451 struct ip_vs_sync_mesg_v0 *m;
452 struct ip_vs_sync_conn_v0 *s;
243 int len; 453 int len;
244 454
245 spin_lock(&curr_sb_lock); 455 if (unlikely(cp->af != AF_INET))
246 if (!curr_sb) { 456 return;
247 if (!(curr_sb=ip_vs_sync_buff_create())) { 457 /* Do not sync ONE PACKET */
248 spin_unlock(&curr_sb_lock); 458 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
459 return;
460
461 spin_lock(&ipvs->sync_buff_lock);
462 if (!ipvs->sync_buff) {
463 ipvs->sync_buff =
464 ip_vs_sync_buff_create_v0(ipvs);
465 if (!ipvs->sync_buff) {
466 spin_unlock(&ipvs->sync_buff_lock);
249 pr_err("ip_vs_sync_buff_create failed.\n"); 467 pr_err("ip_vs_sync_buff_create failed.\n");
250 return; 468 return;
251 } 469 }
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
253 471
254 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 472 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
255 SIMPLE_CONN_SIZE; 473 SIMPLE_CONN_SIZE;
256 m = curr_sb->mesg; 474 m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
257 s = (struct ip_vs_sync_conn *)curr_sb->head; 475 s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
258 476
259 /* copy members */ 477 /* copy members */
478 s->reserved = 0;
260 s->protocol = cp->protocol; 479 s->protocol = cp->protocol;
261 s->cport = cp->cport; 480 s->cport = cp->cport;
262 s->vport = cp->vport; 481 s->vport = cp->vport;
@@ -274,83 +493,365 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
274 493
275 m->nr_conns++; 494 m->nr_conns++;
276 m->size += len; 495 m->size += len;
277 curr_sb->head += len; 496 ipvs->sync_buff->head += len;
278 497
279 /* check if there is a space for next one */ 498 /* check if there is a space for next one */
280 if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { 499 if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
281 sb_queue_tail(curr_sb); 500 sb_queue_tail(ipvs);
282 curr_sb = NULL; 501 ipvs->sync_buff = NULL;
283 } 502 }
284 spin_unlock(&curr_sb_lock); 503 spin_unlock(&ipvs->sync_buff_lock);
285 504
286 /* synchronize its controller if it has */ 505 /* synchronize its controller if it has */
287 if (cp->control) 506 if (cp->control)
288 ip_vs_sync_conn(cp->control); 507 ip_vs_sync_conn(net, cp->control);
508}
509
510/*
511 * Add an ip_vs_conn information into the current sync_buff.
512 * Called by ip_vs_in.
513 * Sending Version 1 messages
514 */
515void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
516{
517 struct netns_ipvs *ipvs = net_ipvs(net);
518 struct ip_vs_sync_mesg *m;
519 union ip_vs_sync_conn *s;
520 __u8 *p;
521 unsigned int len, pe_name_len, pad;
522
523 /* Handle old version of the protocol */
524 if (sysctl_sync_ver(ipvs) == 0) {
525 ip_vs_sync_conn_v0(net, cp);
526 return;
527 }
528 /* Do not sync ONE PACKET */
529 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
530 goto control;
531sloop:
532 /* Sanity checks */
533 pe_name_len = 0;
534 if (cp->pe_data_len) {
535 if (!cp->pe_data || !cp->dest) {
536 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
537 return;
538 }
539 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
540 }
541
542 spin_lock(&ipvs->sync_buff_lock);
543
544#ifdef CONFIG_IP_VS_IPV6
545 if (cp->af == AF_INET6)
546 len = sizeof(struct ip_vs_sync_v6);
547 else
548#endif
549 len = sizeof(struct ip_vs_sync_v4);
550
551 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
552 len += sizeof(struct ip_vs_sync_conn_options) + 2;
553
554 if (cp->pe_data_len)
555 len += cp->pe_data_len + 2; /* + Param hdr field */
556 if (pe_name_len)
557 len += pe_name_len + 2;
558
559 /* check if there is a space for this one */
560 pad = 0;
561 if (ipvs->sync_buff) {
562 pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
563 if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
564 sb_queue_tail(ipvs);
565 ipvs->sync_buff = NULL;
566 pad = 0;
567 }
568 }
569
570 if (!ipvs->sync_buff) {
571 ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
572 if (!ipvs->sync_buff) {
573 spin_unlock(&ipvs->sync_buff_lock);
574 pr_err("ip_vs_sync_buff_create failed.\n");
575 return;
576 }
577 }
578
579 m = ipvs->sync_buff->mesg;
580 p = ipvs->sync_buff->head;
581 ipvs->sync_buff->head += pad + len;
582 m->size += pad + len;
583 /* Add ev. padding from prev. sync_conn */
584 while (pad--)
585 *(p++) = 0;
586
587 s = (union ip_vs_sync_conn *)p;
588
589 /* Set message type & copy members */
590 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
591 s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
592 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
593 s->v4.state = htons(cp->state);
594 s->v4.protocol = cp->protocol;
595 s->v4.cport = cp->cport;
596 s->v4.vport = cp->vport;
597 s->v4.dport = cp->dport;
598 s->v4.fwmark = htonl(cp->fwmark);
599 s->v4.timeout = htonl(cp->timeout / HZ);
600 m->nr_conns++;
601
602#ifdef CONFIG_IP_VS_IPV6
603 if (cp->af == AF_INET6) {
604 p += sizeof(struct ip_vs_sync_v6);
605 ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
606 ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
607 ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
608 } else
609#endif
610 {
611 p += sizeof(struct ip_vs_sync_v4); /* options ptr */
612 s->v4.caddr = cp->caddr.ip;
613 s->v4.vaddr = cp->vaddr.ip;
614 s->v4.daddr = cp->daddr.ip;
615 }
616 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
617 *(p++) = IPVS_OPT_SEQ_DATA;
618 *(p++) = sizeof(struct ip_vs_sync_conn_options);
619 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
620 p += sizeof(struct ip_vs_seq);
621 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
622 p += sizeof(struct ip_vs_seq);
623 }
624 /* Handle pe data */
625 if (cp->pe_data_len && cp->pe_data) {
626 *(p++) = IPVS_OPT_PE_DATA;
627 *(p++) = cp->pe_data_len;
628 memcpy(p, cp->pe_data, cp->pe_data_len);
629 p += cp->pe_data_len;
630 if (pe_name_len) {
631 /* Add PE_NAME */
632 *(p++) = IPVS_OPT_PE_NAME;
633 *(p++) = pe_name_len;
634 memcpy(p, cp->pe->name, pe_name_len);
635 p += pe_name_len;
636 }
637 }
638
639 spin_unlock(&ipvs->sync_buff_lock);
640
641control:
642 /* synchronize its controller if it has */
643 cp = cp->control;
644 if (!cp)
645 return;
646 /*
647 * Reduce sync rate for templates
648 * i.e only increment in_pkts for Templates.
649 */
650 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
651 int pkts = atomic_add_return(1, &cp->in_pkts);
652
653 if (pkts % sysctl_sync_period(ipvs) != 1)
654 return;
655 }
656 goto sloop;
289} 657}
290 658
659/*
660 * fill_param used by version 1
661 */
291static inline int 662static inline int
292ip_vs_conn_fill_param_sync(int af, int protocol, 663ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
293 const union nf_inet_addr *caddr, __be16 cport, 664 struct ip_vs_conn_param *p,
294 const union nf_inet_addr *vaddr, __be16 vport, 665 __u8 *pe_data, unsigned int pe_data_len,
295 struct ip_vs_conn_param *p) 666 __u8 *pe_name, unsigned int pe_name_len)
296{ 667{
297 /* XXX: Need to take into account persistence engine */ 668#ifdef CONFIG_IP_VS_IPV6
298 ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); 669 if (af == AF_INET6)
670 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
671 (const union nf_inet_addr *)&sc->v6.caddr,
672 sc->v6.cport,
673 (const union nf_inet_addr *)&sc->v6.vaddr,
674 sc->v6.vport, p);
675 else
676#endif
677 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
678 (const union nf_inet_addr *)&sc->v4.caddr,
679 sc->v4.cport,
680 (const union nf_inet_addr *)&sc->v4.vaddr,
681 sc->v4.vport, p);
682 /* Handle pe data */
683 if (pe_data_len) {
684 if (pe_name_len) {
685 char buff[IP_VS_PENAME_MAXLEN+1];
686
687 memcpy(buff, pe_name, pe_name_len);
688 buff[pe_name_len]=0;
689 p->pe = __ip_vs_pe_getbyname(buff);
690 if (!p->pe) {
691 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
692 buff);
693 return 1;
694 }
695 } else {
696 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
697 return 1;
698 }
699
700 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
701 if (!p->pe_data) {
702 if (p->pe->module)
703 module_put(p->pe->module);
704 return -ENOMEM;
705 }
706 p->pe_data_len = pe_data_len;
707 }
299 return 0; 708 return 0;
300} 709}
301 710
302/* 711/*
303 * Process received multicast message and create the corresponding 712 * Connection Add / Update.
304 * ip_vs_conn entries. 713 * Common for version 0 and 1 reception of backup sync_conns.
714 * Param: ...
715 * timeout is in sec.
305 */ 716 */
306static void ip_vs_process_message(const char *buffer, const size_t buflen) 717static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
718 unsigned int flags, unsigned int state,
719 unsigned int protocol, unsigned int type,
720 const union nf_inet_addr *daddr, __be16 dport,
721 unsigned long timeout, __u32 fwmark,
722 struct ip_vs_sync_conn_options *opt)
307{ 723{
308 struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
309 struct ip_vs_sync_conn *s;
310 struct ip_vs_sync_conn_options *opt;
311 struct ip_vs_conn *cp;
312 struct ip_vs_protocol *pp;
313 struct ip_vs_dest *dest; 724 struct ip_vs_dest *dest;
314 struct ip_vs_conn_param param; 725 struct ip_vs_conn *cp;
315 char *p; 726 struct netns_ipvs *ipvs = net_ipvs(net);
316 int i;
317 727
318 if (buflen < sizeof(struct ip_vs_sync_mesg)) { 728 if (!(flags & IP_VS_CONN_F_TEMPLATE))
319 IP_VS_ERR_RL("sync message header too short\n"); 729 cp = ip_vs_conn_in_get(param);
320 return; 730 else
321 } 731 cp = ip_vs_ct_in_get(param);
322 732
323 /* Convert size back to host byte order */ 733 if (cp && param->pe_data) /* Free pe_data */
324 m->size = ntohs(m->size); 734 kfree(param->pe_data);
735 if (!cp) {
736 /*
737 * Find the appropriate destination for the connection.
738 * If it is not found the connection will remain unbound
739 * but still handled.
740 */
741 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
742 param->vport, protocol, fwmark);
325 743
326 if (buflen != m->size) { 744 /* Set the approprite ativity flag */
327 IP_VS_ERR_RL("bogus sync message size\n"); 745 if (protocol == IPPROTO_TCP) {
328 return; 746 if (state != IP_VS_TCP_S_ESTABLISHED)
747 flags |= IP_VS_CONN_F_INACTIVE;
748 else
749 flags &= ~IP_VS_CONN_F_INACTIVE;
750 } else if (protocol == IPPROTO_SCTP) {
751 if (state != IP_VS_SCTP_S_ESTABLISHED)
752 flags |= IP_VS_CONN_F_INACTIVE;
753 else
754 flags &= ~IP_VS_CONN_F_INACTIVE;
755 }
756 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
757 if (dest)
758 atomic_dec(&dest->refcnt);
759 if (!cp) {
760 if (param->pe_data)
761 kfree(param->pe_data);
762 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
763 return;
764 }
765 } else if (!cp->dest) {
766 dest = ip_vs_try_bind_dest(cp);
767 if (dest)
768 atomic_dec(&dest->refcnt);
769 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
770 (cp->state != state)) {
771 /* update active/inactive flag for the connection */
772 dest = cp->dest;
773 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
774 (state != IP_VS_TCP_S_ESTABLISHED)) {
775 atomic_dec(&dest->activeconns);
776 atomic_inc(&dest->inactconns);
777 cp->flags |= IP_VS_CONN_F_INACTIVE;
778 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
779 (state == IP_VS_TCP_S_ESTABLISHED)) {
780 atomic_inc(&dest->activeconns);
781 atomic_dec(&dest->inactconns);
782 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
783 }
784 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
785 (cp->state != state)) {
786 dest = cp->dest;
787 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
788 (state != IP_VS_SCTP_S_ESTABLISHED)) {
789 atomic_dec(&dest->activeconns);
790 atomic_inc(&dest->inactconns);
791 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
792 }
329 } 793 }
330 794
331 /* SyncID sanity check */ 795 if (opt)
332 if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { 796 memcpy(&cp->in_seq, opt, sizeof(*opt));
333 IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", 797 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
334 m->syncid); 798 cp->state = state;
335 return; 799 cp->old_state = cp->state;
800 /*
801 * For Ver 0 messages style
802 * - Not possible to recover the right timeout for templates
803 * - can not find the right fwmark
804 * virtual service. If needed, we can do it for
805 * non-fwmark persistent services.
806 * Ver 1 messages style.
807 * - No problem.
808 */
809 if (timeout) {
810 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
811 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
812 cp->timeout = timeout*HZ;
813 } else {
814 struct ip_vs_proto_data *pd;
815
816 pd = ip_vs_proto_data_get(net, protocol);
817 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
818 cp->timeout = pd->timeout_table[state];
819 else
820 cp->timeout = (3*60*HZ);
336 } 821 }
822 ip_vs_conn_put(cp);
823}
337 824
338 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 825/*
826 * Process received multicast message for Version 0
827 */
828static void ip_vs_process_message_v0(struct net *net, const char *buffer,
829 const size_t buflen)
830{
831 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
832 struct ip_vs_sync_conn_v0 *s;
833 struct ip_vs_sync_conn_options *opt;
834 struct ip_vs_protocol *pp;
835 struct ip_vs_conn_param param;
836 char *p;
837 int i;
838
839 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
339 for (i=0; i<m->nr_conns; i++) { 840 for (i=0; i<m->nr_conns; i++) {
340 unsigned flags, state; 841 unsigned flags, state;
341 842
342 if (p + SIMPLE_CONN_SIZE > buffer+buflen) { 843 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
343 IP_VS_ERR_RL("bogus conn in sync message\n"); 844 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
344 return; 845 return;
345 } 846 }
346 s = (struct ip_vs_sync_conn *) p; 847 s = (struct ip_vs_sync_conn_v0 *) p;
347 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; 848 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
348 flags &= ~IP_VS_CONN_F_HASHED; 849 flags &= ~IP_VS_CONN_F_HASHED;
349 if (flags & IP_VS_CONN_F_SEQ_MASK) { 850 if (flags & IP_VS_CONN_F_SEQ_MASK) {
350 opt = (struct ip_vs_sync_conn_options *)&s[1]; 851 opt = (struct ip_vs_sync_conn_options *)&s[1];
351 p += FULL_CONN_SIZE; 852 p += FULL_CONN_SIZE;
352 if (p > buffer+buflen) { 853 if (p > buffer+buflen) {
353 IP_VS_ERR_RL("bogus conn options in sync message\n"); 854 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
354 return; 855 return;
355 } 856 }
356 } else { 857 } else {
@@ -362,118 +863,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
362 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 863 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
363 pp = ip_vs_proto_get(s->protocol); 864 pp = ip_vs_proto_get(s->protocol);
364 if (!pp) { 865 if (!pp) {
365 IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", 866 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
366 s->protocol); 867 s->protocol);
367 continue; 868 continue;
368 } 869 }
369 if (state >= pp->num_states) { 870 if (state >= pp->num_states) {
370 IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", 871 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
371 pp->name, state); 872 pp->name, state);
372 continue; 873 continue;
373 } 874 }
374 } else { 875 } else {
375 /* protocol in templates is not used for state/timeout */ 876 /* protocol in templates is not used for state/timeout */
376 pp = NULL;
377 if (state > 0) { 877 if (state > 0) {
378 IP_VS_DBG(2, "Invalid template state %u in sync msg\n", 878 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
379 state); 879 state);
380 state = 0; 880 state = 0;
381 } 881 }
382 } 882 }
383 883
384 { 884 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
385 if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, 885 (const union nf_inet_addr *)&s->caddr,
386 (union nf_inet_addr *)&s->caddr, 886 s->cport,
387 s->cport, 887 (const union nf_inet_addr *)&s->vaddr,
388 (union nf_inet_addr *)&s->vaddr, 888 s->vport, &param);
389 s->vport, &param)) { 889
390 pr_err("ip_vs_conn_fill_param_sync failed"); 890 /* Send timeout as Zero */
391 return; 891 ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
892 (union nf_inet_addr *)&s->daddr, s->dport,
893 0, 0, opt);
894 }
895}
896
897/*
898 * Handle options
899 */
900static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
901 __u32 *opt_flags,
902 struct ip_vs_sync_conn_options *opt)
903{
904 struct ip_vs_sync_conn_options *topt;
905
906 topt = (struct ip_vs_sync_conn_options *)p;
907
908 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
909 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
910 return -EINVAL;
911 }
912 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
913 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
914 return -EINVAL;
915 }
916 ntoh_seq(&topt->in_seq, &opt->in_seq);
917 ntoh_seq(&topt->out_seq, &opt->out_seq);
918 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
919 return 0;
920}
921
922static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
923 __u8 **data, unsigned int maxlen,
924 __u32 *opt_flags, __u32 flag)
925{
926 if (plen > maxlen) {
927 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
928 return -EINVAL;
929 }
930 if (*opt_flags & flag) {
931 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
932 return -EINVAL;
933 }
934 *data_len = plen;
935 *data = p;
936 *opt_flags |= flag;
937 return 0;
938}
939/*
940 * Process a Version 1 sync. connection
941 */
942static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
943{
944 struct ip_vs_sync_conn_options opt;
945 union ip_vs_sync_conn *s;
946 struct ip_vs_protocol *pp;
947 struct ip_vs_conn_param param;
948 __u32 flags;
949 unsigned int af, state, pe_data_len=0, pe_name_len=0;
950 __u8 *pe_data=NULL, *pe_name=NULL;
951 __u32 opt_flags=0;
952 int retc=0;
953
954 s = (union ip_vs_sync_conn *) p;
955
956 if (s->v6.type & STYPE_F_INET6) {
957#ifdef CONFIG_IP_VS_IPV6
958 af = AF_INET6;
959 p += sizeof(struct ip_vs_sync_v6);
960#else
961 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
962 retc = 10;
963 goto out;
964#endif
965 } else if (!s->v4.type) {
966 af = AF_INET;
967 p += sizeof(struct ip_vs_sync_v4);
968 } else {
969 return -10;
970 }
971 if (p > msg_end)
972 return -20;
973
974 /* Process optional params check Type & Len. */
975 while (p < msg_end) {
976 int ptype;
977 int plen;
978
979 if (p+2 > msg_end)
980 return -30;
981 ptype = *(p++);
982 plen = *(p++);
983
984 if (!plen || ((p + plen) > msg_end))
985 return -40;
986 /* Handle seq option p = param data */
987 switch (ptype & ~IPVS_OPT_F_PARAM) {
988 case IPVS_OPT_SEQ_DATA:
989 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
990 return -50;
991 break;
992
993 case IPVS_OPT_PE_DATA:
994 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
995 IP_VS_PEDATA_MAXLEN, &opt_flags,
996 IPVS_OPT_F_PE_DATA))
997 return -60;
998 break;
999
1000 case IPVS_OPT_PE_NAME:
1001 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1002 IP_VS_PENAME_MAXLEN, &opt_flags,
1003 IPVS_OPT_F_PE_NAME))
1004 return -70;
1005 break;
1006
1007 default:
1008 /* Param data mandatory ? */
1009 if (!(ptype & IPVS_OPT_F_PARAM)) {
1010 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1011 ptype & ~IPVS_OPT_F_PARAM);
1012 retc = 20;
1013 goto out;
392 } 1014 }
393 if (!(flags & IP_VS_CONN_F_TEMPLATE))
394 cp = ip_vs_conn_in_get(&param);
395 else
396 cp = ip_vs_ct_in_get(&param);
397 } 1015 }
398 if (!cp) { 1016 p += plen; /* Next option */
399 /* 1017 }
400 * Find the appropriate destination for the connection. 1018
401 * If it is not found the connection will remain unbound 1019 /* Get flags and Mask off unsupported */
402 * but still handled. 1020 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
403 */ 1021 flags |= IP_VS_CONN_F_SYNC;
404 dest = ip_vs_find_dest(AF_INET, 1022 state = ntohs(s->v4.state);
405 (union nf_inet_addr *)&s->daddr, 1023
406 s->dport, 1024 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
407 (union nf_inet_addr *)&s->vaddr, 1025 pp = ip_vs_proto_get(s->v4.protocol);
408 s->vport, 1026 if (!pp) {
409 s->protocol); 1027 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
410 /* Set the approprite ativity flag */ 1028 s->v4.protocol);
411 if (s->protocol == IPPROTO_TCP) { 1029 retc = 30;
412 if (state != IP_VS_TCP_S_ESTABLISHED) 1030 goto out;
413 flags |= IP_VS_CONN_F_INACTIVE; 1031 }
414 else 1032 if (state >= pp->num_states) {
415 flags &= ~IP_VS_CONN_F_INACTIVE; 1033 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
416 } else if (s->protocol == IPPROTO_SCTP) { 1034 pp->name, state);
417 if (state != IP_VS_SCTP_S_ESTABLISHED) 1035 retc = 40;
418 flags |= IP_VS_CONN_F_INACTIVE; 1036 goto out;
419 else 1037 }
420 flags &= ~IP_VS_CONN_F_INACTIVE; 1038 } else {
1039 /* protocol in templates is not used for state/timeout */
1040 if (state > 0) {
1041 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1042 state);
1043 state = 0;
1044 }
1045 }
1046 if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
1047 pe_data_len, pe_name, pe_name_len)) {
1048 retc = 50;
1049 goto out;
1050 }
1051 /* If only IPv4, just silent skip IPv6 */
1052 if (af == AF_INET)
1053 ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
1054 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1055 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1056 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1057 );
1058#ifdef CONFIG_IP_VS_IPV6
1059 else
1060 ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
1061 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1062 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1063 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1064 );
1065#endif
1066 return 0;
1067 /* Error exit */
1068out:
1069 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1070 return retc;
1071
1072}
1073/*
1074 * Process received multicast message and create the corresponding
1075 * ip_vs_conn entries.
1076 * Handles Version 0 & 1
1077 */
1078static void ip_vs_process_message(struct net *net, __u8 *buffer,
1079 const size_t buflen)
1080{
1081 struct netns_ipvs *ipvs = net_ipvs(net);
1082 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1083 __u8 *p, *msg_end;
1084 int i, nr_conns;
1085
1086 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1087 IP_VS_DBG(2, "BACKUP, message header too short\n");
1088 return;
1089 }
1090 /* Convert size back to host byte order */
1091 m2->size = ntohs(m2->size);
1092
1093 if (buflen != m2->size) {
1094 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1095 return;
1096 }
1097 /* SyncID sanity check */
1098 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1099 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1100 return;
1101 }
1102 /* Handle version 1 message */
1103 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1104 && (m2->spare == 0)) {
1105
1106 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1107 nr_conns = m2->nr_conns;
1108
1109 for (i=0; i<nr_conns; i++) {
1110 union ip_vs_sync_conn *s;
1111 unsigned size;
1112 int retc;
1113
1114 p = msg_end;
1115 if (p + sizeof(s->v4) > buffer+buflen) {
1116 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1117 return;
421 } 1118 }
422 cp = ip_vs_conn_new(&param, 1119 s = (union ip_vs_sync_conn *)p;
423 (union nf_inet_addr *)&s->daddr, 1120 size = ntohs(s->v4.ver_size) & SVER_MASK;
424 s->dport, flags, dest); 1121 msg_end = p + size;
425 if (dest) 1122 /* Basic sanity checks */
426 atomic_dec(&dest->refcnt); 1123 if (msg_end > buffer+buflen) {
427 if (!cp) { 1124 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
428 pr_err("ip_vs_conn_new failed\n");
429 return; 1125 return;
430 } 1126 }
431 } else if (!cp->dest) { 1127 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
432 dest = ip_vs_try_bind_dest(cp); 1128 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
433 if (dest) 1129 ntohs(s->v4.ver_size) >> SVER_SHIFT);
434 atomic_dec(&dest->refcnt); 1130 return;
435 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
436 (cp->state != state)) {
437 /* update active/inactive flag for the connection */
438 dest = cp->dest;
439 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
440 (state != IP_VS_TCP_S_ESTABLISHED)) {
441 atomic_dec(&dest->activeconns);
442 atomic_inc(&dest->inactconns);
443 cp->flags |= IP_VS_CONN_F_INACTIVE;
444 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
445 (state == IP_VS_TCP_S_ESTABLISHED)) {
446 atomic_inc(&dest->activeconns);
447 atomic_dec(&dest->inactconns);
448 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
449 } 1131 }
450 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && 1132 /* Process a single sync_conn */
451 (cp->state != state)) { 1133 retc = ip_vs_proc_sync_conn(net, p, msg_end);
452 dest = cp->dest; 1134 if (retc < 0) {
453 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 1135 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
454 (state != IP_VS_SCTP_S_ESTABLISHED)) { 1136 retc);
455 atomic_dec(&dest->activeconns); 1137 return;
456 atomic_inc(&dest->inactconns);
457 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
458 } 1138 }
1139 /* Make sure we have 32 bit alignment */
1140 msg_end = p + ((size + 3) & ~3);
459 } 1141 }
460 1142 } else {
461 if (opt) 1143 /* Old type of message */
462 memcpy(&cp->in_seq, opt, sizeof(*opt)); 1144 ip_vs_process_message_v0(net, buffer, buflen);
463 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 1145 return;
464 cp->state = state;
465 cp->old_state = cp->state;
466 /*
467 * We can not recover the right timeout for templates
468 * in all cases, we can not find the right fwmark
469 * virtual service. If needed, we can do it for
470 * non-fwmark persistent services.
471 */
472 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
473 cp->timeout = pp->timeout_table[state];
474 else
475 cp->timeout = (3*60*HZ);
476 ip_vs_conn_put(cp);
477 } 1146 }
478} 1147}
479 1148
@@ -511,8 +1180,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
511{ 1180{
512 struct net_device *dev; 1181 struct net_device *dev;
513 struct inet_sock *inet = inet_sk(sk); 1182 struct inet_sock *inet = inet_sk(sk);
1183 struct net *net = sock_net(sk);
514 1184
515 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1185 dev = __dev_get_by_name(net, ifname);
1186 if (!dev)
516 return -ENODEV; 1187 return -ENODEV;
517 1188
518 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1189 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1202,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
531 * Set the maximum length of sync message according to the 1202 * Set the maximum length of sync message according to the
532 * specified interface's MTU. 1203 * specified interface's MTU.
533 */ 1204 */
534static int set_sync_mesg_maxlen(int sync_state) 1205static int set_sync_mesg_maxlen(struct net *net, int sync_state)
535{ 1206{
1207 struct netns_ipvs *ipvs = net_ipvs(net);
536 struct net_device *dev; 1208 struct net_device *dev;
537 int num; 1209 int num;
538 1210
539 if (sync_state == IP_VS_STATE_MASTER) { 1211 if (sync_state == IP_VS_STATE_MASTER) {
540 if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) 1212 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1213 if (!dev)
541 return -ENODEV; 1214 return -ENODEV;
542 1215
543 num = (dev->mtu - sizeof(struct iphdr) - 1216 num = (dev->mtu - sizeof(struct iphdr) -
544 sizeof(struct udphdr) - 1217 sizeof(struct udphdr) -
545 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; 1218 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
546 sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + 1219 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
547 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); 1220 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
548 IP_VS_DBG(7, "setting the maximum length of sync sending " 1221 IP_VS_DBG(7, "setting the maximum length of sync sending "
549 "message %d.\n", sync_send_mesg_maxlen); 1222 "message %d.\n", ipvs->send_mesg_maxlen);
550 } else if (sync_state == IP_VS_STATE_BACKUP) { 1223 } else if (sync_state == IP_VS_STATE_BACKUP) {
551 if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) 1224 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1225 if (!dev)
552 return -ENODEV; 1226 return -ENODEV;
553 1227
554 sync_recv_mesg_maxlen = dev->mtu - 1228 ipvs->recv_mesg_maxlen = dev->mtu -
555 sizeof(struct iphdr) - sizeof(struct udphdr); 1229 sizeof(struct iphdr) - sizeof(struct udphdr);
556 IP_VS_DBG(7, "setting the maximum length of sync receiving " 1230 IP_VS_DBG(7, "setting the maximum length of sync receiving "
557 "message %d.\n", sync_recv_mesg_maxlen); 1231 "message %d.\n", ipvs->recv_mesg_maxlen);
558 } 1232 }
559 1233
560 return 0; 1234 return 0;
@@ -569,6 +1243,7 @@ static int set_sync_mesg_maxlen(int sync_state)
569static int 1243static int
570join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) 1244join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
571{ 1245{
1246 struct net *net = sock_net(sk);
572 struct ip_mreqn mreq; 1247 struct ip_mreqn mreq;
573 struct net_device *dev; 1248 struct net_device *dev;
574 int ret; 1249 int ret;
@@ -576,7 +1251,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
576 memset(&mreq, 0, sizeof(mreq)); 1251 memset(&mreq, 0, sizeof(mreq));
577 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); 1252 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
578 1253
579 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1254 dev = __dev_get_by_name(net, ifname);
1255 if (!dev)
580 return -ENODEV; 1256 return -ENODEV;
581 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1257 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
582 return -EINVAL; 1258 return -EINVAL;
@@ -593,11 +1269,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
593 1269
594static int bind_mcastif_addr(struct socket *sock, char *ifname) 1270static int bind_mcastif_addr(struct socket *sock, char *ifname)
595{ 1271{
1272 struct net *net = sock_net(sock->sk);
596 struct net_device *dev; 1273 struct net_device *dev;
597 __be32 addr; 1274 __be32 addr;
598 struct sockaddr_in sin; 1275 struct sockaddr_in sin;
599 1276
600 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1277 dev = __dev_get_by_name(net, ifname);
1278 if (!dev)
601 return -ENODEV; 1279 return -ENODEV;
602 1280
603 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 1281 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,19 +1297,20 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
619/* 1297/*
620 * Set up sending multicast socket over UDP 1298 * Set up sending multicast socket over UDP
621 */ 1299 */
622static struct socket * make_send_sock(void) 1300static struct socket *make_send_sock(struct net *net)
623{ 1301{
1302 struct netns_ipvs *ipvs = net_ipvs(net);
624 struct socket *sock; 1303 struct socket *sock;
625 int result; 1304 int result;
626 1305
627 /* First create a socket */ 1306 /* First create a socket */
628 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1307 result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
629 if (result < 0) { 1308 if (result < 0) {
630 pr_err("Error during creation of socket; terminating\n"); 1309 pr_err("Error during creation of socket; terminating\n");
631 return ERR_PTR(result); 1310 return ERR_PTR(result);
632 } 1311 }
633 1312
634 result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); 1313 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
635 if (result < 0) { 1314 if (result < 0) {
636 pr_err("Error setting outbound mcast interface\n"); 1315 pr_err("Error setting outbound mcast interface\n");
637 goto error; 1316 goto error;
@@ -640,7 +1319,7 @@ static struct socket * make_send_sock(void)
640 set_mcast_loop(sock->sk, 0); 1319 set_mcast_loop(sock->sk, 0);
641 set_mcast_ttl(sock->sk, 1); 1320 set_mcast_ttl(sock->sk, 1);
642 1321
643 result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); 1322 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
644 if (result < 0) { 1323 if (result < 0) {
645 pr_err("Error binding address of the mcast interface\n"); 1324 pr_err("Error binding address of the mcast interface\n");
646 goto error; 1325 goto error;
@@ -664,13 +1343,14 @@ static struct socket * make_send_sock(void)
664/* 1343/*
665 * Set up receiving multicast socket over UDP 1344 * Set up receiving multicast socket over UDP
666 */ 1345 */
667static struct socket * make_receive_sock(void) 1346static struct socket *make_receive_sock(struct net *net)
668{ 1347{
1348 struct netns_ipvs *ipvs = net_ipvs(net);
669 struct socket *sock; 1349 struct socket *sock;
670 int result; 1350 int result;
671 1351
672 /* First create a socket */ 1352 /* First create a socket */
673 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1353 result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
674 if (result < 0) { 1354 if (result < 0) {
675 pr_err("Error during creation of socket; terminating\n"); 1355 pr_err("Error during creation of socket; terminating\n");
676 return ERR_PTR(result); 1356 return ERR_PTR(result);
@@ -689,7 +1369,7 @@ static struct socket * make_receive_sock(void)
689 /* join the multicast group */ 1369 /* join the multicast group */
690 result = join_mcast_group(sock->sk, 1370 result = join_mcast_group(sock->sk,
691 (struct in_addr *) &mcast_addr.sin_addr, 1371 (struct in_addr *) &mcast_addr.sin_addr,
692 ip_vs_backup_mcast_ifn); 1372 ipvs->backup_mcast_ifn);
693 if (result < 0) { 1373 if (result < 0) {
694 pr_err("Error joining to the multicast group\n"); 1374 pr_err("Error joining to the multicast group\n");
695 goto error; 1375 goto error;
@@ -760,20 +1440,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
760static int sync_thread_master(void *data) 1440static int sync_thread_master(void *data)
761{ 1441{
762 struct ip_vs_sync_thread_data *tinfo = data; 1442 struct ip_vs_sync_thread_data *tinfo = data;
1443 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
763 struct ip_vs_sync_buff *sb; 1444 struct ip_vs_sync_buff *sb;
764 1445
765 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " 1446 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
766 "syncid = %d\n", 1447 "syncid = %d\n",
767 ip_vs_master_mcast_ifn, ip_vs_master_syncid); 1448 ipvs->master_mcast_ifn, ipvs->master_syncid);
768 1449
769 while (!kthread_should_stop()) { 1450 while (!kthread_should_stop()) {
770 while ((sb = sb_dequeue())) { 1451 while ((sb = sb_dequeue(ipvs))) {
771 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1452 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
772 ip_vs_sync_buff_release(sb); 1453 ip_vs_sync_buff_release(sb);
773 } 1454 }
774 1455
775 /* check if entries stay in curr_sb for 2 seconds */ 1456 /* check if entries stay in ipvs->sync_buff for 2 seconds */
776 sb = get_curr_sync_buff(2 * HZ); 1457 sb = get_curr_sync_buff(ipvs, 2 * HZ);
777 if (sb) { 1458 if (sb) {
778 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1459 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
779 ip_vs_sync_buff_release(sb); 1460 ip_vs_sync_buff_release(sb);
@@ -783,14 +1464,13 @@ static int sync_thread_master(void *data)
783 } 1464 }
784 1465
785 /* clean up the sync_buff queue */ 1466 /* clean up the sync_buff queue */
786 while ((sb=sb_dequeue())) { 1467 while ((sb = sb_dequeue(ipvs)))
787 ip_vs_sync_buff_release(sb); 1468 ip_vs_sync_buff_release(sb);
788 }
789 1469
790 /* clean up the current sync_buff */ 1470 /* clean up the current sync_buff */
791 if ((sb = get_curr_sync_buff(0))) { 1471 sb = get_curr_sync_buff(ipvs, 0);
1472 if (sb)
792 ip_vs_sync_buff_release(sb); 1473 ip_vs_sync_buff_release(sb);
793 }
794 1474
795 /* release the sending multicast socket */ 1475 /* release the sending multicast socket */
796 sock_release(tinfo->sock); 1476 sock_release(tinfo->sock);
@@ -803,11 +1483,12 @@ static int sync_thread_master(void *data)
803static int sync_thread_backup(void *data) 1483static int sync_thread_backup(void *data)
804{ 1484{
805 struct ip_vs_sync_thread_data *tinfo = data; 1485 struct ip_vs_sync_thread_data *tinfo = data;
1486 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
806 int len; 1487 int len;
807 1488
808 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1489 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
809 "syncid = %d\n", 1490 "syncid = %d\n",
810 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 1491 ipvs->backup_mcast_ifn, ipvs->backup_syncid);
811 1492
812 while (!kthread_should_stop()) { 1493 while (!kthread_should_stop()) {
813 wait_event_interruptible(*sk_sleep(tinfo->sock->sk), 1494 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1498,7 @@ static int sync_thread_backup(void *data)
817 /* do we have data now? */ 1498 /* do we have data now? */
818 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { 1499 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
819 len = ip_vs_receive(tinfo->sock, tinfo->buf, 1500 len = ip_vs_receive(tinfo->sock, tinfo->buf,
820 sync_recv_mesg_maxlen); 1501 ipvs->recv_mesg_maxlen);
821 if (len <= 0) { 1502 if (len <= 0) {
822 pr_err("receiving message error\n"); 1503 pr_err("receiving message error\n");
823 break; 1504 break;
@@ -826,7 +1507,7 @@ static int sync_thread_backup(void *data)
826 /* disable bottom half, because it accesses the data 1507 /* disable bottom half, because it accesses the data
827 shared by softirq while getting/creating conns */ 1508 shared by softirq while getting/creating conns */
828 local_bh_disable(); 1509 local_bh_disable();
829 ip_vs_process_message(tinfo->buf, len); 1510 ip_vs_process_message(tinfo->net, tinfo->buf, len);
830 local_bh_enable(); 1511 local_bh_enable();
831 } 1512 }
832 } 1513 }
@@ -840,41 +1521,42 @@ static int sync_thread_backup(void *data)
840} 1521}
841 1522
842 1523
843int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 1524int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
844{ 1525{
845 struct ip_vs_sync_thread_data *tinfo; 1526 struct ip_vs_sync_thread_data *tinfo;
846 struct task_struct **realtask, *task; 1527 struct task_struct **realtask, *task;
847 struct socket *sock; 1528 struct socket *sock;
1529 struct netns_ipvs *ipvs = net_ipvs(net);
848 char *name, *buf = NULL; 1530 char *name, *buf = NULL;
849 int (*threadfn)(void *data); 1531 int (*threadfn)(void *data);
850 int result = -ENOMEM; 1532 int result = -ENOMEM;
851 1533
852 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1534 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
853 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", 1535 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
854 sizeof(struct ip_vs_sync_conn)); 1536 sizeof(struct ip_vs_sync_conn_v0));
855 1537
856 if (state == IP_VS_STATE_MASTER) { 1538 if (state == IP_VS_STATE_MASTER) {
857 if (sync_master_thread) 1539 if (ipvs->master_thread)
858 return -EEXIST; 1540 return -EEXIST;
859 1541
860 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, 1542 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
861 sizeof(ip_vs_master_mcast_ifn)); 1543 sizeof(ipvs->master_mcast_ifn));
862 ip_vs_master_syncid = syncid; 1544 ipvs->master_syncid = syncid;
863 realtask = &sync_master_thread; 1545 realtask = &ipvs->master_thread;
864 name = "ipvs_syncmaster"; 1546 name = "ipvs_master:%d";
865 threadfn = sync_thread_master; 1547 threadfn = sync_thread_master;
866 sock = make_send_sock(); 1548 sock = make_send_sock(net);
867 } else if (state == IP_VS_STATE_BACKUP) { 1549 } else if (state == IP_VS_STATE_BACKUP) {
868 if (sync_backup_thread) 1550 if (ipvs->backup_thread)
869 return -EEXIST; 1551 return -EEXIST;
870 1552
871 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, 1553 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
872 sizeof(ip_vs_backup_mcast_ifn)); 1554 sizeof(ipvs->backup_mcast_ifn));
873 ip_vs_backup_syncid = syncid; 1555 ipvs->backup_syncid = syncid;
874 realtask = &sync_backup_thread; 1556 realtask = &ipvs->backup_thread;
875 name = "ipvs_syncbackup"; 1557 name = "ipvs_backup:%d";
876 threadfn = sync_thread_backup; 1558 threadfn = sync_thread_backup;
877 sock = make_receive_sock(); 1559 sock = make_receive_sock(net);
878 } else { 1560 } else {
879 return -EINVAL; 1561 return -EINVAL;
880 } 1562 }
@@ -884,9 +1566,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
884 goto out; 1566 goto out;
885 } 1567 }
886 1568
887 set_sync_mesg_maxlen(state); 1569 set_sync_mesg_maxlen(net, state);
888 if (state == IP_VS_STATE_BACKUP) { 1570 if (state == IP_VS_STATE_BACKUP) {
889 buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); 1571 buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
890 if (!buf) 1572 if (!buf)
891 goto outsocket; 1573 goto outsocket;
892 } 1574 }
@@ -895,10 +1577,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
895 if (!tinfo) 1577 if (!tinfo)
896 goto outbuf; 1578 goto outbuf;
897 1579
1580 tinfo->net = net;
898 tinfo->sock = sock; 1581 tinfo->sock = sock;
899 tinfo->buf = buf; 1582 tinfo->buf = buf;
900 1583
901 task = kthread_run(threadfn, tinfo, name); 1584 task = kthread_run(threadfn, tinfo, name, ipvs->gen);
902 if (IS_ERR(task)) { 1585 if (IS_ERR(task)) {
903 result = PTR_ERR(task); 1586 result = PTR_ERR(task);
904 goto outtinfo; 1587 goto outtinfo;
@@ -906,7 +1589,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
906 1589
907 /* mark as active */ 1590 /* mark as active */
908 *realtask = task; 1591 *realtask = task;
909 ip_vs_sync_state |= state; 1592 ipvs->sync_state |= state;
910 1593
911 /* increase the module use count */ 1594 /* increase the module use count */
912 ip_vs_use_count_inc(); 1595 ip_vs_use_count_inc();
@@ -924,16 +1607,18 @@ out:
924} 1607}
925 1608
926 1609
927int stop_sync_thread(int state) 1610int stop_sync_thread(struct net *net, int state)
928{ 1611{
1612 struct netns_ipvs *ipvs = net_ipvs(net);
1613
929 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1614 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
930 1615
931 if (state == IP_VS_STATE_MASTER) { 1616 if (state == IP_VS_STATE_MASTER) {
932 if (!sync_master_thread) 1617 if (!ipvs->master_thread)
933 return -ESRCH; 1618 return -ESRCH;
934 1619
935 pr_info("stopping master sync thread %d ...\n", 1620 pr_info("stopping master sync thread %d ...\n",
936 task_pid_nr(sync_master_thread)); 1621 task_pid_nr(ipvs->master_thread));
937 1622
938 /* 1623 /*
939 * The lock synchronizes with sb_queue_tail(), so that we don't 1624 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1626,21 @@ int stop_sync_thread(int state)
941 * progress of stopping the master sync daemon. 1626 * progress of stopping the master sync daemon.
942 */ 1627 */
943 1628
944 spin_lock_bh(&ip_vs_sync_lock); 1629 spin_lock_bh(&ipvs->sync_lock);
945 ip_vs_sync_state &= ~IP_VS_STATE_MASTER; 1630 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
946 spin_unlock_bh(&ip_vs_sync_lock); 1631 spin_unlock_bh(&ipvs->sync_lock);
947 kthread_stop(sync_master_thread); 1632 kthread_stop(ipvs->master_thread);
948 sync_master_thread = NULL; 1633 ipvs->master_thread = NULL;
949 } else if (state == IP_VS_STATE_BACKUP) { 1634 } else if (state == IP_VS_STATE_BACKUP) {
950 if (!sync_backup_thread) 1635 if (!ipvs->backup_thread)
951 return -ESRCH; 1636 return -ESRCH;
952 1637
953 pr_info("stopping backup sync thread %d ...\n", 1638 pr_info("stopping backup sync thread %d ...\n",
954 task_pid_nr(sync_backup_thread)); 1639 task_pid_nr(ipvs->backup_thread));
955 1640
956 ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; 1641 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
957 kthread_stop(sync_backup_thread); 1642 kthread_stop(ipvs->backup_thread);
958 sync_backup_thread = NULL; 1643 ipvs->backup_thread = NULL;
959 } else { 1644 } else {
960 return -EINVAL; 1645 return -EINVAL;
961 } 1646 }
@@ -965,3 +1650,42 @@ int stop_sync_thread(int state)
965 1650
966 return 0; 1651 return 0;
967} 1652}
1653
1654/*
1655 * Initialize data struct for each netns
1656 */
1657static int __net_init __ip_vs_sync_init(struct net *net)
1658{
1659 struct netns_ipvs *ipvs = net_ipvs(net);
1660
1661 INIT_LIST_HEAD(&ipvs->sync_queue);
1662 spin_lock_init(&ipvs->sync_lock);
1663 spin_lock_init(&ipvs->sync_buff_lock);
1664
1665 ipvs->sync_mcast_addr.sin_family = AF_INET;
1666 ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
1667 ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
1668 return 0;
1669}
1670
1671static void __ip_vs_sync_cleanup(struct net *net)
1672{
1673 stop_sync_thread(net, IP_VS_STATE_MASTER);
1674 stop_sync_thread(net, IP_VS_STATE_BACKUP);
1675}
1676
1677static struct pernet_operations ipvs_sync_ops = {
1678 .init = __ip_vs_sync_init,
1679 .exit = __ip_vs_sync_cleanup,
1680};
1681
1682
1683int __init ip_vs_sync_init(void)
1684{
1685 return register_pernet_subsys(&ipvs_sync_ops);
1686}
1687
1688void ip_vs_sync_cleanup(void)
1689{
1690 unregister_pernet_subsys(&ipvs_sync_ops);
1691}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bbddfdb10db..bc1bfc48a17 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -27,22 +27,6 @@
27 27
28#include <net/ip_vs.h> 28#include <net/ip_vs.h>
29 29
30
31static inline unsigned int
32ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
33{
34 /*
35 * We think the overhead of processing active connections is 256
36 * times higher than that of inactive connections in average. (This
37 * 256 times might not be accurate, we will change it later) We
38 * use the following formula to estimate the overhead now:
39 * dest->activeconns*256 + dest->inactconns
40 */
41 return (atomic_read(&dest->activeconns) << 8) +
42 atomic_read(&dest->inactconns);
43}
44
45
46/* 30/*
47 * Weighted Least Connection scheduling 31 * Weighted Least Connection scheduling
48 */ 32 */
@@ -71,11 +55,11 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
71 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 55 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
72 atomic_read(&dest->weight) > 0) { 56 atomic_read(&dest->weight) > 0) {
73 least = dest; 57 least = dest;
74 loh = ip_vs_wlc_dest_overhead(least); 58 loh = ip_vs_dest_conn_overhead(least);
75 goto nextstage; 59 goto nextstage;
76 } 60 }
77 } 61 }
78 IP_VS_ERR_RL("WLC: no destination available\n"); 62 ip_vs_scheduler_err(svc, "no destination available");
79 return NULL; 63 return NULL;
80 64
81 /* 65 /*
@@ -85,7 +69,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
85 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 69 list_for_each_entry_continue(dest, &svc->destinations, n_list) {
86 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 70 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
87 continue; 71 continue;
88 doh = ip_vs_wlc_dest_overhead(dest); 72 doh = ip_vs_dest_conn_overhead(dest);
89 if (loh * atomic_read(&dest->weight) > 73 if (loh * atomic_read(&dest->weight) >
90 doh * atomic_read(&least->weight)) { 74 doh * atomic_read(&least->weight)) {
91 least = dest; 75 least = dest;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 30db633f88f..1ef41f50723 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -147,8 +147,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
147 147
148 if (mark->cl == mark->cl->next) { 148 if (mark->cl == mark->cl->next) {
149 /* no dest entry */ 149 /* no dest entry */
150 IP_VS_ERR_RL("WRR: no destination available: " 150 ip_vs_scheduler_err(svc,
151 "no destinations present\n"); 151 "no destination available: "
152 "no destinations present");
152 dest = NULL; 153 dest = NULL;
153 goto out; 154 goto out;
154 } 155 }
@@ -162,8 +163,8 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
162 */ 163 */
163 if (mark->cw == 0) { 164 if (mark->cw == 0) {
164 mark->cl = &svc->destinations; 165 mark->cl = &svc->destinations;
165 IP_VS_ERR_RL("WRR: no destination " 166 ip_vs_scheduler_err(svc,
166 "available\n"); 167 "no destination available");
167 dest = NULL; 168 dest = NULL;
168 goto out; 169 goto out;
169 } 170 }
@@ -185,8 +186,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
185 /* back to the start, and no dest is found. 186 /* back to the start, and no dest is found.
186 It is only possible when all dests are OVERLOADED */ 187 It is only possible when all dests are OVERLOADED */
187 dest = NULL; 188 dest = NULL;
188 IP_VS_ERR_RL("WRR: no destination available: " 189 ip_vs_scheduler_err(svc,
189 "all destinations are overloaded\n"); 190 "no destination available: "
191 "all destinations are overloaded");
190 goto out; 192 goto out;
191 } 193 }
192 } 194 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3fbe4a..6132b213edd 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -43,6 +43,13 @@
43 43
44#include <net/ip_vs.h> 44#include <net/ip_vs.h>
45 45
46enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
52};
46 53
47/* 54/*
48 * Destination cache to speed up outgoing route lookup 55 * Destination cache to speed up outgoing route lookup
@@ -77,11 +84,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
77 return dst; 84 return dst;
78} 85}
79 86
80/* 87/* Get route to destination or remote server */
81 * Get route to destination or remote server
82 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
83 * &4=Allow redirect from remote daddr to local
84 */
85static struct rtable * 88static struct rtable *
86__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 89__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
87 __be32 daddr, u32 rtos, int rt_mode) 90 __be32 daddr, u32 rtos, int rt_mode)
@@ -95,12 +98,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
95 spin_lock(&dest->dst_lock); 98 spin_lock(&dest->dst_lock);
96 if (!(rt = (struct rtable *) 99 if (!(rt = (struct rtable *)
97 __ip_vs_dst_check(dest, rtos))) { 100 __ip_vs_dst_check(dest, rtos))) {
98 struct flowi fl = { 101 rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0);
99 .fl4_dst = dest->addr.ip, 102 if (IS_ERR(rt)) {
100 .fl4_tos = rtos,
101 };
102
103 if (ip_route_output_key(net, &rt, &fl)) {
104 spin_unlock(&dest->dst_lock); 103 spin_unlock(&dest->dst_lock);
105 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 104 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
106 &dest->addr.ip); 105 &dest->addr.ip);
@@ -113,12 +112,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
113 } 112 }
114 spin_unlock(&dest->dst_lock); 113 spin_unlock(&dest->dst_lock);
115 } else { 114 } else {
116 struct flowi fl = { 115 rt = ip_route_output(net, daddr, 0, rtos, 0);
117 .fl4_dst = daddr, 116 if (IS_ERR(rt)) {
118 .fl4_tos = rtos,
119 };
120
121 if (ip_route_output_key(net, &rt, &fl)) {
122 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 117 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
123 &daddr); 118 &daddr);
124 return NULL; 119 return NULL;
@@ -126,15 +121,16 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
126 } 121 }
127 122
128 local = rt->rt_flags & RTCF_LOCAL; 123 local = rt->rt_flags & RTCF_LOCAL;
129 if (!((local ? 1 : 2) & rt_mode)) { 124 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
125 rt_mode)) {
130 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", 126 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
131 (rt->rt_flags & RTCF_LOCAL) ? 127 (rt->rt_flags & RTCF_LOCAL) ?
132 "local":"non-local", &rt->rt_dst); 128 "local":"non-local", &rt->rt_dst);
133 ip_rt_put(rt); 129 ip_rt_put(rt);
134 return NULL; 130 return NULL;
135 } 131 }
136 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) && 132 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
137 ort->rt_flags & RTCF_LOCAL)) { 133 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
138 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " 134 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
139 "requires NAT method, dest: %pI4\n", 135 "requires NAT method, dest: %pI4\n",
140 &ip_hdr(skb)->daddr, &rt->rt_dst); 136 &ip_hdr(skb)->daddr, &rt->rt_dst);
@@ -169,15 +165,15 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
169 return 0; 165 return 0;
170 refdst_drop(orefdst); 166 refdst_drop(orefdst);
171 } else { 167 } else {
172 struct flowi fl = { 168 struct flowi4 fl4 = {
173 .fl4_dst = iph->daddr, 169 .daddr = iph->daddr,
174 .fl4_src = iph->saddr, 170 .saddr = iph->saddr,
175 .fl4_tos = RT_TOS(iph->tos), 171 .flowi4_tos = RT_TOS(iph->tos),
176 .mark = skb->mark, 172 .flowi4_mark = skb->mark,
177 }; 173 };
178 struct rtable *rt;
179 174
180 if (ip_route_output_key(net, &rt, &fl)) 175 rt = ip_route_output_key(net, &fl4);
176 if (IS_ERR(rt))
181 return 0; 177 return 0;
182 if (!(rt->rt_flags & RTCF_LOCAL)) { 178 if (!(rt->rt_flags & RTCF_LOCAL)) {
183 ip_rt_put(rt); 179 ip_rt_put(rt);
@@ -202,22 +198,27 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
202 struct in6_addr *ret_saddr, int do_xfrm) 198 struct in6_addr *ret_saddr, int do_xfrm)
203{ 199{
204 struct dst_entry *dst; 200 struct dst_entry *dst;
205 struct flowi fl = { 201 struct flowi6 fl6 = {
206 .fl6_dst = *daddr, 202 .daddr = *daddr,
207 }; 203 };
208 204
209 dst = ip6_route_output(net, NULL, &fl); 205 dst = ip6_route_output(net, NULL, &fl6);
210 if (dst->error) 206 if (dst->error)
211 goto out_err; 207 goto out_err;
212 if (!ret_saddr) 208 if (!ret_saddr)
213 return dst; 209 return dst;
214 if (ipv6_addr_any(&fl.fl6_src) && 210 if (ipv6_addr_any(&fl6.saddr) &&
215 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 211 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
216 &fl.fl6_dst, 0, &fl.fl6_src) < 0) 212 &fl6.daddr, 0, &fl6.saddr) < 0)
217 goto out_err;
218 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
219 goto out_err; 213 goto out_err;
220 ipv6_addr_copy(ret_saddr, &fl.fl6_src); 214 if (do_xfrm) {
215 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
216 if (IS_ERR(dst)) {
217 dst = NULL;
218 goto out_err;
219 }
220 }
221 ipv6_addr_copy(ret_saddr, &fl6.saddr);
221 return dst; 222 return dst;
222 223
223out_err: 224out_err:
@@ -384,13 +385,14 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
384 385
385 EnterFunction(10); 386 EnterFunction(10);
386 387
387 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, 388 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
388 RT_TOS(iph->tos), 2))) 389 IP_VS_RT_MODE_NON_LOCAL)))
389 goto tx_error_icmp; 390 goto tx_error_icmp;
390 391
391 /* MTU checking */ 392 /* MTU checking */
392 mtu = dst_mtu(&rt->dst); 393 mtu = dst_mtu(&rt->dst);
393 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 394 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
395 !skb_is_gso(skb)) {
394 ip_rt_put(rt); 396 ip_rt_put(rt);
395 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 397 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
396 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 398 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +445,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
443 445
444 /* MTU checking */ 446 /* MTU checking */
445 mtu = dst_mtu(&rt->dst); 447 mtu = dst_mtu(&rt->dst);
446 if (skb->len > mtu) { 448 if (skb->len > mtu && !skb_is_gso(skb)) {
447 if (!skb->dev) { 449 if (!skb->dev) {
448 struct net *net = dev_net(skb_dst(skb)->dev); 450 struct net *net = dev_net(skb_dst(skb)->dev);
449 451
@@ -512,7 +514,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
512 } 514 }
513 515
514 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 516 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
515 RT_TOS(iph->tos), 1|2|4))) 517 RT_TOS(iph->tos),
518 IP_VS_RT_MODE_LOCAL |
519 IP_VS_RT_MODE_NON_LOCAL |
520 IP_VS_RT_MODE_RDR)))
516 goto tx_error_icmp; 521 goto tx_error_icmp;
517 local = rt->rt_flags & RTCF_LOCAL; 522 local = rt->rt_flags & RTCF_LOCAL;
518 /* 523 /*
@@ -543,7 +548,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
543 548
544 /* MTU checking */ 549 /* MTU checking */
545 mtu = dst_mtu(&rt->dst); 550 mtu = dst_mtu(&rt->dst);
546 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 551 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
552 !skb_is_gso(skb)) {
547 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 553 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
548 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, 554 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
549 "ip_vs_nat_xmit(): frag needed for"); 555 "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +664,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
658 664
659 /* MTU checking */ 665 /* MTU checking */
660 mtu = dst_mtu(&rt->dst); 666 mtu = dst_mtu(&rt->dst);
661 if (skb->len > mtu) { 667 if (skb->len > mtu && !skb_is_gso(skb)) {
662 if (!skb->dev) { 668 if (!skb->dev) {
663 struct net *net = dev_net(skb_dst(skb)->dev); 669 struct net *net = dev_net(skb_dst(skb)->dev);
664 670
@@ -754,7 +760,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
754 EnterFunction(10); 760 EnterFunction(10);
755 761
756 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 762 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
757 RT_TOS(tos), 1|2))) 763 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
764 IP_VS_RT_MODE_NON_LOCAL)))
758 goto tx_error_icmp; 765 goto tx_error_icmp;
759 if (rt->rt_flags & RTCF_LOCAL) { 766 if (rt->rt_flags & RTCF_LOCAL) {
760 ip_rt_put(rt); 767 ip_rt_put(rt);
@@ -773,8 +780,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
773 780
774 df |= (old_iph->frag_off & htons(IP_DF)); 781 df |= (old_iph->frag_off & htons(IP_DF));
775 782
776 if ((old_iph->frag_off & htons(IP_DF)) 783 if ((old_iph->frag_off & htons(IP_DF) &&
777 && mtu < ntohs(old_iph->tot_len)) { 784 mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
778 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 785 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
779 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 786 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
780 goto tx_error_put; 787 goto tx_error_put;
@@ -886,7 +893,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
886 if (skb_dst(skb)) 893 if (skb_dst(skb))
887 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 894 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
888 895
889 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 896 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
897 !skb_is_gso(skb)) {
890 if (!skb->dev) { 898 if (!skb->dev) {
891 struct net *net = dev_net(skb_dst(skb)->dev); 899 struct net *net = dev_net(skb_dst(skb)->dev);
892 900
@@ -982,7 +990,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
982 EnterFunction(10); 990 EnterFunction(10);
983 991
984 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 992 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
985 RT_TOS(iph->tos), 1|2))) 993 RT_TOS(iph->tos),
994 IP_VS_RT_MODE_LOCAL |
995 IP_VS_RT_MODE_NON_LOCAL)))
986 goto tx_error_icmp; 996 goto tx_error_icmp;
987 if (rt->rt_flags & RTCF_LOCAL) { 997 if (rt->rt_flags & RTCF_LOCAL) {
988 ip_rt_put(rt); 998 ip_rt_put(rt);
@@ -991,7 +1001,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
991 1001
992 /* MTU checking */ 1002 /* MTU checking */
993 mtu = dst_mtu(&rt->dst); 1003 mtu = dst_mtu(&rt->dst);
994 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { 1004 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1005 !skb_is_gso(skb)) {
995 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 1006 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
996 ip_rt_put(rt); 1007 ip_rt_put(rt);
997 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1008 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1125,7 +1136,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1125 */ 1136 */
1126 1137
1127 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1138 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1128 RT_TOS(ip_hdr(skb)->tos), 1|2|4))) 1139 RT_TOS(ip_hdr(skb)->tos),
1140 IP_VS_RT_MODE_LOCAL |
1141 IP_VS_RT_MODE_NON_LOCAL |
1142 IP_VS_RT_MODE_RDR)))
1129 goto tx_error_icmp; 1143 goto tx_error_icmp;
1130 local = rt->rt_flags & RTCF_LOCAL; 1144 local = rt->rt_flags & RTCF_LOCAL;
1131 1145
@@ -1158,7 +1172,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1158 1172
1159 /* MTU checking */ 1173 /* MTU checking */
1160 mtu = dst_mtu(&rt->dst); 1174 mtu = dst_mtu(&rt->dst);
1161 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1175 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1176 !skb_is_gso(skb)) {
1162 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1177 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1163 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1178 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1164 goto tx_error_put; 1179 goto tx_error_put;
@@ -1272,7 +1287,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1272 1287
1273 /* MTU checking */ 1288 /* MTU checking */
1274 mtu = dst_mtu(&rt->dst); 1289 mtu = dst_mtu(&rt->dst);
1275 if (skb->len > mtu) { 1290 if (skb->len > mtu && !skb_is_gso(skb)) {
1276 if (!skb->dev) { 1291 if (!skb->dev) {
1277 struct net *net = dev_net(skb_dst(skb)->dev); 1292 struct net *net = dev_net(skb_dst(skb)->dev);
1278 1293
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 00000000000..4e99cca6161
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
1/*
2 * broadcast connection tracking helper
3 *
4 * (c) 2005 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/ip.h>
14#include <net/route.h>
15#include <linux/inetdevice.h>
16#include <linux/skbuff.h>
17
18#include <net/netfilter/nf_conntrack.h>
19#include <net/netfilter/nf_conntrack_helper.h>
20#include <net/netfilter/nf_conntrack_expect.h>
21
22int nf_conntrack_broadcast_help(struct sk_buff *skb,
23 unsigned int protoff,
24 struct nf_conn *ct,
25 enum ip_conntrack_info ctinfo,
26 unsigned int timeout)
27{
28 struct nf_conntrack_expect *exp;
29 struct iphdr *iph = ip_hdr(skb);
30 struct rtable *rt = skb_rtable(skb);
31 struct in_device *in_dev;
32 struct nf_conn_help *help = nfct_help(ct);
33 __be32 mask = 0;
34
35 /* we're only interested in locally generated packets */
36 if (skb->sk == NULL)
37 goto out;
38 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
39 goto out;
40 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
41 goto out;
42
43 rcu_read_lock();
44 in_dev = __in_dev_get_rcu(rt->dst.dev);
45 if (in_dev != NULL) {
46 for_primary_ifa(in_dev) {
47 if (ifa->ifa_broadcast == iph->daddr) {
48 mask = ifa->ifa_mask;
49 break;
50 }
51 } endfor_ifa(in_dev);
52 }
53 rcu_read_unlock();
54
55 if (mask == 0)
56 goto out;
57
58 exp = nf_ct_expect_alloc(ct);
59 if (exp == NULL)
60 goto out;
61
62 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
63 exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
64
65 exp->mask.src.u3.ip = mask;
66 exp->mask.src.u.udp.port = htons(0xFFFF);
67
68 exp->expectfn = NULL;
69 exp->flags = NF_CT_EXPECT_PERMANENT;
70 exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
71 exp->helper = NULL;
72
73 nf_ct_expect_related(exp);
74 nf_ct_expect_put(exp);
75
76 nf_ct_refresh(ct, skb, timeout * HZ);
77out:
78 return NF_ACCEPT;
79}
80EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
81
82MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 84f4fcc5884..941286ca911 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
43#include <net/netfilter/nf_conntrack_acct.h> 43#include <net/netfilter/nf_conntrack_acct.h>
44#include <net/netfilter/nf_conntrack_ecache.h> 44#include <net/netfilter/nf_conntrack_ecache.h>
45#include <net/netfilter/nf_conntrack_zones.h> 45#include <net/netfilter/nf_conntrack_zones.h>
46#include <net/netfilter/nf_conntrack_timestamp.h>
46#include <net/netfilter/nf_nat.h> 47#include <net/netfilter/nf_nat.h>
47#include <net/netfilter/nf_nat_core.h> 48#include <net/netfilter/nf_nat_core.h>
48 49
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
282static void death_by_timeout(unsigned long ul_conntrack) 283static void death_by_timeout(unsigned long ul_conntrack)
283{ 284{
284 struct nf_conn *ct = (void *)ul_conntrack; 285 struct nf_conn *ct = (void *)ul_conntrack;
286 struct nf_conn_tstamp *tstamp;
287
288 tstamp = nf_conn_tstamp_find(ct);
289 if (tstamp && tstamp->stop == 0)
290 tstamp->stop = ktime_to_ns(ktime_get_real());
285 291
286 if (!test_bit(IPS_DYING_BIT, &ct->status) && 292 if (!test_bit(IPS_DYING_BIT, &ct->status) &&
287 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 293 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
419 struct nf_conntrack_tuple_hash *h; 425 struct nf_conntrack_tuple_hash *h;
420 struct nf_conn *ct; 426 struct nf_conn *ct;
421 struct nf_conn_help *help; 427 struct nf_conn_help *help;
428 struct nf_conn_tstamp *tstamp;
422 struct hlist_nulls_node *n; 429 struct hlist_nulls_node *n;
423 enum ip_conntrack_info ctinfo; 430 enum ip_conntrack_info ctinfo;
424 struct net *net; 431 struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
486 ct->timeout.expires += jiffies; 493 ct->timeout.expires += jiffies;
487 add_timer(&ct->timeout); 494 add_timer(&ct->timeout);
488 atomic_inc(&ct->ct_general.use); 495 atomic_inc(&ct->ct_general.use);
489 set_bit(IPS_CONFIRMED_BIT, &ct->status); 496 ct->status |= IPS_CONFIRMED;
497
498 /* set conntrack timestamp, if enabled. */
499 tstamp = nf_conn_tstamp_find(ct);
500 if (tstamp) {
501 if (skb->tstamp.tv64 == 0)
502 __net_timestamp((struct sk_buff *)skb);
490 503
504 tstamp->start = ktime_to_ns(skb->tstamp);
505 }
491 /* Since the lookup is lockless, hash insertion must be done after 506 /* Since the lookup is lockless, hash insertion must be done after
492 * starting the timer and setting the CONFIRMED bit. The RCU barriers 507 * starting the timer and setting the CONFIRMED bit. The RCU barriers
493 * guarantee that no other CPU can find the conntrack before the above 508 * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
655 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. 670 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
656 */ 671 */
657 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, 672 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
658 sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); 673 offsetof(struct nf_conn, proto) -
674 offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
659 spin_lock_init(&ct->lock); 675 spin_lock_init(&ct->lock);
660 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 676 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
661 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 677 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
745 } 761 }
746 762
747 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 763 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
764 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
748 765
749 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 766 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
750 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, 767 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1192,6 +1209,11 @@ struct __nf_ct_flush_report {
1192static int kill_report(struct nf_conn *i, void *data) 1209static int kill_report(struct nf_conn *i, void *data)
1193{ 1210{
1194 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1211 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1212 struct nf_conn_tstamp *tstamp;
1213
1214 tstamp = nf_conn_tstamp_find(i);
1215 if (tstamp && tstamp->stop == 0)
1216 tstamp->stop = ktime_to_ns(ktime_get_real());
1195 1217
1196 /* If we fail to deliver the event, death_by_timeout() will retry */ 1218 /* If we fail to deliver the event, death_by_timeout() will retry */
1197 if (nf_conntrack_event_report(IPCT_DESTROY, i, 1219 if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1208,9 +1230,9 @@ static int kill_all(struct nf_conn *i, void *data)
1208 return 1; 1230 return 1;
1209} 1231}
1210 1232
1211void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) 1233void nf_ct_free_hashtable(void *hash, unsigned int size)
1212{ 1234{
1213 if (vmalloced) 1235 if (is_vmalloc_addr(hash))
1214 vfree(hash); 1236 vfree(hash);
1215 else 1237 else
1216 free_pages((unsigned long)hash, 1238 free_pages((unsigned long)hash,
@@ -1277,9 +1299,9 @@ static void nf_conntrack_cleanup_net(struct net *net)
1277 goto i_see_dead_people; 1299 goto i_see_dead_people;
1278 } 1300 }
1279 1301
1280 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1302 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1281 net->ct.htable_size);
1282 nf_conntrack_ecache_fini(net); 1303 nf_conntrack_ecache_fini(net);
1304 nf_conntrack_tstamp_fini(net);
1283 nf_conntrack_acct_fini(net); 1305 nf_conntrack_acct_fini(net);
1284 nf_conntrack_expect_fini(net); 1306 nf_conntrack_expect_fini(net);
1285 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1307 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
@@ -1307,21 +1329,18 @@ void nf_conntrack_cleanup(struct net *net)
1307 } 1329 }
1308} 1330}
1309 1331
1310void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) 1332void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1311{ 1333{
1312 struct hlist_nulls_head *hash; 1334 struct hlist_nulls_head *hash;
1313 unsigned int nr_slots, i; 1335 unsigned int nr_slots, i;
1314 size_t sz; 1336 size_t sz;
1315 1337
1316 *vmalloced = 0;
1317
1318 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 1338 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1319 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 1339 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1320 sz = nr_slots * sizeof(struct hlist_nulls_head); 1340 sz = nr_slots * sizeof(struct hlist_nulls_head);
1321 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1341 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1322 get_order(sz)); 1342 get_order(sz));
1323 if (!hash) { 1343 if (!hash) {
1324 *vmalloced = 1;
1325 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1344 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1326 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 1345 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1327 PAGE_KERNEL); 1346 PAGE_KERNEL);
@@ -1337,7 +1356,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1337 1356
1338int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1357int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1339{ 1358{
1340 int i, bucket, vmalloced, old_vmalloced; 1359 int i, bucket;
1341 unsigned int hashsize, old_size; 1360 unsigned int hashsize, old_size;
1342 struct hlist_nulls_head *hash, *old_hash; 1361 struct hlist_nulls_head *hash, *old_hash;
1343 struct nf_conntrack_tuple_hash *h; 1362 struct nf_conntrack_tuple_hash *h;
@@ -1354,7 +1373,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1354 if (!hashsize) 1373 if (!hashsize)
1355 return -EINVAL; 1374 return -EINVAL;
1356 1375
1357 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); 1376 hash = nf_ct_alloc_hashtable(&hashsize, 1);
1358 if (!hash) 1377 if (!hash)
1359 return -ENOMEM; 1378 return -ENOMEM;
1360 1379
@@ -1376,15 +1395,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1376 } 1395 }
1377 } 1396 }
1378 old_size = init_net.ct.htable_size; 1397 old_size = init_net.ct.htable_size;
1379 old_vmalloced = init_net.ct.hash_vmalloc;
1380 old_hash = init_net.ct.hash; 1398 old_hash = init_net.ct.hash;
1381 1399
1382 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1400 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
1383 init_net.ct.hash_vmalloc = vmalloced;
1384 init_net.ct.hash = hash; 1401 init_net.ct.hash = hash;
1385 spin_unlock_bh(&nf_conntrack_lock); 1402 spin_unlock_bh(&nf_conntrack_lock);
1386 1403
1387 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1404 nf_ct_free_hashtable(old_hash, old_size);
1388 return 0; 1405 return 0;
1389} 1406}
1390EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1407EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1497,8 +1514,7 @@ static int nf_conntrack_init_net(struct net *net)
1497 } 1514 }
1498 1515
1499 net->ct.htable_size = nf_conntrack_htable_size; 1516 net->ct.htable_size = nf_conntrack_htable_size;
1500 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1517 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1501 &net->ct.hash_vmalloc, 1);
1502 if (!net->ct.hash) { 1518 if (!net->ct.hash) {
1503 ret = -ENOMEM; 1519 ret = -ENOMEM;
1504 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1520 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1510,6 +1526,9 @@ static int nf_conntrack_init_net(struct net *net)
1510 ret = nf_conntrack_acct_init(net); 1526 ret = nf_conntrack_acct_init(net);
1511 if (ret < 0) 1527 if (ret < 0)
1512 goto err_acct; 1528 goto err_acct;
1529 ret = nf_conntrack_tstamp_init(net);
1530 if (ret < 0)
1531 goto err_tstamp;
1513 ret = nf_conntrack_ecache_init(net); 1532 ret = nf_conntrack_ecache_init(net);
1514 if (ret < 0) 1533 if (ret < 0)
1515 goto err_ecache; 1534 goto err_ecache;
@@ -1517,12 +1536,13 @@ static int nf_conntrack_init_net(struct net *net)
1517 return 0; 1536 return 0;
1518 1537
1519err_ecache: 1538err_ecache:
1539 nf_conntrack_tstamp_fini(net);
1540err_tstamp:
1520 nf_conntrack_acct_fini(net); 1541 nf_conntrack_acct_fini(net);
1521err_acct: 1542err_acct:
1522 nf_conntrack_expect_fini(net); 1543 nf_conntrack_expect_fini(net);
1523err_expect: 1544err_expect:
1524 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1545 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1525 net->ct.htable_size);
1526err_hash: 1546err_hash:
1527 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1547 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1528err_cache: 1548err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0bd1ef..cd1e8e0970f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
319 const struct nf_conntrack_expect_policy *p; 319 const struct nf_conntrack_expect_policy *p;
320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); 320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321 321
322 atomic_inc(&exp->use); 322 /* two references : one for hash insert, one for the timer */
323 atomic_add(2, &exp->use);
323 324
324 if (master_help) { 325 if (master_help) {
325 hlist_add_head(&exp->lnode, &master_help->expectations); 326 hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
333 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 334 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
334 (unsigned long)exp); 335 (unsigned long)exp);
335 if (master_help) { 336 if (master_help) {
336 p = &master_help->helper->expect_policy[exp->class]; 337 p = &rcu_dereference_protected(
338 master_help->helper,
339 lockdep_is_held(&nf_conntrack_lock)
340 )->expect_policy[exp->class];
337 exp->timeout.expires = jiffies + p->timeout * HZ; 341 exp->timeout.expires = jiffies + p->timeout * HZ;
338 } 342 }
339 add_timer(&exp->timeout); 343 add_timer(&exp->timeout);
340 344
341 atomic_inc(&exp->use);
342 NF_CT_STAT_INC(net, expect_create); 345 NF_CT_STAT_INC(net, expect_create);
343} 346}
344 347
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
369 if (!del_timer(&i->timeout)) 372 if (!del_timer(&i->timeout))
370 return 0; 373 return 0;
371 374
372 p = &master_help->helper->expect_policy[i->class]; 375 p = &rcu_dereference_protected(
376 master_help->helper,
377 lockdep_is_held(&nf_conntrack_lock)
378 )->expect_policy[i->class];
373 i->timeout.expires = jiffies + p->timeout * HZ; 379 i->timeout.expires = jiffies + p->timeout * HZ;
374 add_timer(&i->timeout); 380 add_timer(&i->timeout);
375 return 1; 381 return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
407 } 413 }
408 /* Will be over limit? */ 414 /* Will be over limit? */
409 if (master_help) { 415 if (master_help) {
410 p = &master_help->helper->expect_policy[expect->class]; 416 p = &rcu_dereference_protected(
417 master_help->helper,
418 lockdep_is_held(&nf_conntrack_lock)
419 )->expect_policy[expect->class];
411 if (p->max_expected && 420 if (p->max_expected &&
412 master_help->expecting[expect->class] >= p->max_expected) { 421 master_help->expecting[expect->class] >= p->max_expected) {
413 evict_oldest_expect(master, expect); 422 evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
478 struct hlist_node *n; 487 struct hlist_node *n;
479 488
480 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 489 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
481 n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 490 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
482 if (n) 491 if (n)
483 return n; 492 return n;
484 } 493 }
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
491 struct net *net = seq_file_net(seq); 500 struct net *net = seq_file_net(seq);
492 struct ct_expect_iter_state *st = seq->private; 501 struct ct_expect_iter_state *st = seq->private;
493 502
494 head = rcu_dereference(head->next); 503 head = rcu_dereference(hlist_next_rcu(head));
495 while (head == NULL) { 504 while (head == NULL) {
496 if (++st->bucket >= nf_ct_expect_hsize) 505 if (++st->bucket >= nf_ct_expect_hsize)
497 return NULL; 506 return NULL;
498 head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 507 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
499 } 508 }
500 return head; 509 return head;
501} 510}
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
630 } 639 }
631 640
632 net->ct.expect_count = 0; 641 net->ct.expect_count = 0;
633 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 642 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
634 &net->ct.expect_vmalloc, 0);
635 if (net->ct.expect_hash == NULL) 643 if (net->ct.expect_hash == NULL)
636 goto err1; 644 goto err1;
637 645
@@ -653,8 +661,7 @@ err3:
653 if (net_eq(net, &init_net)) 661 if (net_eq(net, &init_net))
654 kmem_cache_destroy(nf_ct_expect_cachep); 662 kmem_cache_destroy(nf_ct_expect_cachep);
655err2: 663err2:
656 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 664 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
657 nf_ct_expect_hsize);
658err1: 665err1:
659 return err; 666 return err;
660} 667}
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
666 rcu_barrier(); /* Wait for call_rcu() before destroy */ 673 rcu_barrier(); /* Wait for call_rcu() before destroy */
667 kmem_cache_destroy(nf_ct_expect_cachep); 674 kmem_cache_destroy(nf_ct_expect_cachep);
668 } 675 }
669 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 676 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
670 nf_ct_expect_hsize);
671} 677}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450c193..80a23ed62bb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
140 /* This assumes that extended areas in conntrack for the types 140 /* This assumes that extended areas in conntrack for the types
141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ 141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
142 for (i = min; i <= max; i++) { 142 for (i = min; i <= max; i++) {
143 t1 = nf_ct_ext_types[i]; 143 t1 = rcu_dereference_protected(nf_ct_ext_types[i],
144 lockdep_is_held(&nf_ct_ext_type_mutex));
144 if (!t1) 145 if (!t1)
145 continue; 146 continue;
146 147
147 t1->alloc_size = sizeof(struct nf_ct_ext) 148 t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
148 + ALIGN(sizeof(struct nf_ct_ext), t1->align) 149 t1->len;
149 + t1->len;
150 for (j = 0; j < NF_CT_EXT_NUM; j++) { 150 for (j = 0; j < NF_CT_EXT_NUM; j++) {
151 t2 = nf_ct_ext_types[j]; 151 t2 = rcu_dereference_protected(nf_ct_ext_types[j],
152 lockdep_is_held(&nf_ct_ext_type_mutex));
152 if (t2 == NULL || t2 == t1 || 153 if (t2 == NULL || t2 == t1 ||
153 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) 154 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
154 continue; 155 continue;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index b969025cf82..533a183e666 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -714,7 +714,6 @@ static int callforward_do_filter(const union nf_inet_addr *src,
714 u_int8_t family) 714 u_int8_t family)
715{ 715{
716 const struct nf_afinfo *afinfo; 716 const struct nf_afinfo *afinfo;
717 struct flowi fl1, fl2;
718 int ret = 0; 717 int ret = 0;
719 718
720 /* rcu_read_lock()ed by nf_hook_slow() */ 719 /* rcu_read_lock()ed by nf_hook_slow() */
@@ -722,17 +721,20 @@ static int callforward_do_filter(const union nf_inet_addr *src,
722 if (!afinfo) 721 if (!afinfo)
723 return 0; 722 return 0;
724 723
725 memset(&fl1, 0, sizeof(fl1));
726 memset(&fl2, 0, sizeof(fl2));
727
728 switch (family) { 724 switch (family) {
729 case AF_INET: { 725 case AF_INET: {
726 struct flowi4 fl1, fl2;
730 struct rtable *rt1, *rt2; 727 struct rtable *rt1, *rt2;
731 728
732 fl1.fl4_dst = src->ip; 729 memset(&fl1, 0, sizeof(fl1));
733 fl2.fl4_dst = dst->ip; 730 fl1.daddr = src->ip;
734 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { 731
735 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 732 memset(&fl2, 0, sizeof(fl2));
733 fl2.daddr = dst->ip;
734 if (!afinfo->route((struct dst_entry **)&rt1,
735 flowi4_to_flowi(&fl1))) {
736 if (!afinfo->route((struct dst_entry **)&rt2,
737 flowi4_to_flowi(&fl2))) {
736 if (rt1->rt_gateway == rt2->rt_gateway && 738 if (rt1->rt_gateway == rt2->rt_gateway &&
737 rt1->dst.dev == rt2->dst.dev) 739 rt1->dst.dev == rt2->dst.dev)
738 ret = 1; 740 ret = 1;
@@ -745,12 +747,18 @@ static int callforward_do_filter(const union nf_inet_addr *src,
745#if defined(CONFIG_NF_CONNTRACK_IPV6) || \ 747#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
746 defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) 748 defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
747 case AF_INET6: { 749 case AF_INET6: {
750 struct flowi6 fl1, fl2;
748 struct rt6_info *rt1, *rt2; 751 struct rt6_info *rt1, *rt2;
749 752
750 memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); 753 memset(&fl1, 0, sizeof(fl1));
751 memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); 754 ipv6_addr_copy(&fl1.daddr, &src->in6);
752 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { 755
753 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 756 memset(&fl2, 0, sizeof(fl2));
757 ipv6_addr_copy(&fl2.daddr, &dst->in6);
758 if (!afinfo->route((struct dst_entry **)&rt1,
759 flowi6_to_flowi(&fl1))) {
760 if (!afinfo->route((struct dst_entry **)&rt2,
761 flowi6_to_flowi(&fl2))) {
754 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 762 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
755 sizeof(rt1->rt6i_gateway)) && 763 sizeof(rt1->rt6i_gateway)) &&
756 rt1->dst.dev == rt2->dst.dev) 764 rt1->dst.dev == rt2->dst.dev)
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4cd4e8..1bdfea35795 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
33static struct hlist_head *nf_ct_helper_hash __read_mostly; 33static struct hlist_head *nf_ct_helper_hash __read_mostly;
34static unsigned int nf_ct_helper_hsize __read_mostly; 34static unsigned int nf_ct_helper_hsize __read_mostly;
35static unsigned int nf_ct_helper_count __read_mostly; 35static unsigned int nf_ct_helper_count __read_mostly;
36static int nf_ct_helper_vmalloc;
37 36
38 37
39/* Stupid hash, but collision free for the default registrations of the 38/* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
158 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); 157 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
159 struct nf_conn_help *help = nfct_help(ct); 158 struct nf_conn_help *help = nfct_help(ct);
160 159
161 if (help && help->helper == me) { 160 if (help && rcu_dereference_protected(
161 help->helper,
162 lockdep_is_held(&nf_conntrack_lock)
163 ) == me) {
162 nf_conntrack_event(IPCT_HELPER, ct); 164 nf_conntrack_event(IPCT_HELPER, ct);
163 rcu_assign_pointer(help->helper, NULL); 165 rcu_assign_pointer(help->helper, NULL);
164 } 166 }
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
210 hlist_for_each_entry_safe(exp, n, next, 212 hlist_for_each_entry_safe(exp, n, next,
211 &net->ct.expect_hash[i], hnode) { 213 &net->ct.expect_hash[i], hnode) {
212 struct nf_conn_help *help = nfct_help(exp->master); 214 struct nf_conn_help *help = nfct_help(exp->master);
213 if ((help->helper == me || exp->helper == me) && 215 if ((rcu_dereference_protected(
216 help->helper,
217 lockdep_is_held(&nf_conntrack_lock)
218 ) == me || exp->helper == me) &&
214 del_timer(&exp->timeout)) { 219 del_timer(&exp->timeout)) {
215 nf_ct_unlink_expect(exp); 220 nf_ct_unlink_expect(exp);
216 nf_ct_expect_put(exp); 221 nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
261 int err; 266 int err;
262 267
263 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 268 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
264 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 269 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
265 &nf_ct_helper_vmalloc, 0);
266 if (!nf_ct_helper_hash) 270 if (!nf_ct_helper_hash)
267 return -ENOMEM; 271 return -ENOMEM;
268 272
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
273 return 0; 277 return 0;
274 278
275err1: 279err1:
276 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 280 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
277 nf_ct_helper_hsize);
278 return err; 281 return err;
279} 282}
280 283
281void nf_conntrack_helper_fini(void) 284void nf_conntrack_helper_fini(void)
282{ 285{
283 nf_ct_extend_unregister(&helper_extend); 286 nf_ct_extend_unregister(&helper_extend);
284 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 287 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
285 nf_ct_helper_hsize);
286} 288}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index aadde018a07..4c8f30a3d6d 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/inetdevice.h>
24#include <linux/if_addr.h>
25#include <linux/in.h> 21#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/netfilter.h>
28#include <net/route.h>
29 22
30#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
31#include <net/netfilter/nf_conntrack_helper.h> 24#include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
40MODULE_ALIAS_NFCT_HELPER("netbios_ns"); 33MODULE_ALIAS_NFCT_HELPER("netbios_ns");
41 34
42static unsigned int timeout __read_mostly = 3; 35static unsigned int timeout __read_mostly = 3;
43module_param(timeout, uint, 0400); 36module_param(timeout, uint, S_IRUSR);
44MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 37MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
45 38
46static int help(struct sk_buff *skb, unsigned int protoff,
47 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
48{
49 struct nf_conntrack_expect *exp;
50 struct iphdr *iph = ip_hdr(skb);
51 struct rtable *rt = skb_rtable(skb);
52 struct in_device *in_dev;
53 __be32 mask = 0;
54
55 /* we're only interested in locally generated packets */
56 if (skb->sk == NULL)
57 goto out;
58 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
59 goto out;
60 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
61 goto out;
62
63 rcu_read_lock();
64 in_dev = __in_dev_get_rcu(rt->dst.dev);
65 if (in_dev != NULL) {
66 for_primary_ifa(in_dev) {
67 if (ifa->ifa_broadcast == iph->daddr) {
68 mask = ifa->ifa_mask;
69 break;
70 }
71 } endfor_ifa(in_dev);
72 }
73 rcu_read_unlock();
74
75 if (mask == 0)
76 goto out;
77
78 exp = nf_ct_expect_alloc(ct);
79 if (exp == NULL)
80 goto out;
81
82 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
83 exp->tuple.src.u.udp.port = htons(NMBD_PORT);
84
85 exp->mask.src.u3.ip = mask;
86 exp->mask.src.u.udp.port = htons(0xFFFF);
87
88 exp->expectfn = NULL;
89 exp->flags = NF_CT_EXPECT_PERMANENT;
90 exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
91 exp->helper = NULL;
92
93 nf_ct_expect_related(exp);
94 nf_ct_expect_put(exp);
95
96 nf_ct_refresh(ct, skb, timeout * HZ);
97out:
98 return NF_ACCEPT;
99}
100
101static struct nf_conntrack_expect_policy exp_policy = { 39static struct nf_conntrack_expect_policy exp_policy = {
102 .max_expected = 1, 40 .max_expected = 1,
103}; 41};
104 42
43static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
44 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
45{
46 return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
47}
48
105static struct nf_conntrack_helper helper __read_mostly = { 49static struct nf_conntrack_helper helper __read_mostly = {
106 .name = "netbios-ns", 50 .name = "netbios-ns",
107 .tuple.src.l3num = AF_INET, 51 .tuple.src.l3num = NFPROTO_IPV4,
108 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), 52 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
109 .tuple.dst.protonum = IPPROTO_UDP, 53 .tuple.dst.protonum = IPPROTO_UDP,
110 .me = THIS_MODULE, 54 .me = THIS_MODULE,
111 .help = help, 55 .help = netbios_ns_help,
112 .expect_policy = &exp_policy, 56 .expect_policy = &exp_policy,
113}; 57};
114 58
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eead9db6f89..30bf8a167fc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
42#include <net/netfilter/nf_conntrack_tuple.h> 42#include <net/netfilter/nf_conntrack_tuple.h>
43#include <net/netfilter/nf_conntrack_acct.h> 43#include <net/netfilter/nf_conntrack_acct.h>
44#include <net/netfilter/nf_conntrack_zones.h> 44#include <net/netfilter/nf_conntrack_zones.h>
45#include <net/netfilter/nf_conntrack_timestamp.h>
45#ifdef CONFIG_NF_NAT_NEEDED 46#ifdef CONFIG_NF_NAT_NEEDED
46#include <net/netfilter/nf_nat_core.h> 47#include <net/netfilter/nf_nat_core.h>
47#include <net/netfilter/nf_nat_protocol.h> 48#include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
230 return -1; 231 return -1;
231} 232}
232 233
234static int
235ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
236{
237 struct nlattr *nest_count;
238 const struct nf_conn_tstamp *tstamp;
239
240 tstamp = nf_conn_tstamp_find(ct);
241 if (!tstamp)
242 return 0;
243
244 nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
245 if (!nest_count)
246 goto nla_put_failure;
247
248 NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
249 if (tstamp->stop != 0) {
250 NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
251 cpu_to_be64(tstamp->stop));
252 }
253 nla_nest_end(skb, nest_count);
254
255 return 0;
256
257nla_put_failure:
258 return -1;
259}
260
233#ifdef CONFIG_NF_CONNTRACK_MARK 261#ifdef CONFIG_NF_CONNTRACK_MARK
234static inline int 262static inline int
235ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) 263ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
404 ctnetlink_dump_timeout(skb, ct) < 0 || 432 ctnetlink_dump_timeout(skb, ct) < 0 ||
405 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 433 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
406 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 434 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
435 ctnetlink_dump_timestamp(skb, ct) < 0 ||
407 ctnetlink_dump_protoinfo(skb, ct) < 0 || 436 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
408 ctnetlink_dump_helpinfo(skb, ct) < 0 || 437 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
409 ctnetlink_dump_mark(skb, ct) < 0 || 438 ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
471} 500}
472 501
473static inline size_t 502static inline size_t
503ctnetlink_timestamp_size(const struct nf_conn *ct)
504{
505#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
506 if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
507 return 0;
508 return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
509#else
510 return 0;
511#endif
512}
513
514static inline size_t
474ctnetlink_nlmsg_size(const struct nf_conn *ct) 515ctnetlink_nlmsg_size(const struct nf_conn *ct)
475{ 516{
476 return NLMSG_ALIGN(sizeof(struct nfgenmsg)) 517 return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
481 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ 522 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
482 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ 523 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
483 + ctnetlink_counters_size(ct) 524 + ctnetlink_counters_size(ct)
525 + ctnetlink_timestamp_size(ct)
484 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ 526 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
485 + nla_total_size(0) /* CTA_PROTOINFO */ 527 + nla_total_size(0) /* CTA_PROTOINFO */
486 + nla_total_size(0) /* CTA_HELP */ 528 + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
571 613
572 if (events & (1 << IPCT_DESTROY)) { 614 if (events & (1 << IPCT_DESTROY)) {
573 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 615 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
574 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 616 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
617 ctnetlink_dump_timestamp(skb, ct) < 0)
575 goto nla_put_failure; 618 goto nla_put_failure;
576 } else { 619 } else {
577 if (ctnetlink_dump_timeout(skb, ct) < 0) 620 if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -761,7 +804,7 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
761static int 804static int
762ctnetlink_parse_tuple(const struct nlattr * const cda[], 805ctnetlink_parse_tuple(const struct nlattr * const cda[],
763 struct nf_conntrack_tuple *tuple, 806 struct nf_conntrack_tuple *tuple,
764 enum ctattr_tuple type, u_int8_t l3num) 807 enum ctattr_type type, u_int8_t l3num)
765{ 808{
766 struct nlattr *tb[CTA_TUPLE_MAX+1]; 809 struct nlattr *tb[CTA_TUPLE_MAX+1];
767 int err; 810 int err;
@@ -1358,6 +1401,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1358 } 1401 }
1359 1402
1360 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1403 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1404 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
1361 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); 1405 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
1362 /* we must add conntrack extensions before confirmation. */ 1406 /* we must add conntrack extensions before confirmation. */
1363 ct->status |= IPS_CONFIRMED; 1407 ct->status |= IPS_CONFIRMED;
@@ -1376,6 +1420,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1376 } 1420 }
1377#endif 1421#endif
1378 1422
1423 memset(&ct->proto, 0, sizeof(ct->proto));
1379 if (cda[CTA_PROTOINFO]) { 1424 if (cda[CTA_PROTOINFO]) {
1380 err = ctnetlink_change_protoinfo(ct, cda); 1425 err = ctnetlink_change_protoinfo(ct, cda);
1381 if (err < 0) 1426 if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74110d..5701c8dd783 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
166int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) 166int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
167{ 167{
168 int ret = 0; 168 int ret = 0;
169 struct nf_conntrack_l3proto *old;
169 170
170 if (proto->l3proto >= AF_MAX) 171 if (proto->l3proto >= AF_MAX)
171 return -EBUSY; 172 return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
174 return -EINVAL; 175 return -EINVAL;
175 176
176 mutex_lock(&nf_ct_proto_mutex); 177 mutex_lock(&nf_ct_proto_mutex);
177 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { 178 old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
179 lockdep_is_held(&nf_ct_proto_mutex));
180 if (old != &nf_conntrack_l3proto_generic) {
178 ret = -EBUSY; 181 ret = -EBUSY;
179 goto out_unlock; 182 goto out_unlock;
180 } 183 }
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
201 BUG_ON(proto->l3proto >= AF_MAX); 204 BUG_ON(proto->l3proto >= AF_MAX);
202 205
203 mutex_lock(&nf_ct_proto_mutex); 206 mutex_lock(&nf_ct_proto_mutex);
204 BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); 207 BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
208 lockdep_is_held(&nf_ct_proto_mutex)
209 ) != proto);
205 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 210 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
206 &nf_conntrack_l3proto_generic); 211 &nf_conntrack_l3proto_generic);
207 nf_ct_l3proto_unregister_sysctl(proto); 212 nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
279 mutex_lock(&nf_ct_proto_mutex); 284 mutex_lock(&nf_ct_proto_mutex);
280 if (!nf_ct_protos[l4proto->l3proto]) { 285 if (!nf_ct_protos[l4proto->l3proto]) {
281 /* l3proto may be loaded latter. */ 286 /* l3proto may be loaded latter. */
282 struct nf_conntrack_l4proto **proto_array; 287 struct nf_conntrack_l4proto __rcu **proto_array;
283 int i; 288 int i;
284 289
285 proto_array = kmalloc(MAX_NF_CT_PROTO * 290 proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
291 } 296 }
292 297
293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 298 for (i = 0; i < MAX_NF_CT_PROTO; i++)
294 proto_array[i] = &nf_conntrack_l4proto_generic; 299 RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
295 300
296 /* Before making proto_array visible to lockless readers, 301 /* Before making proto_array visible to lockless readers,
297 * we must make sure its content is committed to memory. 302 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
299 smp_wmb(); 304 smp_wmb();
300 305
301 nf_ct_protos[l4proto->l3proto] = proto_array; 306 nf_ct_protos[l4proto->l3proto] = proto_array;
302 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 307 } else if (rcu_dereference_protected(
303 &nf_conntrack_l4proto_generic) { 308 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
309 lockdep_is_held(&nf_ct_proto_mutex)
310 ) != &nf_conntrack_l4proto_generic) {
304 ret = -EBUSY; 311 ret = -EBUSY;
305 goto out_unlock; 312 goto out_unlock;
306 } 313 }
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
331 BUG_ON(l4proto->l3proto >= PF_MAX); 338 BUG_ON(l4proto->l3proto >= PF_MAX);
332 339
333 mutex_lock(&nf_ct_proto_mutex); 340 mutex_lock(&nf_ct_proto_mutex);
334 BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); 341 BUG_ON(rcu_dereference_protected(
342 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
343 lockdep_is_held(&nf_ct_proto_mutex)
344 ) != l4proto);
335 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 345 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
336 &nf_conntrack_l4proto_generic); 346 &nf_conntrack_l4proto_generic);
337 nf_ct_l4proto_unregister_sysctl(l4proto); 347 nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560d6d4..9ae57c57c50 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; 453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
454 ct->proto.dccp.state = CT_DCCP_NONE; 454 ct->proto.dccp.state = CT_DCCP_NONE;
455 ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
456 ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
457 ct->proto.dccp.handshake_seq = 0;
455 return true; 458 return true;
456 459
457out_invalid: 460out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2d5ea..6f4ee70f460 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
413 test_bit(SCTP_CID_COOKIE_ACK, map)) 413 test_bit(SCTP_CID_COOKIE_ACK, map))
414 return false; 414 return false;
415 415
416 memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
416 new_state = SCTP_CONNTRACK_MAX; 417 new_state = SCTP_CONNTRACK_MAX;
417 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 418 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
418 /* Don't need lock here: this conntrack not in circulation yet */ 419 /* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73b24d..37bf94394be 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -227,11 +227,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
227 * sCL -> sIV 227 * sCL -> sIV
228 */ 228 */
229/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 229/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
230/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, 230/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231/* 231/*
232 * sSS -> sSR Standard open. 232 * sSS -> sSR Standard open.
233 * sS2 -> sSR Simultaneous open 233 * sS2 -> sSR Simultaneous open
234 * sSR -> sSR Retransmitted SYN/ACK. 234 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
235 * sES -> sIG Late retransmitted SYN/ACK? 235 * sES -> sIG Late retransmitted SYN/ACK?
236 * sFW -> sIG Might be SYN/ACK answering ignored SYN 236 * sFW -> sIG Might be SYN/ACK answering ignored SYN
237 * sCW -> sIG 237 * sCW -> sIG
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1066 BUG_ON(th == NULL); 1066 BUG_ON(th == NULL);
1067 1067
1068 /* Don't need lock here: this conntrack not in circulation yet */ 1068 /* Don't need lock here: this conntrack not in circulation yet */
1069 new_state 1069 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1070 = tcp_conntracks[0][get_conntrack_index(th)]
1071 [TCP_CONNTRACK_NONE];
1072 1070
1073 /* Invalid: delete conntrack */ 1071 /* Invalid: delete conntrack */
1074 if (new_state >= TCP_CONNTRACK_MAX) { 1072 if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1077 } 1075 }
1078 1076
1079 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1077 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1078 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1080 /* SYN packet */ 1079 /* SYN packet */
1081 ct->proto.tcp.seen[0].td_end = 1080 ct->proto.tcp.seen[0].td_end =
1082 segment_seq_plus_len(ntohl(th->seq), skb->len, 1081 segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1088 ct->proto.tcp.seen[0].td_end; 1087 ct->proto.tcp.seen[0].td_end;
1089 1088
1090 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); 1089 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1091 ct->proto.tcp.seen[1].flags = 0;
1092 } else if (nf_ct_tcp_loose == 0) { 1090 } else if (nf_ct_tcp_loose == 0) {
1093 /* Don't try to pick up connections. */ 1091 /* Don't try to pick up connections. */
1094 return false; 1092 return false;
1095 } else { 1093 } else {
1094 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1096 /* 1095 /*
1097 * We are in the middle of a connection, 1096 * We are in the middle of a connection,
1098 * its history is lost for us. 1097 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1107 ct->proto.tcp.seen[0].td_maxend = 1106 ct->proto.tcp.seen[0].td_maxend =
1108 ct->proto.tcp.seen[0].td_end + 1107 ct->proto.tcp.seen[0].td_end +
1109 ct->proto.tcp.seen[0].td_maxwin; 1108 ct->proto.tcp.seen[0].td_maxwin;
1110 ct->proto.tcp.seen[0].td_scale = 0;
1111 1109
1112 /* We assume SACK and liberal window checking to handle 1110 /* We assume SACK and liberal window checking to handle
1113 * window scaling */ 1111 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1116 IP_CT_TCP_FLAG_BE_LIBERAL; 1114 IP_CT_TCP_FLAG_BE_LIBERAL;
1117 } 1115 }
1118 1116
1119 ct->proto.tcp.seen[1].td_end = 0;
1120 ct->proto.tcp.seen[1].td_maxend = 0;
1121 ct->proto.tcp.seen[1].td_maxwin = 0;
1122 ct->proto.tcp.seen[1].td_scale = 0;
1123
1124 /* tcp_packet will set them */ 1117 /* tcp_packet will set them */
1125 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1126 ct->proto.tcp.last_index = TCP_NONE_SET; 1118 ct->proto.tcp.last_index = TCP_NONE_SET;
1127 1119
1128 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1120 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 00000000000..6e545e26289
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
1/*
2 * SNMP service broadcast connection tracking helper
3 *
4 * (c) 2011 Jiri Olsa <jolsa@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/in.h>
15
16#include <net/netfilter/nf_conntrack.h>
17#include <net/netfilter/nf_conntrack_helper.h>
18#include <net/netfilter/nf_conntrack_expect.h>
19
20#define SNMP_PORT 161
21
22MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
23MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
24MODULE_LICENSE("GPL");
25MODULE_ALIAS_NFCT_HELPER("snmp");
26
27static unsigned int timeout __read_mostly = 30;
28module_param(timeout, uint, S_IRUSR);
29MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
30
31int (*nf_nat_snmp_hook)(struct sk_buff *skb,
32 unsigned int protoff,
33 struct nf_conn *ct,
34 enum ip_conntrack_info ctinfo);
35EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
36
37static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
38 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
39{
40 typeof(nf_nat_snmp_hook) nf_nat_snmp;
41
42 nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
43
44 nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
45 if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
46 return nf_nat_snmp(skb, protoff, ct, ctinfo);
47
48 return NF_ACCEPT;
49}
50
51static struct nf_conntrack_expect_policy exp_policy = {
52 .max_expected = 1,
53};
54
55static struct nf_conntrack_helper helper __read_mostly = {
56 .name = "snmp",
57 .tuple.src.l3num = NFPROTO_IPV4,
58 .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
59 .tuple.dst.protonum = IPPROTO_UDP,
60 .me = THIS_MODULE,
61 .help = snmp_conntrack_help,
62 .expect_policy = &exp_policy,
63};
64
65static int __init nf_conntrack_snmp_init(void)
66{
67 exp_policy.timeout = timeout;
68 return nf_conntrack_helper_register(&helper);
69}
70
71static void __exit nf_conntrack_snmp_fini(void)
72{
73 nf_conntrack_helper_unregister(&helper);
74}
75
76module_init(nf_conntrack_snmp_init);
77module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f24b2..0ae14282588 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
29#include <net/netfilter/nf_conntrack_helper.h> 29#include <net/netfilter/nf_conntrack_helper.h>
30#include <net/netfilter/nf_conntrack_acct.h> 30#include <net/netfilter/nf_conntrack_acct.h>
31#include <net/netfilter/nf_conntrack_zones.h> 31#include <net/netfilter/nf_conntrack_zones.h>
32#include <net/netfilter/nf_conntrack_timestamp.h>
33#include <linux/rculist_nulls.h>
32 34
33MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
34 36
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
45struct ct_iter_state { 47struct ct_iter_state {
46 struct seq_net_private p; 48 struct seq_net_private p;
47 unsigned int bucket; 49 unsigned int bucket;
50 u_int64_t time_now;
48}; 51};
49 52
50static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 53static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56 for (st->bucket = 0; 59 for (st->bucket = 0;
57 st->bucket < net->ct.htable_size; 60 st->bucket < net->ct.htable_size;
58 st->bucket++) { 61 st->bucket++) {
59 n = rcu_dereference(net->ct.hash[st->bucket].first); 62 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
60 if (!is_a_nulls(n)) 63 if (!is_a_nulls(n))
61 return n; 64 return n;
62 } 65 }
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
69 struct net *net = seq_file_net(seq); 72 struct net *net = seq_file_net(seq);
70 struct ct_iter_state *st = seq->private; 73 struct ct_iter_state *st = seq->private;
71 74
72 head = rcu_dereference(head->next); 75 head = rcu_dereference(hlist_nulls_next_rcu(head));
73 while (is_a_nulls(head)) { 76 while (is_a_nulls(head)) {
74 if (likely(get_nulls_value(head) == st->bucket)) { 77 if (likely(get_nulls_value(head) == st->bucket)) {
75 if (++st->bucket >= net->ct.htable_size) 78 if (++st->bucket >= net->ct.htable_size)
76 return NULL; 79 return NULL;
77 } 80 }
78 head = rcu_dereference(net->ct.hash[st->bucket].first); 81 head = rcu_dereference(
82 hlist_nulls_first_rcu(
83 &net->ct.hash[st->bucket]));
79 } 84 }
80 return head; 85 return head;
81} 86}
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
93static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 98static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
94 __acquires(RCU) 99 __acquires(RCU)
95{ 100{
101 struct ct_iter_state *st = seq->private;
102
103 st->time_now = ktime_to_ns(ktime_get_real());
96 rcu_read_lock(); 104 rcu_read_lock();
97 return ct_get_idx(seq, *pos); 105 return ct_get_idx(seq, *pos);
98} 106}
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
132} 140}
133#endif 141#endif
134 142
143#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
144static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
145{
146 struct ct_iter_state *st = s->private;
147 struct nf_conn_tstamp *tstamp;
148 s64 delta_time;
149
150 tstamp = nf_conn_tstamp_find(ct);
151 if (tstamp) {
152 delta_time = st->time_now - tstamp->start;
153 if (delta_time > 0)
154 delta_time = div_s64(delta_time, NSEC_PER_SEC);
155 else
156 delta_time = 0;
157
158 return seq_printf(s, "delta-time=%llu ",
159 (unsigned long long)delta_time);
160 }
161 return 0;
162}
163#else
164static inline int
165ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
166{
167 return 0;
168}
169#endif
170
135/* return 0 on success, 1 in case of error */ 171/* return 0 on success, 1 in case of error */
136static int ct_seq_show(struct seq_file *s, void *v) 172static int ct_seq_show(struct seq_file *s, void *v)
137{ 173{
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
200 goto release; 236 goto release;
201#endif 237#endif
202 238
239 if (ct_show_delta_time(s, ct))
240 goto release;
241
203 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 242 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
204 goto release; 243 goto release;
205 244
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 00000000000..af7dd31af0a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
1/*
2 * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation (or any later at your option).
7 */
8
9#include <linux/netfilter.h>
10#include <linux/slab.h>
11#include <linux/kernel.h>
12#include <linux/moduleparam.h>
13
14#include <net/netfilter/nf_conntrack.h>
15#include <net/netfilter/nf_conntrack_extend.h>
16#include <net/netfilter/nf_conntrack_timestamp.h>
17
18static int nf_ct_tstamp __read_mostly;
19
20module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
21MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
22
23#ifdef CONFIG_SYSCTL
24static struct ctl_table tstamp_sysctl_table[] = {
25 {
26 .procname = "nf_conntrack_timestamp",
27 .data = &init_net.ct.sysctl_tstamp,
28 .maxlen = sizeof(unsigned int),
29 .mode = 0644,
30 .proc_handler = proc_dointvec,
31 },
32 {}
33};
34#endif /* CONFIG_SYSCTL */
35
36static struct nf_ct_ext_type tstamp_extend __read_mostly = {
37 .len = sizeof(struct nf_conn_tstamp),
38 .align = __alignof__(struct nf_conn_tstamp),
39 .id = NF_CT_EXT_TSTAMP,
40};
41
42#ifdef CONFIG_SYSCTL
43static int nf_conntrack_tstamp_init_sysctl(struct net *net)
44{
45 struct ctl_table *table;
46
47 table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
48 GFP_KERNEL);
49 if (!table)
50 goto out;
51
52 table[0].data = &net->ct.sysctl_tstamp;
53
54 net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
55 nf_net_netfilter_sysctl_path, table);
56 if (!net->ct.tstamp_sysctl_header) {
57 printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
58 goto out_register;
59 }
60 return 0;
61
62out_register:
63 kfree(table);
64out:
65 return -ENOMEM;
66}
67
68static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
69{
70 struct ctl_table *table;
71
72 table = net->ct.tstamp_sysctl_header->ctl_table_arg;
73 unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
74 kfree(table);
75}
76#else
77static int nf_conntrack_tstamp_init_sysctl(struct net *net)
78{
79 return 0;
80}
81
82static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
83{
84}
85#endif
86
87int nf_conntrack_tstamp_init(struct net *net)
88{
89 int ret;
90
91 net->ct.sysctl_tstamp = nf_ct_tstamp;
92
93 if (net_eq(net, &init_net)) {
94 ret = nf_ct_extend_register(&tstamp_extend);
95 if (ret < 0) {
96 printk(KERN_ERR "nf_ct_tstamp: Unable to register "
97 "extension\n");
98 goto out_extend_register;
99 }
100 }
101
102 ret = nf_conntrack_tstamp_init_sysctl(net);
103 if (ret < 0)
104 goto out_sysctl;
105
106 return 0;
107
108out_sysctl:
109 if (net_eq(net, &init_net))
110 nf_ct_extend_unregister(&tstamp_extend);
111out_extend_register:
112 return ret;
113}
114
115void nf_conntrack_tstamp_fini(struct net *net)
116{
117 nf_conntrack_tstamp_fini_sysctl(net);
118 if (net_eq(net, &init_net))
119 nf_ct_extend_unregister(&tstamp_extend);
120}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 91816998ed8..20714edf6cd 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -165,7 +165,8 @@ static int seq_show(struct seq_file *s, void *v)
165 struct nf_logger *t; 165 struct nf_logger *t;
166 int ret; 166 int ret;
167 167
168 logger = nf_loggers[*pos]; 168 logger = rcu_dereference_protected(nf_loggers[*pos],
169 lockdep_is_held(&nf_log_mutex));
169 170
170 if (!logger) 171 if (!logger)
171 ret = seq_printf(s, "%2lld NONE (", *pos); 172 ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -253,7 +254,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
253 mutex_unlock(&nf_log_mutex); 254 mutex_unlock(&nf_log_mutex);
254 } else { 255 } else {
255 mutex_lock(&nf_log_mutex); 256 mutex_lock(&nf_log_mutex);
256 logger = nf_loggers[tindex]; 257 logger = rcu_dereference_protected(nf_loggers[tindex],
258 lockdep_is_held(&nf_log_mutex));
257 if (!logger) 259 if (!logger)
258 table->data = "NONE"; 260 table->data = "NONE";
259 else 261 else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed5bd2..5ab22e2bbd7 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
28{ 28{
29 int ret; 29 int ret;
30 const struct nf_queue_handler *old;
30 31
31 if (pf >= ARRAY_SIZE(queue_handler)) 32 if (pf >= ARRAY_SIZE(queue_handler))
32 return -EINVAL; 33 return -EINVAL;
33 34
34 mutex_lock(&queue_handler_mutex); 35 mutex_lock(&queue_handler_mutex);
35 if (queue_handler[pf] == qh) 36 old = rcu_dereference_protected(queue_handler[pf],
37 lockdep_is_held(&queue_handler_mutex));
38 if (old == qh)
36 ret = -EEXIST; 39 ret = -EEXIST;
37 else if (queue_handler[pf]) 40 else if (old)
38 ret = -EBUSY; 41 ret = -EBUSY;
39 else { 42 else {
40 rcu_assign_pointer(queue_handler[pf], qh); 43 rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
49/* The caller must flush their queue before this */ 52/* The caller must flush their queue before this */
50int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 53int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
51{ 54{
55 const struct nf_queue_handler *old;
56
52 if (pf >= ARRAY_SIZE(queue_handler)) 57 if (pf >= ARRAY_SIZE(queue_handler))
53 return -EINVAL; 58 return -EINVAL;
54 59
55 mutex_lock(&queue_handler_mutex); 60 mutex_lock(&queue_handler_mutex);
56 if (queue_handler[pf] && queue_handler[pf] != qh) { 61 old = rcu_dereference_protected(queue_handler[pf],
62 lockdep_is_held(&queue_handler_mutex));
63 if (old && old != qh) {
57 mutex_unlock(&queue_handler_mutex); 64 mutex_unlock(&queue_handler_mutex);
58 return -EINVAL; 65 return -EINVAL;
59 } 66 }
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
73 80
74 mutex_lock(&queue_handler_mutex); 81 mutex_lock(&queue_handler_mutex);
75 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { 82 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
76 if (queue_handler[pf] == qh) 83 if (rcu_dereference_protected(
84 queue_handler[pf],
85 lockdep_is_held(&queue_handler_mutex)
86 ) == qh)
77 rcu_assign_pointer(queue_handler[pf], NULL); 87 rcu_assign_pointer(queue_handler[pf], NULL);
78 } 88 }
79 mutex_unlock(&queue_handler_mutex); 89 mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
115 int (*okfn)(struct sk_buff *), 125 int (*okfn)(struct sk_buff *),
116 unsigned int queuenum) 126 unsigned int queuenum)
117{ 127{
118 int status; 128 int status = -ENOENT;
119 struct nf_queue_entry *entry = NULL; 129 struct nf_queue_entry *entry = NULL;
120#ifdef CONFIG_BRIDGE_NETFILTER 130#ifdef CONFIG_BRIDGE_NETFILTER
121 struct net_device *physindev; 131 struct net_device *physindev;
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb,
128 rcu_read_lock(); 138 rcu_read_lock();
129 139
130 qh = rcu_dereference(queue_handler[pf]); 140 qh = rcu_dereference(queue_handler[pf]);
131 if (!qh) 141 if (!qh) {
142 status = -ESRCH;
132 goto err_unlock; 143 goto err_unlock;
144 }
133 145
134 afinfo = nf_get_afinfo(pf); 146 afinfo = nf_get_afinfo(pf);
135 if (!afinfo) 147 if (!afinfo)
136 goto err_unlock; 148 goto err_unlock;
137 149
138 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); 150 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
139 if (!entry) 151 if (!entry) {
152 status = -ENOMEM;
140 goto err_unlock; 153 goto err_unlock;
154 }
141 155
142 *entry = (struct nf_queue_entry) { 156 *entry = (struct nf_queue_entry) {
143 .skb = skb, 157 .skb = skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
151 165
152 /* If it's going away, ignore hook. */ 166 /* If it's going away, ignore hook. */
153 if (!try_module_get(entry->elem->owner)) { 167 if (!try_module_get(entry->elem->owner)) {
154 rcu_read_unlock(); 168 status = -ECANCELED;
155 kfree(entry); 169 goto err_unlock;
156 return 0;
157 } 170 }
158
159 /* Bump dev refs so they don't vanish while packet is out */ 171 /* Bump dev refs so they don't vanish while packet is out */
160 if (indev) 172 if (indev)
161 dev_hold(indev); 173 dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
182 goto err; 194 goto err;
183 } 195 }
184 196
185 return 1; 197 return 0;
186 198
187err_unlock: 199err_unlock:
188 rcu_read_unlock(); 200 rcu_read_unlock();
189err: 201err:
190 kfree_skb(skb);
191 kfree(entry); 202 kfree(entry);
192 return 1; 203 return status;
193} 204}
194 205
195int nf_queue(struct sk_buff *skb, 206int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
201 unsigned int queuenum) 212 unsigned int queuenum)
202{ 213{
203 struct sk_buff *segs; 214 struct sk_buff *segs;
215 int err;
216 unsigned int queued;
204 217
205 if (!skb_is_gso(skb)) 218 if (!skb_is_gso(skb))
206 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 219 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
216 } 229 }
217 230
218 segs = skb_gso_segment(skb, 0); 231 segs = skb_gso_segment(skb, 0);
219 kfree_skb(skb); 232 /* Does not use PTR_ERR to limit the number of error codes that can be
233 * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
234 * 'ignore this hook'.
235 */
220 if (IS_ERR(segs)) 236 if (IS_ERR(segs))
221 return 1; 237 return -EINVAL;
222 238
239 queued = 0;
240 err = 0;
223 do { 241 do {
224 struct sk_buff *nskb = segs->next; 242 struct sk_buff *nskb = segs->next;
225 243
226 segs->next = NULL; 244 segs->next = NULL;
227 if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, 245 if (err == 0)
228 queuenum)) 246 err = __nf_queue(segs, elem, pf, hook, indev,
247 outdev, okfn, queuenum);
248 if (err == 0)
249 queued++;
250 else
229 kfree_skb(segs); 251 kfree_skb(segs);
230 segs = nskb; 252 segs = nskb;
231 } while (segs); 253 } while (segs);
232 return 1; 254
255 /* also free orig skb if only some segments were queued */
256 if (unlikely(err && queued))
257 err = 0;
258 if (err == 0)
259 kfree_skb(skb);
260 return err;
233} 261}
234 262
235void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 263void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
237 struct sk_buff *skb = entry->skb; 265 struct sk_buff *skb = entry->skb;
238 struct list_head *elem = &entry->elem->list; 266 struct list_head *elem = &entry->elem->list;
239 const struct nf_afinfo *afinfo; 267 const struct nf_afinfo *afinfo;
268 int err;
240 269
241 rcu_read_lock(); 270 rcu_read_lock();
242 271
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
270 local_bh_enable(); 299 local_bh_enable();
271 break; 300 break;
272 case NF_QUEUE: 301 case NF_QUEUE:
273 if (!__nf_queue(skb, elem, entry->pf, entry->hook, 302 err = __nf_queue(skb, elem, entry->pf, entry->hook,
274 entry->indev, entry->outdev, entry->okfn, 303 entry->indev, entry->outdev, entry->okfn,
275 verdict >> NF_VERDICT_BITS)) 304 verdict >> NF_VERDICT_QBITS);
276 goto next_hook; 305 if (err < 0) {
306 if (err == -ECANCELED)
307 goto next_hook;
308 if (err == -ESRCH &&
309 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
310 goto next_hook;
311 kfree_skb(skb);
312 }
277 break; 313 break;
278 case NF_STOLEN: 314 case NF_STOLEN:
279 default: 315 default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b0ab4..985e9b76c91 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -376,7 +376,6 @@ __build_packet_message(struct nfulnl_instance *inst,
376 unsigned int hooknum, 376 unsigned int hooknum,
377 const struct net_device *indev, 377 const struct net_device *indev,
378 const struct net_device *outdev, 378 const struct net_device *outdev,
379 const struct nf_loginfo *li,
380 const char *prefix, unsigned int plen) 379 const char *prefix, unsigned int plen)
381{ 380{
382 struct nfulnl_msg_packet_hdr pmsg; 381 struct nfulnl_msg_packet_hdr pmsg;
@@ -652,7 +651,7 @@ nfulnl_log_packet(u_int8_t pf,
652 inst->qlen++; 651 inst->qlen++;
653 652
654 __build_packet_message(inst, skb, data_len, pf, 653 __build_packet_message(inst, skb, data_len, pf,
655 hooknum, in, out, li, prefix, plen); 654 hooknum, in, out, prefix, plen);
656 655
657 if (inst->qlen >= qthreshold) 656 if (inst->qlen >= qthreshold)
658 __nfulnl_flush(inst); 657 __nfulnl_flush(inst);
@@ -874,19 +873,19 @@ static struct hlist_node *get_first(struct iter_state *st)
874 873
875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 874 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
876 if (!hlist_empty(&instance_table[st->bucket])) 875 if (!hlist_empty(&instance_table[st->bucket]))
877 return rcu_dereference_bh(instance_table[st->bucket].first); 876 return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
878 } 877 }
879 return NULL; 878 return NULL;
880} 879}
881 880
882static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 881static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
883{ 882{
884 h = rcu_dereference_bh(h->next); 883 h = rcu_dereference_bh(hlist_next_rcu(h));
885 while (!h) { 884 while (!h) {
886 if (++st->bucket >= INSTANCE_BUCKETS) 885 if (++st->bucket >= INSTANCE_BUCKETS)
887 return NULL; 886 return NULL;
888 887
889 h = rcu_dereference_bh(instance_table[st->bucket].first); 888 h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
890 } 889 }
891 return h; 890 return h;
892} 891}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 68e67d19724..b83123f12b4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
387{ 387{
388 struct sk_buff *nskb; 388 struct sk_buff *nskb;
389 struct nfqnl_instance *queue; 389 struct nfqnl_instance *queue;
390 int err; 390 int err = -ENOBUFS;
391 391
392 /* rcu_read_lock()ed by nf_hook_slow() */ 392 /* rcu_read_lock()ed by nf_hook_slow() */
393 queue = instance_lookup(queuenum); 393 queue = instance_lookup(queuenum);
394 if (!queue) 394 if (!queue) {
395 err = -ESRCH;
395 goto err_out; 396 goto err_out;
397 }
396 398
397 if (queue->copy_mode == NFQNL_COPY_NONE) 399 if (queue->copy_mode == NFQNL_COPY_NONE) {
400 err = -EINVAL;
398 goto err_out; 401 goto err_out;
402 }
399 403
400 nskb = nfqnl_build_packet_message(queue, entry); 404 nskb = nfqnl_build_packet_message(queue, entry);
401 if (nskb == NULL) 405 if (nskb == NULL) {
406 err = -ENOMEM;
402 goto err_out; 407 goto err_out;
403 408 }
404 spin_lock_bh(&queue->lock); 409 spin_lock_bh(&queue->lock);
405 410
406 if (!queue->peer_pid) 411 if (!queue->peer_pid) {
412 err = -EINVAL;
407 goto err_out_free_nskb; 413 goto err_out_free_nskb;
408 414 }
409 if (queue->queue_total >= queue->queue_maxlen) { 415 if (queue->queue_total >= queue->queue_maxlen) {
410 queue->queue_dropped++; 416 queue->queue_dropped++;
411 if (net_ratelimit()) 417 if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
432err_out_unlock: 438err_out_unlock:
433 spin_unlock_bh(&queue->lock); 439 spin_unlock_bh(&queue->lock);
434err_out: 440err_out:
435 return -1; 441 return err;
436} 442}
437 443
438static int 444static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c9423763107..a9adf4c6b29 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/audit.h>
26#include <net/net_namespace.h> 27#include <net/net_namespace.h>
27 28
28#include <linux/netfilter/x_tables.h> 29#include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
38#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 39#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
39 40
40struct compat_delta { 41struct compat_delta {
41 struct compat_delta *next; 42 unsigned int offset; /* offset in kernel */
42 unsigned int offset; 43 int delta; /* delta in 32bit user land */
43 int delta;
44}; 44};
45 45
46struct xt_af { 46struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
49 struct list_head target; 49 struct list_head target;
50#ifdef CONFIG_COMPAT 50#ifdef CONFIG_COMPAT
51 struct mutex compat_mutex; 51 struct mutex compat_mutex;
52 struct compat_delta *compat_offsets; 52 struct compat_delta *compat_tab;
53 unsigned int number; /* number of slots in compat_tab[] */
54 unsigned int cur; /* number of used slots in compat_tab[] */
53#endif 55#endif
54}; 56};
55 57
@@ -181,14 +183,14 @@ EXPORT_SYMBOL(xt_unregister_matches);
181/* 183/*
182 * These are weird, but module loading must not be done with mutex 184 * These are weird, but module loading must not be done with mutex
183 * held (since they will register), and we have to have a single 185 * held (since they will register), and we have to have a single
184 * function to use try_then_request_module(). 186 * function to use.
185 */ 187 */
186 188
187/* Find match, grabs ref. Returns ERR_PTR() on error. */ 189/* Find match, grabs ref. Returns ERR_PTR() on error. */
188struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) 190struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
189{ 191{
190 struct xt_match *m; 192 struct xt_match *m;
191 int err = 0; 193 int err = -ENOENT;
192 194
193 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 195 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
194 return ERR_PTR(-EINTR); 196 return ERR_PTR(-EINTR);
@@ -219,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
219{ 221{
220 struct xt_match *match; 222 struct xt_match *match;
221 223
222 match = try_then_request_module(xt_find_match(nfproto, name, revision), 224 match = xt_find_match(nfproto, name, revision);
223 "%st_%s", xt_prefix[nfproto], name); 225 if (IS_ERR(match)) {
224 return (match != NULL) ? match : ERR_PTR(-ENOENT); 226 request_module("%st_%s", xt_prefix[nfproto], name);
227 match = xt_find_match(nfproto, name, revision);
228 }
229
230 return match;
225} 231}
226EXPORT_SYMBOL_GPL(xt_request_find_match); 232EXPORT_SYMBOL_GPL(xt_request_find_match);
227 233
@@ -229,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match);
229struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) 235struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
230{ 236{
231 struct xt_target *t; 237 struct xt_target *t;
232 int err = 0; 238 int err = -ENOENT;
233 239
234 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 240 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
235 return ERR_PTR(-EINTR); 241 return ERR_PTR(-EINTR);
@@ -259,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
259{ 265{
260 struct xt_target *target; 266 struct xt_target *target;
261 267
262 target = try_then_request_module(xt_find_target(af, name, revision), 268 target = xt_find_target(af, name, revision);
263 "%st_%s", xt_prefix[af], name); 269 if (IS_ERR(target)) {
264 return (target != NULL) ? target : ERR_PTR(-ENOENT); 270 request_module("%st_%s", xt_prefix[af], name);
271 target = xt_find_target(af, name, revision);
272 }
273
274 return target;
265} 275}
266EXPORT_SYMBOL_GPL(xt_request_find_target); 276EXPORT_SYMBOL_GPL(xt_request_find_target);
267 277
@@ -414,54 +424,67 @@ int xt_check_match(struct xt_mtchk_param *par,
414EXPORT_SYMBOL_GPL(xt_check_match); 424EXPORT_SYMBOL_GPL(xt_check_match);
415 425
416#ifdef CONFIG_COMPAT 426#ifdef CONFIG_COMPAT
417int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) 427int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
418{ 428{
419 struct compat_delta *tmp; 429 struct xt_af *xp = &xt[af];
420 430
421 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); 431 if (!xp->compat_tab) {
422 if (!tmp) 432 if (!xp->number)
423 return -ENOMEM; 433 return -EINVAL;
434 xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
435 if (!xp->compat_tab)
436 return -ENOMEM;
437 xp->cur = 0;
438 }
424 439
425 tmp->offset = offset; 440 if (xp->cur >= xp->number)
426 tmp->delta = delta; 441 return -EINVAL;
427 442
428 if (xt[af].compat_offsets) { 443 if (xp->cur)
429 tmp->next = xt[af].compat_offsets->next; 444 delta += xp->compat_tab[xp->cur - 1].delta;
430 xt[af].compat_offsets->next = tmp; 445 xp->compat_tab[xp->cur].offset = offset;
431 } else { 446 xp->compat_tab[xp->cur].delta = delta;
432 xt[af].compat_offsets = tmp; 447 xp->cur++;
433 tmp->next = NULL;
434 }
435 return 0; 448 return 0;
436} 449}
437EXPORT_SYMBOL_GPL(xt_compat_add_offset); 450EXPORT_SYMBOL_GPL(xt_compat_add_offset);
438 451
439void xt_compat_flush_offsets(u_int8_t af) 452void xt_compat_flush_offsets(u_int8_t af)
440{ 453{
441 struct compat_delta *tmp, *next; 454 if (xt[af].compat_tab) {
442 455 vfree(xt[af].compat_tab);
443 if (xt[af].compat_offsets) { 456 xt[af].compat_tab = NULL;
444 for (tmp = xt[af].compat_offsets; tmp; tmp = next) { 457 xt[af].number = 0;
445 next = tmp->next;
446 kfree(tmp);
447 }
448 xt[af].compat_offsets = NULL;
449 } 458 }
450} 459}
451EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); 460EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
452 461
453int xt_compat_calc_jump(u_int8_t af, unsigned int offset) 462int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
454{ 463{
455 struct compat_delta *tmp; 464 struct compat_delta *tmp = xt[af].compat_tab;
456 int delta; 465 int mid, left = 0, right = xt[af].cur - 1;
457 466
458 for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) 467 while (left <= right) {
459 if (tmp->offset < offset) 468 mid = (left + right) >> 1;
460 delta += tmp->delta; 469 if (offset > tmp[mid].offset)
461 return delta; 470 left = mid + 1;
471 else if (offset < tmp[mid].offset)
472 right = mid - 1;
473 else
474 return mid ? tmp[mid - 1].delta : 0;
475 }
476 WARN_ON_ONCE(1);
477 return 0;
462} 478}
463EXPORT_SYMBOL_GPL(xt_compat_calc_jump); 479EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
464 480
481void xt_compat_init_offsets(u_int8_t af, unsigned int number)
482{
483 xt[af].number = number;
484 xt[af].cur = 0;
485}
486EXPORT_SYMBOL(xt_compat_init_offsets);
487
465int xt_compat_match_offset(const struct xt_match *match) 488int xt_compat_match_offset(const struct xt_match *match)
466{ 489{
467 u_int16_t csize = match->compatsize ? : match->matchsize; 490 u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +843,21 @@ xt_replace_table(struct xt_table *table,
820 */ 843 */
821 local_bh_enable(); 844 local_bh_enable();
822 845
846#ifdef CONFIG_AUDIT
847 if (audit_enabled) {
848 struct audit_buffer *ab;
849
850 ab = audit_log_start(current->audit_context, GFP_KERNEL,
851 AUDIT_NETFILTER_CFG);
852 if (ab) {
853 audit_log_format(ab, "table=%s family=%u entries=%u",
854 table->name, table->af,
855 private->number);
856 audit_log_end(ab);
857 }
858 }
859#endif
860
823 return private; 861 return private;
824} 862}
825EXPORT_SYMBOL_GPL(xt_replace_table); 863EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1376,7 @@ static int __init xt_init(void)
1338 mutex_init(&xt[i].mutex); 1376 mutex_init(&xt[i].mutex);
1339#ifdef CONFIG_COMPAT 1377#ifdef CONFIG_COMPAT
1340 mutex_init(&xt[i].compat_mutex); 1378 mutex_init(&xt[i].compat_mutex);
1341 xt[i].compat_offsets = NULL; 1379 xt[i].compat_tab = NULL;
1342#endif 1380#endif
1343 INIT_LIST_HEAD(&xt[i].target); 1381 INIT_LIST_HEAD(&xt[i].target);
1344 INIT_LIST_HEAD(&xt[i].match); 1382 INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 00000000000..363a99ec063
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,222 @@
1/*
2 * Creates audit record for dropped/accepted packets
3 *
4 * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
5 * (C) 2010-2011 Red Hat, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10*/
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/audit.h>
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_AUDIT.h>
22#include <linux/netfilter_bridge/ebtables.h>
23#include <net/ipv6.h>
24#include <net/ip.h>
25
26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
28MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
29MODULE_ALIAS("ipt_AUDIT");
30MODULE_ALIAS("ip6t_AUDIT");
31MODULE_ALIAS("ebt_AUDIT");
32MODULE_ALIAS("arpt_AUDIT");
33
34static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
35 unsigned int proto, unsigned int offset)
36{
37 switch (proto) {
38 case IPPROTO_TCP:
39 case IPPROTO_UDP:
40 case IPPROTO_UDPLITE: {
41 const __be16 *pptr;
42 __be16 _ports[2];
43
44 pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
45 if (pptr == NULL) {
46 audit_log_format(ab, " truncated=1");
47 return;
48 }
49
50 audit_log_format(ab, " sport=%hu dport=%hu",
51 ntohs(pptr[0]), ntohs(pptr[1]));
52 }
53 break;
54
55 case IPPROTO_ICMP:
56 case IPPROTO_ICMPV6: {
57 const u8 *iptr;
58 u8 _ih[2];
59
60 iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
61 if (iptr == NULL) {
62 audit_log_format(ab, " truncated=1");
63 return;
64 }
65
66 audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
67 iptr[0], iptr[1]);
68
69 }
70 break;
71 }
72}
73
74static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
75{
76 struct iphdr _iph;
77 const struct iphdr *ih;
78
79 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
80 if (!ih) {
81 audit_log_format(ab, " truncated=1");
82 return;
83 }
84
85 audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
86 &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
87
88 if (ntohs(ih->frag_off) & IP_OFFSET) {
89 audit_log_format(ab, " frag=1");
90 return;
91 }
92
93 audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
94}
95
96static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
97{
98 struct ipv6hdr _ip6h;
99 const struct ipv6hdr *ih;
100 u8 nexthdr;
101 int offset;
102
103 ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
104 if (!ih) {
105 audit_log_format(ab, " truncated=1");
106 return;
107 }
108
109 nexthdr = ih->nexthdr;
110 offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
111 &nexthdr);
112
113 audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
114 &ih->saddr, &ih->daddr, nexthdr);
115
116 if (offset)
117 audit_proto(ab, skb, nexthdr, offset);
118}
119
120static unsigned int
121audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
122{
123 const struct xt_audit_info *info = par->targinfo;
124 struct audit_buffer *ab;
125
126 ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
127 if (ab == NULL)
128 goto errout;
129
130 audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
131 info->type, par->hooknum, skb->len,
132 par->in ? par->in->name : "?",
133 par->out ? par->out->name : "?");
134
135 if (skb->mark)
136 audit_log_format(ab, " mark=%#x", skb->mark);
137
138 if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
139 audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
140 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
141 ntohs(eth_hdr(skb)->h_proto));
142
143 if (par->family == NFPROTO_BRIDGE) {
144 switch (eth_hdr(skb)->h_proto) {
145 case __constant_htons(ETH_P_IP):
146 audit_ip4(ab, skb);
147 break;
148
149 case __constant_htons(ETH_P_IPV6):
150 audit_ip6(ab, skb);
151 break;
152 }
153 }
154 }
155
156 switch (par->family) {
157 case NFPROTO_IPV4:
158 audit_ip4(ab, skb);
159 break;
160
161 case NFPROTO_IPV6:
162 audit_ip6(ab, skb);
163 break;
164 }
165
166 audit_log_end(ab);
167
168errout:
169 return XT_CONTINUE;
170}
171
172static unsigned int
173audit_tg_ebt(struct sk_buff *skb, const struct xt_action_param *par)
174{
175 audit_tg(skb, par);
176 return EBT_CONTINUE;
177}
178
179static int audit_tg_check(const struct xt_tgchk_param *par)
180{
181 const struct xt_audit_info *info = par->targinfo;
182
183 if (info->type > XT_AUDIT_TYPE_MAX) {
184 pr_info("Audit type out of range (valid range: 0..%hhu)\n",
185 XT_AUDIT_TYPE_MAX);
186 return -ERANGE;
187 }
188
189 return 0;
190}
191
192static struct xt_target audit_tg_reg[] __read_mostly = {
193 {
194 .name = "AUDIT",
195 .family = NFPROTO_UNSPEC,
196 .target = audit_tg,
197 .targetsize = sizeof(struct xt_audit_info),
198 .checkentry = audit_tg_check,
199 .me = THIS_MODULE,
200 },
201 {
202 .name = "AUDIT",
203 .family = NFPROTO_BRIDGE,
204 .target = audit_tg_ebt,
205 .targetsize = sizeof(struct xt_audit_info),
206 .checkentry = audit_tg_check,
207 .me = THIS_MODULE,
208 },
209};
210
211static int __init audit_tg_init(void)
212{
213 return xt_register_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg));
214}
215
216static void __exit audit_tg_exit(void)
217{
218 xt_unregister_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg));
219}
220
221module_init(audit_tg_init);
222module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4abeb9..af9c4dadf81 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
19#include <linux/netfilter_ipv6.h> 19#include <linux/netfilter_ipv6.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_CLASSIFY.h> 21#include <linux/netfilter/xt_CLASSIFY.h>
22#include <linux/netfilter_arp.h>
22 23
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 24MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
25MODULE_DESCRIPTION("Xtables: Qdisc classification"); 26MODULE_DESCRIPTION("Xtables: Qdisc classification");
26MODULE_ALIAS("ipt_CLASSIFY"); 27MODULE_ALIAS("ipt_CLASSIFY");
27MODULE_ALIAS("ip6t_CLASSIFY"); 28MODULE_ALIAS("ip6t_CLASSIFY");
29MODULE_ALIAS("arpt_CLASSIFY");
28 30
29static unsigned int 31static unsigned int
30classify_tg(struct sk_buff *skb, const struct xt_action_param *par) 32classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
35 return XT_CONTINUE; 37 return XT_CONTINUE;
36} 38}
37 39
38static struct xt_target classify_tg_reg __read_mostly = { 40static struct xt_target classify_tg_reg[] __read_mostly = {
39 .name = "CLASSIFY", 41 {
40 .revision = 0, 42 .name = "CLASSIFY",
41 .family = NFPROTO_UNSPEC, 43 .revision = 0,
42 .table = "mangle", 44 .family = NFPROTO_UNSPEC,
43 .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | 45 .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
44 (1 << NF_INET_POST_ROUTING), 46 (1 << NF_INET_POST_ROUTING),
45 .target = classify_tg, 47 .target = classify_tg,
46 .targetsize = sizeof(struct xt_classify_target_info), 48 .targetsize = sizeof(struct xt_classify_target_info),
47 .me = THIS_MODULE, 49 .me = THIS_MODULE,
50 },
51 {
52 .name = "CLASSIFY",
53 .revision = 0,
54 .family = NFPROTO_ARP,
55 .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
56 .target = classify_tg,
57 .targetsize = sizeof(struct xt_classify_target_info),
58 .me = THIS_MODULE,
59 },
48}; 60};
49 61
50static int __init classify_tg_init(void) 62static int __init classify_tg_init(void)
51{ 63{
52 return xt_register_target(&classify_tg_reg); 64 return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
53} 65}
54 66
55static void __exit classify_tg_exit(void) 67static void __exit classify_tg_exit(void)
56{ 68{
57 xt_unregister_target(&classify_tg_reg); 69 xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
58} 70}
59 71
60module_init(classify_tg_init); 72module_init(classify_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e1354..3bdd443aaf1 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); 313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
314MODULE_DESCRIPTION("Xtables: idle time monitor"); 314MODULE_DESCRIPTION("Xtables: idle time monitor");
315MODULE_LICENSE("GPL v2"); 315MODULE_LICENSE("GPL v2");
316MODULE_ALIAS("ipt_IDLETIMER");
317MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a4140509eea..993de2ba89d 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); 32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); 33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
34MODULE_ALIAS("ipt_LED");
35MODULE_ALIAS("ip6t_LED");
34 36
35static LIST_HEAD(xt_led_triggers); 37static LIST_HEAD(xt_led_triggers);
36static DEFINE_MUTEX(xt_led_mutex); 38static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1bde3..d4f4b5d66b2 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
72 72
73 if (info->queues_total > 1) { 73 if (info->queues_total > 1) {
74 if (par->family == NFPROTO_IPV4) 74 if (par->family == NFPROTO_IPV4)
75 queue = hash_v4(skb) % info->queues_total + queue; 75 queue = (((u64) hash_v4(skb) * info->queues_total) >>
76 32) + queue;
76#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 77#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
77 else if (par->family == NFPROTO_IPV6) 78 else if (par->family == NFPROTO_IPV6)
78 queue = hash_v6(skb) % info->queues_total + queue; 79 queue = (((u64) hash_v6(skb) * info->queues_total) >>
80 32) + queue;
79#endif 81#endif
80 } 82 }
81 return NF_QUEUE_NR(queue); 83 return NF_QUEUE_NR(queue);
82} 84}
83 85
84static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) 86static unsigned int
87nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
85{ 88{
86 const struct xt_NFQ_info_v1 *info = par->targinfo; 89 const struct xt_NFQ_info_v2 *info = par->targinfo;
90 unsigned int ret = nfqueue_tg_v1(skb, par);
91
92 if (info->bypass)
93 ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
94 return ret;
95}
96
97static int nfqueue_tg_check(const struct xt_tgchk_param *par)
98{
99 const struct xt_NFQ_info_v2 *info = par->targinfo;
87 u32 maxid; 100 u32 maxid;
88 101
89 if (unlikely(!rnd_inited)) { 102 if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
100 info->queues_total, maxid); 113 info->queues_total, maxid);
101 return -ERANGE; 114 return -ERANGE;
102 } 115 }
116 if (par->target->revision == 2 && info->bypass > 1)
117 return -EINVAL;
103 return 0; 118 return 0;
104} 119}
105 120
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
115 .name = "NFQUEUE", 130 .name = "NFQUEUE",
116 .revision = 1, 131 .revision = 1,
117 .family = NFPROTO_UNSPEC, 132 .family = NFPROTO_UNSPEC,
118 .checkentry = nfqueue_tg_v1_check, 133 .checkentry = nfqueue_tg_check,
119 .target = nfqueue_tg_v1, 134 .target = nfqueue_tg_v1,
120 .targetsize = sizeof(struct xt_NFQ_info_v1), 135 .targetsize = sizeof(struct xt_NFQ_info_v1),
121 .me = THIS_MODULE, 136 .me = THIS_MODULE,
122 }, 137 },
138 {
139 .name = "NFQUEUE",
140 .revision = 2,
141 .family = NFPROTO_UNSPEC,
142 .checkentry = nfqueue_tg_check,
143 .target = nfqueue_tg_v2,
144 .targetsize = sizeof(struct xt_NFQ_info_v2),
145 .me = THIS_MODULE,
146 },
123}; 147};
124 148
125static int __init nfqueue_tg_init(void) 149static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index eb81c380da1..6e6b46cb1db 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -148,16 +148,21 @@ tcpmss_mangle_packet(struct sk_buff *skb,
148static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, 148static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
149 unsigned int family) 149 unsigned int family)
150{ 150{
151 struct flowi fl = {}; 151 struct flowi fl;
152 const struct nf_afinfo *ai; 152 const struct nf_afinfo *ai;
153 struct rtable *rt = NULL; 153 struct rtable *rt = NULL;
154 u_int32_t mtu = ~0U; 154 u_int32_t mtu = ~0U;
155 155
156 if (family == PF_INET) 156 if (family == PF_INET) {
157 fl.fl4_dst = ip_hdr(skb)->saddr; 157 struct flowi4 *fl4 = &fl.u.ip4;
158 else 158 memset(fl4, 0, sizeof(*fl4));
159 fl.fl6_dst = ipv6_hdr(skb)->saddr; 159 fl4->daddr = ip_hdr(skb)->saddr;
160 } else {
161 struct flowi6 *fl6 = &fl.u.ip6;
160 162
163 memset(fl6, 0, sizeof(*fl6));
164 ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
165 }
161 rcu_read_lock(); 166 rcu_read_lock();
162 ai = nf_get_afinfo(family); 167 ai = nf_get_afinfo(family);
163 if (ai != NULL) 168 if (ai != NULL)
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 5128a6c4cb2..5f054a0dbbb 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -62,18 +62,19 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
62 const struct iphdr *iph = ip_hdr(skb); 62 const struct iphdr *iph = ip_hdr(skb);
63 struct net *net = pick_net(skb); 63 struct net *net = pick_net(skb);
64 struct rtable *rt; 64 struct rtable *rt;
65 struct flowi fl; 65 struct flowi4 fl4;
66 66
67 memset(&fl, 0, sizeof(fl)); 67 memset(&fl4, 0, sizeof(fl4));
68 if (info->priv) { 68 if (info->priv) {
69 if (info->priv->oif == -1) 69 if (info->priv->oif == -1)
70 return false; 70 return false;
71 fl.oif = info->priv->oif; 71 fl4.flowi4_oif = info->priv->oif;
72 } 72 }
73 fl.fl4_dst = info->gw.ip; 73 fl4.daddr = info->gw.ip;
74 fl.fl4_tos = RT_TOS(iph->tos); 74 fl4.flowi4_tos = RT_TOS(iph->tos);
75 fl.fl4_scope = RT_SCOPE_UNIVERSE; 75 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
76 if (ip_route_output_key(net, &rt, &fl) != 0) 76 rt = ip_route_output_key(net, &fl4);
77 if (IS_ERR(rt))
77 return false; 78 return false;
78 79
79 skb_dst_drop(skb); 80 skb_dst_drop(skb);
@@ -142,18 +143,18 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
142 const struct ipv6hdr *iph = ipv6_hdr(skb); 143 const struct ipv6hdr *iph = ipv6_hdr(skb);
143 struct net *net = pick_net(skb); 144 struct net *net = pick_net(skb);
144 struct dst_entry *dst; 145 struct dst_entry *dst;
145 struct flowi fl; 146 struct flowi6 fl6;
146 147
147 memset(&fl, 0, sizeof(fl)); 148 memset(&fl6, 0, sizeof(fl6));
148 if (info->priv) { 149 if (info->priv) {
149 if (info->priv->oif == -1) 150 if (info->priv->oif == -1)
150 return false; 151 return false;
151 fl.oif = info->priv->oif; 152 fl6.flowi6_oif = info->priv->oif;
152 } 153 }
153 fl.fl6_dst = info->gw.in6; 154 fl6.daddr = info->gw.in6;
154 fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | 155 fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
155 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; 156 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
156 dst = ip6_route_output(net, NULL, &fl); 157 dst = ip6_route_output(net, NULL, &fl6);
157 if (dst == NULL) 158 if (dst == NULL)
158 return false; 159 return false;
159 160
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
new file mode 100644
index 00000000000..2220b85e951
--- /dev/null
+++ b/net/netfilter/xt_addrtype.c
@@ -0,0 +1,229 @@
1/*
2 * iptables module to match inet_addr_type() of an ip.
3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/ip.h>
17#include <net/route.h>
18
19#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
20#include <net/ipv6.h>
21#include <net/ip6_route.h>
22#include <net/ip6_fib.h>
23#endif
24
25#include <linux/netfilter/xt_addrtype.h>
26#include <linux/netfilter/x_tables.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
30MODULE_DESCRIPTION("Xtables: address type match");
31MODULE_ALIAS("ipt_addrtype");
32MODULE_ALIAS("ip6t_addrtype");
33
34#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
35static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
36{
37 u32 ret;
38
39 if (!rt)
40 return XT_ADDRTYPE_UNREACHABLE;
41
42 if (rt->rt6i_flags & RTF_REJECT)
43 ret = XT_ADDRTYPE_UNREACHABLE;
44 else
45 ret = 0;
46
47 if (rt->rt6i_flags & RTF_LOCAL)
48 ret |= XT_ADDRTYPE_LOCAL;
49 if (rt->rt6i_flags & RTF_ANYCAST)
50 ret |= XT_ADDRTYPE_ANYCAST;
51 return ret;
52}
53
54static bool match_type6(struct net *net, const struct net_device *dev,
55 const struct in6_addr *addr, u16 mask)
56{
57 int addr_type = ipv6_addr_type(addr);
58
59 if ((mask & XT_ADDRTYPE_MULTICAST) &&
60 !(addr_type & IPV6_ADDR_MULTICAST))
61 return false;
62 if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST))
63 return false;
64 if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY)
65 return false;
66
67 if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
68 XT_ADDRTYPE_UNREACHABLE) & mask) {
69 struct rt6_info *rt;
70 u32 type;
71 int ifindex = dev ? dev->ifindex : 0;
72
73 rt = rt6_lookup(net, addr, NULL, ifindex, !!dev);
74
75 type = xt_addrtype_rt6_to_type(rt);
76
77 dst_release(&rt->dst);
78 return !!(mask & type);
79 }
80 return true;
81}
82
83static bool
84addrtype_mt6(struct net *net, const struct net_device *dev,
85 const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info)
86{
87 const struct ipv6hdr *iph = ipv6_hdr(skb);
88 bool ret = true;
89
90 if (info->source)
91 ret &= match_type6(net, dev, &iph->saddr, info->source) ^
92 (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
93 if (ret && info->dest)
94 ret &= match_type6(net, dev, &iph->daddr, info->dest) ^
95 !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
96 return ret;
97}
98#endif
99
100static inline bool match_type(struct net *net, const struct net_device *dev,
101 __be32 addr, u_int16_t mask)
102{
103 return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
104}
105
106static bool
107addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
108{
109 struct net *net = dev_net(par->in ? par->in : par->out);
110 const struct xt_addrtype_info *info = par->matchinfo;
111 const struct iphdr *iph = ip_hdr(skb);
112 bool ret = true;
113
114 if (info->source)
115 ret &= match_type(net, NULL, iph->saddr, info->source) ^
116 info->invert_source;
117 if (info->dest)
118 ret &= match_type(net, NULL, iph->daddr, info->dest) ^
119 info->invert_dest;
120
121 return ret;
122}
123
124static bool
125addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
126{
127 struct net *net = dev_net(par->in ? par->in : par->out);
128 const struct xt_addrtype_info_v1 *info = par->matchinfo;
129 const struct iphdr *iph;
130 const struct net_device *dev = NULL;
131 bool ret = true;
132
133 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
134 dev = par->in;
135 else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
136 dev = par->out;
137
138#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
139 if (par->family == NFPROTO_IPV6)
140 return addrtype_mt6(net, dev, skb, info);
141#endif
142 iph = ip_hdr(skb);
143 if (info->source)
144 ret &= match_type(net, dev, iph->saddr, info->source) ^
145 (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
146 if (ret && info->dest)
147 ret &= match_type(net, dev, iph->daddr, info->dest) ^
148 !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
149 return ret;
150}
151
152static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
153{
154 struct xt_addrtype_info_v1 *info = par->matchinfo;
155
156 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
157 info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
158 pr_info("both incoming and outgoing "
159 "interface limitation cannot be selected\n");
160 return -EINVAL;
161 }
162
163 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
164 (1 << NF_INET_LOCAL_IN)) &&
165 info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
166 pr_info("output interface limitation "
167 "not valid in PREROUTING and INPUT\n");
168 return -EINVAL;
169 }
170
171 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
172 (1 << NF_INET_LOCAL_OUT)) &&
173 info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
174 pr_info("input interface limitation "
175 "not valid in POSTROUTING and OUTPUT\n");
176 return -EINVAL;
177 }
178
179#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
180 if (par->family == NFPROTO_IPV6) {
181 if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
182 pr_err("ipv6 BLACKHOLE matching not supported\n");
183 return -EINVAL;
184 }
185 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
186 pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
187 return -EINVAL;
188 }
189 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
190 pr_err("ipv6 does not support BROADCAST matching\n");
191 return -EINVAL;
192 }
193 }
194#endif
195 return 0;
196}
197
198static struct xt_match addrtype_mt_reg[] __read_mostly = {
199 {
200 .name = "addrtype",
201 .family = NFPROTO_IPV4,
202 .match = addrtype_mt_v0,
203 .matchsize = sizeof(struct xt_addrtype_info),
204 .me = THIS_MODULE
205 },
206 {
207 .name = "addrtype",
208 .family = NFPROTO_UNSPEC,
209 .revision = 1,
210 .match = addrtype_mt_v1,
211 .checkentry = addrtype_mt_checkentry_v1,
212 .matchsize = sizeof(struct xt_addrtype_info_v1),
213 .me = THIS_MODULE
214 }
215};
216
217static int __init addrtype_mt_init(void)
218{
219 return xt_register_matches(addrtype_mt_reg,
220 ARRAY_SIZE(addrtype_mt_reg));
221}
222
223static void __exit addrtype_mt_exit(void)
224{
225 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
226}
227
228module_init(addrtype_mt_init);
229module_exit(addrtype_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5c5b6b921b8..c6d5a83450c 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -33,17 +33,17 @@
33 33
34/* we will save the tuples of all connections we care about */ 34/* we will save the tuples of all connections we care about */
35struct xt_connlimit_conn { 35struct xt_connlimit_conn {
36 struct list_head list; 36 struct hlist_node node;
37 struct nf_conntrack_tuple tuple; 37 struct nf_conntrack_tuple tuple;
38 union nf_inet_addr addr;
38}; 39};
39 40
40struct xt_connlimit_data { 41struct xt_connlimit_data {
41 struct list_head iphash[256]; 42 struct hlist_head iphash[256];
42 spinlock_t lock; 43 spinlock_t lock;
43}; 44};
44 45
45static u_int32_t connlimit_rnd __read_mostly; 46static u_int32_t connlimit_rnd __read_mostly;
46static bool connlimit_rnd_inited __read_mostly;
47 47
48static inline unsigned int connlimit_iphash(__be32 addr) 48static inline unsigned int connlimit_iphash(__be32 addr)
49{ 49{
@@ -101,9 +101,9 @@ static int count_them(struct net *net,
101{ 101{
102 const struct nf_conntrack_tuple_hash *found; 102 const struct nf_conntrack_tuple_hash *found;
103 struct xt_connlimit_conn *conn; 103 struct xt_connlimit_conn *conn;
104 struct xt_connlimit_conn *tmp; 104 struct hlist_node *pos, *n;
105 struct nf_conn *found_ct; 105 struct nf_conn *found_ct;
106 struct list_head *hash; 106 struct hlist_head *hash;
107 bool addit = true; 107 bool addit = true;
108 int matches = 0; 108 int matches = 0;
109 109
@@ -115,7 +115,7 @@ static int count_them(struct net *net,
115 rcu_read_lock(); 115 rcu_read_lock();
116 116
117 /* check the saved connections */ 117 /* check the saved connections */
118 list_for_each_entry_safe(conn, tmp, hash, list) { 118 hlist_for_each_entry_safe(conn, pos, n, hash, node) {
119 found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, 119 found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
120 &conn->tuple); 120 &conn->tuple);
121 found_ct = NULL; 121 found_ct = NULL;
@@ -135,7 +135,7 @@ static int count_them(struct net *net,
135 135
136 if (found == NULL) { 136 if (found == NULL) {
137 /* this one is gone */ 137 /* this one is gone */
138 list_del(&conn->list); 138 hlist_del(&conn->node);
139 kfree(conn); 139 kfree(conn);
140 continue; 140 continue;
141 } 141 }
@@ -146,12 +146,12 @@ static int count_them(struct net *net,
146 * closed already -> ditch it 146 * closed already -> ditch it
147 */ 147 */
148 nf_ct_put(found_ct); 148 nf_ct_put(found_ct);
149 list_del(&conn->list); 149 hlist_del(&conn->node);
150 kfree(conn); 150 kfree(conn);
151 continue; 151 continue;
152 } 152 }
153 153
154 if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) 154 if (same_source_net(addr, mask, &conn->addr, family))
155 /* same source network -> be counted! */ 155 /* same source network -> be counted! */
156 ++matches; 156 ++matches;
157 nf_ct_put(found_ct); 157 nf_ct_put(found_ct);
@@ -161,11 +161,12 @@ static int count_them(struct net *net,
161 161
162 if (addit) { 162 if (addit) {
163 /* save the new connection in our list */ 163 /* save the new connection in our list */
164 conn = kzalloc(sizeof(*conn), GFP_ATOMIC); 164 conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
165 if (conn == NULL) 165 if (conn == NULL)
166 return -ENOMEM; 166 return -ENOMEM;
167 conn->tuple = *tuple; 167 conn->tuple = *tuple;
168 list_add(&conn->list, hash); 168 conn->addr = *addr;
169 hlist_add_head(&conn->node, hash);
169 ++matches; 170 ++matches;
170 } 171 }
171 172
@@ -186,17 +187,19 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
186 187
187 ct = nf_ct_get(skb, &ctinfo); 188 ct = nf_ct_get(skb, &ctinfo);
188 if (ct != NULL) 189 if (ct != NULL)
189 tuple_ptr = &ct->tuplehash[0].tuple; 190 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
190 else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), 191 else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
191 par->family, &tuple)) 192 par->family, &tuple))
192 goto hotdrop; 193 goto hotdrop;
193 194
194 if (par->family == NFPROTO_IPV6) { 195 if (par->family == NFPROTO_IPV6) {
195 const struct ipv6hdr *iph = ipv6_hdr(skb); 196 const struct ipv6hdr *iph = ipv6_hdr(skb);
196 memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); 197 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
198 &iph->daddr : &iph->saddr, sizeof(addr.ip6));
197 } else { 199 } else {
198 const struct iphdr *iph = ip_hdr(skb); 200 const struct iphdr *iph = ip_hdr(skb);
199 addr.ip = iph->saddr; 201 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
202 iph->daddr : iph->saddr;
200 } 203 }
201 204
202 spin_lock_bh(&info->data->lock); 205 spin_lock_bh(&info->data->lock);
@@ -204,13 +207,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
204 &info->mask, par->family); 207 &info->mask, par->family);
205 spin_unlock_bh(&info->data->lock); 208 spin_unlock_bh(&info->data->lock);
206 209
207 if (connections < 0) { 210 if (connections < 0)
208 /* kmalloc failed, drop it entirely */ 211 /* kmalloc failed, drop it entirely */
209 par->hotdrop = true; 212 goto hotdrop;
210 return false;
211 }
212 213
213 return (connections > info->limit) ^ info->inverse; 214 return (connections > info->limit) ^
215 !!(info->flags & XT_CONNLIMIT_INVERT);
214 216
215 hotdrop: 217 hotdrop:
216 par->hotdrop = true; 218 par->hotdrop = true;
@@ -223,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
223 unsigned int i; 225 unsigned int i;
224 int ret; 226 int ret;
225 227
226 if (unlikely(!connlimit_rnd_inited)) { 228 if (unlikely(!connlimit_rnd)) {
227 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); 229 u_int32_t rand;
228 connlimit_rnd_inited = true; 230
231 do {
232 get_random_bytes(&rand, sizeof(rand));
233 } while (!rand);
234 cmpxchg(&connlimit_rnd, 0, rand);
229 } 235 }
230 ret = nf_ct_l3proto_try_module_get(par->family); 236 ret = nf_ct_l3proto_try_module_get(par->family);
231 if (ret < 0) { 237 if (ret < 0) {
@@ -243,7 +249,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
243 249
244 spin_lock_init(&info->data->lock); 250 spin_lock_init(&info->data->lock);
245 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) 251 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
246 INIT_LIST_HEAD(&info->data->iphash[i]); 252 INIT_HLIST_HEAD(&info->data->iphash[i]);
247 253
248 return 0; 254 return 0;
249} 255}
@@ -252,15 +258,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
252{ 258{
253 const struct xt_connlimit_info *info = par->matchinfo; 259 const struct xt_connlimit_info *info = par->matchinfo;
254 struct xt_connlimit_conn *conn; 260 struct xt_connlimit_conn *conn;
255 struct xt_connlimit_conn *tmp; 261 struct hlist_node *pos, *n;
256 struct list_head *hash = info->data->iphash; 262 struct hlist_head *hash = info->data->iphash;
257 unsigned int i; 263 unsigned int i;
258 264
259 nf_ct_l3proto_module_put(par->family); 265 nf_ct_l3proto_module_put(par->family);
260 266
261 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { 267 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
262 list_for_each_entry_safe(conn, tmp, &hash[i], list) { 268 hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
263 list_del(&conn->list); 269 hlist_del(&conn->node);
264 kfree(conn); 270 kfree(conn);
265 } 271 }
266 } 272 }
@@ -268,25 +274,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
268 kfree(info->data); 274 kfree(info->data);
269} 275}
270 276
271static struct xt_match connlimit_mt_reg __read_mostly = { 277static struct xt_match connlimit_mt_reg[] __read_mostly = {
272 .name = "connlimit", 278 {
273 .revision = 0, 279 .name = "connlimit",
274 .family = NFPROTO_UNSPEC, 280 .revision = 0,
275 .checkentry = connlimit_mt_check, 281 .family = NFPROTO_UNSPEC,
276 .match = connlimit_mt, 282 .checkentry = connlimit_mt_check,
277 .matchsize = sizeof(struct xt_connlimit_info), 283 .match = connlimit_mt,
278 .destroy = connlimit_mt_destroy, 284 .matchsize = sizeof(struct xt_connlimit_info),
279 .me = THIS_MODULE, 285 .destroy = connlimit_mt_destroy,
286 .me = THIS_MODULE,
287 },
288 {
289 .name = "connlimit",
290 .revision = 1,
291 .family = NFPROTO_UNSPEC,
292 .checkentry = connlimit_mt_check,
293 .match = connlimit_mt,
294 .matchsize = sizeof(struct xt_connlimit_info),
295 .destroy = connlimit_mt_destroy,
296 .me = THIS_MODULE,
297 },
280}; 298};
281 299
282static int __init connlimit_mt_init(void) 300static int __init connlimit_mt_init(void)
283{ 301{
284 return xt_register_match(&connlimit_mt_reg); 302 return xt_register_matches(connlimit_mt_reg,
303 ARRAY_SIZE(connlimit_mt_reg));
285} 304}
286 305
287static void __exit connlimit_mt_exit(void) 306static void __exit connlimit_mt_exit(void)
288{ 307{
289 xt_unregister_match(&connlimit_mt_reg); 308 xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
290} 309}
291 310
292module_init(connlimit_mt_init); 311module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e536710ad91..2c0086a4751 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
112 return true; 112 return true;
113} 113}
114 114
115static inline bool
116port_match(u16 min, u16 max, u16 port, bool invert)
117{
118 return (port >= min && port <= max) ^ invert;
119}
120
121static inline bool
122ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
123 const struct nf_conn *ct)
124{
125 const struct nf_conntrack_tuple *tuple;
126
127 tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
128 if ((info->match_flags & XT_CONNTRACK_PROTO) &&
129 (nf_ct_protonum(ct) == info->l4proto) ^
130 !(info->invert_flags & XT_CONNTRACK_PROTO))
131 return false;
132
133 /* Shortcut to match all recognized protocols by using ->src.all. */
134 if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
135 !port_match(info->origsrc_port, info->origsrc_port_high,
136 ntohs(tuple->src.u.all),
137 info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
138 return false;
139
140 if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
141 !port_match(info->origdst_port, info->origdst_port_high,
142 ntohs(tuple->dst.u.all),
143 info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
144 return false;
145
146 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
147
148 if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
149 !port_match(info->replsrc_port, info->replsrc_port_high,
150 ntohs(tuple->src.u.all),
151 info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
152 return false;
153
154 if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
155 !port_match(info->repldst_port, info->repldst_port_high,
156 ntohs(tuple->dst.u.all),
157 info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
158 return false;
159
160 return true;
161}
162
115static bool 163static bool
116conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, 164conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
117 u16 state_mask, u16 status_mask) 165 u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
170 !(info->invert_flags & XT_CONNTRACK_REPLDST)) 218 !(info->invert_flags & XT_CONNTRACK_REPLDST))
171 return false; 219 return false;
172 220
173 if (!ct_proto_port_check(info, ct)) 221 if (par->match->revision != 3) {
174 return false; 222 if (!ct_proto_port_check(info, ct))
223 return false;
224 } else {
225 if (!ct_proto_port_check_v3(par->matchinfo, ct))
226 return false;
227 }
175 228
176 if ((info->match_flags & XT_CONNTRACK_STATUS) && 229 if ((info->match_flags & XT_CONNTRACK_STATUS) &&
177 (!!(status_mask & ct->status) ^ 230 (!!(status_mask & ct->status) ^
@@ -207,10 +260,23 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
207 return conntrack_mt(skb, par, info->state_mask, info->status_mask); 260 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
208} 261}
209 262
263static bool
264conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
265{
266 const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
267
268 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
269}
270
210static int conntrack_mt_check(const struct xt_mtchk_param *par) 271static int conntrack_mt_check(const struct xt_mtchk_param *par)
211{ 272{
212 int ret; 273 int ret;
213 274
275 if (strcmp(par->table, "raw") == 0) {
276 pr_info("state is undetermined at the time of raw table\n");
277 return -EINVAL;
278 }
279
214 ret = nf_ct_l3proto_try_module_get(par->family); 280 ret = nf_ct_l3proto_try_module_get(par->family);
215 if (ret < 0) 281 if (ret < 0)
216 pr_info("cannot load conntrack support for proto=%u\n", 282 pr_info("cannot load conntrack support for proto=%u\n",
@@ -244,6 +310,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
244 .destroy = conntrack_mt_destroy, 310 .destroy = conntrack_mt_destroy,
245 .me = THIS_MODULE, 311 .me = THIS_MODULE,
246 }, 312 },
313 {
314 .name = "conntrack",
315 .revision = 3,
316 .family = NFPROTO_UNSPEC,
317 .matchsize = sizeof(struct xt_conntrack_mtinfo3),
318 .match = conntrack_mt_v3,
319 .checkentry = conntrack_mt_check,
320 .destroy = conntrack_mt_destroy,
321 .me = THIS_MODULE,
322 },
247}; 323};
248 324
249static int __init conntrack_mt_init(void) 325static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a5cba..c7a2e5466bc 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); 23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
24MODULE_DESCRIPTION("Xtables: CPU match"); 24MODULE_DESCRIPTION("Xtables: CPU match");
25MODULE_ALIAS("ipt_cpu");
26MODULE_ALIAS("ip6t_cpu");
25 27
26static int cpu_mt_check(const struct xt_mtchk_param *par) 28static int cpu_mt_check(const struct xt_mtchk_param *par)
27{ 29{
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
new file mode 100644
index 00000000000..d9202cdd25c
--- /dev/null
+++ b/net/netfilter/xt_devgroup.c
@@ -0,0 +1,82 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/netdevice.h>
12
13#include <linux/netfilter/xt_devgroup.h>
14#include <linux/netfilter/x_tables.h>
15
16MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
17MODULE_LICENSE("GPL");
18MODULE_DESCRIPTION("Xtables: Device group match");
19MODULE_ALIAS("ipt_devgroup");
20MODULE_ALIAS("ip6t_devgroup");
21
22static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
23{
24 const struct xt_devgroup_info *info = par->matchinfo;
25
26 if (info->flags & XT_DEVGROUP_MATCH_SRC &&
27 (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
28 ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
29 return false;
30
31 if (info->flags & XT_DEVGROUP_MATCH_DST &&
32 (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
33 ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
34 return false;
35
36 return true;
37}
38
39static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
40{
41 const struct xt_devgroup_info *info = par->matchinfo;
42
43 if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
44 XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
45 return -EINVAL;
46
47 if (info->flags & XT_DEVGROUP_MATCH_SRC &&
48 par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) |
49 (1 << NF_INET_LOCAL_IN) |
50 (1 << NF_INET_FORWARD)))
51 return -EINVAL;
52
53 if (info->flags & XT_DEVGROUP_MATCH_DST &&
54 par->hook_mask & ~((1 << NF_INET_FORWARD) |
55 (1 << NF_INET_LOCAL_OUT) |
56 (1 << NF_INET_POST_ROUTING)))
57 return -EINVAL;
58
59 return 0;
60}
61
62static struct xt_match devgroup_mt_reg __read_mostly = {
63 .name = "devgroup",
64 .match = devgroup_mt,
65 .checkentry = devgroup_mt_checkentry,
66 .matchsize = sizeof(struct xt_devgroup_info),
67 .family = NFPROTO_UNSPEC,
68 .me = THIS_MODULE
69};
70
71static int __init devgroup_mt_init(void)
72{
73 return xt_register_match(&devgroup_mt_reg);
74}
75
76static void __exit devgroup_mt_exit(void)
77{
78 xt_unregister_match(&devgroup_mt_reg);
79}
80
81module_init(devgroup_mt_init);
82module_exit(devgroup_mt_exit);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 73c33a42f87..b46626cddd9 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -31,7 +31,7 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
31 pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n", 31 pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n",
32 &iph->saddr, 32 &iph->saddr,
33 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "", 33 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
34 &info->src_max.ip, 34 &info->src_min.ip,
35 &info->src_max.ip); 35 &info->src_max.ip);
36 return false; 36 return false;
37 } 37 }
@@ -76,15 +76,27 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
76 m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6); 76 m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6);
77 m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr); 77 m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr);
78 m ^= !!(info->flags & IPRANGE_SRC_INV); 78 m ^= !!(info->flags & IPRANGE_SRC_INV);
79 if (m) 79 if (m) {
80 pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n",
81 &iph->saddr,
82 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
83 &info->src_min.in6,
84 &info->src_max.in6);
80 return false; 85 return false;
86 }
81 } 87 }
82 if (info->flags & IPRANGE_DST) { 88 if (info->flags & IPRANGE_DST) {
83 m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6); 89 m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6);
84 m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr); 90 m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr);
85 m ^= !!(info->flags & IPRANGE_DST_INV); 91 m ^= !!(info->flags & IPRANGE_DST_INV);
86 if (m) 92 if (m) {
93 pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n",
94 &iph->daddr,
95 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
96 &info->dst_min.in6,
97 &info->dst_max.in6);
87 return false; 98 return false;
99 }
88 } 100 }
89 return true; 101 return true;
90} 102}
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d8aa3..bb10b0717f1 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
new file mode 100644
index 00000000000..061d48cec13
--- /dev/null
+++ b/net/netfilter/xt_set.c
@@ -0,0 +1,359 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Martin Josefsson <gandalf@wlug.westbo.se>
4 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* Kernel module which implements the set match and SET target
12 * for netfilter/iptables. */
13
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/version.h>
17
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/xt_set.h>
20
21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
23MODULE_DESCRIPTION("Xtables: IP set match and target module");
24MODULE_ALIAS("xt_SET");
25MODULE_ALIAS("ipt_set");
26MODULE_ALIAS("ip6t_set");
27MODULE_ALIAS("ipt_SET");
28MODULE_ALIAS("ip6t_SET");
29
30static inline int
31match_set(ip_set_id_t index, const struct sk_buff *skb,
32 u8 pf, u8 dim, u8 flags, int inv)
33{
34 if (ip_set_test(index, skb, pf, dim, flags))
35 inv = !inv;
36 return inv;
37}
38
39/* Revision 0 interface: backward compatible with netfilter/iptables */
40
41static bool
42set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
43{
44 const struct xt_set_info_match_v0 *info = par->matchinfo;
45
46 return match_set(info->match_set.index, skb, par->family,
47 info->match_set.u.compat.dim,
48 info->match_set.u.compat.flags,
49 info->match_set.u.compat.flags & IPSET_INV_MATCH);
50}
51
52static void
53compat_flags(struct xt_set_info_v0 *info)
54{
55 u_int8_t i;
56
57 /* Fill out compatibility data according to enum ip_set_kopt */
58 info->u.compat.dim = IPSET_DIM_ZERO;
59 if (info->u.flags[0] & IPSET_MATCH_INV)
60 info->u.compat.flags |= IPSET_INV_MATCH;
61 for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
62 info->u.compat.dim++;
63 if (info->u.flags[i] & IPSET_SRC)
64 info->u.compat.flags |= (1<<info->u.compat.dim);
65 }
66}
67
68static int
69set_match_v0_checkentry(const struct xt_mtchk_param *par)
70{
71 struct xt_set_info_match_v0 *info = par->matchinfo;
72 ip_set_id_t index;
73
74 index = ip_set_nfnl_get_byindex(info->match_set.index);
75
76 if (index == IPSET_INVALID_ID) {
77 pr_warning("Cannot find set indentified by id %u to match\n",
78 info->match_set.index);
79 return -ENOENT;
80 }
81 if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
82 pr_warning("Protocol error: set match dimension "
83 "is over the limit!\n");
84 return -ERANGE;
85 }
86
87 /* Fill out compatibility data */
88 compat_flags(&info->match_set);
89
90 return 0;
91}
92
93static void
94set_match_v0_destroy(const struct xt_mtdtor_param *par)
95{
96 struct xt_set_info_match_v0 *info = par->matchinfo;
97
98 ip_set_nfnl_put(info->match_set.index);
99}
100
101static unsigned int
102set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
103{
104 const struct xt_set_info_target_v0 *info = par->targinfo;
105
106 if (info->add_set.index != IPSET_INVALID_ID)
107 ip_set_add(info->add_set.index, skb, par->family,
108 info->add_set.u.compat.dim,
109 info->add_set.u.compat.flags);
110 if (info->del_set.index != IPSET_INVALID_ID)
111 ip_set_del(info->del_set.index, skb, par->family,
112 info->del_set.u.compat.dim,
113 info->del_set.u.compat.flags);
114
115 return XT_CONTINUE;
116}
117
118static int
119set_target_v0_checkentry(const struct xt_tgchk_param *par)
120{
121 struct xt_set_info_target_v0 *info = par->targinfo;
122 ip_set_id_t index;
123
124 if (info->add_set.index != IPSET_INVALID_ID) {
125 index = ip_set_nfnl_get_byindex(info->add_set.index);
126 if (index == IPSET_INVALID_ID) {
127 pr_warning("Cannot find add_set index %u as target\n",
128 info->add_set.index);
129 return -ENOENT;
130 }
131 }
132
133 if (info->del_set.index != IPSET_INVALID_ID) {
134 index = ip_set_nfnl_get_byindex(info->del_set.index);
135 if (index == IPSET_INVALID_ID) {
136 pr_warning("Cannot find del_set index %u as target\n",
137 info->del_set.index);
138 return -ENOENT;
139 }
140 }
141 if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
142 info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
143 pr_warning("Protocol error: SET target dimension "
144 "is over the limit!\n");
145 return -ERANGE;
146 }
147
148 /* Fill out compatibility data */
149 compat_flags(&info->add_set);
150 compat_flags(&info->del_set);
151
152 return 0;
153}
154
155static void
156set_target_v0_destroy(const struct xt_tgdtor_param *par)
157{
158 const struct xt_set_info_target_v0 *info = par->targinfo;
159
160 if (info->add_set.index != IPSET_INVALID_ID)
161 ip_set_nfnl_put(info->add_set.index);
162 if (info->del_set.index != IPSET_INVALID_ID)
163 ip_set_nfnl_put(info->del_set.index);
164}
165
166/* Revision 1: current interface to netfilter/iptables */
167
168static bool
169set_match(const struct sk_buff *skb, struct xt_action_param *par)
170{
171 const struct xt_set_info_match *info = par->matchinfo;
172
173 return match_set(info->match_set.index, skb, par->family,
174 info->match_set.dim,
175 info->match_set.flags,
176 info->match_set.flags & IPSET_INV_MATCH);
177}
178
179static int
180set_match_checkentry(const struct xt_mtchk_param *par)
181{
182 struct xt_set_info_match *info = par->matchinfo;
183 ip_set_id_t index;
184
185 index = ip_set_nfnl_get_byindex(info->match_set.index);
186
187 if (index == IPSET_INVALID_ID) {
188 pr_warning("Cannot find set indentified by id %u to match\n",
189 info->match_set.index);
190 return -ENOENT;
191 }
192 if (info->match_set.dim > IPSET_DIM_MAX) {
193 pr_warning("Protocol error: set match dimension "
194 "is over the limit!\n");
195 return -ERANGE;
196 }
197
198 return 0;
199}
200
201static void
202set_match_destroy(const struct xt_mtdtor_param *par)
203{
204 struct xt_set_info_match *info = par->matchinfo;
205
206 ip_set_nfnl_put(info->match_set.index);
207}
208
209static unsigned int
210set_target(struct sk_buff *skb, const struct xt_action_param *par)
211{
212 const struct xt_set_info_target *info = par->targinfo;
213
214 if (info->add_set.index != IPSET_INVALID_ID)
215 ip_set_add(info->add_set.index,
216 skb, par->family,
217 info->add_set.dim,
218 info->add_set.flags);
219 if (info->del_set.index != IPSET_INVALID_ID)
220 ip_set_del(info->del_set.index,
221 skb, par->family,
222 info->add_set.dim,
223 info->del_set.flags);
224
225 return XT_CONTINUE;
226}
227
228static int
229set_target_checkentry(const struct xt_tgchk_param *par)
230{
231 const struct xt_set_info_target *info = par->targinfo;
232 ip_set_id_t index;
233
234 if (info->add_set.index != IPSET_INVALID_ID) {
235 index = ip_set_nfnl_get_byindex(info->add_set.index);
236 if (index == IPSET_INVALID_ID) {
237 pr_warning("Cannot find add_set index %u as target\n",
238 info->add_set.index);
239 return -ENOENT;
240 }
241 }
242
243 if (info->del_set.index != IPSET_INVALID_ID) {
244 index = ip_set_nfnl_get_byindex(info->del_set.index);
245 if (index == IPSET_INVALID_ID) {
246 pr_warning("Cannot find del_set index %u as target\n",
247 info->del_set.index);
248 return -ENOENT;
249 }
250 }
251 if (info->add_set.dim > IPSET_DIM_MAX ||
252 info->del_set.flags > IPSET_DIM_MAX) {
253 pr_warning("Protocol error: SET target dimension "
254 "is over the limit!\n");
255 return -ERANGE;
256 }
257
258 return 0;
259}
260
261static void
262set_target_destroy(const struct xt_tgdtor_param *par)
263{
264 const struct xt_set_info_target *info = par->targinfo;
265
266 if (info->add_set.index != IPSET_INVALID_ID)
267 ip_set_nfnl_put(info->add_set.index);
268 if (info->del_set.index != IPSET_INVALID_ID)
269 ip_set_nfnl_put(info->del_set.index);
270}
271
272static struct xt_match set_matches[] __read_mostly = {
273 {
274 .name = "set",
275 .family = NFPROTO_IPV4,
276 .revision = 0,
277 .match = set_match_v0,
278 .matchsize = sizeof(struct xt_set_info_match_v0),
279 .checkentry = set_match_v0_checkentry,
280 .destroy = set_match_v0_destroy,
281 .me = THIS_MODULE
282 },
283 {
284 .name = "set",
285 .family = NFPROTO_IPV4,
286 .revision = 1,
287 .match = set_match,
288 .matchsize = sizeof(struct xt_set_info_match),
289 .checkentry = set_match_checkentry,
290 .destroy = set_match_destroy,
291 .me = THIS_MODULE
292 },
293 {
294 .name = "set",
295 .family = NFPROTO_IPV6,
296 .revision = 1,
297 .match = set_match,
298 .matchsize = sizeof(struct xt_set_info_match),
299 .checkentry = set_match_checkentry,
300 .destroy = set_match_destroy,
301 .me = THIS_MODULE
302 },
303};
304
305static struct xt_target set_targets[] __read_mostly = {
306 {
307 .name = "SET",
308 .revision = 0,
309 .family = NFPROTO_IPV4,
310 .target = set_target_v0,
311 .targetsize = sizeof(struct xt_set_info_target_v0),
312 .checkentry = set_target_v0_checkentry,
313 .destroy = set_target_v0_destroy,
314 .me = THIS_MODULE
315 },
316 {
317 .name = "SET",
318 .revision = 1,
319 .family = NFPROTO_IPV4,
320 .target = set_target,
321 .targetsize = sizeof(struct xt_set_info_target),
322 .checkentry = set_target_checkentry,
323 .destroy = set_target_destroy,
324 .me = THIS_MODULE
325 },
326 {
327 .name = "SET",
328 .revision = 1,
329 .family = NFPROTO_IPV6,
330 .target = set_target,
331 .targetsize = sizeof(struct xt_set_info_target),
332 .checkentry = set_target_checkentry,
333 .destroy = set_target_destroy,
334 .me = THIS_MODULE
335 },
336};
337
338static int __init xt_set_init(void)
339{
340 int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches));
341
342 if (!ret) {
343 ret = xt_register_targets(set_targets,
344 ARRAY_SIZE(set_targets));
345 if (ret)
346 xt_unregister_matches(set_matches,
347 ARRAY_SIZE(set_matches));
348 }
349 return ret;
350}
351
352static void __exit xt_set_fini(void)
353{
354 xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches));
355 xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets));
356}
357
358module_init(xt_set_init);
359module_exit(xt_set_fini);
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 6caef8b2061..f4fc4c9ad56 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -49,9 +49,9 @@
49static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, 49static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
50 struct netlbl_audit *audit_info) 50 struct netlbl_audit *audit_info)
51{ 51{
52 audit_info->secid = NETLINK_CB(skb).sid; 52 security_task_getsecid(current, &audit_info->secid);
53 audit_info->loginuid = NETLINK_CB(skb).loginuid; 53 audit_info->loginuid = audit_get_loginuid(current);
54 audit_info->sessionid = NETLINK_CB(skb).sessionid; 54 audit_info->sessionid = audit_get_sessionid(current);
55} 55}
56 56
57/* NetLabel NETLINK I/O functions */ 57/* NetLabel NETLINK I/O functions */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1f924595bde..c8f35b5d2ee 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,17 +1362,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1362 1362
1363 NETLINK_CB(skb).pid = nlk->pid; 1363 NETLINK_CB(skb).pid = nlk->pid;
1364 NETLINK_CB(skb).dst_group = dst_group; 1364 NETLINK_CB(skb).dst_group = dst_group;
1365 NETLINK_CB(skb).loginuid = audit_get_loginuid(current);
1366 NETLINK_CB(skb).sessionid = audit_get_sessionid(current);
1367 security_task_getsecid(current, &(NETLINK_CB(skb).sid));
1368 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1365 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1369 1366
1370 /* What can I do? Netlink is asynchronous, so that
1371 we will have to save current capabilities to
1372 check them, when this message will be delivered
1373 to corresponding kernel module. --ANK (980802)
1374 */
1375
1376 err = -EFAULT; 1367 err = -EFAULT;
1377 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1368 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1378 kfree_skb(skb); 1369 kfree_skb(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 91cb1d71f01..b5362e96022 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -164,7 +164,6 @@ struct packet_mreq_max {
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring); 165 int closing, int tx_ring);
166 166
167#define PGV_FROM_VMALLOC 1
168struct pgv { 167struct pgv {
169 char *buffer; 168 char *buffer;
170}; 169};
@@ -466,7 +465,7 @@ retry:
466 */ 465 */
467 466
468 err = -EMSGSIZE; 467 err = -EMSGSIZE;
469 if (len > dev->mtu + dev->hard_header_len) 468 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN)
470 goto out_unlock; 469 goto out_unlock;
471 470
472 if (!skb) { 471 if (!skb) {
@@ -497,6 +496,19 @@ retry:
497 goto retry; 496 goto retry;
498 } 497 }
499 498
499 if (len > (dev->mtu + dev->hard_header_len)) {
500 /* Earlier code assumed this would be a VLAN pkt,
501 * double-check this now that we have the actual
502 * packet in hand.
503 */
504 struct ethhdr *ehdr;
505 skb_reset_mac_header(skb);
506 ehdr = eth_hdr(skb);
507 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
508 err = -EMSGSIZE;
509 goto out_unlock;
510 }
511 }
500 512
501 skb->protocol = proto; 513 skb->protocol = proto;
502 skb->dev = dev; 514 skb->dev = dev;
@@ -523,11 +535,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
523{ 535{
524 struct sk_filter *filter; 536 struct sk_filter *filter;
525 537
526 rcu_read_lock_bh(); 538 rcu_read_lock();
527 filter = rcu_dereference_bh(sk->sk_filter); 539 filter = rcu_dereference(sk->sk_filter);
528 if (filter != NULL) 540 if (filter != NULL)
529 res = sk_run_filter(skb, filter->insns); 541 res = sk_run_filter(skb, filter->insns);
530 rcu_read_unlock_bh(); 542 rcu_read_unlock();
531 543
532 return res; 544 return res;
533} 545}
@@ -954,7 +966,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
954 966
955static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 967static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
956{ 968{
957 struct socket *sock;
958 struct sk_buff *skb; 969 struct sk_buff *skb;
959 struct net_device *dev; 970 struct net_device *dev;
960 __be16 proto; 971 __be16 proto;
@@ -966,8 +977,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
966 int len_sum = 0; 977 int len_sum = 0;
967 int status = 0; 978 int status = 0;
968 979
969 sock = po->sk.sk_socket;
970
971 mutex_lock(&po->pg_vec_lock); 980 mutex_lock(&po->pg_vec_lock);
972 981
973 err = -EBUSY; 982 err = -EBUSY;
@@ -1200,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1200 } 1209 }
1201 1210
1202 err = -EMSGSIZE; 1211 err = -EMSGSIZE;
1203 if (!gso_type && (len > dev->mtu+reserve)) 1212 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN))
1204 goto out_unlock; 1213 goto out_unlock;
1205 1214
1206 err = -ENOBUFS; 1215 err = -ENOBUFS;
@@ -1225,6 +1234,20 @@ static int packet_snd(struct socket *sock,
1225 if (err < 0) 1234 if (err < 0)
1226 goto out_free; 1235 goto out_free;
1227 1236
1237 if (!gso_type && (len > dev->mtu + reserve)) {
1238 /* Earlier code assumed this would be a VLAN pkt,
1239 * double-check this now that we have the actual
1240 * packet in hand.
1241 */
1242 struct ethhdr *ehdr;
1243 skb_reset_mac_header(skb);
1244 ehdr = eth_hdr(skb);
1245 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1246 err = -EMSGSIZE;
1247 goto out_free;
1248 }
1249 }
1250
1228 skb->protocol = proto; 1251 skb->protocol = proto;
1229 skb->dev = dev; 1252 skb->dev = dev;
1230 skb->priority = sk->sk_priority; 1253 skb->priority = sk->sk_priority;
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 0d9b8a220a7..6ec7d55b176 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,15 +14,3 @@ config PHONET
14 14
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called phonet. If unsure, say N. 16 will be called phonet. If unsure, say N.
17
18config PHONET_PIPECTRLR
19 bool "Phonet Pipe Controller (EXPERIMENTAL)"
20 depends on PHONET && EXPERIMENTAL
21 default N
22 help
23 The Pipe Controller implementation in Phonet stack to support Pipe
24 data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
25 platform.
26
27 This option is incompatible with older Nokia modems.
28 Say N here unless you really know what you are doing.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 1072b2c19d3..c6fffd946d4 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -110,6 +110,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
110 sk->sk_protocol = protocol; 110 sk->sk_protocol = protocol;
111 pn = pn_sk(sk); 111 pn = pn_sk(sk);
112 pn->sobject = 0; 112 pn->sobject = 0;
113 pn->dobject = 0;
113 pn->resource = 0; 114 pn->resource = 0;
114 sk->sk_prot->init(sk); 115 sk->sk_prot->init(sk);
115 err = 0; 116 err = 0;
@@ -194,11 +195,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
194 if (skb->pkt_type == PACKET_LOOPBACK) { 195 if (skb->pkt_type == PACKET_LOOPBACK) {
195 skb_reset_mac_header(skb); 196 skb_reset_mac_header(skb);
196 skb_orphan(skb); 197 skb_orphan(skb);
197 if (irq) 198 err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0;
198 netif_rx(skb);
199 else
200 netif_rx_ni(skb);
201 err = 0;
202 } else { 199 } else {
203 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 200 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
204 NULL, NULL, skb->len); 201 NULL, NULL, skb->len);
@@ -207,6 +204,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
207 goto drop; 204 goto drop;
208 } 205 }
209 err = dev_queue_xmit(skb); 206 err = dev_queue_xmit(skb);
207 if (unlikely(err > 0))
208 err = net_xmit_errno(err);
210 } 209 }
211 210
212 return err; 211 return err;
@@ -242,8 +241,18 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
242 struct net_device *dev; 241 struct net_device *dev;
243 struct pn_sock *pn = pn_sk(sk); 242 struct pn_sock *pn = pn_sk(sk);
244 int err; 243 int err;
245 u16 src; 244 u16 src, dst;
246 u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; 245 u8 daddr, saddr, res;
246
247 src = pn->sobject;
248 if (target != NULL) {
249 dst = pn_sockaddr_get_object(target);
250 res = pn_sockaddr_get_resource(target);
251 } else {
252 dst = pn->dobject;
253 res = pn->resource;
254 }
255 daddr = pn_addr(dst);
247 256
248 err = -EHOSTUNREACH; 257 err = -EHOSTUNREACH;
249 if (sk->sk_bound_dev_if) 258 if (sk->sk_bound_dev_if)
@@ -251,10 +260,9 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
251 else if (phonet_address_lookup(net, daddr) == 0) { 260 else if (phonet_address_lookup(net, daddr) == 0) {
252 dev = phonet_device_get(net); 261 dev = phonet_device_get(net);
253 skb->pkt_type = PACKET_LOOPBACK; 262 skb->pkt_type = PACKET_LOOPBACK;
254 } else if (pn_sockaddr_get_object(target) == 0) { 263 } else if (dst == 0) {
255 /* Resource routing (small race until phonet_rcv()) */ 264 /* Resource routing (small race until phonet_rcv()) */
256 struct sock *sk = pn_find_sock_by_res(net, 265 struct sock *sk = pn_find_sock_by_res(net, res);
257 target->spn_resource);
258 if (sk) { 266 if (sk) {
259 sock_put(sk); 267 sock_put(sk);
260 dev = phonet_device_get(net); 268 dev = phonet_device_get(net);
@@ -271,12 +279,10 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
271 if (saddr == PN_NO_ADDR) 279 if (saddr == PN_NO_ADDR)
272 goto drop; 280 goto drop;
273 281
274 src = pn->sobject;
275 if (!pn_addr(src)) 282 if (!pn_addr(src))
276 src = pn_object(saddr, pn_obj(src)); 283 src = pn_object(saddr, pn_obj(src));
277 284
278 err = pn_send(skb, dev, pn_sockaddr_get_object(target), 285 err = pn_send(skb, dev, dst, src, res, 0);
279 src, pn_sockaddr_get_resource(target), 0);
280 dev_put(dev); 286 dev_put(dev);
281 return err; 287 return err;
282 288
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 3e60f2e4e6c..f17fd841f94 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -42,7 +42,7 @@
42 * TCP_ESTABLISHED connected pipe in enabled state 42 * TCP_ESTABLISHED connected pipe in enabled state
43 * 43 *
44 * pep_sock locking: 44 * pep_sock locking:
45 * - sk_state, ackq, hlist: sock lock needed 45 * - sk_state, hlist: sock lock needed
46 * - listener: read only 46 * - listener: read only
47 * - pipe_handle: read only 47 * - pipe_handle: read only
48 */ 48 */
@@ -50,11 +50,6 @@
50#define CREDITS_MAX 10 50#define CREDITS_MAX 10
51#define CREDITS_THR 7 51#define CREDITS_THR 7
52 52
53static const struct sockaddr_pn pipe_srv = {
54 .spn_family = AF_PHONET,
55 .spn_resource = 0xD9, /* pipe service */
56};
57
58#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ 53#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */
59 54
60/* Get the next TLV sub-block. */ 55/* Get the next TLV sub-block. */
@@ -82,236 +77,95 @@ static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen,
82 return data; 77 return data;
83} 78}
84 79
85static int pep_reply(struct sock *sk, struct sk_buff *oskb, 80static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload,
86 u8 code, const void *data, int len, gfp_t priority) 81 int len, gfp_t priority)
87{ 82{
88 const struct pnpipehdr *oph = pnp_hdr(oskb); 83 struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
89 struct pnpipehdr *ph;
90 struct sk_buff *skb;
91
92 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
93 if (!skb) 84 if (!skb)
94 return -ENOMEM; 85 return NULL;
95 skb_set_owner_w(skb, sk); 86 skb_set_owner_w(skb, sk);
96 87
97 skb_reserve(skb, MAX_PNPIPE_HEADER); 88 skb_reserve(skb, MAX_PNPIPE_HEADER);
98 __skb_put(skb, len); 89 __skb_put(skb, len);
99 skb_copy_to_linear_data(skb, data, len); 90 skb_copy_to_linear_data(skb, payload, len);
100 __skb_push(skb, sizeof(*ph)); 91 __skb_push(skb, sizeof(struct pnpipehdr));
101 skb_reset_transport_header(skb); 92 skb_reset_transport_header(skb);
102 ph = pnp_hdr(skb); 93 return skb;
103 ph->utid = oph->utid;
104 ph->message_id = oph->message_id + 1; /* REQ -> RESP */
105 ph->pipe_handle = oph->pipe_handle;
106 ph->error_code = code;
107
108 return pn_skb_send(sk, skb, &pipe_srv);
109}
110
111#define PAD 0x00
112
113#ifdef CONFIG_PHONET_PIPECTRLR
114static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
115{
116 int i, j;
117 u8 base_fc, final_fc;
118
119 for (i = 0; i < len; i++) {
120 base_fc = host_fc[i];
121 for (j = 0; j < len; j++) {
122 if (remote_fc[j] == base_fc) {
123 final_fc = base_fc;
124 goto done;
125 }
126 }
127 }
128 return -EINVAL;
129
130done:
131 return final_fc;
132
133}
134
135static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
136 u8 *pref_rx_fc, u8 *req_tx_fc)
137{
138 struct pnpipehdr *hdr;
139 u8 n_sb;
140
141 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
142 return -EINVAL;
143
144 hdr = pnp_hdr(skb);
145 n_sb = hdr->data[4];
146
147 __skb_pull(skb, sizeof(*hdr) + 4);
148 while (n_sb > 0) {
149 u8 type, buf[3], len = sizeof(buf);
150 u8 *data = pep_get_sb(skb, &type, &len, buf);
151
152 if (data == NULL)
153 return -EINVAL;
154
155 switch (type) {
156 case PN_PIPE_SB_REQUIRED_FC_TX:
157 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
158 break;
159 req_tx_fc[0] = data[2];
160 req_tx_fc[1] = data[3];
161 req_tx_fc[2] = data[4];
162 break;
163
164 case PN_PIPE_SB_PREFERRED_FC_RX:
165 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
166 break;
167 pref_rx_fc[0] = data[2];
168 pref_rx_fc[1] = data[3];
169 pref_rx_fc[2] = data[4];
170 break;
171
172 }
173 n_sb--;
174 }
175 return 0;
176} 94}
177 95
178static int pipe_handler_send_req(struct sock *sk, u8 utid, 96static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code,
179 u8 msg_id, gfp_t priority) 97 const void *data, int len, gfp_t priority)
180{ 98{
181 int len; 99 const struct pnpipehdr *oph = pnp_hdr(oskb);
182 struct pnpipehdr *ph; 100 struct pnpipehdr *ph;
183 struct sk_buff *skb; 101 struct sk_buff *skb;
184 struct pep_sock *pn = pep_sk(sk); 102 struct sockaddr_pn peer;
185
186 static const u8 data[4] = {
187 PAD, PAD, PAD, PAD,
188 };
189 103
190 switch (msg_id) { 104 skb = pep_alloc_skb(sk, data, len, priority);
191 case PNS_PEP_CONNECT_REQ:
192 len = sizeof(data);
193 break;
194
195 case PNS_PEP_DISCONNECT_REQ:
196 case PNS_PEP_ENABLE_REQ:
197 case PNS_PEP_DISABLE_REQ:
198 len = 0;
199 break;
200
201 default:
202 return -EINVAL;
203 }
204
205 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
206 if (!skb) 105 if (!skb)
207 return -ENOMEM; 106 return -ENOMEM;
208 skb_set_owner_w(skb, sk);
209 107
210 skb_reserve(skb, MAX_PNPIPE_HEADER);
211 if (len) {
212 __skb_put(skb, len);
213 skb_copy_to_linear_data(skb, data, len);
214 }
215 __skb_push(skb, sizeof(*ph));
216 skb_reset_transport_header(skb);
217 ph = pnp_hdr(skb); 108 ph = pnp_hdr(skb);
218 ph->utid = utid; 109 ph->utid = oph->utid;
219 ph->message_id = msg_id; 110 ph->message_id = oph->message_id + 1; /* REQ -> RESP */
220 ph->pipe_handle = pn->pipe_handle; 111 ph->pipe_handle = oph->pipe_handle;
221 ph->error_code = PN_PIPE_NO_ERROR; 112 ph->error_code = code;
222 113
223 return pn_skb_send(sk, skb, &pn->remote_pep); 114 pn_skb_get_src_sockaddr(oskb, &peer);
115 return pn_skb_send(sk, skb, &peer);
224} 116}
225 117
226static int pipe_handler_send_created_ind(struct sock *sk, 118static int pep_indicate(struct sock *sk, u8 id, u8 code,
227 u8 utid, u8 msg_id) 119 const void *data, int len, gfp_t priority)
228{ 120{
229 int err_code; 121 struct pep_sock *pn = pep_sk(sk);
230 struct pnpipehdr *ph; 122 struct pnpipehdr *ph;
231 struct sk_buff *skb; 123 struct sk_buff *skb;
232 124
233 struct pep_sock *pn = pep_sk(sk); 125 skb = pep_alloc_skb(sk, data, len, priority);
234 static u8 data[4] = {
235 0x03, 0x04,
236 };
237 data[2] = pn->tx_fc;
238 data[3] = pn->rx_fc;
239
240 /*
241 * actually, below is number of sub-blocks and not error code.
242 * Pipe_created_ind message format does not have any
243 * error code field. However, the Phonet stack will always send
244 * an error code as part of pnpipehdr. So, use that err_code to
245 * specify the number of sub-blocks.
246 */
247 err_code = 0x01;
248
249 skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
250 if (!skb) 126 if (!skb)
251 return -ENOMEM; 127 return -ENOMEM;
252 skb_set_owner_w(skb, sk);
253 128
254 skb_reserve(skb, MAX_PNPIPE_HEADER);
255 __skb_put(skb, sizeof(data));
256 skb_copy_to_linear_data(skb, data, sizeof(data));
257 __skb_push(skb, sizeof(*ph));
258 skb_reset_transport_header(skb);
259 ph = pnp_hdr(skb); 129 ph = pnp_hdr(skb);
260 ph->utid = utid; 130 ph->utid = 0;
261 ph->message_id = msg_id; 131 ph->message_id = id;
262 ph->pipe_handle = pn->pipe_handle; 132 ph->pipe_handle = pn->pipe_handle;
263 ph->error_code = err_code; 133 ph->data[0] = code;
264 134 return pn_skb_send(sk, skb, NULL);
265 return pn_skb_send(sk, skb, &pn->remote_pep);
266} 135}
267 136
268static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) 137#define PAD 0x00
138
139static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
140 const void *data, int len)
269{ 141{
270 int err_code; 142 struct pep_sock *pn = pep_sk(sk);
271 struct pnpipehdr *ph; 143 struct pnpipehdr *ph;
272 struct sk_buff *skb; 144 struct sk_buff *skb;
273 struct pep_sock *pn = pep_sk(sk);
274
275 /*
276 * actually, below is a filler.
277 * Pipe_enabled/disabled_ind message format does not have any
278 * error code field. However, the Phonet stack will always send
279 * an error code as part of pnpipehdr. So, use that err_code to
280 * specify the filler value.
281 */
282 err_code = 0x0;
283 145
284 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); 146 skb = pep_alloc_skb(sk, data, len, GFP_KERNEL);
285 if (!skb) 147 if (!skb)
286 return -ENOMEM; 148 return -ENOMEM;
287 skb_set_owner_w(skb, sk);
288 149
289 skb_reserve(skb, MAX_PNPIPE_HEADER);
290 __skb_push(skb, sizeof(*ph));
291 skb_reset_transport_header(skb);
292 ph = pnp_hdr(skb); 150 ph = pnp_hdr(skb);
293 ph->utid = utid; 151 ph->utid = id; /* whatever */
294 ph->message_id = msg_id; 152 ph->message_id = id;
295 ph->pipe_handle = pn->pipe_handle; 153 ph->pipe_handle = pn->pipe_handle;
296 ph->error_code = err_code; 154 ph->data[0] = code;
297 155 return pn_skb_send(sk, skb, NULL);
298 return pn_skb_send(sk, skb, &pn->remote_pep);
299} 156}
300 157
301static int pipe_handler_enable_pipe(struct sock *sk, int enable) 158static int pipe_handler_send_created_ind(struct sock *sk)
302{ 159{
303 int utid, req; 160 struct pep_sock *pn = pep_sk(sk);
304 161 u8 data[4] = {
305 if (enable) { 162 PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2),
306 utid = PNS_PIPE_ENABLE_UTID; 163 pn->tx_fc, pn->rx_fc,
307 req = PNS_PEP_ENABLE_REQ; 164 };
308 } else { 165
309 utid = PNS_PIPE_DISABLE_UTID; 166 return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */,
310 req = PNS_PEP_DISABLE_REQ; 167 data, 4, GFP_ATOMIC);
311 }
312 return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC);
313} 168}
314#endif
315 169
316static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) 170static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
317{ 171{
@@ -334,11 +188,12 @@ static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
334 GFP_KERNEL); 188 GFP_KERNEL);
335} 189}
336 190
337static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) 191static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code,
192 gfp_t priority)
338{ 193{
339 static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; 194 static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ };
340 WARN_ON(code == PN_PIPE_NO_ERROR); 195 WARN_ON(code == PN_PIPE_NO_ERROR);
341 return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); 196 return pep_reply(sk, skb, code, data, sizeof(data), priority);
342} 197}
343 198
344/* Control requests are not sent by the pipe service and have a specific 199/* Control requests are not sent by the pipe service and have a specific
@@ -350,23 +205,21 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
350 struct sk_buff *skb; 205 struct sk_buff *skb;
351 struct pnpipehdr *ph; 206 struct pnpipehdr *ph;
352 struct sockaddr_pn dst; 207 struct sockaddr_pn dst;
208 u8 data[4] = {
209 oph->data[0], /* PEP type */
210 code, /* error code, at an unusual offset */
211 PAD, PAD,
212 };
353 213
354 skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); 214 skb = pep_alloc_skb(sk, data, 4, priority);
355 if (!skb) 215 if (!skb)
356 return -ENOMEM; 216 return -ENOMEM;
357 skb_set_owner_w(skb, sk);
358
359 skb_reserve(skb, MAX_PHONET_HEADER);
360 ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4);
361 217
218 ph = pnp_hdr(skb);
362 ph->utid = oph->utid; 219 ph->utid = oph->utid;
363 ph->message_id = PNS_PEP_CTRL_RESP; 220 ph->message_id = PNS_PEP_CTRL_RESP;
364 ph->pipe_handle = oph->pipe_handle; 221 ph->pipe_handle = oph->pipe_handle;
365 ph->data[0] = oph->data[1]; /* CTRL id */ 222 ph->data[0] = oph->data[1]; /* CTRL id */
366 ph->data[1] = oph->data[0]; /* PEP type */
367 ph->data[2] = code; /* error code, at an usual offset */
368 ph->data[3] = PAD;
369 ph->data[4] = PAD;
370 223
371 pn_skb_get_src_sockaddr(oskb, &dst); 224 pn_skb_get_src_sockaddr(oskb, &dst);
372 return pn_skb_send(sk, skb, &dst); 225 return pn_skb_send(sk, skb, &dst);
@@ -374,38 +227,15 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
374 227
375static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) 228static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
376{ 229{
377 struct pep_sock *pn = pep_sk(sk); 230 u8 data[4] = { type, PAD, PAD, status };
378 struct pnpipehdr *ph;
379 struct sk_buff *skb;
380 231
381 skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); 232 return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON,
382 if (!skb) 233 data, 4, priority);
383 return -ENOMEM;
384 skb_set_owner_w(skb, sk);
385
386 skb_reserve(skb, MAX_PNPIPE_HEADER + 4);
387 __skb_push(skb, sizeof(*ph) + 4);
388 skb_reset_transport_header(skb);
389 ph = pnp_hdr(skb);
390 ph->utid = 0;
391 ph->message_id = PNS_PEP_STATUS_IND;
392 ph->pipe_handle = pn->pipe_handle;
393 ph->pep_type = PN_PEP_TYPE_COMMON;
394 ph->data[1] = type;
395 ph->data[2] = PAD;
396 ph->data[3] = PAD;
397 ph->data[4] = status;
398
399#ifdef CONFIG_PHONET_PIPECTRLR
400 return pn_skb_send(sk, skb, &pn->remote_pep);
401#else
402 return pn_skb_send(sk, skb, &pipe_srv);
403#endif
404} 234}
405 235
406/* Send our RX flow control information to the sender. 236/* Send our RX flow control information to the sender.
407 * Socket must be locked. */ 237 * Socket must be locked. */
408static void pipe_grant_credits(struct sock *sk) 238static void pipe_grant_credits(struct sock *sk, gfp_t priority)
409{ 239{
410 struct pep_sock *pn = pep_sk(sk); 240 struct pep_sock *pn = pep_sk(sk);
411 241
@@ -415,16 +245,16 @@ static void pipe_grant_credits(struct sock *sk)
415 case PN_LEGACY_FLOW_CONTROL: /* TODO */ 245 case PN_LEGACY_FLOW_CONTROL: /* TODO */
416 break; 246 break;
417 case PN_ONE_CREDIT_FLOW_CONTROL: 247 case PN_ONE_CREDIT_FLOW_CONTROL:
418 pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, 248 if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL,
419 PEP_IND_READY, GFP_ATOMIC); 249 PEP_IND_READY, priority) == 0)
420 pn->rx_credits = 1; 250 pn->rx_credits = 1;
421 break; 251 break;
422 case PN_MULTI_CREDIT_FLOW_CONTROL: 252 case PN_MULTI_CREDIT_FLOW_CONTROL:
423 if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) 253 if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX)
424 break; 254 break;
425 if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, 255 if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS,
426 CREDITS_MAX - pn->rx_credits, 256 CREDITS_MAX - pn->rx_credits,
427 GFP_ATOMIC) == 0) 257 priority) == 0)
428 pn->rx_credits = CREDITS_MAX; 258 pn->rx_credits = CREDITS_MAX;
429 break; 259 break;
430 } 260 }
@@ -522,7 +352,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
522 352
523 switch (hdr->message_id) { 353 switch (hdr->message_id) {
524 case PNS_PEP_CONNECT_REQ: 354 case PNS_PEP_CONNECT_REQ:
525 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); 355 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC);
526 break; 356 break;
527 357
528 case PNS_PEP_DISCONNECT_REQ: 358 case PNS_PEP_DISCONNECT_REQ:
@@ -532,35 +362,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
532 sk->sk_state_change(sk); 362 sk->sk_state_change(sk);
533 break; 363 break;
534 364
535#ifdef CONFIG_PHONET_PIPECTRLR
536 case PNS_PEP_DISCONNECT_RESP:
537 pn->pipe_state = PIPE_IDLE;
538 sk->sk_state = TCP_CLOSE;
539 break;
540#endif
541
542 case PNS_PEP_ENABLE_REQ: 365 case PNS_PEP_ENABLE_REQ:
543 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ 366 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
544 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 367 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
545 break; 368 break;
546 369
547#ifdef CONFIG_PHONET_PIPECTRLR
548 case PNS_PEP_ENABLE_RESP:
549 pn->pipe_state = PIPE_ENABLED;
550 pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID,
551 PNS_PIPE_ENABLED_IND);
552
553 if (!pn_flow_safe(pn->tx_fc)) {
554 atomic_set(&pn->tx_credits, 1);
555 sk->sk_write_space(sk);
556 }
557 if (sk->sk_state == TCP_ESTABLISHED)
558 break; /* Nothing to do */
559 sk->sk_state = TCP_ESTABLISHED;
560 pipe_grant_credits(sk);
561 break;
562#endif
563
564 case PNS_PEP_RESET_REQ: 370 case PNS_PEP_RESET_REQ:
565 switch (hdr->state_after_reset) { 371 switch (hdr->state_after_reset) {
566 case PN_PIPE_DISABLE: 372 case PN_PIPE_DISABLE:
@@ -579,17 +385,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
579 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 385 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
580 break; 386 break;
581 387
582#ifdef CONFIG_PHONET_PIPECTRLR
583 case PNS_PEP_DISABLE_RESP:
584 pn->pipe_state = PIPE_DISABLED;
585 atomic_set(&pn->tx_credits, 0);
586 pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID,
587 PNS_PIPE_DISABLED_IND);
588 sk->sk_state = TCP_SYN_RECV;
589 pn->rx_credits = 0;
590 break;
591#endif
592
593 case PNS_PEP_CTRL_REQ: 388 case PNS_PEP_CTRL_REQ:
594 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { 389 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
595 atomic_inc(&sk->sk_drops); 390 atomic_inc(&sk->sk_drops);
@@ -607,7 +402,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
607 if (!pn_flow_safe(pn->rx_fc)) { 402 if (!pn_flow_safe(pn->rx_fc)) {
608 err = sock_queue_rcv_skb(sk, skb); 403 err = sock_queue_rcv_skb(sk, skb);
609 if (!err) 404 if (!err)
610 return 0; 405 return NET_RX_SUCCESS;
406 err = -ENOBUFS;
611 break; 407 break;
612 } 408 }
613 409
@@ -645,7 +441,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
645 if (sk->sk_state == TCP_ESTABLISHED) 441 if (sk->sk_state == TCP_ESTABLISHED)
646 break; /* Nothing to do */ 442 break; /* Nothing to do */
647 sk->sk_state = TCP_ESTABLISHED; 443 sk->sk_state = TCP_ESTABLISHED;
648 pipe_grant_credits(sk); 444 pipe_grant_credits(sk, GFP_ATOMIC);
649 break; 445 break;
650 446
651 case PNS_PIPE_DISABLED_IND: 447 case PNS_PIPE_DISABLED_IND:
@@ -660,7 +456,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
660 } 456 }
661out: 457out:
662 kfree_skb(skb); 458 kfree_skb(skb);
663 return err; 459 return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS;
664 460
665queue: 461queue:
666 skb->dev = NULL; 462 skb->dev = NULL;
@@ -669,7 +465,7 @@ queue:
669 skb_queue_tail(queue, skb); 465 skb_queue_tail(queue, skb);
670 if (!sock_flag(sk, SOCK_DEAD)) 466 if (!sock_flag(sk, SOCK_DEAD))
671 sk->sk_data_ready(sk, err); 467 sk->sk_data_ready(sk, err);
672 return 0; 468 return NET_RX_SUCCESS;
673} 469}
674 470
675/* Destroy connected sock. */ 471/* Destroy connected sock. */
@@ -681,133 +477,126 @@ static void pipe_destruct(struct sock *sk)
681 skb_queue_purge(&pn->ctrlreq_queue); 477 skb_queue_purge(&pn->ctrlreq_queue);
682} 478}
683 479
684#ifdef CONFIG_PHONET_PIPECTRLR 480static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n)
685static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
686{ 481{
687 struct pep_sock *pn = pep_sk(sk); 482 unsigned i;
688 u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1}; 483 u8 final_fc = PN_NO_FLOW_CONTROL;
689 u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
690 u8 negotiated_rx_fc, negotiated_tx_fc;
691 int ret;
692
693 pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
694 remote_req_tx_fc);
695 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
696 host_pref_rx_fc,
697 sizeof(host_pref_rx_fc));
698 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
699 remote_pref_rx_fc,
700 sizeof(host_pref_rx_fc));
701
702 pn->pipe_state = PIPE_DISABLED;
703 sk->sk_state = TCP_SYN_RECV;
704 sk->sk_backlog_rcv = pipe_do_rcv;
705 sk->sk_destruct = pipe_destruct;
706 pn->rx_credits = 0;
707 pn->rx_fc = negotiated_rx_fc;
708 pn->tx_fc = negotiated_tx_fc;
709 sk->sk_state_change(sk);
710 484
711 ret = pipe_handler_send_created_ind(sk, 485 for (i = 0; i < n; i++) {
712 PNS_PIPE_CREATED_IND_UTID, 486 u8 fc = fcs[i];
713 PNS_PIPE_CREATED_IND
714 );
715 487
716 return ret; 488 if (fc > final_fc && fc < PN_MAX_FLOW_CONTROL)
489 final_fc = fc;
490 }
491 return final_fc;
717} 492}
718#endif
719 493
720static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) 494static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
721{ 495{
722 struct sock *newsk; 496 struct pep_sock *pn = pep_sk(sk);
723 struct pep_sock *newpn, *pn = pep_sk(sk);
724 struct pnpipehdr *hdr; 497 struct pnpipehdr *hdr;
725 struct sockaddr_pn dst; 498 u8 n_sb;
726 u16 peer_type;
727 u8 pipe_handle, enabled, n_sb;
728 u8 aligned = 0;
729 499
730 if (!pskb_pull(skb, sizeof(*hdr) + 4)) 500 if (!pskb_pull(skb, sizeof(*hdr) + 4))
731 return -EINVAL; 501 return -EINVAL;
732 502
733 hdr = pnp_hdr(skb); 503 hdr = pnp_hdr(skb);
734 pipe_handle = hdr->pipe_handle; 504 if (hdr->error_code != PN_PIPE_NO_ERROR)
735 switch (hdr->state_after_connect) { 505 return -ECONNREFUSED;
736 case PN_PIPE_DISABLE:
737 enabled = 0;
738 break;
739 case PN_PIPE_ENABLE:
740 enabled = 1;
741 break;
742 default:
743 pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM);
744 return -EINVAL;
745 }
746 peer_type = hdr->other_pep_type << 8;
747
748 if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) {
749 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
750 return -ENOBUFS;
751 }
752 506
753 /* Parse sub-blocks (options) */ 507 /* Parse sub-blocks */
754 n_sb = hdr->data[4]; 508 n_sb = hdr->data[4];
755 while (n_sb > 0) { 509 while (n_sb > 0) {
756 u8 type, buf[1], len = sizeof(buf); 510 u8 type, buf[6], len = sizeof(buf);
757 const u8 *data = pep_get_sb(skb, &type, &len, buf); 511 const u8 *data = pep_get_sb(skb, &type, &len, buf);
758 512
759 if (data == NULL) 513 if (data == NULL)
760 return -EINVAL; 514 return -EINVAL;
515
761 switch (type) { 516 switch (type) {
762 case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: 517 case PN_PIPE_SB_REQUIRED_FC_TX:
763 if (len < 1) 518 if (len < 2 || len < data[0])
764 return -EINVAL; 519 break;
765 peer_type = (peer_type & 0xff00) | data[0]; 520 pn->tx_fc = pipe_negotiate_fc(data + 2, len - 2);
766 break; 521 break;
767 case PN_PIPE_SB_ALIGNED_DATA: 522
768 aligned = data[0] != 0; 523 case PN_PIPE_SB_PREFERRED_FC_RX:
524 if (len < 2 || len < data[0])
525 break;
526 pn->rx_fc = pipe_negotiate_fc(data + 2, len - 2);
769 break; 527 break;
528
770 } 529 }
771 n_sb--; 530 n_sb--;
772 } 531 }
773 532
774 skb = skb_clone(skb, GFP_ATOMIC); 533 return pipe_handler_send_created_ind(sk);
775 if (!skb) 534}
776 return -ENOMEM;
777 535
778 /* Create a new to-be-accepted sock */ 536/* Queue an skb to an actively connected sock.
779 newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); 537 * Socket lock must be held. */
780 if (!newsk) { 538static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
781 kfree_skb(skb); 539{
782 return -ENOMEM; 540 struct pep_sock *pn = pep_sk(sk);
783 } 541 struct pnpipehdr *hdr = pnp_hdr(skb);
784 sock_init_data(NULL, newsk); 542 int err = NET_RX_SUCCESS;
785 newsk->sk_state = TCP_SYN_RECV;
786 newsk->sk_backlog_rcv = pipe_do_rcv;
787 newsk->sk_protocol = sk->sk_protocol;
788 newsk->sk_destruct = pipe_destruct;
789 543
790 newpn = pep_sk(newsk); 544 switch (hdr->message_id) {
791 pn_skb_get_dst_sockaddr(skb, &dst); 545 case PNS_PIPE_ALIGNED_DATA:
792 newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); 546 __skb_pull(skb, 1);
793 newpn->pn_sk.resource = pn->pn_sk.resource; 547 /* fall through */
794 skb_queue_head_init(&newpn->ctrlreq_queue); 548 case PNS_PIPE_DATA:
795 newpn->pipe_handle = pipe_handle; 549 __skb_pull(skb, 3); /* Pipe data header */
796 atomic_set(&newpn->tx_credits, 0); 550 if (!pn_flow_safe(pn->rx_fc)) {
797 newpn->peer_type = peer_type; 551 err = sock_queue_rcv_skb(sk, skb);
798 newpn->rx_credits = 0; 552 if (!err)
799 newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; 553 return NET_RX_SUCCESS;
800 newpn->init_enable = enabled; 554 err = NET_RX_DROP;
801 newpn->aligned = aligned; 555 break;
556 }
802 557
803 BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); 558 if (pn->rx_credits == 0) {
804 skb_queue_head(&newsk->sk_receive_queue, skb); 559 atomic_inc(&sk->sk_drops);
805 if (!sock_flag(sk, SOCK_DEAD)) 560 err = NET_RX_DROP;
806 sk->sk_data_ready(sk, 0); 561 break;
562 }
563 pn->rx_credits--;
564 skb->dev = NULL;
565 skb_set_owner_r(skb, sk);
566 err = skb->len;
567 skb_queue_tail(&sk->sk_receive_queue, skb);
568 if (!sock_flag(sk, SOCK_DEAD))
569 sk->sk_data_ready(sk, err);
570 return NET_RX_SUCCESS;
807 571
808 sk_acceptq_added(sk); 572 case PNS_PEP_CONNECT_RESP:
809 sk_add_node(newsk, &pn->ackq); 573 if (sk->sk_state != TCP_SYN_SENT)
810 return 0; 574 break;
575 if (!sock_flag(sk, SOCK_DEAD))
576 sk->sk_state_change(sk);
577 if (pep_connresp_rcv(sk, skb)) {
578 sk->sk_state = TCP_CLOSE_WAIT;
579 break;
580 }
581
582 sk->sk_state = TCP_ESTABLISHED;
583 if (!pn_flow_safe(pn->tx_fc)) {
584 atomic_set(&pn->tx_credits, 1);
585 sk->sk_write_space(sk);
586 }
587 pipe_grant_credits(sk, GFP_ATOMIC);
588 break;
589
590 case PNS_PEP_DISCONNECT_RESP:
591 /* sock should already be dead, nothing to do */
592 break;
593
594 case PNS_PEP_STATUS_IND:
595 pipe_rcv_status(sk, skb);
596 break;
597 }
598 kfree_skb(skb);
599 return err;
811} 600}
812 601
813/* Listening sock must be locked */ 602/* Listening sock must be locked */
@@ -847,7 +636,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
847 struct sock *sknode; 636 struct sock *sknode;
848 struct pnpipehdr *hdr; 637 struct pnpipehdr *hdr;
849 struct sockaddr_pn dst; 638 struct sockaddr_pn dst;
850 int err = NET_RX_SUCCESS;
851 u8 pipe_handle; 639 u8 pipe_handle;
852 640
853 if (!pskb_may_pull(skb, sizeof(*hdr))) 641 if (!pskb_may_pull(skb, sizeof(*hdr)))
@@ -865,26 +653,18 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
865 if (sknode) 653 if (sknode)
866 return sk_receive_skb(sknode, skb, 1); 654 return sk_receive_skb(sknode, skb, 1);
867 655
868 /* Look for a pipe handle pending accept */
869 sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle);
870 if (sknode) {
871 sock_put(sknode);
872 if (net_ratelimit())
873 printk(KERN_WARNING"Phonet unconnected PEP ignored");
874 err = NET_RX_DROP;
875 goto drop;
876 }
877
878 switch (hdr->message_id) { 656 switch (hdr->message_id) {
879 case PNS_PEP_CONNECT_REQ: 657 case PNS_PEP_CONNECT_REQ:
880 err = pep_connreq_rcv(sk, skb); 658 if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) {
881 break; 659 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE,
882 660 GFP_ATOMIC);
883#ifdef CONFIG_PHONET_PIPECTRLR 661 break;
884 case PNS_PEP_CONNECT_RESP: 662 }
885 err = pep_connresp_rcv(sk, skb); 663 skb_queue_head(&sk->sk_receive_queue, skb);
886 break; 664 sk_acceptq_added(sk);
887#endif 665 if (!sock_flag(sk, SOCK_DEAD))
666 sk->sk_data_ready(sk, 0);
667 return NET_RX_SUCCESS;
888 668
889 case PNS_PEP_DISCONNECT_REQ: 669 case PNS_PEP_DISCONNECT_REQ:
890 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 670 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
@@ -898,12 +678,17 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
898 case PNS_PEP_ENABLE_REQ: 678 case PNS_PEP_ENABLE_REQ:
899 case PNS_PEP_DISABLE_REQ: 679 case PNS_PEP_DISABLE_REQ:
900 /* invalid handle is not even allowed here! */ 680 /* invalid handle is not even allowed here! */
681 break;
682
901 default: 683 default:
902 err = NET_RX_DROP; 684 if ((1 << sk->sk_state)
685 & ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT))
686 /* actively connected socket */
687 return pipe_handler_do_rcv(sk, skb);
903 } 688 }
904drop: 689drop:
905 kfree_skb(skb); 690 kfree_skb(skb);
906 return err; 691 return NET_RX_SUCCESS;
907} 692}
908 693
909static int pipe_do_remove(struct sock *sk) 694static int pipe_do_remove(struct sock *sk)
@@ -912,20 +697,16 @@ static int pipe_do_remove(struct sock *sk)
912 struct pnpipehdr *ph; 697 struct pnpipehdr *ph;
913 struct sk_buff *skb; 698 struct sk_buff *skb;
914 699
915 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL); 700 skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL);
916 if (!skb) 701 if (!skb)
917 return -ENOMEM; 702 return -ENOMEM;
918 703
919 skb_reserve(skb, MAX_PNPIPE_HEADER);
920 __skb_push(skb, sizeof(*ph));
921 skb_reset_transport_header(skb);
922 ph = pnp_hdr(skb); 704 ph = pnp_hdr(skb);
923 ph->utid = 0; 705 ph->utid = 0;
924 ph->message_id = PNS_PIPE_REMOVE_REQ; 706 ph->message_id = PNS_PIPE_REMOVE_REQ;
925 ph->pipe_handle = pn->pipe_handle; 707 ph->pipe_handle = pn->pipe_handle;
926 ph->data[0] = PAD; 708 ph->data[0] = PAD;
927 709 return pn_skb_send(sk, skb, NULL);
928 return pn_skb_send(sk, skb, &pipe_srv);
929} 710}
930 711
931/* associated socket ceases to exist */ 712/* associated socket ceases to exist */
@@ -938,29 +719,15 @@ static void pep_sock_close(struct sock *sk, long timeout)
938 sk_common_release(sk); 719 sk_common_release(sk);
939 720
940 lock_sock(sk); 721 lock_sock(sk);
941 if (sk->sk_state == TCP_LISTEN) { 722 if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) {
942 /* Destroy the listen queue */ 723 if (sk->sk_backlog_rcv == pipe_do_rcv)
943 struct sock *sknode; 724 /* Forcefully remove dangling Phonet pipe */
944 struct hlist_node *p, *n; 725 pipe_do_remove(sk);
945 726 else
946 sk_for_each_safe(sknode, p, n, &pn->ackq) 727 pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD,
947 sk_del_node_init(sknode); 728 NULL, 0);
948 sk->sk_state = TCP_CLOSE;
949 } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
950 /* Forcefully remove dangling Phonet pipe */
951 pipe_do_remove(sk);
952
953#ifdef CONFIG_PHONET_PIPECTRLR
954 if (pn->pipe_state != PIPE_IDLE) {
955 /* send pep disconnect request */
956 pipe_handler_send_req(sk,
957 PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ,
958 GFP_KERNEL);
959
960 pn->pipe_state = PIPE_IDLE;
961 sk->sk_state = TCP_CLOSE;
962 } 729 }
963#endif 730 sk->sk_state = TCP_CLOSE;
964 731
965 ifindex = pn->ifindex; 732 ifindex = pn->ifindex;
966 pn->ifindex = 0; 733 pn->ifindex = 0;
@@ -971,86 +738,141 @@ static void pep_sock_close(struct sock *sk, long timeout)
971 sock_put(sk); 738 sock_put(sk);
972} 739}
973 740
974static int pep_wait_connreq(struct sock *sk, int noblock) 741static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
975{ 742{
976 struct task_struct *tsk = current; 743 struct pep_sock *pn = pep_sk(sk), *newpn;
977 struct pep_sock *pn = pep_sk(sk); 744 struct sock *newsk = NULL;
978 long timeo = sock_rcvtimeo(sk, noblock); 745 struct sk_buff *skb;
979 746 struct pnpipehdr *hdr;
980 for (;;) { 747 struct sockaddr_pn dst, src;
981 DEFINE_WAIT(wait); 748 int err;
749 u16 peer_type;
750 u8 pipe_handle, enabled, n_sb;
751 u8 aligned = 0;
982 752
983 if (sk->sk_state != TCP_LISTEN) 753 skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
984 return -EINVAL; 754 if (!skb)
985 if (!hlist_empty(&pn->ackq)) 755 return NULL;
986 break;
987 if (!timeo)
988 return -EWOULDBLOCK;
989 if (signal_pending(tsk))
990 return sock_intr_errno(timeo);
991 756
992 prepare_to_wait_exclusive(sk_sleep(sk), &wait, 757 lock_sock(sk);
993 TASK_INTERRUPTIBLE); 758 if (sk->sk_state != TCP_LISTEN) {
994 release_sock(sk); 759 err = -EINVAL;
995 timeo = schedule_timeout(timeo); 760 goto drop;
996 lock_sock(sk);
997 finish_wait(sk_sleep(sk), &wait);
998 } 761 }
762 sk_acceptq_removed(sk);
999 763
1000 return 0; 764 err = -EPROTO;
1001} 765 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
766 goto drop;
1002 767
1003static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) 768 hdr = pnp_hdr(skb);
1004{ 769 pipe_handle = hdr->pipe_handle;
1005 struct pep_sock *pn = pep_sk(sk); 770 switch (hdr->state_after_connect) {
1006 struct sock *newsk = NULL; 771 case PN_PIPE_DISABLE:
1007 struct sk_buff *oskb; 772 enabled = 0;
1008 int err; 773 break;
774 case PN_PIPE_ENABLE:
775 enabled = 1;
776 break;
777 default:
778 pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM,
779 GFP_KERNEL);
780 goto drop;
781 }
782 peer_type = hdr->other_pep_type << 8;
1009 783
1010 lock_sock(sk); 784 /* Parse sub-blocks (options) */
1011 err = pep_wait_connreq(sk, flags & O_NONBLOCK); 785 n_sb = hdr->data[4];
1012 if (err) 786 while (n_sb > 0) {
1013 goto out; 787 u8 type, buf[1], len = sizeof(buf);
788 const u8 *data = pep_get_sb(skb, &type, &len, buf);
1014 789
1015 newsk = __sk_head(&pn->ackq); 790 if (data == NULL)
791 goto drop;
792 switch (type) {
793 case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE:
794 if (len < 1)
795 goto drop;
796 peer_type = (peer_type & 0xff00) | data[0];
797 break;
798 case PN_PIPE_SB_ALIGNED_DATA:
799 aligned = data[0] != 0;
800 break;
801 }
802 n_sb--;
803 }
1016 804
1017 oskb = skb_dequeue(&newsk->sk_receive_queue); 805 /* Check for duplicate pipe handle */
1018 err = pep_accept_conn(newsk, oskb); 806 newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
1019 if (err) { 807 if (unlikely(newsk)) {
1020 skb_queue_head(&newsk->sk_receive_queue, oskb); 808 __sock_put(newsk);
1021 newsk = NULL; 809 newsk = NULL;
1022 goto out; 810 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL);
811 goto drop;
812 }
813
814 /* Create a new to-be-accepted sock */
815 newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot);
816 if (!newsk) {
817 pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
818 err = -ENOBUFS;
819 goto drop;
1023 } 820 }
1024 kfree_skb(oskb);
1025 821
822 sock_init_data(NULL, newsk);
823 newsk->sk_state = TCP_SYN_RECV;
824 newsk->sk_backlog_rcv = pipe_do_rcv;
825 newsk->sk_protocol = sk->sk_protocol;
826 newsk->sk_destruct = pipe_destruct;
827
828 newpn = pep_sk(newsk);
829 pn_skb_get_dst_sockaddr(skb, &dst);
830 pn_skb_get_src_sockaddr(skb, &src);
831 newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
832 newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
833 newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst);
1026 sock_hold(sk); 834 sock_hold(sk);
1027 pep_sk(newsk)->listener = sk; 835 newpn->listener = sk;
836 skb_queue_head_init(&newpn->ctrlreq_queue);
837 newpn->pipe_handle = pipe_handle;
838 atomic_set(&newpn->tx_credits, 0);
839 newpn->ifindex = 0;
840 newpn->peer_type = peer_type;
841 newpn->rx_credits = 0;
842 newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
843 newpn->init_enable = enabled;
844 newpn->aligned = aligned;
1028 845
1029 sock_hold(newsk); 846 err = pep_accept_conn(newsk, skb);
1030 sk_del_node_init(newsk); 847 if (err) {
1031 sk_acceptq_removed(sk); 848 sock_put(newsk);
849 newsk = NULL;
850 goto drop;
851 }
1032 sk_add_node(newsk, &pn->hlist); 852 sk_add_node(newsk, &pn->hlist);
1033 __sock_put(newsk); 853drop:
1034
1035out:
1036 release_sock(sk); 854 release_sock(sk);
855 kfree_skb(skb);
1037 *errp = err; 856 *errp = err;
1038 return newsk; 857 return newsk;
1039} 858}
1040 859
1041#ifdef CONFIG_PHONET_PIPECTRLR
1042static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) 860static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
1043{ 861{
1044 struct pep_sock *pn = pep_sk(sk); 862 struct pep_sock *pn = pep_sk(sk);
1045 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; 863 int err;
1046 864 u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
1047 memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn));
1048 865
1049 return pipe_handler_send_req(sk, 866 pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
1050 PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, 867 err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
1051 GFP_ATOMIC); 868 PN_PIPE_ENABLE, data, 4);
869 if (err) {
870 pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
871 return err;
872 }
873 sk->sk_state = TCP_SYN_SENT;
874 return 0;
1052} 875}
1053#endif
1054 876
1055static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) 877static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
1056{ 878{
@@ -1081,10 +903,18 @@ static int pep_init(struct sock *sk)
1081{ 903{
1082 struct pep_sock *pn = pep_sk(sk); 904 struct pep_sock *pn = pep_sk(sk);
1083 905
1084 INIT_HLIST_HEAD(&pn->ackq); 906 sk->sk_destruct = pipe_destruct;
1085 INIT_HLIST_HEAD(&pn->hlist); 907 INIT_HLIST_HEAD(&pn->hlist);
908 pn->listener = NULL;
1086 skb_queue_head_init(&pn->ctrlreq_queue); 909 skb_queue_head_init(&pn->ctrlreq_queue);
910 atomic_set(&pn->tx_credits, 0);
911 pn->ifindex = 0;
912 pn->peer_type = 0;
1087 pn->pipe_handle = PN_PIPE_INVALID_HANDLE; 913 pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
914 pn->rx_credits = 0;
915 pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
916 pn->init_enable = 1;
917 pn->aligned = 0;
1088 return 0; 918 return 0;
1089} 919}
1090 920
@@ -1103,18 +933,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
1103 933
1104 lock_sock(sk); 934 lock_sock(sk);
1105 switch (optname) { 935 switch (optname) {
1106#ifdef CONFIG_PHONET_PIPECTRLR
1107 case PNPIPE_PIPE_HANDLE:
1108 if (val) {
1109 if (pn->pipe_state > PIPE_IDLE) {
1110 err = -EFAULT;
1111 break;
1112 }
1113 pn->pipe_handle = val;
1114 break;
1115 }
1116#endif
1117
1118 case PNPIPE_ENCAP: 936 case PNPIPE_ENCAP:
1119 if (val && val != PNPIPE_ENCAP_IP) { 937 if (val && val != PNPIPE_ENCAP_IP) {
1120 err = -EINVAL; 938 err = -EINVAL;
@@ -1141,16 +959,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
1141 } 959 }
1142 goto out_norel; 960 goto out_norel;
1143 961
1144#ifdef CONFIG_PHONET_PIPECTRLR
1145 case PNPIPE_ENABLE:
1146 if (pn->pipe_state <= PIPE_IDLE) {
1147 err = -ENOTCONN;
1148 break;
1149 }
1150 err = pipe_handler_enable_pipe(sk, val);
1151 break;
1152#endif
1153
1154 default: 962 default:
1155 err = -ENOPROTOOPT; 963 err = -ENOPROTOOPT;
1156 } 964 }
@@ -1180,13 +988,11 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
1180 val = pn->ifindex; 988 val = pn->ifindex;
1181 break; 989 break;
1182 990
1183#ifdef CONFIG_PHONET_PIPECTRLR 991 case PNPIPE_HANDLE:
1184 case PNPIPE_ENABLE: 992 val = pn->pipe_handle;
1185 if (pn->pipe_state <= PIPE_IDLE) 993 if (val == PN_PIPE_INVALID_HANDLE)
1186 return -ENOTCONN; 994 return -EINVAL;
1187 val = pn->pipe_state != PIPE_DISABLED;
1188 break; 995 break;
1189#endif
1190 996
1191 default: 997 default:
1192 return -ENOPROTOOPT; 998 return -ENOPROTOOPT;
@@ -1222,11 +1028,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
1222 } else 1028 } else
1223 ph->message_id = PNS_PIPE_DATA; 1029 ph->message_id = PNS_PIPE_DATA;
1224 ph->pipe_handle = pn->pipe_handle; 1030 ph->pipe_handle = pn->pipe_handle;
1225#ifdef CONFIG_PHONET_PIPECTRLR 1031 err = pn_skb_send(sk, skb, NULL);
1226 err = pn_skb_send(sk, skb, &pn->remote_pep);
1227#else
1228 err = pn_skb_send(sk, skb, &pipe_srv);
1229#endif
1230 1032
1231 if (err && pn_flow_safe(pn->tx_fc)) 1033 if (err && pn_flow_safe(pn->tx_fc))
1232 atomic_inc(&pn->tx_credits); 1034 atomic_inc(&pn->tx_credits);
@@ -1253,7 +1055,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
1253 if (!skb) 1055 if (!skb)
1254 return err; 1056 return err;
1255 1057
1256 skb_reserve(skb, MAX_PHONET_HEADER + 3); 1058 skb_reserve(skb, MAX_PHONET_HEADER + 3 + pn->aligned);
1257 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1059 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1258 if (err < 0) 1060 if (err < 0)
1259 goto outfree; 1061 goto outfree;
@@ -1355,7 +1157,7 @@ struct sk_buff *pep_read(struct sock *sk)
1355 struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); 1157 struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
1356 1158
1357 if (sk->sk_state == TCP_ESTABLISHED) 1159 if (sk->sk_state == TCP_ESTABLISHED)
1358 pipe_grant_credits(sk); 1160 pipe_grant_credits(sk, GFP_ATOMIC);
1359 return skb; 1161 return skb;
1360} 1162}
1361 1163
@@ -1400,7 +1202,7 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
1400 } 1202 }
1401 1203
1402 if (sk->sk_state == TCP_ESTABLISHED) 1204 if (sk->sk_state == TCP_ESTABLISHED)
1403 pipe_grant_credits(sk); 1205 pipe_grant_credits(sk, GFP_KERNEL);
1404 release_sock(sk); 1206 release_sock(sk);
1405copy: 1207copy:
1406 msg->msg_flags |= MSG_EOR; 1208 msg->msg_flags |= MSG_EOR;
@@ -1424,9 +1226,9 @@ static void pep_sock_unhash(struct sock *sk)
1424 1226
1425 lock_sock(sk); 1227 lock_sock(sk);
1426 1228
1427#ifndef CONFIG_PHONET_PIPECTRLR 1229 if (pn->listener != NULL) {
1428 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
1429 skparent = pn->listener; 1230 skparent = pn->listener;
1231 pn->listener = NULL;
1430 release_sock(sk); 1232 release_sock(sk);
1431 1233
1432 pn = pep_sk(skparent); 1234 pn = pep_sk(skparent);
@@ -1434,7 +1236,7 @@ static void pep_sock_unhash(struct sock *sk)
1434 sk_del_node_init(sk); 1236 sk_del_node_init(sk);
1435 sk = skparent; 1237 sk = skparent;
1436 } 1238 }
1437#endif 1239
1438 /* Unhash a listening sock only when it is closed 1240 /* Unhash a listening sock only when it is closed
1439 * and all of its active connected pipes are closed. */ 1241 * and all of its active connected pipes are closed. */
1440 if (hlist_empty(&pn->hlist)) 1242 if (hlist_empty(&pn->hlist))
@@ -1448,9 +1250,7 @@ static void pep_sock_unhash(struct sock *sk)
1448static struct proto pep_proto = { 1250static struct proto pep_proto = {
1449 .close = pep_sock_close, 1251 .close = pep_sock_close,
1450 .accept = pep_sock_accept, 1252 .accept = pep_sock_accept,
1451#ifdef CONFIG_PHONET_PIPECTRLR
1452 .connect = pep_sock_connect, 1253 .connect = pep_sock_connect,
1453#endif
1454 .ioctl = pep_ioctl, 1254 .ioctl = pep_ioctl,
1455 .init = pep_init, 1255 .init = pep_init,
1456 .setsockopt = pep_setsockopt, 1256 .setsockopt = pep_setsockopt,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 25f746d20c1..b1adafab377 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -225,15 +225,18 @@ static int pn_socket_autobind(struct socket *sock)
225 return 0; /* socket was already bound */ 225 return 0; /* socket was already bound */
226} 226}
227 227
228#ifdef CONFIG_PHONET_PIPECTRLR
229static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, 228static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
230 int len, int flags) 229 int len, int flags)
231{ 230{
232 struct sock *sk = sock->sk; 231 struct sock *sk = sock->sk;
232 struct pn_sock *pn = pn_sk(sk);
233 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; 233 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
234 long timeo; 234 struct task_struct *tsk = current;
235 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
235 int err; 236 int err;
236 237
238 if (pn_socket_autobind(sock))
239 return -ENOBUFS;
237 if (len < sizeof(struct sockaddr_pn)) 240 if (len < sizeof(struct sockaddr_pn))
238 return -EINVAL; 241 return -EINVAL;
239 if (spn->spn_family != AF_PHONET) 242 if (spn->spn_family != AF_PHONET)
@@ -243,82 +246,61 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
243 246
244 switch (sock->state) { 247 switch (sock->state) {
245 case SS_UNCONNECTED: 248 case SS_UNCONNECTED:
246 sk->sk_state = TCP_CLOSE; 249 if (sk->sk_state != TCP_CLOSE) {
247 break;
248 case SS_CONNECTING:
249 switch (sk->sk_state) {
250 case TCP_SYN_RECV:
251 sock->state = SS_CONNECTED;
252 err = -EISCONN; 250 err = -EISCONN;
253 goto out; 251 goto out;
254 case TCP_CLOSE:
255 err = -EALREADY;
256 if (flags & O_NONBLOCK)
257 goto out;
258 goto wait_connect;
259 } 252 }
260 break; 253 break;
261 case SS_CONNECTED: 254 case SS_CONNECTING:
262 switch (sk->sk_state) { 255 err = -EALREADY;
263 case TCP_SYN_RECV: 256 goto out;
264 err = -EISCONN; 257 default:
265 goto out; 258 err = -EISCONN;
266 case TCP_CLOSE: 259 goto out;
267 sock->state = SS_UNCONNECTED;
268 break;
269 }
270 break;
271 case SS_DISCONNECTING:
272 case SS_FREE:
273 break;
274 } 260 }
275 sk->sk_state = TCP_CLOSE;
276 sk_stream_kill_queues(sk);
277 261
262 pn->dobject = pn_sockaddr_get_object(spn);
263 pn->resource = pn_sockaddr_get_resource(spn);
278 sock->state = SS_CONNECTING; 264 sock->state = SS_CONNECTING;
265
279 err = sk->sk_prot->connect(sk, addr, len); 266 err = sk->sk_prot->connect(sk, addr, len);
280 if (err < 0) { 267 if (err) {
281 sock->state = SS_UNCONNECTED; 268 sock->state = SS_UNCONNECTED;
282 sk->sk_state = TCP_CLOSE; 269 pn->dobject = 0;
283 goto out; 270 goto out;
284 } 271 }
285 272
286 err = -EINPROGRESS; 273 while (sk->sk_state == TCP_SYN_SENT) {
287wait_connect: 274 DEFINE_WAIT(wait);
288 if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK))
289 goto out;
290
291 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
292 release_sock(sk);
293
294 err = -ERESTARTSYS;
295 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
296 sk->sk_state != TCP_CLOSE,
297 timeo);
298
299 lock_sock(sk);
300 if (timeo < 0)
301 goto out; /* -ERESTARTSYS */
302 275
303 err = -ETIMEDOUT; 276 if (!timeo) {
304 if (timeo == 0 && sk->sk_state != TCP_SYN_RECV) 277 err = -EINPROGRESS;
305 goto out; 278 goto out;
279 }
280 if (signal_pending(tsk)) {
281 err = sock_intr_errno(timeo);
282 goto out;
283 }
306 284
307 if (sk->sk_state != TCP_SYN_RECV) { 285 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
308 sock->state = SS_UNCONNECTED; 286 TASK_INTERRUPTIBLE);
309 err = sock_error(sk); 287 release_sock(sk);
310 if (!err) 288 timeo = schedule_timeout(timeo);
311 err = -ECONNREFUSED; 289 lock_sock(sk);
312 goto out; 290 finish_wait(sk_sleep(sk), &wait);
313 } 291 }
314 sock->state = SS_CONNECTED;
315 err = 0;
316 292
293 if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
294 err = 0;
295 else if (sk->sk_state == TCP_CLOSE_WAIT)
296 err = -ECONNRESET;
297 else
298 err = -ECONNREFUSED;
299 sock->state = err ? SS_UNCONNECTED : SS_CONNECTED;
317out: 300out:
318 release_sock(sk); 301 release_sock(sk);
319 return err; 302 return err;
320} 303}
321#endif
322 304
323static int pn_socket_accept(struct socket *sock, struct socket *newsock, 305static int pn_socket_accept(struct socket *sock, struct socket *newsock,
324 int flags) 306 int flags)
@@ -327,6 +309,9 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
327 struct sock *newsk; 309 struct sock *newsk;
328 int err; 310 int err;
329 311
312 if (unlikely(sk->sk_state != TCP_LISTEN))
313 return -EINVAL;
314
330 newsk = sk->sk_prot->accept(sk, flags, &err); 315 newsk = sk->sk_prot->accept(sk, flags, &err);
331 if (!newsk) 316 if (!newsk)
332 return err; 317 return err;
@@ -363,13 +348,8 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
363 348
364 poll_wait(file, sk_sleep(sk), wait); 349 poll_wait(file, sk_sleep(sk), wait);
365 350
366 switch (sk->sk_state) { 351 if (sk->sk_state == TCP_CLOSE)
367 case TCP_LISTEN:
368 return hlist_empty(&pn->ackq) ? 0 : POLLIN;
369 case TCP_CLOSE:
370 return POLLERR; 352 return POLLERR;
371 }
372
373 if (!skb_queue_empty(&sk->sk_receive_queue)) 353 if (!skb_queue_empty(&sk->sk_receive_queue))
374 mask |= POLLIN | POLLRDNORM; 354 mask |= POLLIN | POLLRDNORM;
375 if (!skb_queue_empty(&pn->ctrlreq_queue)) 355 if (!skb_queue_empty(&pn->ctrlreq_queue))
@@ -428,19 +408,19 @@ static int pn_socket_listen(struct socket *sock, int backlog)
428 struct sock *sk = sock->sk; 408 struct sock *sk = sock->sk;
429 int err = 0; 409 int err = 0;
430 410
431 if (sock->state != SS_UNCONNECTED)
432 return -EINVAL;
433 if (pn_socket_autobind(sock)) 411 if (pn_socket_autobind(sock))
434 return -ENOBUFS; 412 return -ENOBUFS;
435 413
436 lock_sock(sk); 414 lock_sock(sk);
437 if (sk->sk_state != TCP_CLOSE) { 415 if (sock->state != SS_UNCONNECTED) {
438 err = -EINVAL; 416 err = -EINVAL;
439 goto out; 417 goto out;
440 } 418 }
441 419
442 sk->sk_state = TCP_LISTEN; 420 if (sk->sk_state != TCP_LISTEN) {
443 sk->sk_ack_backlog = 0; 421 sk->sk_state = TCP_LISTEN;
422 sk->sk_ack_backlog = 0;
423 }
444 sk->sk_max_ack_backlog = backlog; 424 sk->sk_max_ack_backlog = backlog;
445out: 425out:
446 release_sock(sk); 426 release_sock(sk);
@@ -488,11 +468,7 @@ const struct proto_ops phonet_stream_ops = {
488 .owner = THIS_MODULE, 468 .owner = THIS_MODULE,
489 .release = pn_socket_release, 469 .release = pn_socket_release,
490 .bind = pn_socket_bind, 470 .bind = pn_socket_bind,
491#ifdef CONFIG_PHONET_PIPECTRLR
492 .connect = pn_socket_connect, 471 .connect = pn_socket_connect,
493#else
494 .connect = sock_no_connect,
495#endif
496 .socketpair = sock_no_socketpair, 472 .socketpair = sock_no_socketpair,
497 .accept = pn_socket_accept, 473 .accept = pn_socket_accept,
498 .getname = pn_socket_getname, 474 .getname = pn_socket_getname,
@@ -633,8 +609,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
633 609
634 seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " 610 seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
635 "%d %p %d%n", 611 "%d %p %d%n",
636 sk->sk_protocol, pn->sobject, 0, pn->resource, 612 sk->sk_protocol, pn->sobject, pn->dobject,
637 sk->sk_state, 613 pn->resource, sk->sk_state,
638 sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), 614 sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
639 sock_i_uid(sk), sock_i_ino(sk), 615 sock_i_uid(sk), sock_i_ino(sk),
640 atomic_read(&sk->sk_refcnt), sk, 616 atomic_read(&sk->sk_refcnt), sk,
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 4123967d4d6..cce19f95c62 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -364,7 +364,6 @@ void rds_ib_exit(void)
364 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
365 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
366 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
368} 367}
369 368
370struct rds_transport rds_ib_transport = { 369struct rds_transport rds_ib_transport = {
@@ -400,13 +399,9 @@ int rds_ib_init(void)
400 399
401 INIT_LIST_HEAD(&rds_ib_devices); 400 INIT_LIST_HEAD(&rds_ib_devices);
402 401
403 ret = rds_ib_fmr_init();
404 if (ret)
405 goto out;
406
407 ret = ib_register_client(&rds_ib_client); 402 ret = ib_register_client(&rds_ib_client);
408 if (ret) 403 if (ret)
409 goto out_fmr_exit; 404 goto out;
410 405
411 ret = rds_ib_sysctl_init(); 406 ret = rds_ib_sysctl_init();
412 if (ret) 407 if (ret)
@@ -430,8 +425,6 @@ out_sysctl:
430 rds_ib_sysctl_exit(); 425 rds_ib_sysctl_exit();
431out_ibreg: 426out_ibreg:
432 rds_ib_unregister_client(); 427 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
435out: 428out:
436 return ret; 429 return ret;
437} 430}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index e34ad032b66..4297d92788d 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
307void rds_ib_sync_mr(void *trans_private, int dir); 307void rds_ib_sync_mr(void *trans_private, int dir);
308void rds_ib_free_mr(void *trans_private, int invalidate); 308void rds_ib_free_mr(void *trans_private, int invalidate);
309void rds_ib_flush_mrs(void); 309void rds_ib_flush_mrs(void);
310int rds_ib_fmr_init(void);
311void rds_ib_fmr_exit(void);
312 310
313/* ib_recv.c */ 311/* ib_recv.c */
314int rds_ib_recv_init(void); 312int rds_ib_recv_init(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 18a833c450c..819c35a0d9c 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -38,8 +38,6 @@
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h" 39#include "xlist.h"
40 40
41static struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace); 41static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0 42#define CLEAN_LIST_BUSY_BIT 0
45 43
@@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
307 int err = 0, iter = 0; 305 int err = 0, iter = 0;
308 306
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) 307 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 308 schedule_delayed_work(&pool->flush_worker, 10);
311 309
312 while (1) { 310 while (1) {
313 ibmr = rds_ib_reuse_fmr(pool); 311 ibmr = rds_ib_reuse_fmr(pool);
@@ -696,24 +694,6 @@ out_nolock:
696 return ret; 694 return ret;
697} 695}
698 696
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
717static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 697static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
718{ 698{
719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); 699 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
741 /* If we've pinned too many pages, request a flush */ 721 /* If we've pinned too many pages, request a flush */
742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 722 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
743 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 723 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 724 schedule_delayed_work(&pool->flush_worker, 10);
745 725
746 if (invalidate) { 726 if (invalidate) {
747 if (likely(!in_interrupt())) { 727 if (likely(!in_interrupt())) {
@@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
749 } else { 729 } else {
750 /* We get here if the user created a MR marked 730 /* We get here if the user created a MR marked
751 * as use_once and invalidate at the same time. */ 731 * as use_once and invalidate at the same time. */
752 queue_delayed_work(rds_ib_fmr_wq, 732 schedule_delayed_work(&pool->flush_worker, 10);
753 &pool->flush_worker, 10);
754 } 733 }
755 } 734 }
756 735
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421b..c47a511f203 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
551 if (conn->c_loopback 551 if (conn->c_loopback
552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
554 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 554 scat = &rm->data.op_sg[sg];
555 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
556 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
557 return ret;
555 } 558 }
556 559
557 /* FIXME we may overallocate here */ 560 /* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b1..bca6761a3ca 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 struct scatterlist *sgp = &rm->data.op_sg[sg];
65 int ret = sizeof(struct rds_header) +
66 be32_to_cpu(rm->m_inc.i_hdr.h_len);
67
64 /* Do not send cong updates to loopback */ 68 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 69 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 70 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 71 ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
72 goto out;
68 } 73 }
69 74
70 BUG_ON(hdr_off || sg || off); 75 BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
80 NULL); 85 NULL);
81 86
82 rds_inc_put(&rm->m_inc); 87 rds_inc_put(&rm->m_inc);
83 88out:
84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 89 return ret;
85} 90}
86 91
87/* 92/*
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 9542449c072..da8adac2bf0 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) 50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
51 51
52#define RDS_CONG_MAP_BYTES (65536 / 8) 52#define RDS_CONG_MAP_BYTES (65536 / 8)
53#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
54#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) 53#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
55#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 54#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
56 55
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d952e7eac18..5ee0c62046a 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -803,7 +803,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
803 803
804 rose_insert_socket(sk); /* Finish the bind */ 804 rose_insert_socket(sk); /* Finish the bind */
805 } 805 }
806rose_try_next_neigh:
807 rose->dest_addr = addr->srose_addr; 806 rose->dest_addr = addr->srose_addr;
808 rose->dest_call = addr->srose_call; 807 rose->dest_call = addr->srose_call;
809 rose->rand = ((long)rose & 0xFFFF) + rose->lci; 808 rose->rand = ((long)rose & 0xFFFF) + rose->lci;
@@ -865,12 +864,6 @@ rose_try_next_neigh:
865 } 864 }
866 865
867 if (sk->sk_state != TCP_ESTABLISHED) { 866 if (sk->sk_state != TCP_ESTABLISHED) {
868 /* Try next neighbour */
869 rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0);
870 if (rose->neighbour)
871 goto rose_try_next_neigh;
872
873 /* No more neighbours */
874 sock->state = SS_UNCONNECTED; 867 sock->state = SS_UNCONNECTED;
875 err = sock_error(sk); /* Always set at this point */ 868 err = sock_error(sk); /* Always set at this point */
876 goto out_release; 869 goto out_release;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index b4fdaac233f..88a77e90e7e 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -674,29 +674,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig
674 * Find a neighbour or a route given a ROSE address. 674 * Find a neighbour or a route given a ROSE address.
675 */ 675 */
676struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, 676struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
677 unsigned char *diagnostic, int new) 677 unsigned char *diagnostic, int route_frame)
678{ 678{
679 struct rose_neigh *res = NULL; 679 struct rose_neigh *res = NULL;
680 struct rose_node *node; 680 struct rose_node *node;
681 int failed = 0; 681 int failed = 0;
682 int i; 682 int i;
683 683
684 if (!new) spin_lock_bh(&rose_node_list_lock); 684 if (!route_frame) spin_lock_bh(&rose_node_list_lock);
685 for (node = rose_node_list; node != NULL; node = node->next) { 685 for (node = rose_node_list; node != NULL; node = node->next) {
686 if (rosecmpm(addr, &node->address, node->mask) == 0) { 686 if (rosecmpm(addr, &node->address, node->mask) == 0) {
687 for (i = 0; i < node->count; i++) { 687 for (i = 0; i < node->count; i++) {
688 if (new) { 688 if (node->neighbour[i]->restarted) {
689 if (node->neighbour[i]->restarted) { 689 res = node->neighbour[i];
690 res = node->neighbour[i]; 690 goto out;
691 goto out;
692 }
693 } 691 }
694 else { 692 }
693 }
694 }
695 if (!route_frame) { /* connect request */
696 for (node = rose_node_list; node != NULL; node = node->next) {
697 if (rosecmpm(addr, &node->address, node->mask) == 0) {
698 for (i = 0; i < node->count; i++) {
695 if (!rose_ftimer_running(node->neighbour[i])) { 699 if (!rose_ftimer_running(node->neighbour[i])) {
696 res = node->neighbour[i]; 700 res = node->neighbour[i];
701 failed = 0;
697 goto out; 702 goto out;
698 } else 703 }
699 failed = 1; 704 failed = 1;
700 } 705 }
701 } 706 }
702 } 707 }
@@ -711,8 +716,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
711 } 716 }
712 717
713out: 718out:
714 if (!new) spin_unlock_bh(&rose_node_list_lock); 719 if (!route_frame) spin_unlock_bh(&rose_node_list_lock);
715
716 return res; 720 return res;
717} 721}
718 722
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index d763793d39d..43ea7de2fc8 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -25,6 +25,7 @@
25#include <keys/user-type.h> 25#include <keys/user-type.h>
26#include "ar-internal.h" 26#include "ar-internal.h"
27 27
28static int rxrpc_vet_description_s(const char *);
28static int rxrpc_instantiate(struct key *, const void *, size_t); 29static int rxrpc_instantiate(struct key *, const void *, size_t);
29static int rxrpc_instantiate_s(struct key *, const void *, size_t); 30static int rxrpc_instantiate_s(struct key *, const void *, size_t);
30static void rxrpc_destroy(struct key *); 31static void rxrpc_destroy(struct key *);
@@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc);
52 */ 53 */
53struct key_type key_type_rxrpc_s = { 54struct key_type key_type_rxrpc_s = {
54 .name = "rxrpc_s", 55 .name = "rxrpc_s",
56 .vet_description = rxrpc_vet_description_s,
55 .instantiate = rxrpc_instantiate_s, 57 .instantiate = rxrpc_instantiate_s,
56 .match = user_match, 58 .match = user_match,
57 .destroy = rxrpc_destroy_s, 59 .destroy = rxrpc_destroy_s,
@@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = {
59}; 61};
60 62
61/* 63/*
64 * Vet the description for an RxRPC server key
65 */
66static int rxrpc_vet_description_s(const char *desc)
67{
68 unsigned long num;
69 char *p;
70
71 num = simple_strtoul(desc, &p, 10);
72 if (*p != ':' || num > 65535)
73 return -EINVAL;
74 num = simple_strtoul(p + 1, &p, 10);
75 if (*p || num < 1 || num > 255)
76 return -EINVAL;
77 return 0;
78}
79
80/*
62 * parse an RxKAD type XDR format token 81 * parse an RxKAD type XDR format token
63 * - the caller guarantees we have at least 4 words 82 * - the caller guarantees we have at least 4 words
64 */ 83 */
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index a53fb25a64e..55b93dc60d0 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -36,31 +36,15 @@ static void rxrpc_destroy_peer(struct work_struct *work);
36static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) 36static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
37{ 37{
38 struct rtable *rt; 38 struct rtable *rt;
39 struct flowi fl;
40 int ret;
41 39
42 peer->if_mtu = 1500; 40 peer->if_mtu = 1500;
43 41
44 memset(&fl, 0, sizeof(fl)); 42 rt = ip_route_output_ports(&init_net, NULL,
45 43 peer->srx.transport.sin.sin_addr.s_addr, 0,
46 switch (peer->srx.transport.family) { 44 htons(7000), htons(7001),
47 case AF_INET: 45 IPPROTO_UDP, 0, 0);
48 fl.oif = 0; 46 if (IS_ERR(rt)) {
49 fl.proto = IPPROTO_UDP, 47 _leave(" [route err %ld]", PTR_ERR(rt));
50 fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr;
51 fl.fl4_src = 0;
52 fl.fl4_tos = 0;
53 /* assume AFS.CM talking to AFS.FS */
54 fl.fl_ip_sport = htons(7001);
55 fl.fl_ip_dport = htons(7000);
56 break;
57 default:
58 BUG();
59 }
60
61 ret = ip_route_output_key(&init_net, &rt, &fl);
62 if (ret < 0) {
63 _leave(" [route err %d]", ret);
64 return; 48 return;
65 } 49 }
66 50
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f04d4a484d5..a7a5583d4f6 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -126,6 +126,17 @@ config NET_SCH_RED
126 To compile this code as a module, choose M here: the 126 To compile this code as a module, choose M here: the
127 module will be called sch_red. 127 module will be called sch_red.
128 128
129config NET_SCH_SFB
130 tristate "Stochastic Fair Blue (SFB)"
131 ---help---
132 Say Y here if you want to use the Stochastic Fair Blue (SFB)
133 packet scheduling algorithm.
134
135 See the top of <file:net/sched/sch_sfb.c> for more details.
136
137 To compile this code as a module, choose M here: the
138 module will be called sch_sfb.
139
129config NET_SCH_SFQ 140config NET_SCH_SFQ
130 tristate "Stochastic Fairness Queueing (SFQ)" 141 tristate "Stochastic Fairness Queueing (SFQ)"
131 ---help--- 142 ---help---
@@ -205,6 +216,29 @@ config NET_SCH_DRR
205 216
206 If unsure, say N. 217 If unsure, say N.
207 218
219config NET_SCH_MQPRIO
220 tristate "Multi-queue priority scheduler (MQPRIO)"
221 help
222 Say Y here if you want to use the Multi-queue Priority scheduler.
223 This scheduler allows QOS to be offloaded on NICs that have support
224 for offloading QOS schedulers.
225
226 To compile this driver as a module, choose M here: the module will
227 be called sch_mqprio.
228
229 If unsure, say N.
230
231config NET_SCH_CHOKE
232 tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
233 help
234 Say Y here if you want to use the CHOKe packet scheduler (CHOose
235 and Keep for responsive flows, CHOose and Kill for unresponsive
236 flows). This is a variation of RED which trys to penalize flows
237 that monopolize the queue.
238
239 To compile this code as a module, choose M here: the
240 module will be called sch_choke.
241
208config NET_SCH_INGRESS 242config NET_SCH_INGRESS
209 tristate "Ingress Qdisc" 243 tristate "Ingress Qdisc"
210 depends on NET_CLS_ACT 244 depends on NET_CLS_ACT
@@ -243,7 +277,7 @@ config NET_CLS_TCINDEX
243 277
244config NET_CLS_ROUTE4 278config NET_CLS_ROUTE4
245 tristate "Routing decision (ROUTE)" 279 tristate "Routing decision (ROUTE)"
246 select NET_CLS_ROUTE 280 select IP_ROUTE_CLASSID
247 select NET_CLS 281 select NET_CLS
248 ---help--- 282 ---help---
249 If you say Y here, you will be able to classify packets 283 If you say Y here, you will be able to classify packets
@@ -252,9 +286,6 @@ config NET_CLS_ROUTE4
252 To compile this code as a module, choose M here: the 286 To compile this code as a module, choose M here: the
253 module will be called cls_route. 287 module will be called cls_route.
254 288
255config NET_CLS_ROUTE
256 bool
257
258config NET_CLS_FW 289config NET_CLS_FW
259 tristate "Netfilter mark (FW)" 290 tristate "Netfilter mark (FW)"
260 select NET_CLS 291 select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5dba630..2e77b8dba22 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o
24obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o 24obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
25obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o 25obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
26obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o 26obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
27obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
27obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o 28obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
28obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o 29obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
29obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o 30obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
@@ -32,6 +33,9 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
32obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o 33obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
33obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o 34obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
34obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o 35obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
36obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
37obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
38
35obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 39obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
36obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 40obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
37obj-$(CONFIG_NET_CLS_FW) += cls_fw.o 41obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f89e7e..15873e14cb5 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
78 struct tc_action *a, struct tcf_hashinfo *hinfo) 78 struct tc_action *a, struct tcf_hashinfo *hinfo)
79{ 79{
80 struct tcf_common *p; 80 struct tcf_common *p;
81 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; 81 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
82 struct nlattr *nest; 82 struct nlattr *nest;
83 83
84 read_lock_bh(hinfo->lock); 84 read_lock_bh(hinfo->lock);
@@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
126{ 126{
127 struct tcf_common *p, *s_p; 127 struct tcf_common *p, *s_p;
128 struct nlattr *nest; 128 struct nlattr *nest;
129 int i= 0, n_i = 0; 129 int i = 0, n_i = 0;
130 130
131 nest = nla_nest_start(skb, a->order); 131 nest = nla_nest_start(skb, a->order);
132 if (nest == NULL) 132 if (nest == NULL)
@@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
138 while (p != NULL) { 138 while (p != NULL) {
139 s_p = p->tcfc_next; 139 s_p = p->tcfc_next;
140 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) 140 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
141 module_put(a->ops->owner); 141 module_put(a->ops->owner);
142 n_i++; 142 n_i++;
143 p = s_p; 143 p = s_p;
144 } 144 }
@@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
447 nest = nla_nest_start(skb, TCA_OPTIONS); 447 nest = nla_nest_start(skb, TCA_OPTIONS);
448 if (nest == NULL) 448 if (nest == NULL)
449 goto nla_put_failure; 449 goto nla_put_failure;
450 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { 450 err = tcf_action_dump_old(skb, a, bind, ref);
451 if (err > 0) {
451 nla_nest_end(skb, nest); 452 nla_nest_end(skb, nest);
452 return err; 453 return err;
453 } 454 }
@@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
491 struct tc_action *a; 492 struct tc_action *a;
492 struct tc_action_ops *a_o; 493 struct tc_action_ops *a_o;
493 char act_name[IFNAMSIZ]; 494 char act_name[IFNAMSIZ];
494 struct nlattr *tb[TCA_ACT_MAX+1]; 495 struct nlattr *tb[TCA_ACT_MAX + 1];
495 struct nlattr *kind; 496 struct nlattr *kind;
496 int err; 497 int err;
497 498
@@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
549 goto err_free; 550 goto err_free;
550 551
551 /* module count goes up only when brand new policy is created 552 /* module count goes up only when brand new policy is created
552 if it exists and is only bound to in a_o->init() then 553 * if it exists and is only bound to in a_o->init() then
553 ACT_P_CREATED is not returned (a zero is). 554 * ACT_P_CREATED is not returned (a zero is).
554 */ 555 */
555 if (err != ACT_P_CREATED) 556 if (err != ACT_P_CREATED)
556 module_put(a_o->owner); 557 module_put(a_o->owner);
557 a->ops = a_o; 558 a->ops = a_o;
@@ -569,7 +570,7 @@ err_out:
569struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, 570struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
570 char *name, int ovr, int bind) 571 char *name, int ovr, int bind)
571{ 572{
572 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 573 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
573 struct tc_action *head = NULL, *act, *act_prev = NULL; 574 struct tc_action *head = NULL, *act, *act_prev = NULL;
574 int err; 575 int err;
575 int i; 576 int i;
@@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
697static struct tc_action * 698static struct tc_action *
698tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) 699tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
699{ 700{
700 struct nlattr *tb[TCA_ACT_MAX+1]; 701 struct nlattr *tb[TCA_ACT_MAX + 1];
701 struct tc_action *a; 702 struct tc_action *a;
702 int index; 703 int index;
703 int err; 704 int err;
@@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
770 struct tcamsg *t; 771 struct tcamsg *t;
771 struct netlink_callback dcb; 772 struct netlink_callback dcb;
772 struct nlattr *nest; 773 struct nlattr *nest;
773 struct nlattr *tb[TCA_ACT_MAX+1]; 774 struct nlattr *tb[TCA_ACT_MAX + 1];
774 struct nlattr *kind; 775 struct nlattr *kind;
775 struct tc_action *a = create_a(0); 776 struct tc_action *a = create_a(0);
776 int err = -ENOMEM; 777 int err = -ENOMEM;
@@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
821 nlh->nlmsg_flags |= NLM_F_ROOT; 822 nlh->nlmsg_flags |= NLM_F_ROOT;
822 module_put(a->ops->owner); 823 module_put(a->ops->owner);
823 kfree(a); 824 kfree(a);
824 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 825 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
826 n->nlmsg_flags & NLM_F_ECHO);
825 if (err > 0) 827 if (err > 0)
826 return 0; 828 return 0;
827 829
@@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
842 u32 pid, int event) 844 u32 pid, int event)
843{ 845{
844 int i, ret; 846 int i, ret;
845 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 847 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
846 struct tc_action *head = NULL, *act, *act_prev = NULL; 848 struct tc_action *head = NULL, *act, *act_prev = NULL;
847 849
848 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); 850 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
849 if (ret < 0) 851 if (ret < 0)
850 return ret; 852 return ret;
851 853
852 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { 854 if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
853 if (tb[1] != NULL) 855 if (tb[1] != NULL)
854 return tca_action_flush(net, tb[1], n, pid); 856 return tca_action_flush(net, tb[1], n, pid);
855 else 857 else
@@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
892 /* now do the delete */ 894 /* now do the delete */
893 tcf_action_destroy(head, 0); 895 tcf_action_destroy(head, 0);
894 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, 896 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
895 n->nlmsg_flags&NLM_F_ECHO); 897 n->nlmsg_flags & NLM_F_ECHO);
896 if (ret > 0) 898 if (ret > 0)
897 return 0; 899 return 0;
898 return ret; 900 return ret;
@@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
936 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 938 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
937 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 939 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
938 940
939 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 941 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
940 if (err > 0) 942 if (err > 0)
941 err = 0; 943 err = 0;
942 return err; 944 return err;
@@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
967 969
968 /* dump then free all the actions after update; inserted policy 970 /* dump then free all the actions after update; inserted policy
969 * stays intact 971 * stays intact
970 * */ 972 */
971 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); 973 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
972 for (a = act; a; a = act) { 974 for (a = act; a; a = act) {
973 act = a->next; 975 act = a->next;
@@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
993 return -EINVAL; 995 return -EINVAL;
994 } 996 }
995 997
996 /* n->nlmsg_flags&NLM_F_CREATE 998 /* n->nlmsg_flags & NLM_F_CREATE */
997 * */
998 switch (n->nlmsg_type) { 999 switch (n->nlmsg_type) {
999 case RTM_NEWACTION: 1000 case RTM_NEWACTION:
1000 /* we are going to assume all other flags 1001 /* we are going to assume all other flags
@@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1003 * but since we want avoid ambiguity (eg when flags 1004 * but since we want avoid ambiguity (eg when flags
1004 * is zero) then just set this 1005 * is zero) then just set this
1005 */ 1006 */
1006 if (n->nlmsg_flags&NLM_F_REPLACE) 1007 if (n->nlmsg_flags & NLM_F_REPLACE)
1007 ovr = 1; 1008 ovr = 1;
1008replay: 1009replay:
1009 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); 1010 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1029,7 @@ replay:
1028static struct nlattr * 1029static struct nlattr *
1029find_dump_kind(const struct nlmsghdr *n) 1030find_dump_kind(const struct nlmsghdr *n)
1030{ 1031{
1031 struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; 1032 struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
1032 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; 1033 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
1033 struct nlattr *nla[TCAA_MAX + 1]; 1034 struct nlattr *nla[TCAA_MAX + 1];
1034 struct nlattr *kind; 1035 struct nlattr *kind;
@@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1071 } 1072 }
1072 1073
1073 a_o = tc_lookup_action(kind); 1074 a_o = tc_lookup_action(kind);
1074 if (a_o == NULL) { 1075 if (a_o == NULL)
1075 return 0; 1076 return 0;
1076 }
1077 1077
1078 memset(&a, 0, sizeof(struct tc_action)); 1078 memset(&a, 0, sizeof(struct tc_action));
1079 a.ops = a_o; 1079 a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 83ddfc07e45..6cdf9abe475 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
63 if (nla == NULL) 63 if (nla == NULL)
64 return -EINVAL; 64 return -EINVAL;
65 65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); 66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
67 if (err < 0) 67 if (err < 0)
68 return err; 68 return err;
69 69
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a4c0b..2b4ab4b05ce 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
50} 50}
51 51
52typedef int (*g_rand)(struct tcf_gact *gact); 52typedef int (*g_rand)(struct tcf_gact *gact);
53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; 53static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
54#endif /* CONFIG_GACT_PROB */ 54#endif /* CONFIG_GACT_PROB */
55 55
56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { 56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), 89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
90 bind, &gact_idx_gen, &gact_hash_info); 90 bind, &gact_idx_gen, &gact_hash_info);
91 if (IS_ERR(pc)) 91 if (IS_ERR(pc))
92 return PTR_ERR(pc); 92 return PTR_ERR(pc);
93 ret = ACT_P_CREATED; 93 ret = ACT_P_CREATED;
94 } else { 94 } else {
95 if (!ovr) { 95 if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
205static int __init gact_init_module(void) 205static int __init gact_init_module(void)
206{ 206{
207#ifdef CONFIG_GACT_PROB 207#ifdef CONFIG_GACT_PROB
208 printk(KERN_INFO "GACT probability on\n"); 208 pr_info("GACT probability on\n");
209#else 209#else
210 printk(KERN_INFO "GACT probability NOT on\n"); 210 pr_info("GACT probability NOT on\n");
211#endif 211#endif
212 return tcf_register_action(&act_gact_ops); 212 return tcf_register_action(&act_gact_ops);
213} 213}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c2a7c20e81c..9fc211a1b20 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
138 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, 138 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
139 &ipt_idx_gen, &ipt_hash_info); 139 &ipt_idx_gen, &ipt_hash_info);
140 if (IS_ERR(pc)) 140 if (IS_ERR(pc))
141 return PTR_ERR(pc); 141 return PTR_ERR(pc);
142 ret = ACT_P_CREATED; 142 ret = ACT_P_CREATED;
143 } else { 143 } else {
144 if (!ovr) { 144 if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
162 if (unlikely(!t)) 162 if (unlikely(!t))
163 goto err2; 163 goto err2;
164 164
165 if ((err = ipt_init_target(t, tname, hook)) < 0) 165 err = ipt_init_target(t, tname, hook);
166 if (err < 0)
166 goto err3; 167 goto err3;
167 168
168 spin_lock_bh(&ipt->tcf_lock); 169 spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
212 bstats_update(&ipt->tcf_bstats, skb); 213 bstats_update(&ipt->tcf_bstats, skb);
213 214
214 /* yes, we have to worry about both in and out dev 215 /* yes, we have to worry about both in and out dev
215 worry later - danger - this API seems to have changed 216 * worry later - danger - this API seems to have changed
216 from earlier kernels */ 217 * from earlier kernels
218 */
217 par.in = skb->dev; 219 par.in = skb->dev;
218 par.out = NULL; 220 par.out = NULL;
219 par.hooknum = ipt->tcfi_hook; 221 par.hooknum = ipt->tcfi_hook;
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
253 struct tc_cnt c; 255 struct tc_cnt c;
254 256
255 /* for simple targets kernel size == user size 257 /* for simple targets kernel size == user size
256 ** user name = target name 258 * user name = target name
257 ** for foolproof you need to not assume this 259 * for foolproof you need to not assume this
258 */ 260 */
259 261
260 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); 262 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
261 if (unlikely(!t)) 263 if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d765067e99d..961386e2f2c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
41 .lock = &mirred_lock, 41 .lock = &mirred_lock,
42}; 42};
43 43
44static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) 44static int tcf_mirred_release(struct tcf_mirred *m, int bind)
45{ 45{
46 if (m) { 46 if (m) {
47 if (bind) 47 if (bind)
48 m->tcf_bindcnt--; 48 m->tcf_bindcnt--;
49 m->tcf_refcnt--; 49 m->tcf_refcnt--;
50 if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { 50 if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
51 list_del(&m->tcfm_list); 51 list_del(&m->tcfm_list);
52 if (m->tcfm_dev) 52 if (m->tcfm_dev)
53 dev_put(m->tcfm_dev); 53 dev_put(m->tcfm_dev);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 178a4bd7b7c..762b027650a 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
70 &nat_idx_gen, &nat_hash_info); 70 &nat_idx_gen, &nat_hash_info);
71 if (IS_ERR(pc)) 71 if (IS_ERR(pc))
72 return PTR_ERR(pc); 72 return PTR_ERR(pc);
73 p = to_tcf_nat(pc); 73 p = to_tcf_nat(pc);
74 ret = ACT_P_CREATED; 74 ret = ACT_P_CREATED;
75 } else { 75 } else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 445bef716f7..50c7c06c019 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
70 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 70 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
71 &pedit_idx_gen, &pedit_hash_info); 71 &pedit_idx_gen, &pedit_hash_info);
72 if (IS_ERR(pc)) 72 if (IS_ERR(pc))
73 return PTR_ERR(pc); 73 return PTR_ERR(pc);
74 p = to_pedit(pc); 74 p = to_pedit(pc);
75 keys = kmalloc(ksize, GFP_KERNEL); 75 keys = kmalloc(ksize, GFP_KERNEL);
76 if (keys == NULL) { 76 if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
127 int i, munged = 0; 127 int i, munged = 0;
128 unsigned int off; 128 unsigned int off;
129 129
130 if (skb_cloned(skb)) { 130 if (skb_cloned(skb) &&
131 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 131 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
132 return p->tcf_action; 132 return p->tcf_action;
133 }
134 }
135 133
136 off = skb_network_offset(skb); 134 off = skb_network_offset(skb);
137 135
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e2f08b1e2e5..8a1630774fd 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
22#include <net/act_api.h> 22#include <net/act_api.h>
23#include <net/netlink.h> 23#include <net/netlink.h>
24 24
25#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) 25#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L)
26#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) 26#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
27 27
28#define POL_TAB_MASK 15 28#define POL_TAB_MASK 15
29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
37}; 37};
38 38
39/* old policer structure from before tc actions */ 39/* old policer structure from before tc actions */
40struct tc_police_compat 40struct tc_police_compat {
41{
42 u32 index; 41 u32 index;
43 int action; 42 int action;
44 u32 limit; 43 u32 limit;
@@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
139static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, 138static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
140 struct tc_action *a, int ovr, int bind) 139 struct tc_action *a, int ovr, int bind)
141{ 140{
142 unsigned h; 141 unsigned int h;
143 int ret = 0, err; 142 int ret = 0, err;
144 struct nlattr *tb[TCA_POLICE_MAX + 1]; 143 struct nlattr *tb[TCA_POLICE_MAX + 1];
145 struct tc_police *parm; 144 struct tc_police *parm;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7287cff7af3..a34a22de60b 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
47 /* print policy string followed by _ then packet count 47 /* print policy string followed by _ then packet count
48 * Example if this was the 3rd packet and the string was "hello" 48 * Example if this was the 3rd packet and the string was "hello"
49 * then it would look like "hello_3" (without quotes) 49 * then it would look like "hello_3" (without quotes)
50 **/ 50 */
51 pr_info("simple: %s_%d\n", 51 pr_info("simple: %s_%d\n",
52 (char *)d->tcfd_defdata, d->tcf_bstats.packets); 52 (char *)d->tcfd_defdata, d->tcf_bstats.packets);
53 spin_unlock(&d->tcf_lock); 53 spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
126 &simp_idx_gen, &simp_hash_info); 126 &simp_idx_gen, &simp_hash_info);
127 if (IS_ERR(pc)) 127 if (IS_ERR(pc))
128 return PTR_ERR(pc); 128 return PTR_ERR(pc);
129 129
130 d = to_defact(pc); 130 d = to_defact(pc);
131 ret = alloc_defdata(d, defdata); 131 ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
149 return ret; 149 return ret;
150} 150}
151 151
152static inline int tcf_simp_cleanup(struct tc_action *a, int bind) 152static int tcf_simp_cleanup(struct tc_action *a, int bind)
153{ 153{
154 struct tcf_defact *d = a->priv; 154 struct tcf_defact *d = a->priv;
155 155
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
158 return 0; 158 return 0;
159} 159}
160 160
161static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, 161static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
162 int bind, int ref) 162 int bind, int ref)
163{ 163{
164 unsigned char *b = skb_tail_pointer(skb); 164 unsigned char *b = skb_tail_pointer(skb);
165 struct tcf_defact *d = a->priv; 165 struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 836f5fee9e5..5f6f0c7c390 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
113 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 113 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
114 &skbedit_idx_gen, &skbedit_hash_info); 114 &skbedit_idx_gen, &skbedit_hash_info);
115 if (IS_ERR(pc)) 115 if (IS_ERR(pc))
116 return PTR_ERR(pc); 116 return PTR_ERR(pc);
117 117
118 d = to_skbedit(pc); 118 d = to_skbedit(pc);
119 ret = ACT_P_CREATED; 119 ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
144 return ret; 144 return ret;
145} 145}
146 146
147static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) 147static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
148{ 148{
149 struct tcf_skbedit *d = a->priv; 149 struct tcf_skbedit *d = a->priv;
150 150
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
153 return 0; 153 return 0;
154} 154}
155 155
156static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, 156static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
157 int bind, int ref) 157 int bind, int ref)
158{ 158{
159 unsigned char *b = skb_tail_pointer(skb); 159 unsigned char *b = skb_tail_pointer(skb);
160 struct tcf_skbedit *d = a->priv; 160 struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28ef79..bb2c523f815 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
85 int rc = -ENOENT; 85 int rc = -ENOENT;
86 86
87 write_lock(&cls_mod_lock); 87 write_lock(&cls_mod_lock);
88 for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) 88 for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
89 if (t == ops) 89 if (t == ops)
90 break; 90 break;
91 91
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
111 u32 first = TC_H_MAKE(0xC0000000U, 0U); 111 u32 first = TC_H_MAKE(0xC0000000U, 0U);
112 112
113 if (tp) 113 if (tp)
114 first = tp->prio-1; 114 first = tp->prio - 1;
115 115
116 return first; 116 return first;
117} 117}
@@ -149,7 +149,8 @@ replay:
149 149
150 if (prio == 0) { 150 if (prio == 0) {
151 /* If no priority is given, user wants we allocated it. */ 151 /* If no priority is given, user wants we allocated it. */
152 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 152 if (n->nlmsg_type != RTM_NEWTFILTER ||
153 !(n->nlmsg_flags & NLM_F_CREATE))
153 return -ENOENT; 154 return -ENOENT;
154 prio = TC_H_MAKE(0x80000000U, 0U); 155 prio = TC_H_MAKE(0x80000000U, 0U);
155 } 156 }
@@ -176,7 +177,8 @@ replay:
176 } 177 }
177 178
178 /* Is it classful? */ 179 /* Is it classful? */
179 if ((cops = q->ops->cl_ops) == NULL) 180 cops = q->ops->cl_ops;
181 if (!cops)
180 return -EINVAL; 182 return -EINVAL;
181 183
182 if (cops->tcf_chain == NULL) 184 if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
196 goto errout; 198 goto errout;
197 199
198 /* Check the chain for existence of proto-tcf with this priority */ 200 /* Check the chain for existence of proto-tcf with this priority */
199 for (back = chain; (tp=*back) != NULL; back = &tp->next) { 201 for (back = chain; (tp = *back) != NULL; back = &tp->next) {
200 if (tp->prio >= prio) { 202 if (tp->prio >= prio) {
201 if (tp->prio == prio) { 203 if (tp->prio == prio) {
202 if (!nprio || (tp->protocol != protocol && protocol)) 204 if (!nprio ||
205 (tp->protocol != protocol && protocol))
203 goto errout; 206 goto errout;
204 } else 207 } else
205 tp = NULL; 208 tp = NULL;
@@ -216,7 +219,8 @@ replay:
216 goto errout; 219 goto errout;
217 220
218 err = -ENOENT; 221 err = -ENOENT;
219 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 222 if (n->nlmsg_type != RTM_NEWTFILTER ||
223 !(n->nlmsg_flags & NLM_F_CREATE))
220 goto errout; 224 goto errout;
221 225
222 226
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
420 424
421 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 425 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
422 return skb->len; 426 return skb->len;
423 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 427 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
428 if (!dev)
424 return skb->len; 429 return skb->len;
425 430
426 if (!tcm->tcm_parent) 431 if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
429 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 434 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
430 if (!q) 435 if (!q)
431 goto out; 436 goto out;
432 if ((cops = q->ops->cl_ops) == NULL) 437 cops = q->ops->cl_ops;
438 if (!cops)
433 goto errout; 439 goto errout;
434 if (cops->tcf_chain == NULL) 440 if (cops->tcf_chain == NULL)
435 goto errout; 441 goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
444 450
445 s_t = cb->args[0]; 451 s_t = cb->args[0];
446 452
447 for (tp=*chain, t=0; tp; tp = tp->next, t++) { 453 for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
448 if (t < s_t) continue; 454 if (t < s_t)
455 continue;
449 if (TC_H_MAJ(tcm->tcm_info) && 456 if (TC_H_MAJ(tcm->tcm_info) &&
450 TC_H_MAJ(tcm->tcm_info) != tp->prio) 457 TC_H_MAJ(tcm->tcm_info) != tp->prio)
451 continue; 458 continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
468 arg.skb = skb; 475 arg.skb = skb;
469 arg.cb = cb; 476 arg.cb = cb;
470 arg.w.stop = 0; 477 arg.w.stop = 0;
471 arg.w.skip = cb->args[1]-1; 478 arg.w.skip = cb->args[1] - 1;
472 arg.w.count = 0; 479 arg.w.count = 0;
473 tp->ops->walk(tp, &arg.w); 480 tp->ops->walk(tp, &arg.w);
474 cb->args[1] = arg.w.count+1; 481 cb->args[1] = arg.w.count + 1;
475 if (arg.w.stop) 482 if (arg.w.stop)
476 break; 483 break;
477 } 484 }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f23d9155b1e..8be8872dd57 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
21#include <net/act_api.h> 21#include <net/act_api.h>
22#include <net/pkt_cls.h> 22#include <net/pkt_cls.h>
23 23
24struct basic_head 24struct basic_head {
25{
26 u32 hgenerator; 25 u32 hgenerator;
27 struct list_head flist; 26 struct list_head flist;
28}; 27};
29 28
30struct basic_filter 29struct basic_filter {
31{
32 u32 handle; 30 u32 handle;
33 struct tcf_exts exts; 31 struct tcf_exts exts;
34 struct tcf_ematch_tree ematches; 32 struct tcf_ematch_tree ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
92 return 0; 90 return 0;
93} 91}
94 92
95static inline void basic_delete_filter(struct tcf_proto *tp, 93static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
96 struct basic_filter *f)
97{ 94{
98 tcf_unbind_filter(tp, &f->res); 95 tcf_unbind_filter(tp, &f->res);
99 tcf_exts_destroy(tp, &f->exts); 96 tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
135 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, 132 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED },
136}; 133};
137 134
138static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, 135static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
139 unsigned long base, struct nlattr **tb, 136 unsigned long base, struct nlattr **tb,
140 struct nlattr *est) 137 struct nlattr *est)
141{ 138{
142 int err = -EINVAL; 139 int err = -EINVAL;
143 struct tcf_exts e; 140 struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
203 } while (--i > 0 && basic_get(tp, head->hgenerator)); 200 } while (--i > 0 && basic_get(tp, head->hgenerator));
204 201
205 if (i <= 0) { 202 if (i <= 0) {
206 printk(KERN_ERR "Insufficient number of handles\n"); 203 pr_err("Insufficient number of handles\n");
207 goto errout; 204 goto errout;
208 } 205 }
209 206
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d49c40fb7e0..32a335194ca 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
56{ 56{
57 struct cgroup_cls_state *cs; 57 struct cgroup_cls_state *cs;
58 58
59 if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) 59 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
60 if (!cs)
60 return ERR_PTR(-ENOMEM); 61 return ERR_PTR(-ENOMEM);
61 62
62 if (cgrp->parent) 63 if (cgrp->parent)
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
94 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 95 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
95} 96}
96 97
97struct cls_cgroup_head 98struct cls_cgroup_head {
98{
99 u32 handle; 99 u32 handle;
100 struct tcf_exts exts; 100 struct tcf_exts exts;
101 struct tcf_ematch_tree ematches; 101 struct tcf_ematch_tree ematches;
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
166 u32 handle, struct nlattr **tca, 166 u32 handle, struct nlattr **tca,
167 unsigned long *arg) 167 unsigned long *arg)
168{ 168{
169 struct nlattr *tb[TCA_CGROUP_MAX+1]; 169 struct nlattr *tb[TCA_CGROUP_MAX + 1];
170 struct cls_cgroup_head *head = tp->root; 170 struct cls_cgroup_head *head = tp->root;
171 struct tcf_ematch_tree t; 171 struct tcf_ematch_tree t;
172 struct tcf_exts e; 172 struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5b271a18bc3..8ec01391d98 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb)
121 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
122 break; 122 break;
123 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
124 if (iph->frag_off & htons(IP_MF|IP_OFFSET)) 124 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
125 break; 125 break;
126 poff = proto_ports_offset(iph->protocol); 126 poff = proto_ports_offset(iph->protocol);
127 if (poff >= 0 && 127 if (poff >= 0 &&
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
163 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
164 break; 164 break;
165 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
166 if (iph->frag_off & htons(IP_MF|IP_OFFSET)) 166 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
167 break; 167 break;
168 poff = proto_ports_offset(iph->protocol); 168 poff = proto_ports_offset(iph->protocol);
169 if (poff >= 0 && 169 if (poff >= 0 &&
@@ -276,7 +276,7 @@ fallback:
276 276
277static u32 flow_get_rtclassid(const struct sk_buff *skb) 277static u32 flow_get_rtclassid(const struct sk_buff *skb)
278{ 278{
279#ifdef CONFIG_NET_CLS_ROUTE 279#ifdef CONFIG_IP_ROUTE_CLASSID
280 if (skb_dst(skb)) 280 if (skb_dst(skb))
281 return skb_dst(skb)->tclassid; 281 return skb_dst(skb)->tclassid;
282#endif 282#endif
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b6f9b..26e7bc4ffb7 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
31 31
32#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) 32#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
33 33
34struct fw_head 34struct fw_head {
35{
36 struct fw_filter *ht[HTSIZE]; 35 struct fw_filter *ht[HTSIZE];
37 u32 mask; 36 u32 mask;
38}; 37};
39 38
40struct fw_filter 39struct fw_filter {
41{
42 struct fw_filter *next; 40 struct fw_filter *next;
43 u32 id; 41 u32 id;
44 struct tcf_result res; 42 struct tcf_result res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
53 .police = TCA_FW_POLICE 51 .police = TCA_FW_POLICE
54}; 52};
55 53
56static __inline__ int fw_hash(u32 handle) 54static inline int fw_hash(u32 handle)
57{ 55{
58 if (HTSIZE == 4096) 56 if (HTSIZE == 4096)
59 return ((handle >> 24) & 0xFFF) ^ 57 return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
82static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, 80static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
83 struct tcf_result *res) 81 struct tcf_result *res)
84{ 82{
85 struct fw_head *head = (struct fw_head*)tp->root; 83 struct fw_head *head = (struct fw_head *)tp->root;
86 struct fw_filter *f; 84 struct fw_filter *f;
87 int r; 85 int r;
88 u32 id = skb->mark; 86 u32 id = skb->mark;
89 87
90 if (head != NULL) { 88 if (head != NULL) {
91 id &= head->mask; 89 id &= head->mask;
92 for (f=head->ht[fw_hash(id)]; f; f=f->next) { 90 for (f = head->ht[fw_hash(id)]; f; f = f->next) {
93 if (f->id == id) { 91 if (f->id == id) {
94 *res = f->res; 92 *res = f->res;
95#ifdef CONFIG_NET_CLS_IND 93#ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
105 } 103 }
106 } else { 104 } else {
107 /* old method */ 105 /* old method */
108 if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) { 106 if (id && (TC_H_MAJ(id) == 0 ||
107 !(TC_H_MAJ(id ^ tp->q->handle)))) {
109 res->classid = id; 108 res->classid = id;
110 res->class = 0; 109 res->class = 0;
111 return 0; 110 return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
117 116
118static unsigned long fw_get(struct tcf_proto *tp, u32 handle) 117static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
119{ 118{
120 struct fw_head *head = (struct fw_head*)tp->root; 119 struct fw_head *head = (struct fw_head *)tp->root;
121 struct fw_filter *f; 120 struct fw_filter *f;
122 121
123 if (head == NULL) 122 if (head == NULL)
124 return 0; 123 return 0;
125 124
126 for (f=head->ht[fw_hash(handle)]; f; f=f->next) { 125 for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
127 if (f->id == handle) 126 if (f->id == handle)
128 return (unsigned long)f; 127 return (unsigned long)f;
129 } 128 }
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
139 return 0; 138 return 0;
140} 139}
141 140
142static inline void 141static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
143fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
144{ 142{
145 tcf_unbind_filter(tp, &f->res); 143 tcf_unbind_filter(tp, &f->res);
146 tcf_exts_destroy(tp, &f->exts); 144 tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
156 if (head == NULL) 154 if (head == NULL)
157 return; 155 return;
158 156
159 for (h=0; h<HTSIZE; h++) { 157 for (h = 0; h < HTSIZE; h++) {
160 while ((f=head->ht[h]) != NULL) { 158 while ((f = head->ht[h]) != NULL) {
161 head->ht[h] = f->next; 159 head->ht[h] = f->next;
162 fw_delete_filter(tp, f); 160 fw_delete_filter(tp, f);
163 } 161 }
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
167 165
168static int fw_delete(struct tcf_proto *tp, unsigned long arg) 166static int fw_delete(struct tcf_proto *tp, unsigned long arg)
169{ 167{
170 struct fw_head *head = (struct fw_head*)tp->root; 168 struct fw_head *head = (struct fw_head *)tp->root;
171 struct fw_filter *f = (struct fw_filter*)arg; 169 struct fw_filter *f = (struct fw_filter *)arg;
172 struct fw_filter **fp; 170 struct fw_filter **fp;
173 171
174 if (head == NULL || f == NULL) 172 if (head == NULL || f == NULL)
175 goto out; 173 goto out;
176 174
177 for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { 175 for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
178 if (*fp == f) { 176 if (*fp == f) {
179 tcf_tree_lock(tp); 177 tcf_tree_lock(tp);
180 *fp = f->next; 178 *fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
240 struct nlattr **tca, 238 struct nlattr **tca,
241 unsigned long *arg) 239 unsigned long *arg)
242{ 240{
243 struct fw_head *head = (struct fw_head*)tp->root; 241 struct fw_head *head = (struct fw_head *)tp->root;
244 struct fw_filter *f = (struct fw_filter *) *arg; 242 struct fw_filter *f = (struct fw_filter *) *arg;
245 struct nlattr *opt = tca[TCA_OPTIONS]; 243 struct nlattr *opt = tca[TCA_OPTIONS];
246 struct nlattr *tb[TCA_FW_MAX + 1]; 244 struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
302 300
303static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) 301static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
304{ 302{
305 struct fw_head *head = (struct fw_head*)tp->root; 303 struct fw_head *head = (struct fw_head *)tp->root;
306 int h; 304 int h;
307 305
308 if (head == NULL) 306 if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
332 struct sk_buff *skb, struct tcmsg *t) 330 struct sk_buff *skb, struct tcmsg *t)
333{ 331{
334 struct fw_head *head = (struct fw_head *)tp->root; 332 struct fw_head *head = (struct fw_head *)tp->root;
335 struct fw_filter *f = (struct fw_filter*)fh; 333 struct fw_filter *f = (struct fw_filter *)fh;
336 unsigned char *b = skb_tail_pointer(skb); 334 unsigned char *b = skb_tail_pointer(skb);
337 struct nlattr *nest; 335 struct nlattr *nest;
338 336
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd85dec..a907905376d 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
23#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
24 24
25/* 25/*
26 1. For now we assume that route tags < 256. 26 * 1. For now we assume that route tags < 256.
27 It allows to use direct table lookups, instead of hash tables. 27 * It allows to use direct table lookups, instead of hash tables.
28 2. For now we assume that "from TAG" and "fromdev DEV" statements 28 * 2. For now we assume that "from TAG" and "fromdev DEV" statements
29 are mutually exclusive. 29 * are mutually exclusive.
30 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" 30 * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
31 */ 31 */
32 32
33struct route4_fastmap 33struct route4_fastmap {
34{
35 struct route4_filter *filter; 34 struct route4_filter *filter;
36 u32 id; 35 u32 id;
37 int iif; 36 int iif;
38}; 37};
39 38
40struct route4_head 39struct route4_head {
41{
42 struct route4_fastmap fastmap[16]; 40 struct route4_fastmap fastmap[16];
43 struct route4_bucket *table[256+1]; 41 struct route4_bucket *table[256 + 1];
44}; 42};
45 43
46struct route4_bucket 44struct route4_bucket {
47{
48 /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ 45 /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
49 struct route4_filter *ht[16+16+1]; 46 struct route4_filter *ht[16 + 16 + 1];
50}; 47};
51 48
52struct route4_filter 49struct route4_filter {
53{
54 struct route4_filter *next; 50 struct route4_filter *next;
55 u32 id; 51 u32 id;
56 int iif; 52 int iif;
@@ -61,20 +57,20 @@ struct route4_filter
61 struct route4_bucket *bkt; 57 struct route4_bucket *bkt;
62}; 58};
63 59
64#define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) 60#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
65 61
66static const struct tcf_ext_map route_ext_map = { 62static const struct tcf_ext_map route_ext_map = {
67 .police = TCA_ROUTE4_POLICE, 63 .police = TCA_ROUTE4_POLICE,
68 .action = TCA_ROUTE4_ACT 64 .action = TCA_ROUTE4_ACT
69}; 65};
70 66
71static __inline__ int route4_fastmap_hash(u32 id, int iif) 67static inline int route4_fastmap_hash(u32 id, int iif)
72{ 68{
73 return id&0xF; 69 return id & 0xF;
74} 70}
75 71
76static inline 72static void
77void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) 73route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
78{ 74{
79 spinlock_t *root_lock = qdisc_root_sleeping_lock(q); 75 spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
80 76
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
83 spin_unlock_bh(root_lock); 79 spin_unlock_bh(root_lock);
84} 80}
85 81
86static inline void 82static void
87route4_set_fastmap(struct route4_head *head, u32 id, int iif, 83route4_set_fastmap(struct route4_head *head, u32 id, int iif,
88 struct route4_filter *f) 84 struct route4_filter *f)
89{ 85{
90 int h = route4_fastmap_hash(id, iif); 86 int h = route4_fastmap_hash(id, iif);
87
91 head->fastmap[h].id = id; 88 head->fastmap[h].id = id;
92 head->fastmap[h].iif = iif; 89 head->fastmap[h].iif = iif;
93 head->fastmap[h].filter = f; 90 head->fastmap[h].filter = f;
94} 91}
95 92
96static __inline__ int route4_hash_to(u32 id) 93static inline int route4_hash_to(u32 id)
97{ 94{
98 return id&0xFF; 95 return id & 0xFF;
99} 96}
100 97
101static __inline__ int route4_hash_from(u32 id) 98static inline int route4_hash_from(u32 id)
102{ 99{
103 return (id>>16)&0xF; 100 return (id >> 16) & 0xF;
104} 101}
105 102
106static __inline__ int route4_hash_iif(int iif) 103static inline int route4_hash_iif(int iif)
107{ 104{
108 return 16 + ((iif>>16)&0xF); 105 return 16 + ((iif >> 16) & 0xF);
109} 106}
110 107
111static __inline__ int route4_hash_wild(void) 108static inline int route4_hash_wild(void)
112{ 109{
113 return 32; 110 return 32;
114} 111}
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
131static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, 128static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
132 struct tcf_result *res) 129 struct tcf_result *res)
133{ 130{
134 struct route4_head *head = (struct route4_head*)tp->root; 131 struct route4_head *head = (struct route4_head *)tp->root;
135 struct dst_entry *dst; 132 struct dst_entry *dst;
136 struct route4_bucket *b; 133 struct route4_bucket *b;
137 struct route4_filter *f; 134 struct route4_filter *f;
138 u32 id, h; 135 u32 id, h;
139 int iif, dont_cache = 0; 136 int iif, dont_cache = 0;
140 137
141 if ((dst = skb_dst(skb)) == NULL) 138 dst = skb_dst(skb);
139 if (!dst)
142 goto failure; 140 goto failure;
143 141
144 id = dst->tclassid; 142 id = dst->tclassid;
145 if (head == NULL) 143 if (head == NULL)
146 goto old_method; 144 goto old_method;
147 145
148 iif = ((struct rtable*)dst)->fl.iif; 146 iif = ((struct rtable *)dst)->rt_iif;
149 147
150 h = route4_fastmap_hash(id, iif); 148 h = route4_fastmap_hash(id, iif);
151 if (id == head->fastmap[h].id && 149 if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
161 h = route4_hash_to(id); 159 h = route4_hash_to(id);
162 160
163restart: 161restart:
164 if ((b = head->table[h]) != NULL) { 162 b = head->table[h];
163 if (b) {
165 for (f = b->ht[route4_hash_from(id)]; f; f = f->next) 164 for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
166 if (f->id == id) 165 if (f->id == id)
167 ROUTE4_APPLY_RESULT(); 166 ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
197 196
198static inline u32 to_hash(u32 id) 197static inline u32 to_hash(u32 id)
199{ 198{
200 u32 h = id&0xFF; 199 u32 h = id & 0xFF;
201 if (id&0x8000) 200
201 if (id & 0x8000)
202 h += 256; 202 h += 256;
203 return h; 203 return h;
204} 204}
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
211 if (!(id & 0x8000)) { 211 if (!(id & 0x8000)) {
212 if (id > 255) 212 if (id > 255)
213 return 256; 213 return 256;
214 return id&0xF; 214 return id & 0xF;
215 } 215 }
216 return 16 + (id&0xF); 216 return 16 + (id & 0xF);
217} 217}
218 218
219static unsigned long route4_get(struct tcf_proto *tp, u32 handle) 219static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
220{ 220{
221 struct route4_head *head = (struct route4_head*)tp->root; 221 struct route4_head *head = (struct route4_head *)tp->root;
222 struct route4_bucket *b; 222 struct route4_bucket *b;
223 struct route4_filter *f; 223 struct route4_filter *f;
224 unsigned h1, h2; 224 unsigned int h1, h2;
225 225
226 if (!head) 226 if (!head)
227 return 0; 227 return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
230 if (h1 > 256) 230 if (h1 > 256)
231 return 0; 231 return 0;
232 232
233 h2 = from_hash(handle>>16); 233 h2 = from_hash(handle >> 16);
234 if (h2 > 32) 234 if (h2 > 32)
235 return 0; 235 return 0;
236 236
237 if ((b = head->table[h1]) != NULL) { 237 b = head->table[h1];
238 if (b) {
238 for (f = b->ht[h2]; f; f = f->next) 239 for (f = b->ht[h2]; f; f = f->next)
239 if (f->handle == handle) 240 if (f->handle == handle)
240 return (unsigned long)f; 241 return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
251 return 0; 252 return 0;
252} 253}
253 254
254static inline void 255static void
255route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) 256route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
256{ 257{
257 tcf_unbind_filter(tp, &f->res); 258 tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
267 if (head == NULL) 268 if (head == NULL)
268 return; 269 return;
269 270
270 for (h1=0; h1<=256; h1++) { 271 for (h1 = 0; h1 <= 256; h1++) {
271 struct route4_bucket *b; 272 struct route4_bucket *b;
272 273
273 if ((b = head->table[h1]) != NULL) { 274 b = head->table[h1];
274 for (h2=0; h2<=32; h2++) { 275 if (b) {
276 for (h2 = 0; h2 <= 32; h2++) {
275 struct route4_filter *f; 277 struct route4_filter *f;
276 278
277 while ((f = b->ht[h2]) != NULL) { 279 while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
287 289
288static int route4_delete(struct tcf_proto *tp, unsigned long arg) 290static int route4_delete(struct tcf_proto *tp, unsigned long arg)
289{ 291{
290 struct route4_head *head = (struct route4_head*)tp->root; 292 struct route4_head *head = (struct route4_head *)tp->root;
291 struct route4_filter **fp, *f = (struct route4_filter*)arg; 293 struct route4_filter **fp, *f = (struct route4_filter *)arg;
292 unsigned h = 0; 294 unsigned int h = 0;
293 struct route4_bucket *b; 295 struct route4_bucket *b;
294 int i; 296 int i;
295 297
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
299 h = f->handle; 301 h = f->handle;
300 b = f->bkt; 302 b = f->bkt;
301 303
302 for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { 304 for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
303 if (*fp == f) { 305 if (*fp == f) {
304 tcf_tree_lock(tp); 306 tcf_tree_lock(tp);
305 *fp = f->next; 307 *fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
310 312
311 /* Strip tree */ 313 /* Strip tree */
312 314
313 for (i=0; i<=32; i++) 315 for (i = 0; i <= 32; i++)
314 if (b->ht[i]) 316 if (b->ht[i])
315 return 0; 317 return 0;
316 318
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
380 } 382 }
381 383
382 h1 = to_hash(nhandle); 384 h1 = to_hash(nhandle);
383 if ((b = head->table[h1]) == NULL) { 385 b = head->table[h1];
386 if (!b) {
384 err = -ENOBUFS; 387 err = -ENOBUFS;
385 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); 388 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
386 if (b == NULL) 389 if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
391 tcf_tree_unlock(tp); 394 tcf_tree_unlock(tp);
392 } else { 395 } else {
393 unsigned int h2 = from_hash(nhandle >> 16); 396 unsigned int h2 = from_hash(nhandle >> 16);
397
394 err = -EEXIST; 398 err = -EEXIST;
395 for (fp = b->ht[h2]; fp; fp = fp->next) 399 for (fp = b->ht[h2]; fp; fp = fp->next)
396 if (fp->handle == f->handle) 400 if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
444 if (err < 0) 448 if (err < 0)
445 return err; 449 return err;
446 450
447 if ((f = (struct route4_filter*)*arg) != NULL) { 451 f = (struct route4_filter *)*arg;
452 if (f) {
448 if (f->handle != handle && handle) 453 if (f->handle != handle && handle)
449 return -EINVAL; 454 return -EINVAL;
450 455
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
481 486
482reinsert: 487reinsert:
483 h = from_hash(f->handle >> 16); 488 h = from_hash(f->handle >> 16);
484 for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next) 489 for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
485 if (f->handle < f1->handle) 490 if (f->handle < f1->handle)
486 break; 491 break;
487 492
@@ -492,7 +497,8 @@ reinsert:
492 if (old_handle && f->handle != old_handle) { 497 if (old_handle && f->handle != old_handle) {
493 th = to_hash(old_handle); 498 th = to_hash(old_handle);
494 h = from_hash(old_handle >> 16); 499 h = from_hash(old_handle >> 16);
495 if ((b = head->table[th]) != NULL) { 500 b = head->table[th];
501 if (b) {
496 for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { 502 for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
497 if (*fp == f) { 503 if (*fp == f) {
498 *fp = f->next; 504 *fp = f->next;
@@ -515,7 +521,7 @@ errout:
515static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) 521static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
516{ 522{
517 struct route4_head *head = tp->root; 523 struct route4_head *head = tp->root;
518 unsigned h, h1; 524 unsigned int h, h1;
519 525
520 if (head == NULL) 526 if (head == NULL)
521 arg->stop = 1; 527 arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
549static int route4_dump(struct tcf_proto *tp, unsigned long fh, 555static int route4_dump(struct tcf_proto *tp, unsigned long fh,
550 struct sk_buff *skb, struct tcmsg *t) 556 struct sk_buff *skb, struct tcmsg *t)
551{ 557{
552 struct route4_filter *f = (struct route4_filter*)fh; 558 struct route4_filter *f = (struct route4_filter *)fh;
553 unsigned char *b = skb_tail_pointer(skb); 559 unsigned char *b = skb_tail_pointer(skb);
554 struct nlattr *nest; 560 struct nlattr *nest;
555 u32 id; 561 u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
563 if (nest == NULL) 569 if (nest == NULL)
564 goto nla_put_failure; 570 goto nla_put_failure;
565 571
566 if (!(f->handle&0x8000)) { 572 if (!(f->handle & 0x8000)) {
567 id = f->id&0xFF; 573 id = f->id & 0xFF;
568 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); 574 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
569 } 575 }
570 if (f->handle&0x80000000) { 576 if (f->handle & 0x80000000) {
571 if ((f->handle>>16) != 0xFFFF) 577 if ((f->handle >> 16) != 0xFFFF)
572 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); 578 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
573 } else { 579 } else {
574 id = f->id>>16; 580 id = f->id >> 16;
575 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); 581 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
576 } 582 }
577 if (f->res.classid) 583 if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a1790b04..402c44b241a 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
66 powerful classification engine. */ 66 powerful classification engine. */
67 67
68 68
69struct rsvp_head 69struct rsvp_head {
70{
71 u32 tmap[256/32]; 70 u32 tmap[256/32];
72 u32 hgenerator; 71 u32 hgenerator;
73 u8 tgenerator; 72 u8 tgenerator;
74 struct rsvp_session *ht[256]; 73 struct rsvp_session *ht[256];
75}; 74};
76 75
77struct rsvp_session 76struct rsvp_session {
78{
79 struct rsvp_session *next; 77 struct rsvp_session *next;
80 __be32 dst[RSVP_DST_LEN]; 78 __be32 dst[RSVP_DST_LEN];
81 struct tc_rsvp_gpi dpi; 79 struct tc_rsvp_gpi dpi;
82 u8 protocol; 80 u8 protocol;
83 u8 tunnelid; 81 u8 tunnelid;
84 /* 16 (src,sport) hash slots, and one wildcard source slot */ 82 /* 16 (src,sport) hash slots, and one wildcard source slot */
85 struct rsvp_filter *ht[16+1]; 83 struct rsvp_filter *ht[16 + 1];
86}; 84};
87 85
88 86
89struct rsvp_filter 87struct rsvp_filter {
90{
91 struct rsvp_filter *next; 88 struct rsvp_filter *next;
92 __be32 src[RSVP_DST_LEN]; 89 __be32 src[RSVP_DST_LEN];
93 struct tc_rsvp_gpi spi; 90 struct tc_rsvp_gpi spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
100 struct rsvp_session *sess; 97 struct rsvp_session *sess;
101}; 98};
102 99
103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) 100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{ 101{
105 unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; 102 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103
106 h ^= h>>16; 104 h ^= h>>16;
107 h ^= h>>8; 105 h ^= h>>8;
108 return (h ^ protocol ^ tunnelid) & 0xFF; 106 return (h ^ protocol ^ tunnelid) & 0xFF;
109} 107}
110 108
111static __inline__ unsigned hash_src(__be32 *src) 109static inline unsigned int hash_src(__be32 *src)
112{ 110{
113 unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; 111 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112
114 h ^= h>>16; 113 h ^= h>>16;
115 h ^= h>>8; 114 h ^= h>>8;
116 h ^= h>>4; 115 h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, 133static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135 struct tcf_result *res) 134 struct tcf_result *res)
136{ 135{
137 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; 136 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
138 struct rsvp_session *s; 137 struct rsvp_session *s;
139 struct rsvp_filter *f; 138 struct rsvp_filter *f;
140 unsigned h1, h2; 139 unsigned int h1, h2;
141 __be32 *dst, *src; 140 __be32 *dst, *src;
142 u8 protocol; 141 u8 protocol;
143 u8 tunnelid = 0; 142 u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
162 src = &nhptr->saddr.s6_addr32[0]; 161 src = &nhptr->saddr.s6_addr32[0];
163 dst = &nhptr->daddr.s6_addr32[0]; 162 dst = &nhptr->daddr.s6_addr32[0];
164 protocol = nhptr->nexthdr; 163 protocol = nhptr->nexthdr;
165 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); 164 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
166#else 165#else
167 src = &nhptr->saddr; 166 src = &nhptr->saddr;
168 dst = &nhptr->daddr; 167 dst = &nhptr->daddr;
169 protocol = nhptr->protocol; 168 protocol = nhptr->protocol;
170 xprt = ((u8*)nhptr) + (nhptr->ihl<<2); 169 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
171 if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) 170 if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
172 return -1; 171 return -1;
173#endif 172#endif
174 173
@@ -176,10 +175,10 @@ restart:
176 h2 = hash_src(src); 175 h2 = hash_src(src);
177 176
178 for (s = sht[h1]; s; s = s->next) { 177 for (s = sht[h1]; s; s = s->next) {
179 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && 178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
180 protocol == s->protocol && 179 protocol == s->protocol &&
181 !(s->dpi.mask & 180 !(s->dpi.mask &
182 (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && 181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
183#if RSVP_DST_LEN == 4 182#if RSVP_DST_LEN == 4
184 dst[0] == s->dst[0] && 183 dst[0] == s->dst[0] &&
185 dst[1] == s->dst[1] && 184 dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
188 tunnelid == s->tunnelid) { 187 tunnelid == s->tunnelid) {
189 188
190 for (f = s->ht[h2]; f; f = f->next) { 189 for (f = s->ht[h2]; f; f = f->next) {
191 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && 190 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
192 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) 191 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
193#if RSVP_DST_LEN == 4 192#if RSVP_DST_LEN == 4
194 && 193 &&
195 src[0] == f->src[0] && 194 src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
205 return 0; 204 return 0;
206 205
207 tunnelid = f->res.classid; 206 tunnelid = f->res.classid;
208 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); 207 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
209 goto restart; 208 goto restart;
210 } 209 }
211 } 210 }
@@ -224,11 +223,11 @@ matched:
224 223
225static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) 224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
226{ 225{
227 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; 226 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
228 struct rsvp_session *s; 227 struct rsvp_session *s;
229 struct rsvp_filter *f; 228 struct rsvp_filter *f;
230 unsigned h1 = handle&0xFF; 229 unsigned int h1 = handle & 0xFF;
231 unsigned h2 = (handle>>8)&0xFF; 230 unsigned int h2 = (handle >> 8) & 0xFF;
232 231
233 if (h2 > 16) 232 if (h2 > 16)
234 return 0; 233 return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
258 return -ENOBUFS; 257 return -ENOBUFS;
259} 258}
260 259
261static inline void 260static void
262rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) 261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
263{ 262{
264 tcf_unbind_filter(tp, &f->res); 263 tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
277 276
278 sht = data->ht; 277 sht = data->ht;
279 278
280 for (h1=0; h1<256; h1++) { 279 for (h1 = 0; h1 < 256; h1++) {
281 struct rsvp_session *s; 280 struct rsvp_session *s;
282 281
283 while ((s = sht[h1]) != NULL) { 282 while ((s = sht[h1]) != NULL) {
284 sht[h1] = s->next; 283 sht[h1] = s->next;
285 284
286 for (h2=0; h2<=16; h2++) { 285 for (h2 = 0; h2 <= 16; h2++) {
287 struct rsvp_filter *f; 286 struct rsvp_filter *f;
288 287
289 while ((f = s->ht[h2]) != NULL) { 288 while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
299 298
300static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) 299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
301{ 300{
302 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; 301 struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
303 unsigned h = f->handle; 302 unsigned int h = f->handle;
304 struct rsvp_session **sp; 303 struct rsvp_session **sp;
305 struct rsvp_session *s = f->sess; 304 struct rsvp_session *s = f->sess;
306 int i; 305 int i;
307 306
308 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { 307 for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
309 if (*fp == f) { 308 if (*fp == f) {
310 tcf_tree_lock(tp); 309 tcf_tree_lock(tp);
311 *fp = f->next; 310 *fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
314 313
315 /* Strip tree */ 314 /* Strip tree */
316 315
317 for (i=0; i<=16; i++) 316 for (i = 0; i <= 16; i++)
318 if (s->ht[i]) 317 if (s->ht[i])
319 return 0; 318 return 0;
320 319
321 /* OK, session has no flows */ 320 /* OK, session has no flows */
322 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; 321 for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
323 *sp; sp = &(*sp)->next) { 322 *sp; sp = &(*sp)->next) {
324 if (*sp == s) { 323 if (*sp == s) {
325 tcf_tree_lock(tp); 324 tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
337 return 0; 336 return 0;
338} 337}
339 338
340static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) 339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
341{ 340{
342 struct rsvp_head *data = tp->root; 341 struct rsvp_head *data = tp->root;
343 int i = 0xFFFF; 342 int i = 0xFFFF;
344 343
345 while (i-- > 0) { 344 while (i-- > 0) {
346 u32 h; 345 u32 h;
346
347 if ((data->hgenerator += 0x10000) == 0) 347 if ((data->hgenerator += 0x10000) == 0)
348 data->hgenerator = 0x10000; 348 data->hgenerator = 0x10000;
349 h = data->hgenerator|salt; 349 h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
355 355
356static int tunnel_bts(struct rsvp_head *data) 356static int tunnel_bts(struct rsvp_head *data)
357{ 357{
358 int n = data->tgenerator>>5; 358 int n = data->tgenerator >> 5;
359 u32 b = 1<<(data->tgenerator&0x1F); 359 u32 b = 1 << (data->tgenerator & 0x1F);
360 360
361 if (data->tmap[n]&b) 361 if (data->tmap[n] & b)
362 return 0; 362 return 0;
363 data->tmap[n] |= b; 363 data->tmap[n] |= b;
364 return 1; 364 return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
372 372
373 memset(tmap, 0, sizeof(tmap)); 373 memset(tmap, 0, sizeof(tmap));
374 374
375 for (h1=0; h1<256; h1++) { 375 for (h1 = 0; h1 < 256; h1++) {
376 struct rsvp_session *s; 376 struct rsvp_session *s;
377 for (s = sht[h1]; s; s = s->next) { 377 for (s = sht[h1]; s; s = s->next) {
378 for (h2=0; h2<=16; h2++) { 378 for (h2 = 0; h2 <= 16; h2++) {
379 struct rsvp_filter *f; 379 struct rsvp_filter *f;
380 380
381 for (f = s->ht[h2]; f; f = f->next) { 381 for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
395{ 395{
396 int i, k; 396 int i, k;
397 397
398 for (k=0; k<2; k++) { 398 for (k = 0; k < 2; k++) {
399 for (i=255; i>0; i--) { 399 for (i = 255; i > 0; i--) {
400 if (++data->tgenerator == 0) 400 if (++data->tgenerator == 0)
401 data->tgenerator = 1; 401 data->tgenerator = 1;
402 if (tunnel_bts(data)) 402 if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
428 struct nlattr *opt = tca[TCA_OPTIONS-1]; 428 struct nlattr *opt = tca[TCA_OPTIONS-1];
429 struct nlattr *tb[TCA_RSVP_MAX + 1]; 429 struct nlattr *tb[TCA_RSVP_MAX + 1];
430 struct tcf_exts e; 430 struct tcf_exts e;
431 unsigned h1, h2; 431 unsigned int h1, h2;
432 __be32 *dst; 432 __be32 *dst;
433 int err; 433 int err;
434 434
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
443 if (err < 0) 443 if (err < 0)
444 return err; 444 return err;
445 445
446 if ((f = (struct rsvp_filter*)*arg) != NULL) { 446 f = (struct rsvp_filter *)*arg;
447 if (f) {
447 /* Node exists: adjust only classid */ 448 /* Node exists: adjust only classid */
448 449
449 if (f->handle != handle && handle) 450 if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
500 goto errout; 501 goto errout;
501 } 502 }
502 503
503 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { 504 for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
504 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && 505 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
505 pinfo && pinfo->protocol == s->protocol && 506 pinfo && pinfo->protocol == s->protocol &&
506 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && 507 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
523 tcf_exts_change(tp, &f->exts, &e); 524 tcf_exts_change(tp, &f->exts, &e);
524 525
525 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) 526 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
526 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) 527 if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
527 break; 528 break;
528 f->next = *fp; 529 f->next = *fp;
529 wmb(); 530 wmb();
@@ -567,7 +568,7 @@ errout2:
567static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) 568static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
568{ 569{
569 struct rsvp_head *head = tp->root; 570 struct rsvp_head *head = tp->root;
570 unsigned h, h1; 571 unsigned int h, h1;
571 572
572 if (arg->stop) 573 if (arg->stop)
573 return; 574 return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
598static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, 599static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
599 struct sk_buff *skb, struct tcmsg *t) 600 struct sk_buff *skb, struct tcmsg *t)
600{ 601{
601 struct rsvp_filter *f = (struct rsvp_filter*)fh; 602 struct rsvp_filter *f = (struct rsvp_filter *)fh;
602 struct rsvp_session *s; 603 struct rsvp_session *s;
603 unsigned char *b = skb_tail_pointer(skb); 604 unsigned char *b = skb_tail_pointer(skb);
604 struct nlattr *nest; 605 struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
624 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); 625 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
625 if (f->res.classid) 626 if (f->res.classid)
626 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); 627 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
627 if (((f->handle>>8)&0xFF) != 16) 628 if (((f->handle >> 8) & 0xFF) != 16)
628 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); 629 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
629 630
630 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) 631 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330bb91..36667fa6423 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
249 * of the hashing index is below the threshold. 249 * of the hashing index is below the threshold.
250 */ 250 */
251 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) 251 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
252 cp.hash = (cp.mask >> cp.shift)+1; 252 cp.hash = (cp.mask >> cp.shift) + 1;
253 else 253 else
254 cp.hash = DEFAULT_HASH_SIZE; 254 cp.hash = DEFAULT_HASH_SIZE;
255 } 255 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82178a..3b93fc0c895 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
42#include <net/act_api.h> 42#include <net/act_api.h>
43#include <net/pkt_cls.h> 43#include <net/pkt_cls.h>
44 44
45struct tc_u_knode 45struct tc_u_knode {
46{
47 struct tc_u_knode *next; 46 struct tc_u_knode *next;
48 u32 handle; 47 u32 handle;
49 struct tc_u_hnode *ht_up; 48 struct tc_u_hnode *ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
63 struct tc_u32_sel sel; 62 struct tc_u32_sel sel;
64}; 63};
65 64
66struct tc_u_hnode 65struct tc_u_hnode {
67{
68 struct tc_u_hnode *next; 66 struct tc_u_hnode *next;
69 u32 handle; 67 u32 handle;
70 u32 prio; 68 u32 prio;
71 struct tc_u_common *tp_c; 69 struct tc_u_common *tp_c;
72 int refcnt; 70 int refcnt;
73 unsigned divisor; 71 unsigned int divisor;
74 struct tc_u_knode *ht[1]; 72 struct tc_u_knode *ht[1];
75}; 73};
76 74
77struct tc_u_common 75struct tc_u_common {
78{
79 struct tc_u_hnode *hlist; 76 struct tc_u_hnode *hlist;
80 struct Qdisc *q; 77 struct Qdisc *q;
81 int refcnt; 78 int refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
87 .police = TCA_U32_POLICE 84 .police = TCA_U32_POLICE
88}; 85};
89 86
90static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) 87static inline unsigned int u32_hash_fold(__be32 key,
88 const struct tc_u32_sel *sel,
89 u8 fshift)
91{ 90{
92 unsigned h = ntohl(key & sel->hmask)>>fshift; 91 unsigned int h = ntohl(key & sel->hmask) >> fshift;
93 92
94 return h; 93 return h;
95} 94}
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
101 unsigned int off; 100 unsigned int off;
102 } stack[TC_U32_MAXDEPTH]; 101 } stack[TC_U32_MAXDEPTH];
103 102
104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 103 struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
105 unsigned int off = skb_network_offset(skb); 104 unsigned int off = skb_network_offset(skb);
106 struct tc_u_knode *n; 105 struct tc_u_knode *n;
107 int sdepth = 0; 106 int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
120 struct tc_u32_key *key = n->sel.keys; 119 struct tc_u32_key *key = n->sel.keys;
121 120
122#ifdef CONFIG_CLS_U32_PERF 121#ifdef CONFIG_CLS_U32_PERF
123 n->pf->rcnt +=1; 122 n->pf->rcnt += 1;
124 j = 0; 123 j = 0;
125#endif 124#endif
126 125
@@ -133,14 +132,14 @@ next_knode:
133 } 132 }
134#endif 133#endif
135 134
136 for (i = n->sel.nkeys; i>0; i--, key++) { 135 for (i = n->sel.nkeys; i > 0; i--, key++) {
137 int toff = off + key->off + (off2 & key->offmask); 136 int toff = off + key->off + (off2 & key->offmask);
138 __be32 *data, _data; 137 __be32 *data, hdata;
139 138
140 if (skb_headroom(skb) + toff > INT_MAX) 139 if (skb_headroom(skb) + toff > INT_MAX)
141 goto out; 140 goto out;
142 141
143 data = skb_header_pointer(skb, toff, 4, &_data); 142 data = skb_header_pointer(skb, toff, 4, &hdata);
144 if (!data) 143 if (!data)
145 goto out; 144 goto out;
146 if ((*data ^ key->val) & key->mask) { 145 if ((*data ^ key->val) & key->mask) {
@@ -148,13 +147,13 @@ next_knode:
148 goto next_knode; 147 goto next_knode;
149 } 148 }
150#ifdef CONFIG_CLS_U32_PERF 149#ifdef CONFIG_CLS_U32_PERF
151 n->pf->kcnts[j] +=1; 150 n->pf->kcnts[j] += 1;
152 j++; 151 j++;
153#endif 152#endif
154 } 153 }
155 if (n->ht_down == NULL) { 154 if (n->ht_down == NULL) {
156check_terminal: 155check_terminal:
157 if (n->sel.flags&TC_U32_TERMINAL) { 156 if (n->sel.flags & TC_U32_TERMINAL) {
158 157
159 *res = n->res; 158 *res = n->res;
160#ifdef CONFIG_NET_CLS_IND 159#ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
164 } 163 }
165#endif 164#endif
166#ifdef CONFIG_CLS_U32_PERF 165#ifdef CONFIG_CLS_U32_PERF
167 n->pf->rhit +=1; 166 n->pf->rhit += 1;
168#endif 167#endif
169 r = tcf_exts_exec(skb, &n->exts, res); 168 r = tcf_exts_exec(skb, &n->exts, res);
170 if (r < 0) { 169 if (r < 0) {
@@ -188,26 +187,26 @@ check_terminal:
188 ht = n->ht_down; 187 ht = n->ht_down;
189 sel = 0; 188 sel = 0;
190 if (ht->divisor) { 189 if (ht->divisor) {
191 __be32 *data, _data; 190 __be32 *data, hdata;
192 191
193 data = skb_header_pointer(skb, off + n->sel.hoff, 4, 192 data = skb_header_pointer(skb, off + n->sel.hoff, 4,
194 &_data); 193 &hdata);
195 if (!data) 194 if (!data)
196 goto out; 195 goto out;
197 sel = ht->divisor & u32_hash_fold(*data, &n->sel, 196 sel = ht->divisor & u32_hash_fold(*data, &n->sel,
198 n->fshift); 197 n->fshift);
199 } 198 }
200 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) 199 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
201 goto next_ht; 200 goto next_ht;
202 201
203 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { 202 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
204 off2 = n->sel.off + 3; 203 off2 = n->sel.off + 3;
205 if (n->sel.flags & TC_U32_VAROFFSET) { 204 if (n->sel.flags & TC_U32_VAROFFSET) {
206 __be16 *data, _data; 205 __be16 *data, hdata;
207 206
208 data = skb_header_pointer(skb, 207 data = skb_header_pointer(skb,
209 off + n->sel.offoff, 208 off + n->sel.offoff,
210 2, &_data); 209 2, &hdata);
211 if (!data) 210 if (!data)
212 goto out; 211 goto out;
213 off2 += ntohs(n->sel.offmask & *data) >> 212 off2 += ntohs(n->sel.offmask & *data) >>
@@ -215,7 +214,7 @@ check_terminal:
215 } 214 }
216 off2 &= ~3; 215 off2 &= ~3;
217 } 216 }
218 if (n->sel.flags&TC_U32_EAT) { 217 if (n->sel.flags & TC_U32_EAT) {
219 off += off2; 218 off += off2;
220 off2 = 0; 219 off2 = 0;
221 } 220 }
@@ -236,11 +235,11 @@ out:
236 235
237deadloop: 236deadloop:
238 if (net_ratelimit()) 237 if (net_ratelimit())
239 printk(KERN_WARNING "cls_u32: dead loop\n"); 238 pr_warning("cls_u32: dead loop\n");
240 return -1; 239 return -1;
241} 240}
242 241
243static __inline__ struct tc_u_hnode * 242static struct tc_u_hnode *
244u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) 243u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
245{ 244{
246 struct tc_u_hnode *ht; 245 struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
252 return ht; 251 return ht;
253} 252}
254 253
255static __inline__ struct tc_u_knode * 254static struct tc_u_knode *
256u32_lookup_key(struct tc_u_hnode *ht, u32 handle) 255u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
257{ 256{
258 unsigned sel; 257 unsigned int sel;
259 struct tc_u_knode *n = NULL; 258 struct tc_u_knode *n = NULL;
260 259
261 sel = TC_U32_HASH(handle); 260 sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
300 do { 299 do {
301 if (++tp_c->hgenerator == 0x7FF) 300 if (++tp_c->hgenerator == 0x7FF)
302 tp_c->hgenerator = 1; 301 tp_c->hgenerator = 1;
303 } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); 302 } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
304 303
305 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; 304 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
306} 305}
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
378static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 377static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
379{ 378{
380 struct tc_u_knode *n; 379 struct tc_u_knode *n;
381 unsigned h; 380 unsigned int h;
382 381
383 for (h=0; h<=ht->divisor; h++) { 382 for (h = 0; h <= ht->divisor; h++) {
384 while ((n = ht->ht[h]) != NULL) { 383 while ((n = ht->ht[h]) != NULL) {
385 ht->ht[h] = n->next; 384 ht->ht[h] = n->next;
386 385
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
446 445
447static int u32_delete(struct tcf_proto *tp, unsigned long arg) 446static int u32_delete(struct tcf_proto *tp, unsigned long arg)
448{ 447{
449 struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; 448 struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
450 449
451 if (ht == NULL) 450 if (ht == NULL)
452 return 0; 451 return 0;
453 452
454 if (TC_U32_KEY(ht->handle)) 453 if (TC_U32_KEY(ht->handle))
455 return u32_delete_key(tp, (struct tc_u_knode*)ht); 454 return u32_delete_key(tp, (struct tc_u_knode *)ht);
456 455
457 if (tp->root == ht) 456 if (tp->root == ht)
458 return -EINVAL; 457 return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
470static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) 469static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
471{ 470{
472 struct tc_u_knode *n; 471 struct tc_u_knode *n;
473 unsigned i = 0x7FF; 472 unsigned int i = 0x7FF;
474 473
475 for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) 474 for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
476 if (i < TC_U32_NODE(n->handle)) 475 if (i < TC_U32_NODE(n->handle))
477 i = TC_U32_NODE(n->handle); 476 i = TC_U32_NODE(n->handle);
478 i++; 477 i++;
479 478
480 return handle|(i>0xFFF ? 0xFFF : i); 479 return handle | (i > 0xFFF ? 0xFFF : i);
481} 480}
482 481
483static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 482static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
566 if (err < 0) 565 if (err < 0)
567 return err; 566 return err;
568 567
569 if ((n = (struct tc_u_knode*)*arg) != NULL) { 568 n = (struct tc_u_knode *)*arg;
569 if (n) {
570 if (TC_U32_KEY(n->handle) == 0) 570 if (TC_U32_KEY(n->handle) == 0)
571 return -EINVAL; 571 return -EINVAL;
572 572
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
574 } 574 }
575 575
576 if (tb[TCA_U32_DIVISOR]) { 576 if (tb[TCA_U32_DIVISOR]) {
577 unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); 577 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
578 578
579 if (--divisor > 0x100) 579 if (--divisor > 0x100)
580 return -EINVAL; 580 return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
585 if (handle == 0) 585 if (handle == 0)
586 return -ENOMEM; 586 return -ENOMEM;
587 } 587 }
588 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); 588 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
589 if (ht == NULL) 589 if (ht == NULL)
590 return -ENOBUFS; 590 return -ENOBUFS;
591 ht->tp_c = tp_c; 591 ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
683 struct tc_u_common *tp_c = tp->data; 683 struct tc_u_common *tp_c = tp->data;
684 struct tc_u_hnode *ht; 684 struct tc_u_hnode *ht;
685 struct tc_u_knode *n; 685 struct tc_u_knode *n;
686 unsigned h; 686 unsigned int h;
687 687
688 if (arg->stop) 688 if (arg->stop)
689 return; 689 return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
717static int u32_dump(struct tcf_proto *tp, unsigned long fh, 717static int u32_dump(struct tcf_proto *tp, unsigned long fh,
718 struct sk_buff *skb, struct tcmsg *t) 718 struct sk_buff *skb, struct tcmsg *t)
719{ 719{
720 struct tc_u_knode *n = (struct tc_u_knode*)fh; 720 struct tc_u_knode *n = (struct tc_u_knode *)fh;
721 struct nlattr *nest; 721 struct nlattr *nest;
722 722
723 if (n == NULL) 723 if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
730 goto nla_put_failure; 730 goto nla_put_failure;
731 731
732 if (TC_U32_KEY(n->handle) == 0) { 732 if (TC_U32_KEY(n->handle) == 0) {
733 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; 733 struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
734 u32 divisor = ht->divisor+1; 734 u32 divisor = ht->divisor + 1;
735
735 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); 736 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
736 } else { 737 } else {
737 NLA_PUT(skb, TCA_U32_SEL, 738 NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
755 goto nla_put_failure; 756 goto nla_put_failure;
756 757
757#ifdef CONFIG_NET_CLS_IND 758#ifdef CONFIG_NET_CLS_IND
758 if(strlen(n->indev)) 759 if (strlen(n->indev))
759 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); 760 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
760#endif 761#endif
761#ifdef CONFIG_CLS_U32_PERF 762#ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc450397487..1c8360a2752 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
33 return 0; 33 return 0;
34 34
35 switch (cmp->align) { 35 switch (cmp->align) {
36 case TCF_EM_ALIGN_U8: 36 case TCF_EM_ALIGN_U8:
37 val = *ptr; 37 val = *ptr;
38 break; 38 break;
39 39
40 case TCF_EM_ALIGN_U16: 40 case TCF_EM_ALIGN_U16:
41 val = get_unaligned_be16(ptr); 41 val = get_unaligned_be16(ptr);
42 42
43 if (cmp_needs_transformation(cmp)) 43 if (cmp_needs_transformation(cmp))
44 val = be16_to_cpu(val); 44 val = be16_to_cpu(val);
45 break; 45 break;
46 46
47 case TCF_EM_ALIGN_U32: 47 case TCF_EM_ALIGN_U32:
48 /* Worth checking boundries? The branching seems 48 /* Worth checking boundries? The branching seems
49 * to get worse. Visit again. */ 49 * to get worse. Visit again.
50 val = get_unaligned_be32(ptr); 50 */
51 val = get_unaligned_be32(ptr);
51 52
52 if (cmp_needs_transformation(cmp)) 53 if (cmp_needs_transformation(cmp))
53 val = be32_to_cpu(val); 54 val = be32_to_cpu(val);
54 break; 55 break;
55 56
56 default: 57 default:
57 return 0; 58 return 0;
58 } 59 }
59 60
60 if (cmp->mask) 61 if (cmp->mask)
61 val &= cmp->mask; 62 val &= cmp->mask;
62 63
63 switch (cmp->opnd) { 64 switch (cmp->opnd) {
64 case TCF_EM_OPND_EQ: 65 case TCF_EM_OPND_EQ:
65 return val == cmp->val; 66 return val == cmp->val;
66 case TCF_EM_OPND_LT: 67 case TCF_EM_OPND_LT:
67 return val < cmp->val; 68 return val < cmp->val;
68 case TCF_EM_OPND_GT: 69 case TCF_EM_OPND_GT:
69 return val > cmp->val; 70 return val > cmp->val;
70 } 71 }
71 72
72 return 0; 73 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 34da5e29ea1..a4de67eca82 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -73,21 +73,18 @@
73#include <net/pkt_cls.h> 73#include <net/pkt_cls.h>
74#include <net/sock.h> 74#include <net/sock.h>
75 75
76struct meta_obj 76struct meta_obj {
77{
78 unsigned long value; 77 unsigned long value;
79 unsigned int len; 78 unsigned int len;
80}; 79};
81 80
82struct meta_value 81struct meta_value {
83{
84 struct tcf_meta_val hdr; 82 struct tcf_meta_val hdr;
85 unsigned long val; 83 unsigned long val;
86 unsigned int len; 84 unsigned int len;
87}; 85};
88 86
89struct meta_match 87struct meta_match {
90{
91 struct meta_value lvalue; 88 struct meta_value lvalue;
92 struct meta_value rvalue; 89 struct meta_value rvalue;
93}; 90};
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
255 if (unlikely(skb_dst(skb) == NULL)) 252 if (unlikely(skb_dst(skb) == NULL))
256 *err = -1; 253 *err = -1;
257 else 254 else
258#ifdef CONFIG_NET_CLS_ROUTE 255#ifdef CONFIG_IP_ROUTE_CLASSID
259 dst->value = skb_dst(skb)->tclassid; 256 dst->value = skb_dst(skb)->tclassid;
260#else 257#else
261 dst->value = 0; 258 dst->value = 0;
@@ -267,7 +264,7 @@ META_COLLECTOR(int_rtiif)
267 if (unlikely(skb_rtable(skb) == NULL)) 264 if (unlikely(skb_rtable(skb) == NULL))
268 *err = -1; 265 *err = -1;
269 else 266 else
270 dst->value = skb_rtable(skb)->fl.iif; 267 dst->value = skb_rtable(skb)->rt_iif;
271} 268}
272 269
273/************************************************************************** 270/**************************************************************************
@@ -404,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf)
404META_COLLECTOR(int_sk_alloc) 401META_COLLECTOR(int_sk_alloc)
405{ 402{
406 SKIP_NONLOCAL(skb); 403 SKIP_NONLOCAL(skb);
407 dst->value = skb->sk->sk_allocation; 404 dst->value = (__force int) skb->sk->sk_allocation;
408} 405}
409 406
410META_COLLECTOR(int_sk_route_caps) 407META_COLLECTOR(int_sk_route_caps)
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
483 * Meta value collectors assignment table 480 * Meta value collectors assignment table
484 **************************************************************************/ 481 **************************************************************************/
485 482
486struct meta_ops 483struct meta_ops {
487{
488 void (*get)(struct sk_buff *, struct tcf_pkt_info *, 484 void (*get)(struct sk_buff *, struct tcf_pkt_info *,
489 struct meta_value *, struct meta_obj *, int *); 485 struct meta_value *, struct meta_obj *, int *);
490}; 486};
@@ -494,7 +490,7 @@ struct meta_ops
494 490
495/* Meta value operations table listing all meta value collectors and 491/* Meta value operations table listing all meta value collectors and
496 * assigns them to a type and meta id. */ 492 * assigns them to a type and meta id. */
497static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { 493static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
498 [TCF_META_TYPE_VAR] = { 494 [TCF_META_TYPE_VAR] = {
499 [META_ID(DEV)] = META_FUNC(var_dev), 495 [META_ID(DEV)] = META_FUNC(var_dev),
500 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), 496 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
550 } 546 }
551}; 547};
552 548
553static inline struct meta_ops * meta_ops(struct meta_value *val) 549static inline struct meta_ops *meta_ops(struct meta_value *val)
554{ 550{
555 return &__meta_ops[meta_type(val)][meta_id(val)]; 551 return &__meta_ops[meta_type(val)][meta_id(val)];
556} 552}
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
649{ 645{
650 if (v->len == sizeof(unsigned long)) 646 if (v->len == sizeof(unsigned long))
651 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); 647 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
652 else if (v->len == sizeof(u32)) { 648 else if (v->len == sizeof(u32))
653 NLA_PUT_U32(skb, tlv, v->val); 649 NLA_PUT_U32(skb, tlv, v->val);
654 }
655 650
656 return 0; 651 return 0;
657 652
@@ -663,8 +658,7 @@ nla_put_failure:
663 * Type specific operations table 658 * Type specific operations table
664 **************************************************************************/ 659 **************************************************************************/
665 660
666struct meta_type_ops 661struct meta_type_ops {
667{
668 void (*destroy)(struct meta_value *); 662 void (*destroy)(struct meta_value *);
669 int (*compare)(struct meta_obj *, struct meta_obj *); 663 int (*compare)(struct meta_obj *, struct meta_obj *);
670 int (*change)(struct meta_value *, struct nlattr *); 664 int (*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@ struct meta_type_ops
672 int (*dump)(struct sk_buff *, struct meta_value *, int); 666 int (*dump)(struct sk_buff *, struct meta_value *, int);
673}; 667};
674 668
675static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { 669static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
676 [TCF_META_TYPE_VAR] = { 670 [TCF_META_TYPE_VAR] = {
677 .destroy = meta_var_destroy, 671 .destroy = meta_var_destroy,
678 .compare = meta_var_compare, 672 .compare = meta_var_compare,
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
688 } 682 }
689}; 683};
690 684
691static inline struct meta_type_ops * meta_type_ops(struct meta_value *v) 685static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
692{ 686{
693 return &__meta_type_ops[meta_type(v)]; 687 return &__meta_type_ops[meta_type(v)];
694} 688}
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
713 return err; 707 return err;
714 708
715 if (meta_type_ops(v)->apply_extras) 709 if (meta_type_ops(v)->apply_extras)
716 meta_type_ops(v)->apply_extras(v, dst); 710 meta_type_ops(v)->apply_extras(v, dst);
717 711
718 return 0; 712 return 0;
719} 713}
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
732 r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); 726 r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
733 727
734 switch (meta->lvalue.hdr.op) { 728 switch (meta->lvalue.hdr.op) {
735 case TCF_EM_OPND_EQ: 729 case TCF_EM_OPND_EQ:
736 return !r; 730 return !r;
737 case TCF_EM_OPND_LT: 731 case TCF_EM_OPND_LT:
738 return r < 0; 732 return r < 0;
739 case TCF_EM_OPND_GT: 733 case TCF_EM_OPND_GT:
740 return r > 0; 734 return r > 0;
741 } 735 }
742 736
743 return 0; 737 return 0;
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
771 765
772static inline int meta_is_supported(struct meta_value *val) 766static inline int meta_is_supported(struct meta_value *val)
773{ 767{
774 return (!meta_id(val) || meta_ops(val)->get); 768 return !meta_id(val) || meta_ops(val)->get;
775} 769}
776 770
777static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { 771static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176aee6e..a3bed07a008 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
18#include <linux/tc_ematch/tc_em_nbyte.h> 18#include <linux/tc_ematch/tc_em_nbyte.h>
19#include <net/pkt_cls.h> 19#include <net/pkt_cls.h>
20 20
21struct nbyte_data 21struct nbyte_data {
22{
23 struct tcf_em_nbyte hdr; 22 struct tcf_em_nbyte hdr;
24 char pattern[0]; 23 char pattern[0];
25}; 24};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index ea8f566e720..15d353d2e4b 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
19#include <linux/tc_ematch/tc_em_text.h> 19#include <linux/tc_ematch/tc_em_text.h>
20#include <net/pkt_cls.h> 20#include <net/pkt_cls.h>
21 21
22struct text_match 22struct text_match {
23{
24 u16 from_offset; 23 u16 from_offset;
25 u16 to_offset; 24 u16 to_offset;
26 u8 from_layer; 25 u8 from_layer;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f1479f7d..797bdb88c01 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
35 if (!tcf_valid_offset(skb, ptr, sizeof(u32))) 35 if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
36 return 0; 36 return 0;
37 37
38 return !(((*(__be32*) ptr) ^ key->val) & key->mask); 38 return !(((*(__be32 *) ptr) ^ key->val) & key->mask);
39} 39}
40 40
41static struct tcf_ematch_ops em_u32_ops = { 41static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da961f8..88d93eb9250 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
93static LIST_HEAD(ematch_ops); 93static LIST_HEAD(ematch_ops);
94static DEFINE_RWLOCK(ematch_mod_lock); 94static DEFINE_RWLOCK(ematch_mod_lock);
95 95
96static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind) 96static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
97{ 97{
98 struct tcf_ematch_ops *e = NULL; 98 struct tcf_ematch_ops *e = NULL;
99 99
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
163} 163}
164EXPORT_SYMBOL(tcf_em_unregister); 164EXPORT_SYMBOL(tcf_em_unregister);
165 165
166static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, 166static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
167 int index) 167 int index)
168{ 168{
169 return &tree->matches[index]; 169 return &tree->matches[index];
170} 170}
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
184 184
185 if (em_hdr->kind == TCF_EM_CONTAINER) { 185 if (em_hdr->kind == TCF_EM_CONTAINER) {
186 /* Special ematch called "container", carries an index 186 /* Special ematch called "container", carries an index
187 * referencing an external ematch sequence. */ 187 * referencing an external ematch sequence.
188 */
188 u32 ref; 189 u32 ref;
189 190
190 if (data_len < sizeof(ref)) 191 if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
195 goto errout; 196 goto errout;
196 197
197 /* We do not allow backward jumps to avoid loops and jumps 198 /* We do not allow backward jumps to avoid loops and jumps
198 * to our own position are of course illegal. */ 199 * to our own position are of course illegal.
200 */
199 if (ref <= idx) 201 if (ref <= idx)
200 goto errout; 202 goto errout;
201 203
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
208 * which automatically releases the reference again, therefore 210 * which automatically releases the reference again, therefore
209 * the module MUST not be given back under any circumstances 211 * the module MUST not be given back under any circumstances
210 * here. Be aware, the destroy function assumes that the 212 * here. Be aware, the destroy function assumes that the
211 * module is held if the ops field is non zero. */ 213 * module is held if the ops field is non zero.
214 */
212 em->ops = tcf_em_lookup(em_hdr->kind); 215 em->ops = tcf_em_lookup(em_hdr->kind);
213 216
214 if (em->ops == NULL) { 217 if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
221 if (em->ops) { 224 if (em->ops) {
222 /* We dropped the RTNL mutex in order to 225 /* We dropped the RTNL mutex in order to
223 * perform the module load. Tell the caller 226 * perform the module load. Tell the caller
224 * to replay the request. */ 227 * to replay the request.
228 */
225 module_put(em->ops->owner); 229 module_put(em->ops->owner);
226 err = -EAGAIN; 230 err = -EAGAIN;
227 } 231 }
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
230 } 234 }
231 235
232 /* ematch module provides expected length of data, so we 236 /* ematch module provides expected length of data, so we
233 * can do a basic sanity check. */ 237 * can do a basic sanity check.
238 */
234 if (em->ops->datalen && data_len < em->ops->datalen) 239 if (em->ops->datalen && data_len < em->ops->datalen)
235 goto errout; 240 goto errout;
236 241
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
246 * TCF_EM_SIMPLE may be specified stating that the 251 * TCF_EM_SIMPLE may be specified stating that the
247 * data only consists of a u32 integer and the module 252 * data only consists of a u32 integer and the module
248 * does not expected a memory reference but rather 253 * does not expected a memory reference but rather
249 * the value carried. */ 254 * the value carried.
255 */
250 if (em_hdr->flags & TCF_EM_SIMPLE) { 256 if (em_hdr->flags & TCF_EM_SIMPLE) {
251 if (data_len < sizeof(u32)) 257 if (data_len < sizeof(u32))
252 goto errout; 258 goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
334 * The array of rt attributes is parsed in the order as they are 340 * The array of rt attributes is parsed in the order as they are
335 * provided, their type must be incremental from 1 to n. Even 341 * provided, their type must be incremental from 1 to n. Even
336 * if it does not serve any real purpose, a failure of sticking 342 * if it does not serve any real purpose, a failure of sticking
337 * to this policy will result in parsing failure. */ 343 * to this policy will result in parsing failure.
344 */
338 for (idx = 0; nla_ok(rt_match, list_len); idx++) { 345 for (idx = 0; nla_ok(rt_match, list_len); idx++) {
339 err = -EINVAL; 346 err = -EINVAL;
340 347
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
359 /* Check if the number of matches provided by userspace actually 366 /* Check if the number of matches provided by userspace actually
360 * complies with the array of matches. The number was used for 367 * complies with the array of matches. The number was used for
361 * the validation of references and a mismatch could lead to 368 * the validation of references and a mismatch could lead to
362 * undefined references during the matching process. */ 369 * undefined references during the matching process.
370 */
363 if (idx != tree_hdr->nmatches) { 371 if (idx != tree_hdr->nmatches) {
364 err = -EINVAL; 372 err = -EINVAL;
365 goto errout_abort; 373 goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
449 .flags = em->flags 457 .flags = em->flags
450 }; 458 };
451 459
452 NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); 460 NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
453 461
454 if (em->ops && em->ops->dump) { 462 if (em->ops && em->ops->dump) {
455 if (em->ops->dump(skb, em) < 0) 463 if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
478 struct tcf_pkt_info *info) 486 struct tcf_pkt_info *info)
479{ 487{
480 int r = em->ops->match(skb, em, info); 488 int r = em->ops->match(skb, em, info);
489
481 return tcf_em_is_inverted(em) ? !r : r; 490 return tcf_em_is_inverted(em) ? !r : r;
482} 491}
483 492
@@ -527,8 +536,8 @@ pop_stack:
527 536
528stack_overflow: 537stack_overflow:
529 if (net_ratelimit()) 538 if (net_ratelimit())
530 printk(KERN_WARNING "tc ematch: local stack overflow," 539 pr_warning("tc ematch: local stack overflow,"
531 " increase NET_EMATCH_STACK\n"); 540 " increase NET_EMATCH_STACK\n");
532 return -1; 541 return -1;
533} 542}
534EXPORT_SYMBOL(__tcf_em_tree_match); 543EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b22ca2d1ceb..7490f3f2db8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
187 int err = -ENOENT; 187 int err = -ENOENT;
188 188
189 write_lock(&qdisc_mod_lock); 189 write_lock(&qdisc_mod_lock);
190 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) 190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
191 if (q == qops) 191 if (q == qops)
192 break; 192 break;
193 if (q) { 193 if (q) {
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
321 if (!tab || --tab->refcnt) 321 if (!tab || --tab->refcnt)
322 return; 322 return;
323 323
324 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { 324 for (rtabp = &qdisc_rtab_list;
325 (rtab = *rtabp) != NULL;
326 rtabp = &rtab->next) {
325 if (rtab == tab) { 327 if (rtab == tab) {
326 *rtabp = rtab->next; 328 *rtabp = rtab->next;
327 kfree(rtab); 329 kfree(rtab);
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
396 return stab; 398 return stab;
397} 399}
398 400
401static void stab_kfree_rcu(struct rcu_head *head)
402{
403 kfree(container_of(head, struct qdisc_size_table, rcu));
404}
405
399void qdisc_put_stab(struct qdisc_size_table *tab) 406void qdisc_put_stab(struct qdisc_size_table *tab)
400{ 407{
401 if (!tab) 408 if (!tab)
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
405 412
406 if (--tab->refcnt == 0) { 413 if (--tab->refcnt == 0) {
407 list_del(&tab->list); 414 list_del(&tab->list);
408 kfree(tab); 415 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
409 } 416 }
410 417
411 spin_unlock(&qdisc_stab_lock); 418 spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@ nla_put_failure:
428 return -1; 435 return -1;
429} 436}
430 437
431void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) 438void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
432{ 439{
433 int pkt_len, slot; 440 int pkt_len, slot;
434 441
@@ -454,14 +461,13 @@ out:
454 pkt_len = 1; 461 pkt_len = 1;
455 qdisc_skb_cb(skb)->pkt_len = pkt_len; 462 qdisc_skb_cb(skb)->pkt_len = pkt_len;
456} 463}
457EXPORT_SYMBOL(qdisc_calculate_pkt_len); 464EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
458 465
459void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) 466void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
460{ 467{
461 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 468 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
462 printk(KERN_WARNING 469 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
463 "%s: %s qdisc %X: is non-work-conserving?\n", 470 txt, qdisc->ops->id, qdisc->handle >> 16);
464 txt, qdisc->ops->id, qdisc->handle >> 16);
465 qdisc->flags |= TCQ_F_WARN_NONWC; 471 qdisc->flags |= TCQ_F_WARN_NONWC;
466 } 472 }
467} 473}
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
472 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 478 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
473 timer); 479 timer);
474 480
475 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 481 qdisc_unthrottled(wd->qdisc);
476 __netif_schedule(qdisc_root(wd->qdisc)); 482 __netif_schedule(qdisc_root(wd->qdisc));
477 483
478 return HRTIMER_NORESTART; 484 return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
494 &qdisc_root_sleeping(wd->qdisc)->state)) 500 &qdisc_root_sleeping(wd->qdisc)->state))
495 return; 501 return;
496 502
497 wd->qdisc->flags |= TCQ_F_THROTTLED; 503 qdisc_throttled(wd->qdisc);
498 time = ktime_set(0, 0); 504 time = ktime_set(0, 0);
499 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); 505 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
500 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); 506 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
504void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 510void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
505{ 511{
506 hrtimer_cancel(&wd->timer); 512 hrtimer_cancel(&wd->timer);
507 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 513 qdisc_unthrottled(wd->qdisc);
508} 514}
509EXPORT_SYMBOL(qdisc_watchdog_cancel); 515EXPORT_SYMBOL(qdisc_watchdog_cancel);
510 516
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
625 autohandle = TC_H_MAKE(0x80000000U, 0); 631 autohandle = TC_H_MAKE(0x80000000U, 0);
626 } while (qdisc_lookup(dev, autohandle) && --i > 0); 632 } while (qdisc_lookup(dev, autohandle) && --i > 0);
627 633
628 return i>0 ? autohandle : 0; 634 return i > 0 ? autohandle : 0;
629} 635}
630 636
631void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 637void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
834 err = PTR_ERR(stab); 840 err = PTR_ERR(stab);
835 goto err_out4; 841 goto err_out4;
836 } 842 }
837 sch->stab = stab; 843 rcu_assign_pointer(sch->stab, stab);
838 } 844 }
839 if (tca[TCA_RATE]) { 845 if (tca[TCA_RATE]) {
840 spinlock_t *root_lock; 846 spinlock_t *root_lock;
@@ -874,7 +880,7 @@ err_out4:
874 * Any broken qdiscs that would require a ops->reset() here? 880 * Any broken qdiscs that would require a ops->reset() here?
875 * The qdisc was never in action so it shouldn't be necessary. 881 * The qdisc was never in action so it shouldn't be necessary.
876 */ 882 */
877 qdisc_put_stab(sch->stab); 883 qdisc_put_stab(rtnl_dereference(sch->stab));
878 if (ops->destroy) 884 if (ops->destroy)
879 ops->destroy(sch); 885 ops->destroy(sch);
880 goto err_out3; 886 goto err_out3;
@@ -882,7 +888,7 @@ err_out4:
882 888
883static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 889static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
884{ 890{
885 struct qdisc_size_table *stab = NULL; 891 struct qdisc_size_table *ostab, *stab = NULL;
886 int err = 0; 892 int err = 0;
887 893
888 if (tca[TCA_OPTIONS]) { 894 if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
899 return PTR_ERR(stab); 905 return PTR_ERR(stab);
900 } 906 }
901 907
902 qdisc_put_stab(sch->stab); 908 ostab = rtnl_dereference(sch->stab);
903 sch->stab = stab; 909 rcu_assign_pointer(sch->stab, stab);
910 qdisc_put_stab(ostab);
904 911
905 if (tca[TCA_RATE]) { 912 if (tca[TCA_RATE]) {
906 /* NB: ignores errors from replace_estimator 913 /* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@ out:
915 return 0; 922 return 0;
916} 923}
917 924
918struct check_loop_arg 925struct check_loop_arg {
919{ 926 struct qdisc_walker w;
920 struct qdisc_walker w;
921 struct Qdisc *p; 927 struct Qdisc *p;
922 int depth; 928 int depth;
923}; 929};
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
970 struct Qdisc *p = NULL; 976 struct Qdisc *p = NULL;
971 int err; 977 int err;
972 978
973 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 979 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
980 if (!dev)
974 return -ENODEV; 981 return -ENODEV;
975 982
976 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 983 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
980 if (clid) { 987 if (clid) {
981 if (clid != TC_H_ROOT) { 988 if (clid != TC_H_ROOT) {
982 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 989 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
983 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 990 p = qdisc_lookup(dev, TC_H_MAJ(clid));
991 if (!p)
984 return -ENOENT; 992 return -ENOENT;
985 q = qdisc_leaf(p, clid); 993 q = qdisc_leaf(p, clid);
986 } else { /* ingress */ 994 } else if (dev_ingress_queue(dev)) {
987 if (dev_ingress_queue(dev)) 995 q = dev_ingress_queue(dev)->qdisc_sleeping;
988 q = dev_ingress_queue(dev)->qdisc_sleeping;
989 } 996 }
990 } else { 997 } else {
991 q = dev->qdisc; 998 q = dev->qdisc;
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
996 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) 1003 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
997 return -EINVAL; 1004 return -EINVAL;
998 } else { 1005 } else {
999 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) 1006 q = qdisc_lookup(dev, tcm->tcm_handle);
1007 if (!q)
1000 return -ENOENT; 1008 return -ENOENT;
1001 } 1009 }
1002 1010
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1008 return -EINVAL; 1016 return -EINVAL;
1009 if (q->handle == 0) 1017 if (q->handle == 0)
1010 return -ENOENT; 1018 return -ENOENT;
1011 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) 1019 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1020 if (err != 0)
1012 return err; 1021 return err;
1013 } else { 1022 } else {
1014 qdisc_notify(net, skb, n, clid, NULL, q); 1023 qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1017} 1026}
1018 1027
1019/* 1028/*
1020 Create/change qdisc. 1029 * Create/change qdisc.
1021 */ 1030 */
1022 1031
1023static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1032static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@ replay:
1036 clid = tcm->tcm_parent; 1045 clid = tcm->tcm_parent;
1037 q = p = NULL; 1046 q = p = NULL;
1038 1047
1039 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1048 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1049 if (!dev)
1040 return -ENODEV; 1050 return -ENODEV;
1041 1051
1042 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1052 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@ replay:
1046 if (clid) { 1056 if (clid) {
1047 if (clid != TC_H_ROOT) { 1057 if (clid != TC_H_ROOT) {
1048 if (clid != TC_H_INGRESS) { 1058 if (clid != TC_H_INGRESS) {
1049 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 1059 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1060 if (!p)
1050 return -ENOENT; 1061 return -ENOENT;
1051 q = qdisc_leaf(p, clid); 1062 q = qdisc_leaf(p, clid);
1052 } else { /* ingress */ 1063 } else if (dev_ingress_queue_create(dev)) {
1053 if (dev_ingress_queue_create(dev)) 1064 q = dev_ingress_queue(dev)->qdisc_sleeping;
1054 q = dev_ingress_queue(dev)->qdisc_sleeping;
1055 } 1065 }
1056 } else { 1066 } else {
1057 q = dev->qdisc; 1067 q = dev->qdisc;
@@ -1063,13 +1073,14 @@ replay:
1063 1073
1064 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1074 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1065 if (tcm->tcm_handle) { 1075 if (tcm->tcm_handle) {
1066 if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) 1076 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1067 return -EEXIST; 1077 return -EEXIST;
1068 if (TC_H_MIN(tcm->tcm_handle)) 1078 if (TC_H_MIN(tcm->tcm_handle))
1069 return -EINVAL; 1079 return -EINVAL;
1070 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) 1080 q = qdisc_lookup(dev, tcm->tcm_handle);
1081 if (!q)
1071 goto create_n_graft; 1082 goto create_n_graft;
1072 if (n->nlmsg_flags&NLM_F_EXCL) 1083 if (n->nlmsg_flags & NLM_F_EXCL)
1073 return -EEXIST; 1084 return -EEXIST;
1074 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1085 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1075 return -EINVAL; 1086 return -EINVAL;
@@ -1079,7 +1090,7 @@ replay:
1079 atomic_inc(&q->refcnt); 1090 atomic_inc(&q->refcnt);
1080 goto graft; 1091 goto graft;
1081 } else { 1092 } else {
1082 if (q == NULL) 1093 if (!q)
1083 goto create_n_graft; 1094 goto create_n_graft;
1084 1095
1085 /* This magic test requires explanation. 1096 /* This magic test requires explanation.
@@ -1101,9 +1112,9 @@ replay:
1101 * For now we select create/graft, if 1112 * For now we select create/graft, if
1102 * user gave KIND, which does not match existing. 1113 * user gave KIND, which does not match existing.
1103 */ 1114 */
1104 if ((n->nlmsg_flags&NLM_F_CREATE) && 1115 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1105 (n->nlmsg_flags&NLM_F_REPLACE) && 1116 (n->nlmsg_flags & NLM_F_REPLACE) &&
1106 ((n->nlmsg_flags&NLM_F_EXCL) || 1117 ((n->nlmsg_flags & NLM_F_EXCL) ||
1107 (tca[TCA_KIND] && 1118 (tca[TCA_KIND] &&
1108 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1119 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1109 goto create_n_graft; 1120 goto create_n_graft;
@@ -1118,7 +1129,7 @@ replay:
1118 /* Change qdisc parameters */ 1129 /* Change qdisc parameters */
1119 if (q == NULL) 1130 if (q == NULL)
1120 return -ENOENT; 1131 return -ENOENT;
1121 if (n->nlmsg_flags&NLM_F_EXCL) 1132 if (n->nlmsg_flags & NLM_F_EXCL)
1122 return -EEXIST; 1133 return -EEXIST;
1123 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1134 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1124 return -EINVAL; 1135 return -EINVAL;
@@ -1128,7 +1139,7 @@ replay:
1128 return err; 1139 return err;
1129 1140
1130create_n_graft: 1141create_n_graft:
1131 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1142 if (!(n->nlmsg_flags & NLM_F_CREATE))
1132 return -ENOENT; 1143 return -ENOENT;
1133 if (clid == TC_H_INGRESS) { 1144 if (clid == TC_H_INGRESS) {
1134 if (dev_ingress_queue(dev)) 1145 if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1175 struct nlmsghdr *nlh; 1186 struct nlmsghdr *nlh;
1176 unsigned char *b = skb_tail_pointer(skb); 1187 unsigned char *b = skb_tail_pointer(skb);
1177 struct gnet_dump d; 1188 struct gnet_dump d;
1189 struct qdisc_size_table *stab;
1178 1190
1179 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1191 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1180 tcm = NLMSG_DATA(nlh); 1192 tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1190 goto nla_put_failure; 1202 goto nla_put_failure;
1191 q->qstats.qlen = q->q.qlen; 1203 q->qstats.qlen = q->q.qlen;
1192 1204
1193 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) 1205 stab = rtnl_dereference(q->stab);
1206 if (stab && qdisc_dump_stab(skb, stab) < 0)
1194 goto nla_put_failure; 1207 goto nla_put_failure;
1195 1208
1196 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1209 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1234 return -ENOBUFS; 1247 return -ENOBUFS;
1235 1248
1236 if (old && !tc_qdisc_dump_ignore(old)) { 1249 if (old && !tc_qdisc_dump_ignore(old)) {
1237 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) 1250 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
1251 0, RTM_DELQDISC) < 0)
1238 goto err_out; 1252 goto err_out;
1239 } 1253 }
1240 if (new && !tc_qdisc_dump_ignore(new)) { 1254 if (new && !tc_qdisc_dump_ignore(new)) {
1241 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 1255 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
1256 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1242 goto err_out; 1257 goto err_out;
1243 } 1258 }
1244 1259
1245 if (skb->len) 1260 if (skb->len)
1246 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1261 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
1262 n->nlmsg_flags & NLM_F_ECHO);
1247 1263
1248err_out: 1264err_out:
1249 kfree_skb(skb); 1265 kfree_skb(skb);
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1275 q_idx++; 1291 q_idx++;
1276 continue; 1292 continue;
1277 } 1293 }
1278 if (!tc_qdisc_dump_ignore(q) && 1294 if (!tc_qdisc_dump_ignore(q) &&
1279 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, 1295 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1280 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) 1296 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1281 goto done; 1297 goto done;
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1356 u32 qid = TC_H_MAJ(clid); 1372 u32 qid = TC_H_MAJ(clid);
1357 int err; 1373 int err;
1358 1374
1359 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1375 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1376 if (!dev)
1360 return -ENODEV; 1377 return -ENODEV;
1361 1378
1362 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1379 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1391 qid = dev->qdisc->handle; 1408 qid = dev->qdisc->handle;
1392 1409
1393 /* Now qid is genuine qdisc handle consistent 1410 /* Now qid is genuine qdisc handle consistent
1394 both with parent and child. 1411 * both with parent and child.
1395 1412 *
1396 TC_H_MAJ(pid) still may be unspecified, complete it now. 1413 * TC_H_MAJ(pid) still may be unspecified, complete it now.
1397 */ 1414 */
1398 if (pid) 1415 if (pid)
1399 pid = TC_H_MAKE(qid, pid); 1416 pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1403 } 1420 }
1404 1421
1405 /* OK. Locate qdisc */ 1422 /* OK. Locate qdisc */
1406 if ((q = qdisc_lookup(dev, qid)) == NULL) 1423 q = qdisc_lookup(dev, qid);
1424 if (!q)
1407 return -ENOENT; 1425 return -ENOENT;
1408 1426
1409 /* An check that it supports classes */ 1427 /* An check that it supports classes */
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1423 1441
1424 if (cl == 0) { 1442 if (cl == 0) {
1425 err = -ENOENT; 1443 err = -ENOENT;
1426 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) 1444 if (n->nlmsg_type != RTM_NEWTCLASS ||
1445 !(n->nlmsg_flags & NLM_F_CREATE))
1427 goto out; 1446 goto out;
1428 } else { 1447 } else {
1429 switch (n->nlmsg_type) { 1448 switch (n->nlmsg_type) {
1430 case RTM_NEWTCLASS: 1449 case RTM_NEWTCLASS:
1431 err = -EEXIST; 1450 err = -EEXIST;
1432 if (n->nlmsg_flags&NLM_F_EXCL) 1451 if (n->nlmsg_flags & NLM_F_EXCL)
1433 goto out; 1452 goto out;
1434 break; 1453 break;
1435 case RTM_DELTCLASS: 1454 case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
1521 return -EINVAL; 1540 return -EINVAL;
1522 } 1541 }
1523 1542
1524 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1543 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
1544 n->nlmsg_flags & NLM_F_ECHO);
1525} 1545}
1526 1546
1527struct qdisc_dump_args 1547struct qdisc_dump_args {
1528{ 1548 struct qdisc_walker w;
1529 struct qdisc_walker w; 1549 struct sk_buff *skb;
1530 struct sk_buff *skb; 1550 struct netlink_callback *cb;
1531 struct netlink_callback *cb;
1532}; 1551};
1533 1552
1534static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) 1553static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1590 1609
1591static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 1610static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1592{ 1611{
1593 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 1612 struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
1594 struct net *net = sock_net(skb->sk); 1613 struct net *net = sock_net(skb->sk);
1595 struct netdev_queue *dev_queue; 1614 struct netdev_queue *dev_queue;
1596 struct net_device *dev; 1615 struct net_device *dev;
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1598 1617
1599 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1618 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1600 return 0; 1619 return 0;
1601 if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1620 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1621 if (!dev)
1602 return 0; 1622 return 0;
1603 1623
1604 s_t = cb->args[0]; 1624 s_t = cb->args[0];
@@ -1621,19 +1641,22 @@ done:
1621} 1641}
1622 1642
1623/* Main classifier routine: scans classifier chain attached 1643/* Main classifier routine: scans classifier chain attached
1624 to this qdisc, (optionally) tests for protocol and asks 1644 * to this qdisc, (optionally) tests for protocol and asks
1625 specific classifiers. 1645 * specific classifiers.
1626 */ 1646 */
1627int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, 1647int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1628 struct tcf_result *res) 1648 struct tcf_result *res)
1629{ 1649{
1630 __be16 protocol = skb->protocol; 1650 __be16 protocol = skb->protocol;
1631 int err = 0; 1651 int err;
1632 1652
1633 for (; tp; tp = tp->next) { 1653 for (; tp; tp = tp->next) {
1634 if ((tp->protocol == protocol || 1654 if (tp->protocol != protocol &&
1635 tp->protocol == htons(ETH_P_ALL)) && 1655 tp->protocol != htons(ETH_P_ALL))
1636 (err = tp->classify(skb, tp, res)) >= 0) { 1656 continue;
1657 err = tp->classify(skb, tp, res);
1658
1659 if (err >= 0) {
1637#ifdef CONFIG_NET_CLS_ACT 1660#ifdef CONFIG_NET_CLS_ACT
1638 if (err != TC_ACT_RECLASSIFY && skb->tc_verd) 1661 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1639 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); 1662 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1649,12 +1672,12 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1649 struct tcf_result *res) 1672 struct tcf_result *res)
1650{ 1673{
1651 int err = 0; 1674 int err = 0;
1652 __be16 protocol;
1653#ifdef CONFIG_NET_CLS_ACT 1675#ifdef CONFIG_NET_CLS_ACT
1676 __be16 protocol;
1654 struct tcf_proto *otp = tp; 1677 struct tcf_proto *otp = tp;
1655reclassify: 1678reclassify:
1656#endif
1657 protocol = skb->protocol; 1679 protocol = skb->protocol;
1680#endif
1658 1681
1659 err = tc_classify_compat(skb, tp, res); 1682 err = tc_classify_compat(skb, tp, res);
1660#ifdef CONFIG_NET_CLS_ACT 1683#ifdef CONFIG_NET_CLS_ACT
@@ -1664,11 +1687,11 @@ reclassify:
1664 1687
1665 if (verd++ >= MAX_REC_LOOP) { 1688 if (verd++ >= MAX_REC_LOOP) {
1666 if (net_ratelimit()) 1689 if (net_ratelimit())
1667 printk(KERN_NOTICE 1690 pr_notice("%s: packet reclassify loop"
1668 "%s: packet reclassify loop"
1669 " rule prio %u protocol %02x\n", 1691 " rule prio %u protocol %02x\n",
1670 tp->q->ops->id, 1692 tp->q->ops->id,
1671 tp->prio & 0xffff, ntohs(tp->protocol)); 1693 tp->prio & 0xffff,
1694 ntohs(tp->protocol));
1672 return TC_ACT_SHOT; 1695 return TC_ACT_SHOT;
1673 } 1696 }
1674 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); 1697 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1784,7 @@ static int __init pktsched_init(void)
1761 1784
1762 err = register_pernet_subsys(&psched_net_ops); 1785 err = register_pernet_subsys(&psched_net_ops);
1763 if (err) { 1786 if (err) {
1764 printk(KERN_ERR "pktsched_init: " 1787 pr_err("pktsched_init: "
1765 "cannot initialize per netns operations\n"); 1788 "cannot initialize per netns operations\n");
1766 return err; 1789 return err;
1767 } 1790 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 943d733409d..3f08158b868 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
319 * creation), and one for the reference held when calling delete. 319 * creation), and one for the reference held when calling delete.
320 */ 320 */
321 if (flow->ref < 2) { 321 if (flow->ref < 2) {
322 printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); 322 pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
323 return -EINVAL; 323 return -EINVAL;
324 } 324 }
325 if (flow->ref > 2) 325 if (flow->ref > 2)
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
384 } 384 }
385 } 385 }
386 flow = NULL; 386 flow = NULL;
387 done: 387done:
388 ; 388 ;
389 } 389 }
390 if (!flow) 390 if (!flow) {
391 flow = &p->link; 391 flow = &p->link;
392 else { 392 } else {
393 if (flow->vcc) 393 if (flow->vcc)
394 ATM_SKB(skb)->atm_options = flow->vcc->atm_options; 394 ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
395 /*@@@ looks good ... but it's not supposed to work :-) */ 395 /*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
576 576
577 list_for_each_entry_safe(flow, tmp, &p->flows, list) { 577 list_for_each_entry_safe(flow, tmp, &p->flows, list) {
578 if (flow->ref > 1) 578 if (flow->ref > 1)
579 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, 579 pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
580 flow->ref);
581 atm_tc_put(sch, (unsigned long)flow); 580 atm_tc_put(sch, (unsigned long)flow);
582 } 581 }
583 tasklet_kill(&p->task); 582 tasklet_kill(&p->task);
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
616 } 615 }
617 if (flow->excess) 616 if (flow->excess)
618 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); 617 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
619 else { 618 else
620 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); 619 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
621 }
622 620
623 nla_nest_end(skb, nest); 621 nla_nest_end(skb, nest);
624 return skb->len; 622 return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5f63ec58942..24d94c097b3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
72struct cbq_sched_data; 72struct cbq_sched_data;
73 73
74 74
75struct cbq_class 75struct cbq_class {
76{
77 struct Qdisc_class_common common; 76 struct Qdisc_class_common common;
78 struct cbq_class *next_alive; /* next class with backlog in this priority band */ 77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
79 78
@@ -139,19 +138,18 @@ struct cbq_class
139 int refcnt; 138 int refcnt;
140 int filters; 139 int filters;
141 140
142 struct cbq_class *defaults[TC_PRIO_MAX+1]; 141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
143}; 142};
144 143
145struct cbq_sched_data 144struct cbq_sched_data {
146{
147 struct Qdisc_class_hash clhash; /* Hash table of all classes */ 145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1]; 146 int nclasses[TC_CBQ_MAXPRIO + 1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1]; 147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
150 148
151 struct cbq_class link; 149 struct cbq_class link;
152 150
153 unsigned activemask; 151 unsigned int activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes 152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
155 with backlog */ 153 with backlog */
156 154
157#ifdef CONFIG_NET_CLS_ACT 155#ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
162 int tx_len; 160 int tx_len;
163 psched_time_t now; /* Cached timestamp */ 161 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */ 162 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask; 163 unsigned int pmask;
166 164
167 struct hrtimer delay_timer; 165 struct hrtimer delay_timer;
168 struct qdisc_watchdog watchdog; /* Watchdog timer, 166 struct qdisc_watchdog watchdog; /* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
175}; 173};
176 174
177 175
178#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) 176#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
179 177
180static __inline__ struct cbq_class * 178static inline struct cbq_class *
181cbq_class_lookup(struct cbq_sched_data *q, u32 classid) 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182{ 180{
183 struct Qdisc_class_common *clc; 181 struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
193static struct cbq_class * 191static struct cbq_class *
194cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195{ 193{
196 struct cbq_class *cl, *new; 194 struct cbq_class *cl;
197 195
198 for (cl = this->tparent; cl; cl = cl->tparent) 196 for (cl = this->tparent; cl; cl = cl->tparent) {
199 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) 197 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
200 return new;
201 198
199 if (new != NULL && new != this)
200 return new;
201 }
202 return NULL; 202 return NULL;
203} 203}
204 204
205#endif 205#endif
206 206
207/* Classify packet. The procedure is pretty complicated, but 207/* Classify packet. The procedure is pretty complicated, but
208 it allows us to combine link sharing and priority scheduling 208 * it allows us to combine link sharing and priority scheduling
209 transparently. 209 * transparently.
210 210 *
211 Namely, you can put link sharing rules (f.e. route based) at root of CBQ, 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 so that it resolves to split nodes. Then packets are classified 212 * so that it resolves to split nodes. Then packets are classified
213 by logical priority, or a more specific classifier may be attached 213 * by logical priority, or a more specific classifier may be attached
214 to the split node. 214 * to the split node.
215 */ 215 */
216 216
217static struct cbq_class * 217static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227 /* 227 /*
228 * Step 1. If skb->priority points to one of our classes, use it. 228 * Step 1. If skb->priority points to one of our classes, use it.
229 */ 229 */
230 if (TC_H_MAJ(prio^sch->handle) == 0 && 230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
231 (cl = cbq_class_lookup(q, prio)) != NULL) 231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl; 232 return cl;
233 233
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) 243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
244 goto fallback; 244 goto fallback;
245 245
246 if ((cl = (void*)res.class) == NULL) { 246 cl = (void *)res.class;
247 if (!cl) {
247 if (TC_H_MAJ(res.classid)) 248 if (TC_H_MAJ(res.classid))
248 cl = cbq_class_lookup(q, res.classid); 249 cl = cbq_class_lookup(q, res.classid);
249 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) 250 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
250 cl = defmap[TC_PRIO_BESTEFFORT]; 251 cl = defmap[TC_PRIO_BESTEFFORT];
251 252
252 if (cl == NULL || cl->level >= head->level) 253 if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
282 * Step 4. No success... 283 * Step 4. No success...
283 */ 284 */
284 if (TC_H_MAJ(prio) == 0 && 285 if (TC_H_MAJ(prio) == 0 &&
285 !(cl = head->defaults[prio&TC_PRIO_MAX]) && 286 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
286 !(cl = head->defaults[TC_PRIO_BESTEFFORT])) 287 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
287 return head; 288 return head;
288 289
@@ -290,12 +291,12 @@ fallback:
290} 291}
291 292
292/* 293/*
293 A packet has just been enqueued on the empty class. 294 * A packet has just been enqueued on the empty class.
294 cbq_activate_class adds it to the tail of active class list 295 * cbq_activate_class adds it to the tail of active class list
295 of its priority band. 296 * of its priority band.
296 */ 297 */
297 298
298static __inline__ void cbq_activate_class(struct cbq_class *cl) 299static inline void cbq_activate_class(struct cbq_class *cl)
299{ 300{
300 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 301 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
301 int prio = cl->cpriority; 302 int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
314} 315}
315 316
316/* 317/*
317 Unlink class from active chain. 318 * Unlink class from active chain.
318 Note that this same procedure is done directly in cbq_dequeue* 319 * Note that this same procedure is done directly in cbq_dequeue*
319 during round-robin procedure. 320 * during round-robin procedure.
320 */ 321 */
321 322
322static void cbq_deactivate_class(struct cbq_class *this) 323static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350{ 351{
351 int toplevel = q->toplevel; 352 int toplevel = q->toplevel;
352 353
353 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { 354 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
354 psched_time_t now; 355 psched_time_t now;
355 psched_tdiff_t incr; 356 psched_tdiff_t incr;
356 357
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
363 q->toplevel = cl->level; 364 q->toplevel = cl->level;
364 return; 365 return;
365 } 366 }
366 } while ((cl=cl->borrow) != NULL && toplevel > cl->level); 367 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
367 } 368 }
368} 369}
369 370
@@ -417,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
417 delay += cl->offtime; 418 delay += cl->offtime;
418 419
419 /* 420 /*
420 Class goes to sleep, so that it will have no 421 * Class goes to sleep, so that it will have no
421 chance to work avgidle. Let's forgive it 8) 422 * chance to work avgidle. Let's forgive it 8)
422 423 *
423 BTW cbq-2.0 has a crap in this 424 * BTW cbq-2.0 has a crap in this
424 place, apparently they forgot to shift it by cl->ewma_log. 425 * place, apparently they forgot to shift it by cl->ewma_log.
425 */ 426 */
426 if (cl->avgidle < 0) 427 if (cl->avgidle < 0)
427 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); 428 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -438,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
438 q->wd_expires = delay; 439 q->wd_expires = delay;
439 440
440 /* Dirty work! We must schedule wakeups based on 441 /* Dirty work! We must schedule wakeups based on
441 real available rate, rather than leaf rate, 442 * real available rate, rather than leaf rate,
442 which may be tiny (even zero). 443 * which may be tiny (even zero).
443 */ 444 */
444 if (q->toplevel == TC_CBQ_MAXLEVEL) { 445 if (q->toplevel == TC_CBQ_MAXLEVEL) {
445 struct cbq_class *b; 446 struct cbq_class *b;
@@ -459,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
459} 460}
460 461
461/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when 462/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
462 they go overlimit 463 * they go overlimit
463 */ 464 */
464 465
465static void cbq_ovl_rclassic(struct cbq_class *cl) 466static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -594,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
594 struct Qdisc *sch = q->watchdog.qdisc; 595 struct Qdisc *sch = q->watchdog.qdisc;
595 psched_time_t now; 596 psched_time_t now;
596 psched_tdiff_t delay = 0; 597 psched_tdiff_t delay = 0;
597 unsigned pmask; 598 unsigned int pmask;
598 599
599 now = psched_get_time(); 600 now = psched_get_time();
600 601
@@ -623,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
623 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); 624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
624 } 625 }
625 626
626 sch->flags &= ~TCQ_F_THROTTLED; 627 qdisc_unthrottled(sch);
627 __netif_schedule(qdisc_root(sch)); 628 __netif_schedule(qdisc_root(sch));
628 return HRTIMER_NORESTART; 629 return HRTIMER_NORESTART;
629} 630}
@@ -663,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
663#endif 664#endif
664 665
665/* 666/*
666 It is mission critical procedure. 667 * It is mission critical procedure.
667 668 *
668 We "regenerate" toplevel cutoff, if transmitting class 669 * We "regenerate" toplevel cutoff, if transmitting class
669 has backlog and it is not regulated. It is not part of 670 * has backlog and it is not regulated. It is not part of
670 original CBQ description, but looks more reasonable. 671 * original CBQ description, but looks more reasonable.
671 Probably, it is wrong. This question needs further investigation. 672 * Probably, it is wrong. This question needs further investigation.
672*/ 673 */
673 674
674static __inline__ void 675static inline void
675cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
676 struct cbq_class *borrowed) 677 struct cbq_class *borrowed)
677{ 678{
@@ -682,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
682 q->toplevel = borrowed->level; 683 q->toplevel = borrowed->level;
683 return; 684 return;
684 } 685 }
685 } while ((borrowed=borrowed->borrow) != NULL); 686 } while ((borrowed = borrowed->borrow) != NULL);
686 } 687 }
687#if 0 688#if 0
688 /* It is not necessary now. Uncommenting it 689 /* It is not necessary now. Uncommenting it
@@ -710,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
710 cl->bstats.bytes += len; 711 cl->bstats.bytes += len;
711 712
712 /* 713 /*
713 (now - last) is total time between packet right edges. 714 * (now - last) is total time between packet right edges.
714 (last_pktlen/rate) is "virtual" busy time, so that 715 * (last_pktlen/rate) is "virtual" busy time, so that
715 716 *
716 idle = (now - last) - last_pktlen/rate 717 * idle = (now - last) - last_pktlen/rate
717 */ 718 */
718 719
719 idle = q->now - cl->last; 720 idle = q->now - cl->last;
@@ -723,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
723 idle -= L2T(cl, len); 724 idle -= L2T(cl, len);
724 725
725 /* true_avgidle := (1-W)*true_avgidle + W*idle, 726 /* true_avgidle := (1-W)*true_avgidle + W*idle,
726 where W=2^{-ewma_log}. But cl->avgidle is scaled: 727 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
727 cl->avgidle == true_avgidle/W, 728 * cl->avgidle == true_avgidle/W,
728 hence: 729 * hence:
729 */ 730 */
730 avgidle += idle - (avgidle>>cl->ewma_log); 731 avgidle += idle - (avgidle>>cl->ewma_log);
731 } 732 }
@@ -739,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
739 cl->avgidle = avgidle; 740 cl->avgidle = avgidle;
740 741
741 /* Calculate expected time, when this class 742 /* Calculate expected time, when this class
742 will be allowed to send. 743 * will be allowed to send.
743 It will occur, when: 744 * It will occur, when:
744 (1-W)*true_avgidle + W*delay = 0, i.e. 745 * (1-W)*true_avgidle + W*delay = 0, i.e.
745 idle = (1/W - 1)*(-true_avgidle) 746 * idle = (1/W - 1)*(-true_avgidle)
746 or 747 * or
747 idle = (1 - W)*(-cl->avgidle); 748 * idle = (1 - W)*(-cl->avgidle);
748 */ 749 */
749 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); 750 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
750 751
751 /* 752 /*
752 That is not all. 753 * That is not all.
753 To maintain the rate allocated to the class, 754 * To maintain the rate allocated to the class,
754 we add to undertime virtual clock, 755 * we add to undertime virtual clock,
755 necessary to complete transmitted packet. 756 * necessary to complete transmitted packet.
756 (len/phys_bandwidth has been already passed 757 * (len/phys_bandwidth has been already passed
757 to the moment of cbq_update) 758 * to the moment of cbq_update)
758 */ 759 */
759 760
760 idle -= L2T(&q->link, len); 761 idle -= L2T(&q->link, len);
@@ -776,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
776 cbq_update_toplevel(q, this, q->tx_borrowed); 777 cbq_update_toplevel(q, this, q->tx_borrowed);
777} 778}
778 779
779static __inline__ struct cbq_class * 780static inline struct cbq_class *
780cbq_under_limit(struct cbq_class *cl) 781cbq_under_limit(struct cbq_class *cl)
781{ 782{
782 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 783 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -792,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
792 793
793 do { 794 do {
794 /* It is very suspicious place. Now overlimit 795 /* It is very suspicious place. Now overlimit
795 action is generated for not bounded classes 796 * action is generated for not bounded classes
796 only if link is completely congested. 797 * only if link is completely congested.
797 Though it is in agree with ancestor-only paradigm, 798 * Though it is in agree with ancestor-only paradigm,
798 it looks very stupid. Particularly, 799 * it looks very stupid. Particularly,
799 it means that this chunk of code will either 800 * it means that this chunk of code will either
800 never be called or result in strong amplification 801 * never be called or result in strong amplification
801 of burstiness. Dangerous, silly, and, however, 802 * of burstiness. Dangerous, silly, and, however,
802 no another solution exists. 803 * no another solution exists.
803 */ 804 */
804 if ((cl = cl->borrow) == NULL) { 805 cl = cl->borrow;
806 if (!cl) {
805 this_cl->qstats.overlimits++; 807 this_cl->qstats.overlimits++;
806 this_cl->overlimit(this_cl); 808 this_cl->overlimit(this_cl);
807 return NULL; 809 return NULL;
@@ -814,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
814 return cl; 816 return cl;
815} 817}
816 818
817static __inline__ struct sk_buff * 819static inline struct sk_buff *
818cbq_dequeue_prio(struct Qdisc *sch, int prio) 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
819{ 821{
820 struct cbq_sched_data *q = qdisc_priv(sch); 822 struct cbq_sched_data *q = qdisc_priv(sch);
@@ -838,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
838 840
839 if (cl->deficit <= 0) { 841 if (cl->deficit <= 0) {
840 /* Class exhausted its allotment per 842 /* Class exhausted its allotment per
841 this round. Switch to the next one. 843 * this round. Switch to the next one.
842 */ 844 */
843 deficit = 1; 845 deficit = 1;
844 cl->deficit += cl->quantum; 846 cl->deficit += cl->quantum;
@@ -848,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
848 skb = cl->q->dequeue(cl->q); 850 skb = cl->q->dequeue(cl->q);
849 851
850 /* Class did not give us any skb :-( 852 /* Class did not give us any skb :-(
851 It could occur even if cl->q->q.qlen != 0 853 * It could occur even if cl->q->q.qlen != 0
852 f.e. if cl->q == "tbf" 854 * f.e. if cl->q == "tbf"
853 */ 855 */
854 if (skb == NULL) 856 if (skb == NULL)
855 goto skip_class; 857 goto skip_class;
@@ -878,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
878skip_class: 880skip_class:
879 if (cl->q->q.qlen == 0 || prio != cl->cpriority) { 881 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
880 /* Class is empty or penalized. 882 /* Class is empty or penalized.
881 Unlink it from active chain. 883 * Unlink it from active chain.
882 */ 884 */
883 cl_prev->next_alive = cl->next_alive; 885 cl_prev->next_alive = cl->next_alive;
884 cl->next_alive = NULL; 886 cl->next_alive = NULL;
@@ -917,14 +919,14 @@ next_class:
917 return NULL; 919 return NULL;
918} 920}
919 921
920static __inline__ struct sk_buff * 922static inline struct sk_buff *
921cbq_dequeue_1(struct Qdisc *sch) 923cbq_dequeue_1(struct Qdisc *sch)
922{ 924{
923 struct cbq_sched_data *q = qdisc_priv(sch); 925 struct cbq_sched_data *q = qdisc_priv(sch);
924 struct sk_buff *skb; 926 struct sk_buff *skb;
925 unsigned activemask; 927 unsigned int activemask;
926 928
927 activemask = q->activemask&0xFF; 929 activemask = q->activemask & 0xFF;
928 while (activemask) { 930 while (activemask) {
929 int prio = ffz(~activemask); 931 int prio = ffz(~activemask);
930 activemask &= ~(1<<prio); 932 activemask &= ~(1<<prio);
@@ -949,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
949 if (q->tx_class) { 951 if (q->tx_class) {
950 psched_tdiff_t incr2; 952 psched_tdiff_t incr2;
951 /* Time integrator. We calculate EOS time 953 /* Time integrator. We calculate EOS time
952 by adding expected packet transmission time. 954 * by adding expected packet transmission time.
953 If real time is greater, we warp artificial clock, 955 * If real time is greater, we warp artificial clock,
954 so that: 956 * so that:
955 957 *
956 cbq_time = max(real_time, work); 958 * cbq_time = max(real_time, work);
957 */ 959 */
958 incr2 = L2T(&q->link, q->tx_len); 960 incr2 = L2T(&q->link, q->tx_len);
959 q->now += incr2; 961 q->now += incr2;
@@ -971,27 +973,27 @@ cbq_dequeue(struct Qdisc *sch)
971 if (skb) { 973 if (skb) {
972 qdisc_bstats_update(sch, skb); 974 qdisc_bstats_update(sch, skb);
973 sch->q.qlen--; 975 sch->q.qlen--;
974 sch->flags &= ~TCQ_F_THROTTLED; 976 qdisc_unthrottled(sch);
975 return skb; 977 return skb;
976 } 978 }
977 979
978 /* All the classes are overlimit. 980 /* All the classes are overlimit.
979 981 *
980 It is possible, if: 982 * It is possible, if:
981 983 *
982 1. Scheduler is empty. 984 * 1. Scheduler is empty.
983 2. Toplevel cutoff inhibited borrowing. 985 * 2. Toplevel cutoff inhibited borrowing.
984 3. Root class is overlimit. 986 * 3. Root class is overlimit.
985 987 *
986 Reset 2d and 3d conditions and retry. 988 * Reset 2d and 3d conditions and retry.
987 989 *
988 Note, that NS and cbq-2.0 are buggy, peeking 990 * Note, that NS and cbq-2.0 are buggy, peeking
989 an arbitrary class is appropriate for ancestor-only 991 * an arbitrary class is appropriate for ancestor-only
990 sharing, but not for toplevel algorithm. 992 * sharing, but not for toplevel algorithm.
991 993 *
992 Our version is better, but slower, because it requires 994 * Our version is better, but slower, because it requires
993 two passes, but it is unavoidable with top-level sharing. 995 * two passes, but it is unavoidable with top-level sharing.
994 */ 996 */
995 997
996 if (q->toplevel == TC_CBQ_MAXLEVEL && 998 if (q->toplevel == TC_CBQ_MAXLEVEL &&
997 q->link.undertime == PSCHED_PASTPERFECT) 999 q->link.undertime == PSCHED_PASTPERFECT)
@@ -1002,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
1002 } 1004 }
1003 1005
1004 /* No packets in scheduler or nobody wants to give them to us :-( 1006 /* No packets in scheduler or nobody wants to give them to us :-(
1005 Sigh... start watchdog timer in the last case. */ 1007 * Sigh... start watchdog timer in the last case.
1008 */
1006 1009
1007 if (sch->q.qlen) { 1010 if (sch->q.qlen) {
1008 sch->qstats.overlimits++; 1011 sch->qstats.overlimits++;
@@ -1024,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
1024 int level = 0; 1027 int level = 0;
1025 struct cbq_class *cl; 1028 struct cbq_class *cl;
1026 1029
1027 if ((cl = this->children) != NULL) { 1030 cl = this->children;
1031 if (cl) {
1028 do { 1032 do {
1029 if (cl->level > level) 1033 if (cl->level > level)
1030 level = cl->level; 1034 level = cl->level;
1031 } while ((cl = cl->sibling) != this->children); 1035 } while ((cl = cl->sibling) != this->children);
1032 } 1036 }
1033 this->level = level+1; 1037 this->level = level + 1;
1034 } while ((this = this->tparent) != NULL); 1038 } while ((this = this->tparent) != NULL);
1035} 1039}
1036 1040
@@ -1046,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1046 for (h = 0; h < q->clhash.hashsize; h++) { 1050 for (h = 0; h < q->clhash.hashsize; h++) {
1047 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { 1051 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1048 /* BUGGGG... Beware! This expression suffer of 1052 /* BUGGGG... Beware! This expression suffer of
1049 arithmetic overflows! 1053 * arithmetic overflows!
1050 */ 1054 */
1051 if (cl->priority == prio) { 1055 if (cl->priority == prio) {
1052 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ 1056 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1053 q->quanta[prio]; 1057 q->quanta[prio];
1054 } 1058 }
1055 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { 1059 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1056 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); 1060 pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061 cl->common.classid, cl->quantum);
1057 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; 1062 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1058 } 1063 }
1059 } 1064 }
@@ -1064,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
1064{ 1069{
1065 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 1070 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1066 struct cbq_class *split = cl->split; 1071 struct cbq_class *split = cl->split;
1067 unsigned h; 1072 unsigned int h;
1068 int i; 1073 int i;
1069 1074
1070 if (split == NULL) 1075 if (split == NULL)
1071 return; 1076 return;
1072 1077
1073 for (i=0; i<=TC_PRIO_MAX; i++) { 1078 for (i = 0; i <= TC_PRIO_MAX; i++) {
1074 if (split->defaults[i] == cl && !(cl->defmap&(1<<i))) 1079 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1075 split->defaults[i] = NULL; 1080 split->defaults[i] = NULL;
1076 } 1081 }
1077 1082
1078 for (i=0; i<=TC_PRIO_MAX; i++) { 1083 for (i = 0; i <= TC_PRIO_MAX; i++) {
1079 int level = split->level; 1084 int level = split->level;
1080 1085
1081 if (split->defaults[i]) 1086 if (split->defaults[i])
@@ -1088,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
1088 hlist_for_each_entry(c, n, &q->clhash.hash[h], 1093 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1089 common.hnode) { 1094 common.hnode) {
1090 if (c->split == split && c->level < level && 1095 if (c->split == split && c->level < level &&
1091 c->defmap&(1<<i)) { 1096 c->defmap & (1<<i)) {
1092 split->defaults[i] = c; 1097 split->defaults[i] = c;
1093 level = c->level; 1098 level = c->level;
1094 } 1099 }
@@ -1102,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
1102 struct cbq_class *split = NULL; 1107 struct cbq_class *split = NULL;
1103 1108
1104 if (splitid == 0) { 1109 if (splitid == 0) {
1105 if ((split = cl->split) == NULL) 1110 split = cl->split;
1111 if (!split)
1106 return; 1112 return;
1107 splitid = split->common.classid; 1113 splitid = split->common.classid;
1108 } 1114 }
@@ -1120,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
1120 cl->defmap = 0; 1126 cl->defmap = 0;
1121 cbq_sync_defmap(cl); 1127 cbq_sync_defmap(cl);
1122 cl->split = split; 1128 cl->split = split;
1123 cl->defmap = def&mask; 1129 cl->defmap = def & mask;
1124 } else 1130 } else
1125 cl->defmap = (cl->defmap&~mask)|(def&mask); 1131 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1126 1132
1127 cbq_sync_defmap(cl); 1133 cbq_sync_defmap(cl);
1128} 1134}
@@ -1135,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
1135 qdisc_class_hash_remove(&q->clhash, &this->common); 1141 qdisc_class_hash_remove(&q->clhash, &this->common);
1136 1142
1137 if (this->tparent) { 1143 if (this->tparent) {
1138 clp=&this->sibling; 1144 clp = &this->sibling;
1139 cl = *clp; 1145 cl = *clp;
1140 do { 1146 do {
1141 if (cl == this) { 1147 if (cl == this) {
@@ -1174,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
1174 } 1180 }
1175} 1181}
1176 1182
1177static unsigned int cbq_drop(struct Qdisc* sch) 1183static unsigned int cbq_drop(struct Qdisc *sch)
1178{ 1184{
1179 struct cbq_sched_data *q = qdisc_priv(sch); 1185 struct cbq_sched_data *q = qdisc_priv(sch);
1180 struct cbq_class *cl, *cl_head; 1186 struct cbq_class *cl, *cl_head;
@@ -1182,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
1182 unsigned int len; 1188 unsigned int len;
1183 1189
1184 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { 1190 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1185 if ((cl_head = q->active[prio]) == NULL) 1191 cl_head = q->active[prio];
1192 if (!cl_head)
1186 continue; 1193 continue;
1187 1194
1188 cl = cl_head; 1195 cl = cl_head;
@@ -1199,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
1199} 1206}
1200 1207
1201static void 1208static void
1202cbq_reset(struct Qdisc* sch) 1209cbq_reset(struct Qdisc *sch)
1203{ 1210{
1204 struct cbq_sched_data *q = qdisc_priv(sch); 1211 struct cbq_sched_data *q = qdisc_priv(sch);
1205 struct cbq_class *cl; 1212 struct cbq_class *cl;
1206 struct hlist_node *n; 1213 struct hlist_node *n;
1207 int prio; 1214 int prio;
1208 unsigned h; 1215 unsigned int h;
1209 1216
1210 q->activemask = 0; 1217 q->activemask = 0;
1211 q->pmask = 0; 1218 q->pmask = 0;
@@ -1237,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
1237 1244
1238static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) 1245static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1239{ 1246{
1240 if (lss->change&TCF_CBQ_LSS_FLAGS) { 1247 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1241 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; 1248 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1242 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; 1249 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1243 } 1250 }
1244 if (lss->change&TCF_CBQ_LSS_EWMA) 1251 if (lss->change & TCF_CBQ_LSS_EWMA)
1245 cl->ewma_log = lss->ewma_log; 1252 cl->ewma_log = lss->ewma_log;
1246 if (lss->change&TCF_CBQ_LSS_AVPKT) 1253 if (lss->change & TCF_CBQ_LSS_AVPKT)
1247 cl->avpkt = lss->avpkt; 1254 cl->avpkt = lss->avpkt;
1248 if (lss->change&TCF_CBQ_LSS_MINIDLE) 1255 if (lss->change & TCF_CBQ_LSS_MINIDLE)
1249 cl->minidle = -(long)lss->minidle; 1256 cl->minidle = -(long)lss->minidle;
1250 if (lss->change&TCF_CBQ_LSS_MAXIDLE) { 1257 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1251 cl->maxidle = lss->maxidle; 1258 cl->maxidle = lss->maxidle;
1252 cl->avgidle = lss->maxidle; 1259 cl->avgidle = lss->maxidle;
1253 } 1260 }
1254 if (lss->change&TCF_CBQ_LSS_OFFTIME) 1261 if (lss->change & TCF_CBQ_LSS_OFFTIME)
1255 cl->offtime = lss->offtime; 1262 cl->offtime = lss->offtime;
1256 return 0; 1263 return 0;
1257} 1264}
@@ -1279,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1279 if (wrr->weight) 1286 if (wrr->weight)
1280 cl->weight = wrr->weight; 1287 cl->weight = wrr->weight;
1281 if (wrr->priority) { 1288 if (wrr->priority) {
1282 cl->priority = wrr->priority-1; 1289 cl->priority = wrr->priority - 1;
1283 cl->cpriority = cl->priority; 1290 cl->cpriority = cl->priority;
1284 if (cl->priority >= cl->priority2) 1291 if (cl->priority >= cl->priority2)
1285 cl->priority2 = TC_CBQ_MAXPRIO-1; 1292 cl->priority2 = TC_CBQ_MAXPRIO - 1;
1286 } 1293 }
1287 1294
1288 cbq_addprio(q, cl); 1295 cbq_addprio(q, cl);
@@ -1299,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1299 cl->overlimit = cbq_ovl_delay; 1306 cl->overlimit = cbq_ovl_delay;
1300 break; 1307 break;
1301 case TC_CBQ_OVL_LOWPRIO: 1308 case TC_CBQ_OVL_LOWPRIO:
1302 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || 1309 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1303 ovl->priority2-1 <= cl->priority) 1310 ovl->priority2 - 1 <= cl->priority)
1304 return -EINVAL; 1311 return -EINVAL;
1305 cl->priority2 = ovl->priority2-1; 1312 cl->priority2 = ovl->priority2 - 1;
1306 cl->overlimit = cbq_ovl_lowprio; 1313 cl->overlimit = cbq_ovl_lowprio;
1307 break; 1314 break;
1308 case TC_CBQ_OVL_DROP: 1315 case TC_CBQ_OVL_DROP:
@@ -1381,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1381 if (!q->link.q) 1388 if (!q->link.q)
1382 q->link.q = &noop_qdisc; 1389 q->link.q = &noop_qdisc;
1383 1390
1384 q->link.priority = TC_CBQ_MAXPRIO-1; 1391 q->link.priority = TC_CBQ_MAXPRIO - 1;
1385 q->link.priority2 = TC_CBQ_MAXPRIO-1; 1392 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1386 q->link.cpriority = TC_CBQ_MAXPRIO-1; 1393 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1387 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; 1394 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1388 q->link.overlimit = cbq_ovl_classic; 1395 q->link.overlimit = cbq_ovl_classic;
1389 q->link.allot = psched_mtu(qdisc_dev(sch)); 1396 q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1414,7 +1421,7 @@ put_rtab:
1414 return err; 1421 return err;
1415} 1422}
1416 1423
1417static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) 1424static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1418{ 1425{
1419 unsigned char *b = skb_tail_pointer(skb); 1426 unsigned char *b = skb_tail_pointer(skb);
1420 1427
@@ -1426,7 +1433,7 @@ nla_put_failure:
1426 return -1; 1433 return -1;
1427} 1434}
1428 1435
1429static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) 1436static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1430{ 1437{
1431 unsigned char *b = skb_tail_pointer(skb); 1438 unsigned char *b = skb_tail_pointer(skb);
1432 struct tc_cbq_lssopt opt; 1439 struct tc_cbq_lssopt opt;
@@ -1451,15 +1458,15 @@ nla_put_failure:
1451 return -1; 1458 return -1;
1452} 1459}
1453 1460
1454static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) 1461static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1455{ 1462{
1456 unsigned char *b = skb_tail_pointer(skb); 1463 unsigned char *b = skb_tail_pointer(skb);
1457 struct tc_cbq_wrropt opt; 1464 struct tc_cbq_wrropt opt;
1458 1465
1459 opt.flags = 0; 1466 opt.flags = 0;
1460 opt.allot = cl->allot; 1467 opt.allot = cl->allot;
1461 opt.priority = cl->priority+1; 1468 opt.priority = cl->priority + 1;
1462 opt.cpriority = cl->cpriority+1; 1469 opt.cpriority = cl->cpriority + 1;
1463 opt.weight = cl->weight; 1470 opt.weight = cl->weight;
1464 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); 1471 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1465 return skb->len; 1472 return skb->len;
@@ -1469,13 +1476,13 @@ nla_put_failure:
1469 return -1; 1476 return -1;
1470} 1477}
1471 1478
1472static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) 1479static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1473{ 1480{
1474 unsigned char *b = skb_tail_pointer(skb); 1481 unsigned char *b = skb_tail_pointer(skb);
1475 struct tc_cbq_ovl opt; 1482 struct tc_cbq_ovl opt;
1476 1483
1477 opt.strategy = cl->ovl_strategy; 1484 opt.strategy = cl->ovl_strategy;
1478 opt.priority2 = cl->priority2+1; 1485 opt.priority2 = cl->priority2 + 1;
1479 opt.pad = 0; 1486 opt.pad = 0;
1480 opt.penalty = cl->penalty; 1487 opt.penalty = cl->penalty;
1481 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); 1488 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1486,7 +1493,7 @@ nla_put_failure:
1486 return -1; 1493 return -1;
1487} 1494}
1488 1495
1489static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) 1496static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1490{ 1497{
1491 unsigned char *b = skb_tail_pointer(skb); 1498 unsigned char *b = skb_tail_pointer(skb);
1492 struct tc_cbq_fopt opt; 1499 struct tc_cbq_fopt opt;
@@ -1505,7 +1512,7 @@ nla_put_failure:
1505} 1512}
1506 1513
1507#ifdef CONFIG_NET_CLS_ACT 1514#ifdef CONFIG_NET_CLS_ACT
1508static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) 1515static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1509{ 1516{
1510 unsigned char *b = skb_tail_pointer(skb); 1517 unsigned char *b = skb_tail_pointer(skb);
1511 struct tc_cbq_police opt; 1518 struct tc_cbq_police opt;
@@ -1569,7 +1576,7 @@ static int
1569cbq_dump_class(struct Qdisc *sch, unsigned long arg, 1576cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1570 struct sk_buff *skb, struct tcmsg *tcm) 1577 struct sk_buff *skb, struct tcmsg *tcm)
1571{ 1578{
1572 struct cbq_class *cl = (struct cbq_class*)arg; 1579 struct cbq_class *cl = (struct cbq_class *)arg;
1573 struct nlattr *nest; 1580 struct nlattr *nest;
1574 1581
1575 if (cl->tparent) 1582 if (cl->tparent)
@@ -1597,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1597 struct gnet_dump *d) 1604 struct gnet_dump *d)
1598{ 1605{
1599 struct cbq_sched_data *q = qdisc_priv(sch); 1606 struct cbq_sched_data *q = qdisc_priv(sch);
1600 struct cbq_class *cl = (struct cbq_class*)arg; 1607 struct cbq_class *cl = (struct cbq_class *)arg;
1601 1608
1602 cl->qstats.qlen = cl->q->q.qlen; 1609 cl->qstats.qlen = cl->q->q.qlen;
1603 cl->xstats.avgidle = cl->avgidle; 1610 cl->xstats.avgidle = cl->avgidle;
@@ -1617,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1617static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1624static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1618 struct Qdisc **old) 1625 struct Qdisc **old)
1619{ 1626{
1620 struct cbq_class *cl = (struct cbq_class*)arg; 1627 struct cbq_class *cl = (struct cbq_class *)arg;
1621 1628
1622 if (new == NULL) { 1629 if (new == NULL) {
1623 new = qdisc_create_dflt(sch->dev_queue, 1630 new = qdisc_create_dflt(sch->dev_queue,
@@ -1640,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1640 return 0; 1647 return 0;
1641} 1648}
1642 1649
1643static struct Qdisc * 1650static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1644cbq_leaf(struct Qdisc *sch, unsigned long arg)
1645{ 1651{
1646 struct cbq_class *cl = (struct cbq_class*)arg; 1652 struct cbq_class *cl = (struct cbq_class *)arg;
1647 1653
1648 return cl->q; 1654 return cl->q;
1649} 1655}
@@ -1682,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1682 kfree(cl); 1688 kfree(cl);
1683} 1689}
1684 1690
1685static void 1691static void cbq_destroy(struct Qdisc *sch)
1686cbq_destroy(struct Qdisc* sch)
1687{ 1692{
1688 struct cbq_sched_data *q = qdisc_priv(sch); 1693 struct cbq_sched_data *q = qdisc_priv(sch);
1689 struct hlist_node *n, *next; 1694 struct hlist_node *n, *next;
1690 struct cbq_class *cl; 1695 struct cbq_class *cl;
1691 unsigned h; 1696 unsigned int h;
1692 1697
1693#ifdef CONFIG_NET_CLS_ACT 1698#ifdef CONFIG_NET_CLS_ACT
1694 q->rx_class = NULL; 1699 q->rx_class = NULL;
@@ -1712,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
1712 1717
1713static void cbq_put(struct Qdisc *sch, unsigned long arg) 1718static void cbq_put(struct Qdisc *sch, unsigned long arg)
1714{ 1719{
1715 struct cbq_class *cl = (struct cbq_class*)arg; 1720 struct cbq_class *cl = (struct cbq_class *)arg;
1716 1721
1717 if (--cl->refcnt == 0) { 1722 if (--cl->refcnt == 0) {
1718#ifdef CONFIG_NET_CLS_ACT 1723#ifdef CONFIG_NET_CLS_ACT
@@ -1735,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1735{ 1740{
1736 int err; 1741 int err;
1737 struct cbq_sched_data *q = qdisc_priv(sch); 1742 struct cbq_sched_data *q = qdisc_priv(sch);
1738 struct cbq_class *cl = (struct cbq_class*)*arg; 1743 struct cbq_class *cl = (struct cbq_class *)*arg;
1739 struct nlattr *opt = tca[TCA_OPTIONS]; 1744 struct nlattr *opt = tca[TCA_OPTIONS];
1740 struct nlattr *tb[TCA_CBQ_MAX + 1]; 1745 struct nlattr *tb[TCA_CBQ_MAX + 1];
1741 struct cbq_class *parent; 1746 struct cbq_class *parent;
@@ -1827,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1827 1832
1828 if (classid) { 1833 if (classid) {
1829 err = -EINVAL; 1834 err = -EINVAL;
1830 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) 1835 if (TC_H_MAJ(classid ^ sch->handle) ||
1836 cbq_class_lookup(q, classid))
1831 goto failure; 1837 goto failure;
1832 } else { 1838 } else {
1833 int i; 1839 int i;
1834 classid = TC_H_MAKE(sch->handle,0x8000); 1840 classid = TC_H_MAKE(sch->handle, 0x8000);
1835 1841
1836 for (i=0; i<0x8000; i++) { 1842 for (i = 0; i < 0x8000; i++) {
1837 if (++q->hgenerator >= 0x8000) 1843 if (++q->hgenerator >= 0x8000)
1838 q->hgenerator = 1; 1844 q->hgenerator = 1;
1839 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) 1845 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1890,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1890 cl->minidle = -0x7FFFFFFF; 1896 cl->minidle = -0x7FFFFFFF;
1891 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); 1897 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1892 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); 1898 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1893 if (cl->ewma_log==0) 1899 if (cl->ewma_log == 0)
1894 cl->ewma_log = q->link.ewma_log; 1900 cl->ewma_log = q->link.ewma_log;
1895 if (cl->maxidle==0) 1901 if (cl->maxidle == 0)
1896 cl->maxidle = q->link.maxidle; 1902 cl->maxidle = q->link.maxidle;
1897 if (cl->avpkt==0) 1903 if (cl->avpkt == 0)
1898 cl->avpkt = q->link.avpkt; 1904 cl->avpkt = q->link.avpkt;
1899 cl->overlimit = cbq_ovl_classic; 1905 cl->overlimit = cbq_ovl_classic;
1900 if (tb[TCA_CBQ_OVL_STRATEGY]) 1906 if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1920,7 +1926,7 @@ failure:
1920static int cbq_delete(struct Qdisc *sch, unsigned long arg) 1926static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1921{ 1927{
1922 struct cbq_sched_data *q = qdisc_priv(sch); 1928 struct cbq_sched_data *q = qdisc_priv(sch);
1923 struct cbq_class *cl = (struct cbq_class*)arg; 1929 struct cbq_class *cl = (struct cbq_class *)arg;
1924 unsigned int qlen; 1930 unsigned int qlen;
1925 1931
1926 if (cl->filters || cl->children || cl == &q->link) 1932 if (cl->filters || cl->children || cl == &q->link)
@@ -1978,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1978 u32 classid) 1984 u32 classid)
1979{ 1985{
1980 struct cbq_sched_data *q = qdisc_priv(sch); 1986 struct cbq_sched_data *q = qdisc_priv(sch);
1981 struct cbq_class *p = (struct cbq_class*)parent; 1987 struct cbq_class *p = (struct cbq_class *)parent;
1982 struct cbq_class *cl = cbq_class_lookup(q, classid); 1988 struct cbq_class *cl = cbq_class_lookup(q, classid);
1983 1989
1984 if (cl) { 1990 if (cl) {
@@ -1992,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1992 1998
1993static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) 1999static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1994{ 2000{
1995 struct cbq_class *cl = (struct cbq_class*)arg; 2001 struct cbq_class *cl = (struct cbq_class *)arg;
1996 2002
1997 cl->filters--; 2003 cl->filters--;
1998} 2004}
@@ -2002,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2002 struct cbq_sched_data *q = qdisc_priv(sch); 2008 struct cbq_sched_data *q = qdisc_priv(sch);
2003 struct cbq_class *cl; 2009 struct cbq_class *cl;
2004 struct hlist_node *n; 2010 struct hlist_node *n;
2005 unsigned h; 2011 unsigned int h;
2006 2012
2007 if (arg->stop) 2013 if (arg->stop)
2008 return; 2014 return;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
new file mode 100644
index 00000000000..06afbaeb4c8
--- /dev/null
+++ b/net/sched/sch_choke.c
@@ -0,0 +1,688 @@
1/*
2 * net/sched/sch_choke.c CHOKE scheduler
3 *
4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/reciprocal_div.h>
18#include <linux/vmalloc.h>
19#include <net/pkt_sched.h>
20#include <net/inet_ecn.h>
21#include <net/red.h>
22#include <linux/ip.h>
23#include <net/ip.h>
24#include <linux/ipv6.h>
25#include <net/ipv6.h>
26
27/*
28 CHOKe stateless AQM for fair bandwidth allocation
29 =================================================
30
31 CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
32 unresponsive flows) is a variant of RED that penalizes misbehaving flows but
33 maintains no flow state. The difference from RED is an additional step
34 during the enqueuing process. If average queue size is over the
35 low threshold (qmin), a packet is chosen at random from the queue.
36 If both the new and chosen packet are from the same flow, both
37 are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
38 needs to access packets in queue randomly. It has a minimal class
39 interface to allow overriding the builtin flow classifier with
40 filters.
41
42 Source:
43 R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
44 Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
45 IEEE INFOCOM, 2000.
46
47 A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
48 Characteristics", IEEE/ACM Transactions on Networking, 2004
49
50 */
51
52/* Upper bound on size of sk_buff table (packets) */
53#define CHOKE_MAX_QUEUE (128*1024 - 1)
54
55struct choke_sched_data {
56/* Parameters */
57 u32 limit;
58 unsigned char flags;
59
60 struct red_parms parms;
61
62/* Variables */
63 struct tcf_proto *filter_list;
64 struct {
65 u32 prob_drop; /* Early probability drops */
66 u32 prob_mark; /* Early probability marks */
67 u32 forced_drop; /* Forced drops, qavg > max_thresh */
68 u32 forced_mark; /* Forced marks, qavg > max_thresh */
69 u32 pdrop; /* Drops due to queue limits */
70 u32 other; /* Drops due to drop() calls */
71 u32 matched; /* Drops to flow match */
72 } stats;
73
74 unsigned int head;
75 unsigned int tail;
76
77 unsigned int tab_mask; /* size - 1 */
78
79 struct sk_buff **tab;
80};
81
82/* deliver a random number between 0 and N - 1 */
83static u32 random_N(unsigned int N)
84{
85 return reciprocal_divide(random32(), N);
86}
87
88/* number of elements in queue including holes */
89static unsigned int choke_len(const struct choke_sched_data *q)
90{
91 return (q->tail - q->head) & q->tab_mask;
92}
93
94/* Is ECN parameter configured */
95static int use_ecn(const struct choke_sched_data *q)
96{
97 return q->flags & TC_RED_ECN;
98}
99
100/* Should packets over max just be dropped (versus marked) */
101static int use_harddrop(const struct choke_sched_data *q)
102{
103 return q->flags & TC_RED_HARDDROP;
104}
105
106/* Move head pointer forward to skip over holes */
107static void choke_zap_head_holes(struct choke_sched_data *q)
108{
109 do {
110 q->head = (q->head + 1) & q->tab_mask;
111 if (q->head == q->tail)
112 break;
113 } while (q->tab[q->head] == NULL);
114}
115
116/* Move tail pointer backwards to reuse holes */
117static void choke_zap_tail_holes(struct choke_sched_data *q)
118{
119 do {
120 q->tail = (q->tail - 1) & q->tab_mask;
121 if (q->head == q->tail)
122 break;
123 } while (q->tab[q->tail] == NULL);
124}
125
126/* Drop packet from queue array by creating a "hole" */
127static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
128{
129 struct choke_sched_data *q = qdisc_priv(sch);
130 struct sk_buff *skb = q->tab[idx];
131
132 q->tab[idx] = NULL;
133
134 if (idx == q->head)
135 choke_zap_head_holes(q);
136 if (idx == q->tail)
137 choke_zap_tail_holes(q);
138
139 sch->qstats.backlog -= qdisc_pkt_len(skb);
140 qdisc_drop(skb, sch);
141 qdisc_tree_decrease_qlen(sch, 1);
142 --sch->q.qlen;
143}
144
145/*
146 * Compare flow of two packets
147 * Returns true only if source and destination address and port match.
148 * false for special cases
149 */
150static bool choke_match_flow(struct sk_buff *skb1,
151 struct sk_buff *skb2)
152{
153 int off1, off2, poff;
154 const u32 *ports1, *ports2;
155 u8 ip_proto;
156 __u32 hash1;
157
158 if (skb1->protocol != skb2->protocol)
159 return false;
160
161 /* Use hash value as quick check
162 * Assumes that __skb_get_rxhash makes IP header and ports linear
163 */
164 hash1 = skb_get_rxhash(skb1);
165 if (!hash1 || hash1 != skb_get_rxhash(skb2))
166 return false;
167
168 /* Probably match, but be sure to avoid hash collisions */
169 off1 = skb_network_offset(skb1);
170 off2 = skb_network_offset(skb2);
171
172 switch (skb1->protocol) {
173 case __constant_htons(ETH_P_IP): {
174 const struct iphdr *ip1, *ip2;
175
176 ip1 = (const struct iphdr *) (skb1->data + off1);
177 ip2 = (const struct iphdr *) (skb2->data + off2);
178
179 ip_proto = ip1->protocol;
180 if (ip_proto != ip2->protocol ||
181 ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
182 return false;
183
184 if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
185 ip_proto = 0;
186 off1 += ip1->ihl * 4;
187 off2 += ip2->ihl * 4;
188 break;
189 }
190
191 case __constant_htons(ETH_P_IPV6): {
192 const struct ipv6hdr *ip1, *ip2;
193
194 ip1 = (const struct ipv6hdr *) (skb1->data + off1);
195 ip2 = (const struct ipv6hdr *) (skb2->data + off2);
196
197 ip_proto = ip1->nexthdr;
198 if (ip_proto != ip2->nexthdr ||
199 ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
200 ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
201 return false;
202 off1 += 40;
203 off2 += 40;
204 }
205
206 default: /* Maybe compare MAC header here? */
207 return false;
208 }
209
210 poff = proto_ports_offset(ip_proto);
211 if (poff < 0)
212 return true;
213
214 off1 += poff;
215 off2 += poff;
216
217 ports1 = (__force u32 *)(skb1->data + off1);
218 ports2 = (__force u32 *)(skb2->data + off2);
219 return *ports1 == *ports2;
220}
221
222struct choke_skb_cb {
223 u16 classid;
224};
225
226static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
227{
228 BUILD_BUG_ON(sizeof(skb->cb) <
229 sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb));
230 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
231}
232
233static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
234{
235 choke_skb_cb(skb)->classid = classid;
236}
237
238static u16 choke_get_classid(const struct sk_buff *skb)
239{
240 return choke_skb_cb(skb)->classid;
241}
242
243/*
244 * Classify flow using either:
245 * 1. pre-existing classification result in skb
246 * 2. fast internal classification
247 * 3. use TC filter based classification
248 */
249static bool choke_classify(struct sk_buff *skb,
250 struct Qdisc *sch, int *qerr)
251
252{
253 struct choke_sched_data *q = qdisc_priv(sch);
254 struct tcf_result res;
255 int result;
256
257 result = tc_classify(skb, q->filter_list, &res);
258 if (result >= 0) {
259#ifdef CONFIG_NET_CLS_ACT
260 switch (result) {
261 case TC_ACT_STOLEN:
262 case TC_ACT_QUEUED:
263 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
264 case TC_ACT_SHOT:
265 return false;
266 }
267#endif
268 choke_set_classid(skb, TC_H_MIN(res.classid));
269 return true;
270 }
271
272 return false;
273}
274
275/*
276 * Select a packet at random from queue
277 * HACK: since queue can have holes from previous deletion; retry several
278 * times to find a random skb but then just give up and return the head
279 * Will return NULL if queue is empty (q->head == q->tail)
280 */
281static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
282 unsigned int *pidx)
283{
284 struct sk_buff *skb;
285 int retrys = 3;
286
287 do {
288 *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
289 skb = q->tab[*pidx];
290 if (skb)
291 return skb;
292 } while (--retrys > 0);
293
294 return q->tab[*pidx = q->head];
295}
296
297/*
298 * Compare new packet with random packet in queue
299 * returns true if matched and sets *pidx
300 */
301static bool choke_match_random(const struct choke_sched_data *q,
302 struct sk_buff *nskb,
303 unsigned int *pidx)
304{
305 struct sk_buff *oskb;
306
307 if (q->head == q->tail)
308 return false;
309
310 oskb = choke_peek_random(q, pidx);
311 if (q->filter_list)
312 return choke_get_classid(nskb) == choke_get_classid(oskb);
313
314 return choke_match_flow(oskb, nskb);
315}
316
317static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
318{
319 struct choke_sched_data *q = qdisc_priv(sch);
320 struct red_parms *p = &q->parms;
321 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
322
323 if (q->filter_list) {
324 /* If using external classifiers, get result and record it. */
325 if (!choke_classify(skb, sch, &ret))
326 goto other_drop; /* Packet was eaten by filter */
327 }
328
329 /* Compute average queue usage (see RED) */
330 p->qavg = red_calc_qavg(p, sch->q.qlen);
331 if (red_is_idling(p))
332 red_end_of_idle_period(p);
333
334 /* Is queue small? */
335 if (p->qavg <= p->qth_min)
336 p->qcount = -1;
337 else {
338 unsigned int idx;
339
340 /* Draw a packet at random from queue and compare flow */
341 if (choke_match_random(q, skb, &idx)) {
342 q->stats.matched++;
343 choke_drop_by_idx(sch, idx);
344 goto congestion_drop;
345 }
346
347 /* Queue is large, always mark/drop */
348 if (p->qavg > p->qth_max) {
349 p->qcount = -1;
350
351 sch->qstats.overlimits++;
352 if (use_harddrop(q) || !use_ecn(q) ||
353 !INET_ECN_set_ce(skb)) {
354 q->stats.forced_drop++;
355 goto congestion_drop;
356 }
357
358 q->stats.forced_mark++;
359 } else if (++p->qcount) {
360 if (red_mark_probability(p, p->qavg)) {
361 p->qcount = 0;
362 p->qR = red_random(p);
363
364 sch->qstats.overlimits++;
365 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
366 q->stats.prob_drop++;
367 goto congestion_drop;
368 }
369
370 q->stats.prob_mark++;
371 }
372 } else
373 p->qR = red_random(p);
374 }
375
376 /* Admit new packet */
377 if (sch->q.qlen < q->limit) {
378 q->tab[q->tail] = skb;
379 q->tail = (q->tail + 1) & q->tab_mask;
380 ++sch->q.qlen;
381 sch->qstats.backlog += qdisc_pkt_len(skb);
382 return NET_XMIT_SUCCESS;
383 }
384
385 q->stats.pdrop++;
386 sch->qstats.drops++;
387 kfree_skb(skb);
388 return NET_XMIT_DROP;
389
390 congestion_drop:
391 qdisc_drop(skb, sch);
392 return NET_XMIT_CN;
393
394 other_drop:
395 if (ret & __NET_XMIT_BYPASS)
396 sch->qstats.drops++;
397 kfree_skb(skb);
398 return ret;
399}
400
401static struct sk_buff *choke_dequeue(struct Qdisc *sch)
402{
403 struct choke_sched_data *q = qdisc_priv(sch);
404 struct sk_buff *skb;
405
406 if (q->head == q->tail) {
407 if (!red_is_idling(&q->parms))
408 red_start_of_idle_period(&q->parms);
409 return NULL;
410 }
411
412 skb = q->tab[q->head];
413 q->tab[q->head] = NULL;
414 choke_zap_head_holes(q);
415 --sch->q.qlen;
416 sch->qstats.backlog -= qdisc_pkt_len(skb);
417 qdisc_bstats_update(sch, skb);
418
419 return skb;
420}
421
422static unsigned int choke_drop(struct Qdisc *sch)
423{
424 struct choke_sched_data *q = qdisc_priv(sch);
425 unsigned int len;
426
427 len = qdisc_queue_drop(sch);
428 if (len > 0)
429 q->stats.other++;
430 else {
431 if (!red_is_idling(&q->parms))
432 red_start_of_idle_period(&q->parms);
433 }
434
435 return len;
436}
437
438static void choke_reset(struct Qdisc *sch)
439{
440 struct choke_sched_data *q = qdisc_priv(sch);
441
442 red_restart(&q->parms);
443}
444
445static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
446 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
447 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
448};
449
450
451static void choke_free(void *addr)
452{
453 if (addr) {
454 if (is_vmalloc_addr(addr))
455 vfree(addr);
456 else
457 kfree(addr);
458 }
459}
460
461static int choke_change(struct Qdisc *sch, struct nlattr *opt)
462{
463 struct choke_sched_data *q = qdisc_priv(sch);
464 struct nlattr *tb[TCA_CHOKE_MAX + 1];
465 const struct tc_red_qopt *ctl;
466 int err;
467 struct sk_buff **old = NULL;
468 unsigned int mask;
469
470 if (opt == NULL)
471 return -EINVAL;
472
473 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
474 if (err < 0)
475 return err;
476
477 if (tb[TCA_CHOKE_PARMS] == NULL ||
478 tb[TCA_CHOKE_STAB] == NULL)
479 return -EINVAL;
480
481 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
482
483 if (ctl->limit > CHOKE_MAX_QUEUE)
484 return -EINVAL;
485
486 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
487 if (mask != q->tab_mask) {
488 struct sk_buff **ntab;
489
490 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
491 if (!ntab)
492 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
493 if (!ntab)
494 return -ENOMEM;
495
496 sch_tree_lock(sch);
497 old = q->tab;
498 if (old) {
499 unsigned int oqlen = sch->q.qlen, tail = 0;
500
501 while (q->head != q->tail) {
502 struct sk_buff *skb = q->tab[q->head];
503
504 q->head = (q->head + 1) & q->tab_mask;
505 if (!skb)
506 continue;
507 if (tail < mask) {
508 ntab[tail++] = skb;
509 continue;
510 }
511 sch->qstats.backlog -= qdisc_pkt_len(skb);
512 --sch->q.qlen;
513 qdisc_drop(skb, sch);
514 }
515 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
516 q->head = 0;
517 q->tail = tail;
518 }
519
520 q->tab_mask = mask;
521 q->tab = ntab;
522 } else
523 sch_tree_lock(sch);
524
525 q->flags = ctl->flags;
526 q->limit = ctl->limit;
527
528 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
529 ctl->Plog, ctl->Scell_log,
530 nla_data(tb[TCA_CHOKE_STAB]));
531
532 if (q->head == q->tail)
533 red_end_of_idle_period(&q->parms);
534
535 sch_tree_unlock(sch);
536 choke_free(old);
537 return 0;
538}
539
540static int choke_init(struct Qdisc *sch, struct nlattr *opt)
541{
542 return choke_change(sch, opt);
543}
544
545static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
546{
547 struct choke_sched_data *q = qdisc_priv(sch);
548 struct nlattr *opts = NULL;
549 struct tc_red_qopt opt = {
550 .limit = q->limit,
551 .flags = q->flags,
552 .qth_min = q->parms.qth_min >> q->parms.Wlog,
553 .qth_max = q->parms.qth_max >> q->parms.Wlog,
554 .Wlog = q->parms.Wlog,
555 .Plog = q->parms.Plog,
556 .Scell_log = q->parms.Scell_log,
557 };
558
559 opts = nla_nest_start(skb, TCA_OPTIONS);
560 if (opts == NULL)
561 goto nla_put_failure;
562
563 NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
564 return nla_nest_end(skb, opts);
565
566nla_put_failure:
567 nla_nest_cancel(skb, opts);
568 return -EMSGSIZE;
569}
570
571static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
572{
573 struct choke_sched_data *q = qdisc_priv(sch);
574 struct tc_choke_xstats st = {
575 .early = q->stats.prob_drop + q->stats.forced_drop,
576 .marked = q->stats.prob_mark + q->stats.forced_mark,
577 .pdrop = q->stats.pdrop,
578 .other = q->stats.other,
579 .matched = q->stats.matched,
580 };
581
582 return gnet_stats_copy_app(d, &st, sizeof(st));
583}
584
585static void choke_destroy(struct Qdisc *sch)
586{
587 struct choke_sched_data *q = qdisc_priv(sch);
588
589 tcf_destroy_chain(&q->filter_list);
590 choke_free(q->tab);
591}
592
593static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
594{
595 return NULL;
596}
597
598static unsigned long choke_get(struct Qdisc *sch, u32 classid)
599{
600 return 0;
601}
602
603static void choke_put(struct Qdisc *q, unsigned long cl)
604{
605}
606
607static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
608 u32 classid)
609{
610 return 0;
611}
612
613static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
614{
615 struct choke_sched_data *q = qdisc_priv(sch);
616
617 if (cl)
618 return NULL;
619 return &q->filter_list;
620}
621
622static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
623 struct sk_buff *skb, struct tcmsg *tcm)
624{
625 tcm->tcm_handle |= TC_H_MIN(cl);
626 return 0;
627}
628
629static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
630{
631 if (!arg->stop) {
632 if (arg->fn(sch, 1, arg) < 0) {
633 arg->stop = 1;
634 return;
635 }
636 arg->count++;
637 }
638}
639
640static const struct Qdisc_class_ops choke_class_ops = {
641 .leaf = choke_leaf,
642 .get = choke_get,
643 .put = choke_put,
644 .tcf_chain = choke_find_tcf,
645 .bind_tcf = choke_bind,
646 .unbind_tcf = choke_put,
647 .dump = choke_dump_class,
648 .walk = choke_walk,
649};
650
651static struct sk_buff *choke_peek_head(struct Qdisc *sch)
652{
653 struct choke_sched_data *q = qdisc_priv(sch);
654
655 return (q->head != q->tail) ? q->tab[q->head] : NULL;
656}
657
658static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
659 .id = "choke",
660 .priv_size = sizeof(struct choke_sched_data),
661
662 .enqueue = choke_enqueue,
663 .dequeue = choke_dequeue,
664 .peek = choke_peek_head,
665 .drop = choke_drop,
666 .init = choke_init,
667 .destroy = choke_destroy,
668 .reset = choke_reset,
669 .change = choke_change,
670 .dump = choke_dump,
671 .dump_stats = choke_dump_stats,
672 .owner = THIS_MODULE,
673};
674
675static int __init choke_module_init(void)
676{
677 return register_qdisc(&choke_qdisc_ops);
678}
679
680static void __exit choke_module_exit(void)
681{
682 unregister_qdisc(&choke_qdisc_ops);
683}
684
685module_init(choke_module_init)
686module_exit(choke_module_exit)
687
688MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0f7bf3fdfea..2c790204d04 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
137 mask = nla_get_u8(tb[TCA_DSMARK_MASK]); 137 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
138 138
139 if (tb[TCA_DSMARK_VALUE]) 139 if (tb[TCA_DSMARK_VALUE])
140 p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); 140 p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
141 141
142 if (tb[TCA_DSMARK_MASK]) 142 if (tb[TCA_DSMARK_MASK])
143 p->mask[*arg-1] = mask; 143 p->mask[*arg - 1] = mask;
144 144
145 err = 0; 145 err = 0;
146 146
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
155 if (!dsmark_valid_index(p, arg)) 155 if (!dsmark_valid_index(p, arg))
156 return -EINVAL; 156 return -EINVAL;
157 157
158 p->mask[arg-1] = 0xff; 158 p->mask[arg - 1] = 0xff;
159 p->value[arg-1] = 0; 159 p->value[arg - 1] = 0;
160 160
161 return 0; 161 return 0;
162} 162}
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
175 if (p->mask[i] == 0xff && !p->value[i]) 175 if (p->mask[i] == 0xff && !p->value[i])
176 goto ignore; 176 goto ignore;
177 if (walker->count >= walker->skip) { 177 if (walker->count >= walker->skip) {
178 if (walker->fn(sch, i+1, walker) < 0) { 178 if (walker->fn(sch, i + 1, walker) < 0) {
179 walker->stop = 1; 179 walker->stop = 1;
180 break; 180 break;
181 } 181 }
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
304 * and don't need yet another qdisc as a bypass. 304 * and don't need yet another qdisc as a bypass.
305 */ 305 */
306 if (p->mask[index] != 0xff || p->value[index]) 306 if (p->mask[index] != 0xff || p->value[index])
307 printk(KERN_WARNING 307 pr_warning("dsmark_dequeue: unsupported protocol %d\n",
308 "dsmark_dequeue: unsupported protocol %d\n", 308 ntohs(skb->protocol));
309 ntohs(skb->protocol));
310 break; 309 break;
311 } 310 }
312 311
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
424 if (!dsmark_valid_index(p, cl)) 423 if (!dsmark_valid_index(p, cl))
425 return -EINVAL; 424 return -EINVAL;
426 425
427 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); 426 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
428 tcm->tcm_info = p->q->handle; 427 tcm->tcm_info = p->q->handle;
429 428
430 opts = nla_nest_start(skb, TCA_OPTIONS); 429 opts = nla_nest_start(skb, TCA_OPTIONS);
431 if (opts == NULL) 430 if (opts == NULL)
432 goto nla_put_failure; 431 goto nla_put_failure;
433 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]); 432 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
434 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]); 433 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
435 434
436 return nla_nest_end(skb, opts); 435 return nla_nest_end(skb, opts);
437 436
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index d468b479aa9..66effe2da8e 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,36 +19,25 @@
19 19
20/* 1 band FIFO pseudo-"scheduler" */ 20/* 1 band FIFO pseudo-"scheduler" */
21 21
22struct fifo_sched_data 22static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
23{ 23{
24 u32 limit; 24 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
25};
26
27static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
28{
29 struct fifo_sched_data *q = qdisc_priv(sch);
30
31 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
32 return qdisc_enqueue_tail(skb, sch); 25 return qdisc_enqueue_tail(skb, sch);
33 26
34 return qdisc_reshape_fail(skb, sch); 27 return qdisc_reshape_fail(skb, sch);
35} 28}
36 29
37static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) 30static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
38{ 31{
39 struct fifo_sched_data *q = qdisc_priv(sch); 32 if (likely(skb_queue_len(&sch->q) < sch->limit))
40
41 if (likely(skb_queue_len(&sch->q) < q->limit))
42 return qdisc_enqueue_tail(skb, sch); 33 return qdisc_enqueue_tail(skb, sch);
43 34
44 return qdisc_reshape_fail(skb, sch); 35 return qdisc_reshape_fail(skb, sch);
45} 36}
46 37
47static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) 38static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
48{ 39{
49 struct fifo_sched_data *q = qdisc_priv(sch); 40 if (likely(skb_queue_len(&sch->q) < sch->limit))
50
51 if (likely(skb_queue_len(&sch->q) < q->limit))
52 return qdisc_enqueue_tail(skb, sch); 41 return qdisc_enqueue_tail(skb, sch);
53 42
54 /* queue full, remove one skb to fulfill the limit */ 43 /* queue full, remove one skb to fulfill the limit */
@@ -61,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
61 50
62static int fifo_init(struct Qdisc *sch, struct nlattr *opt) 51static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
63{ 52{
64 struct fifo_sched_data *q = qdisc_priv(sch); 53 bool bypass;
54 bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
65 55
66 if (opt == NULL) { 56 if (opt == NULL) {
67 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; 57 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
68 58
69 if (sch->ops == &bfifo_qdisc_ops) 59 if (is_bfifo)
70 limit *= psched_mtu(qdisc_dev(sch)); 60 limit *= psched_mtu(qdisc_dev(sch));
71 61
72 q->limit = limit; 62 sch->limit = limit;
73 } else { 63 } else {
74 struct tc_fifo_qopt *ctl = nla_data(opt); 64 struct tc_fifo_qopt *ctl = nla_data(opt);
75 65
76 if (nla_len(opt) < sizeof(*ctl)) 66 if (nla_len(opt) < sizeof(*ctl))
77 return -EINVAL; 67 return -EINVAL;
78 68
79 q->limit = ctl->limit; 69 sch->limit = ctl->limit;
80 } 70 }
81 71
72 if (is_bfifo)
73 bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
74 else
75 bypass = sch->limit >= 1;
76
77 if (bypass)
78 sch->flags |= TCQ_F_CAN_BYPASS;
79 else
80 sch->flags &= ~TCQ_F_CAN_BYPASS;
82 return 0; 81 return 0;
83} 82}
84 83
85static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 84static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
86{ 85{
87 struct fifo_sched_data *q = qdisc_priv(sch); 86 struct tc_fifo_qopt opt = { .limit = sch->limit };
88 struct tc_fifo_qopt opt = { .limit = q->limit };
89 87
90 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 88 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
91 return skb->len; 89 return skb->len;
@@ -96,7 +94,7 @@ nla_put_failure:
96 94
97struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { 95struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
98 .id = "pfifo", 96 .id = "pfifo",
99 .priv_size = sizeof(struct fifo_sched_data), 97 .priv_size = 0,
100 .enqueue = pfifo_enqueue, 98 .enqueue = pfifo_enqueue,
101 .dequeue = qdisc_dequeue_head, 99 .dequeue = qdisc_dequeue_head,
102 .peek = qdisc_peek_head, 100 .peek = qdisc_peek_head,
@@ -111,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops);
111 109
112struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { 110struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
113 .id = "bfifo", 111 .id = "bfifo",
114 .priv_size = sizeof(struct fifo_sched_data), 112 .priv_size = 0,
115 .enqueue = bfifo_enqueue, 113 .enqueue = bfifo_enqueue,
116 .dequeue = qdisc_dequeue_head, 114 .dequeue = qdisc_dequeue_head,
117 .peek = qdisc_peek_head, 115 .peek = qdisc_peek_head,
@@ -126,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops);
126 124
127struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { 125struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
128 .id = "pfifo_head_drop", 126 .id = "pfifo_head_drop",
129 .priv_size = sizeof(struct fifo_sched_data), 127 .priv_size = 0,
130 .enqueue = pfifo_tail_enqueue, 128 .enqueue = pfifo_tail_enqueue,
131 .dequeue = qdisc_dequeue_head, 129 .dequeue = qdisc_dequeue_head,
132 .peek = qdisc_peek_head, 130 .peek = qdisc_peek_head,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1bc698039ae..c84b65920d1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
87 */ 87 */
88 kfree_skb(skb); 88 kfree_skb(skb);
89 if (net_ratelimit()) 89 if (net_ratelimit())
90 printk(KERN_WARNING "Dead loop on netdevice %s, " 90 pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
91 "fix it urgently!\n", dev_queue->dev->name); 91 dev_queue->dev->name);
92 ret = qdisc_qlen(q); 92 ret = qdisc_qlen(q);
93 } else { 93 } else {
94 /* 94 /*
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
137 } else { 137 } else {
138 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 138 /* Driver returned NETDEV_TX_BUSY - requeue skb */
139 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) 139 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
140 printk(KERN_WARNING "BUG %s code %d qlen %d\n", 140 pr_warning("BUG %s code %d qlen %d\n",
141 dev->name, ret, q->q.qlen); 141 dev->name, ret, q->q.qlen);
142 142
143 ret = dev_requeue_skb(skb, q); 143 ret = dev_requeue_skb(skb, q);
144 } 144 }
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = {
412}; 412};
413 413
414 414
415static const u8 prio2band[TC_PRIO_MAX+1] = 415static const u8 prio2band[TC_PRIO_MAX + 1] = {
416 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; 416 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
417};
417 418
418/* 3-band FIFO queue: old style, but should be a bit faster than 419/* 3-band FIFO queue: old style, but should be a bit faster than
419 generic prio+fifo combination. 420 generic prio+fifo combination.
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
445 return priv->q + band; 446 return priv->q + band;
446} 447}
447 448
448static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 449static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
449{ 450{
450 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { 451 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
451 int band = prio2band[skb->priority & TC_PRIO_MAX]; 452 int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
460 return qdisc_drop(skb, qdisc); 461 return qdisc_drop(skb, qdisc);
461} 462}
462 463
463static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) 464static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
464{ 465{
465 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 466 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
466 int band = bitmap2band[priv->bitmap]; 467 int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
479 return NULL; 480 return NULL;
480} 481}
481 482
482static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) 483static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
483{ 484{
484 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 485 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
485 int band = bitmap2band[priv->bitmap]; 486 int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
493 return NULL; 494 return NULL;
494} 495}
495 496
496static void pfifo_fast_reset(struct Qdisc* qdisc) 497static void pfifo_fast_reset(struct Qdisc *qdisc)
497{ 498{
498 int prio; 499 int prio;
499 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 500 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
510{ 511{
511 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 512 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
512 513
513 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 514 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
514 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 515 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
515 return skb->len; 516 return skb->len;
516 517
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
526 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 527 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
527 skb_queue_head_init(band2list(priv, prio)); 528 skb_queue_head_init(band2list(priv, prio));
528 529
530 /* Can by-pass the queue discipline */
531 qdisc->flags |= TCQ_F_CAN_BYPASS;
529 return 0; 532 return 0;
530} 533}
531 534
@@ -540,27 +543,32 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
540 .dump = pfifo_fast_dump, 543 .dump = pfifo_fast_dump,
541 .owner = THIS_MODULE, 544 .owner = THIS_MODULE,
542}; 545};
546EXPORT_SYMBOL(pfifo_fast_ops);
543 547
544struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 548struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
545 struct Qdisc_ops *ops) 549 struct Qdisc_ops *ops)
546{ 550{
547 void *p; 551 void *p;
548 struct Qdisc *sch; 552 struct Qdisc *sch;
549 unsigned int size; 553 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
550 int err = -ENOBUFS; 554 int err = -ENOBUFS;
551 555
552 /* ensure that the Qdisc and the private data are 64-byte aligned */
553 size = QDISC_ALIGN(sizeof(*sch));
554 size += ops->priv_size + (QDISC_ALIGNTO - 1);
555
556 p = kzalloc_node(size, GFP_KERNEL, 556 p = kzalloc_node(size, GFP_KERNEL,
557 netdev_queue_numa_node_read(dev_queue)); 557 netdev_queue_numa_node_read(dev_queue));
558 558
559 if (!p) 559 if (!p)
560 goto errout; 560 goto errout;
561 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); 561 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
562 sch->padded = (char *) sch - (char *) p; 562 /* if we got non aligned memory, ask more and do alignment ourself */
563 563 if (sch != p) {
564 kfree(p);
565 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
566 netdev_queue_numa_node_read(dev_queue));
567 if (!p)
568 goto errout;
569 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
570 sch->padded = (char *) sch - (char *) p;
571 }
564 INIT_LIST_HEAD(&sch->list); 572 INIT_LIST_HEAD(&sch->list);
565 skb_queue_head_init(&sch->q); 573 skb_queue_head_init(&sch->q);
566 spin_lock_init(&sch->busylock); 574 spin_lock_init(&sch->busylock);
@@ -630,7 +638,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
630#ifdef CONFIG_NET_SCHED 638#ifdef CONFIG_NET_SCHED
631 qdisc_list_del(qdisc); 639 qdisc_list_del(qdisc);
632 640
633 qdisc_put_stab(qdisc->stab); 641 qdisc_put_stab(rtnl_dereference(qdisc->stab));
634#endif 642#endif
635 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 643 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
636 if (ops->reset) 644 if (ops->reset)
@@ -674,25 +682,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
674 682
675 return oqdisc; 683 return oqdisc;
676} 684}
685EXPORT_SYMBOL(dev_graft_qdisc);
677 686
678static void attach_one_default_qdisc(struct net_device *dev, 687static void attach_one_default_qdisc(struct net_device *dev,
679 struct netdev_queue *dev_queue, 688 struct netdev_queue *dev_queue,
680 void *_unused) 689 void *_unused)
681{ 690{
682 struct Qdisc *qdisc; 691 struct Qdisc *qdisc = &noqueue_qdisc;
683 692
684 if (dev->tx_queue_len) { 693 if (dev->tx_queue_len) {
685 qdisc = qdisc_create_dflt(dev_queue, 694 qdisc = qdisc_create_dflt(dev_queue,
686 &pfifo_fast_ops, TC_H_ROOT); 695 &pfifo_fast_ops, TC_H_ROOT);
687 if (!qdisc) { 696 if (!qdisc) {
688 printk(KERN_INFO "%s: activation failed\n", dev->name); 697 netdev_info(dev, "activation failed\n");
689 return; 698 return;
690 } 699 }
691
692 /* Can by-pass the queue discipline for default qdisc */
693 qdisc->flags |= TCQ_F_CAN_BYPASS;
694 } else {
695 qdisc = &noqueue_qdisc;
696 } 700 }
697 dev_queue->qdisc_sleeping = qdisc; 701 dev_queue->qdisc_sleeping = qdisc;
698} 702}
@@ -761,6 +765,7 @@ void dev_activate(struct net_device *dev)
761 dev_watchdog_up(dev); 765 dev_watchdog_up(dev);
762 } 766 }
763} 767}
768EXPORT_SYMBOL(dev_activate);
764 769
765static void dev_deactivate_queue(struct net_device *dev, 770static void dev_deactivate_queue(struct net_device *dev,
766 struct netdev_queue *dev_queue, 771 struct netdev_queue *dev_queue,
@@ -841,6 +846,7 @@ void dev_deactivate(struct net_device *dev)
841 dev_deactivate_many(&single); 846 dev_deactivate_many(&single);
842 list_del(&single); 847 list_del(&single);
843} 848}
849EXPORT_SYMBOL(dev_deactivate);
844 850
845static void dev_init_scheduler_queue(struct net_device *dev, 851static void dev_init_scheduler_queue(struct net_device *dev,
846 struct netdev_queue *dev_queue, 852 struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2aa5c9..b9493a09a87 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
32struct gred_sched_data; 32struct gred_sched_data;
33struct gred_sched; 33struct gred_sched;
34 34
35struct gred_sched_data 35struct gred_sched_data {
36{
37 u32 limit; /* HARD maximal queue length */ 36 u32 limit; /* HARD maximal queue length */
38 u32 DP; /* the drop pramaters */ 37 u32 DP; /* the drop pramaters */
39 u32 bytesin; /* bytes seen on virtualQ so far*/ 38 u32 bytesin; /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
50 GRED_RIO_MODE, 49 GRED_RIO_MODE,
51}; 50};
52 51
53struct gred_sched 52struct gred_sched {
54{
55 struct gred_sched_data *tab[MAX_DPs]; 53 struct gred_sched_data *tab[MAX_DPs];
56 unsigned long flags; 54 unsigned long flags;
57 u32 red_flags; 55 u32 red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
150 return t->red_flags & TC_RED_HARDDROP; 148 return t->red_flags & TC_RED_HARDDROP;
151} 149}
152 150
153static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 151static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
154{ 152{
155 struct gred_sched_data *q=NULL; 153 struct gred_sched_data *q = NULL;
156 struct gred_sched *t= qdisc_priv(sch); 154 struct gred_sched *t = qdisc_priv(sch);
157 unsigned long qavg = 0; 155 unsigned long qavg = 0;
158 u16 dp = tc_index_to_dp(skb); 156 u16 dp = tc_index_to_dp(skb);
159 157
160 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 158 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
161 dp = t->def; 159 dp = t->def;
162 160
163 if ((q = t->tab[dp]) == NULL) { 161 q = t->tab[dp];
162 if (!q) {
164 /* Pass through packets not assigned to a DP 163 /* Pass through packets not assigned to a DP
165 * if no default DP has been configured. This 164 * if no default DP has been configured. This
166 * allows for DP flows to be left untouched. 165 * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
183 for (i = 0; i < t->DPs; i++) { 182 for (i = 0; i < t->DPs; i++) {
184 if (t->tab[i] && t->tab[i]->prio < q->prio && 183 if (t->tab[i] && t->tab[i]->prio < q->prio &&
185 !red_is_idling(&t->tab[i]->parms)) 184 !red_is_idling(&t->tab[i]->parms))
186 qavg +=t->tab[i]->parms.qavg; 185 qavg += t->tab[i]->parms.qavg;
187 } 186 }
188 187
189 } 188 }
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
203 gred_store_wred_set(t, q); 202 gred_store_wred_set(t, q);
204 203
205 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 204 switch (red_action(&q->parms, q->parms.qavg + qavg)) {
206 case RED_DONT_MARK: 205 case RED_DONT_MARK:
207 break; 206 break;
208 207
209 case RED_PROB_MARK: 208 case RED_PROB_MARK:
210 sch->qstats.overlimits++; 209 sch->qstats.overlimits++;
211 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 210 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
212 q->stats.prob_drop++; 211 q->stats.prob_drop++;
213 goto congestion_drop; 212 goto congestion_drop;
214 } 213 }
215 214
216 q->stats.prob_mark++; 215 q->stats.prob_mark++;
217 break; 216 break;
218 217
219 case RED_HARD_MARK: 218 case RED_HARD_MARK:
220 sch->qstats.overlimits++; 219 sch->qstats.overlimits++;
221 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 220 if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
222 !INET_ECN_set_ce(skb)) { 221 !INET_ECN_set_ce(skb)) {
223 q->stats.forced_drop++; 222 q->stats.forced_drop++;
224 goto congestion_drop; 223 goto congestion_drop;
225 } 224 }
226 q->stats.forced_mark++; 225 q->stats.forced_mark++;
227 break; 226 break;
228 } 227 }
229 228
230 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { 229 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
241 return NET_XMIT_CN; 240 return NET_XMIT_CN;
242} 241}
243 242
244static struct sk_buff *gred_dequeue(struct Qdisc* sch) 243static struct sk_buff *gred_dequeue(struct Qdisc *sch)
245{ 244{
246 struct sk_buff *skb; 245 struct sk_buff *skb;
247 struct gred_sched *t = qdisc_priv(sch); 246 struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
254 253
255 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 254 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
256 if (net_ratelimit()) 255 if (net_ratelimit())
257 printk(KERN_WARNING "GRED: Unable to relocate " 256 pr_warning("GRED: Unable to relocate VQ 0x%x "
258 "VQ 0x%x after dequeue, screwing up " 257 "after dequeue, screwing up "
259 "backlog.\n", tc_index_to_dp(skb)); 258 "backlog.\n", tc_index_to_dp(skb));
260 } else { 259 } else {
261 q->backlog -= qdisc_pkt_len(skb); 260 q->backlog -= qdisc_pkt_len(skb);
262 261
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
273 return NULL; 272 return NULL;
274} 273}
275 274
276static unsigned int gred_drop(struct Qdisc* sch) 275static unsigned int gred_drop(struct Qdisc *sch)
277{ 276{
278 struct sk_buff *skb; 277 struct sk_buff *skb;
279 struct gred_sched *t = qdisc_priv(sch); 278 struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
286 285
287 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 286 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
288 if (net_ratelimit()) 287 if (net_ratelimit())
289 printk(KERN_WARNING "GRED: Unable to relocate " 288 pr_warning("GRED: Unable to relocate VQ 0x%x "
290 "VQ 0x%x while dropping, screwing up " 289 "while dropping, screwing up "
291 "backlog.\n", tc_index_to_dp(skb)); 290 "backlog.\n", tc_index_to_dp(skb));
292 } else { 291 } else {
293 q->backlog -= len; 292 q->backlog -= len;
294 q->stats.other++; 293 q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
308 307
309} 308}
310 309
311static void gred_reset(struct Qdisc* sch) 310static void gred_reset(struct Qdisc *sch)
312{ 311{
313 int i; 312 int i;
314 struct gred_sched *t = qdisc_priv(sch); 313 struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
369 368
370 for (i = table->DPs; i < MAX_DPs; i++) { 369 for (i = table->DPs; i < MAX_DPs; i++) {
371 if (table->tab[i]) { 370 if (table->tab[i]) {
372 printk(KERN_WARNING "GRED: Warning: Destroying " 371 pr_warning("GRED: Warning: Destroying "
373 "shadowed VQ 0x%x\n", i); 372 "shadowed VQ 0x%x\n", i);
374 gred_destroy_vq(table->tab[i]); 373 gred_destroy_vq(table->tab[i]);
375 table->tab[i] = NULL; 374 table->tab[i] = NULL;
376 } 375 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 14a799de1c3..6488e642565 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
81 * that are expensive on 32-bit architectures. 81 * that are expensive on 32-bit architectures.
82 */ 82 */
83 83
84struct internal_sc 84struct internal_sc {
85{
86 u64 sm1; /* scaled slope of the 1st segment */ 85 u64 sm1; /* scaled slope of the 1st segment */
87 u64 ism1; /* scaled inverse-slope of the 1st segment */ 86 u64 ism1; /* scaled inverse-slope of the 1st segment */
88 u64 dx; /* the x-projection of the 1st segment */ 87 u64 dx; /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
92}; 91};
93 92
94/* runtime service curve */ 93/* runtime service curve */
95struct runtime_sc 94struct runtime_sc {
96{
97 u64 x; /* current starting position on x-axis */ 95 u64 x; /* current starting position on x-axis */
98 u64 y; /* current starting position on y-axis */ 96 u64 y; /* current starting position on y-axis */
99 u64 sm1; /* scaled slope of the 1st segment */ 97 u64 sm1; /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
104 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 102 u64 ism2; /* scaled inverse-slope of the 2nd segment */
105}; 103};
106 104
107enum hfsc_class_flags 105enum hfsc_class_flags {
108{
109 HFSC_RSC = 0x1, 106 HFSC_RSC = 0x1,
110 HFSC_FSC = 0x2, 107 HFSC_FSC = 0x2,
111 HFSC_USC = 0x4 108 HFSC_USC = 0x4
112}; 109};
113 110
114struct hfsc_class 111struct hfsc_class {
115{
116 struct Qdisc_class_common cl_common; 112 struct Qdisc_class_common cl_common;
117 unsigned int refcnt; /* usage count */ 113 unsigned int refcnt; /* usage count */
118 114
@@ -140,8 +136,8 @@ struct hfsc_class
140 u64 cl_cumul; /* cumulative work in bytes done by 136 u64 cl_cumul; /* cumulative work in bytes done by
141 real-time criteria */ 137 real-time criteria */
142 138
143 u64 cl_d; /* deadline*/ 139 u64 cl_d; /* deadline*/
144 u64 cl_e; /* eligible time */ 140 u64 cl_e; /* eligible time */
145 u64 cl_vt; /* virtual time */ 141 u64 cl_vt; /* virtual time */
146 u64 cl_f; /* time when this class will fit for 142 u64 cl_f; /* time when this class will fit for
147 link-sharing, max(myf, cfmin) */ 143 link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
176 unsigned long cl_nactive; /* number of active children */ 172 unsigned long cl_nactive; /* number of active children */
177}; 173};
178 174
179struct hfsc_sched 175struct hfsc_sched {
180{
181 u16 defcls; /* default class id */ 176 u16 defcls; /* default class id */
182 struct hfsc_class root; /* root class */ 177 struct hfsc_class root; /* root class */
183 struct Qdisc_class_hash clhash; /* class hash */ 178 struct Qdisc_class_hash clhash; /* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
693 if (go_active) { 688 if (go_active) {
694 n = rb_last(&cl->cl_parent->vt_tree); 689 n = rb_last(&cl->cl_parent->vt_tree);
695 if (n != NULL) { 690 if (n != NULL) {
696 max_cl = rb_entry(n, struct hfsc_class,vt_node); 691 max_cl = rb_entry(n, struct hfsc_class, vt_node);
697 /* 692 /*
698 * set vt to the average of the min and max 693 * set vt to the average of the min and max
699 * classes. if the parent's period didn't 694 * classes. if the parent's period didn't
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1177 return NULL; 1172 return NULL;
1178 } 1173 }
1179#endif 1174#endif
1180 if ((cl = (struct hfsc_class *)res.class) == NULL) { 1175 cl = (struct hfsc_class *)res.class;
1181 if ((cl = hfsc_find_class(res.classid, sch)) == NULL) 1176 if (!cl) {
1177 cl = hfsc_find_class(res.classid, sch);
1178 if (!cl)
1182 break; /* filter selected invalid classid */ 1179 break; /* filter selected invalid classid */
1183 if (cl->level >= head->level) 1180 if (cl->level >= head->level)
1184 break; /* filter may only point downwards */ 1181 break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
1316 return -1; 1313 return -1;
1317} 1314}
1318 1315
1319static inline int 1316static int
1320hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) 1317hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
1321{ 1318{
1322 if ((cl->cl_flags & HFSC_RSC) && 1319 if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1420 struct hfsc_class *cl; 1417 struct hfsc_class *cl;
1421 u64 next_time = 0; 1418 u64 next_time = 0;
1422 1419
1423 if ((cl = eltree_get_minel(q)) != NULL) 1420 cl = eltree_get_minel(q);
1421 if (cl)
1424 next_time = cl->cl_e; 1422 next_time = cl->cl_e;
1425 if (q->root.cl_cfmin != 0) { 1423 if (q->root.cl_cfmin != 0) {
1426 if (next_time == 0 || next_time > q->root.cl_cfmin) 1424 if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1625,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
1625 * find the class with the minimum deadline among 1623 * find the class with the minimum deadline among
1626 * the eligible classes. 1624 * the eligible classes.
1627 */ 1625 */
1628 if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { 1626 cl = eltree_get_mindl(q, cur_time);
1627 if (cl) {
1629 realtime = 1; 1628 realtime = 1;
1630 } else { 1629 } else {
1631 /* 1630 /*
@@ -1664,7 +1663,7 @@ hfsc_dequeue(struct Qdisc *sch)
1664 set_passive(cl); 1663 set_passive(cl);
1665 } 1664 }
1666 1665
1667 sch->flags &= ~TCQ_F_THROTTLED; 1666 qdisc_unthrottled(sch);
1668 qdisc_bstats_update(sch, skb); 1667 qdisc_bstats_update(sch, skb);
1669 sch->q.qlen--; 1668 sch->q.qlen--;
1670 1669
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fc12fe6f559..e1429a85091 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
99 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ 99 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
100 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ 100 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
101 /* When class changes from state 1->2 and disconnects from 101 /* When class changes from state 1->2 and disconnects from
102 parent's feed then we lost ptr value and start from the 102 * parent's feed then we lost ptr value and start from the
103 first child again. Here we store classid of the 103 * first child again. Here we store classid of the
104 last valid ptr (used when ptr is NULL). */ 104 * last valid ptr (used when ptr is NULL).
105 */
105 u32 last_ptr_id[TC_HTB_NUMPRIO]; 106 u32 last_ptr_id[TC_HTB_NUMPRIO];
106 } inner; 107 } inner;
107 } un; 108 } un;
@@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
185 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull 186 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
186 * then finish and return direct queue. 187 * then finish and return direct queue.
187 */ 188 */
188#define HTB_DIRECT (struct htb_class*)-1 189#define HTB_DIRECT ((struct htb_class *)-1L)
189 190
190static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 191static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
191 int *qerr) 192 int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
197 int result; 198 int result;
198 199
199 /* allow to select class by setting skb->priority to valid classid; 200 /* allow to select class by setting skb->priority to valid classid;
200 note that nfmark can be used too by attaching filter fw with no 201 * note that nfmark can be used too by attaching filter fw with no
201 rules in it */ 202 * rules in it
203 */
202 if (skb->priority == sch->handle) 204 if (skb->priority == sch->handle)
203 return HTB_DIRECT; /* X:0 (direct flow) selected */ 205 return HTB_DIRECT; /* X:0 (direct flow) selected */
204 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) 206 cl = htb_find(skb->priority, sch);
207 if (cl && cl->level == 0)
205 return cl; 208 return cl;
206 209
207 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 210 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 return NULL; 219 return NULL;
217 } 220 }
218#endif 221#endif
219 if ((cl = (void *)res.class) == NULL) { 222 cl = (void *)res.class;
223 if (!cl) {
220 if (res.classid == sch->handle) 224 if (res.classid == sch->handle)
221 return HTB_DIRECT; /* X:0 (direct flow) */ 225 return HTB_DIRECT; /* X:0 (direct flow) */
222 if ((cl = htb_find(res.classid, sch)) == NULL) 226 cl = htb_find(res.classid, sch);
227 if (!cl)
223 break; /* filter selected invalid classid */ 228 break; /* filter selected invalid classid */
224 } 229 }
225 if (!cl->level) 230 if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
378 383
379 if (p->un.inner.feed[prio].rb_node) 384 if (p->un.inner.feed[prio].rb_node)
380 /* parent already has its feed in use so that 385 /* parent already has its feed in use so that
381 reset bit in mask as parent is already ok */ 386 * reset bit in mask as parent is already ok
387 */
382 mask &= ~(1 << prio); 388 mask &= ~(1 << prio);
383 389
384 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); 390 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
413 419
414 if (p->un.inner.ptr[prio] == cl->node + prio) { 420 if (p->un.inner.ptr[prio] == cl->node + prio) {
415 /* we are removing child which is pointed to from 421 /* we are removing child which is pointed to from
416 parent feed - forget the pointer but remember 422 * parent feed - forget the pointer but remember
417 classid */ 423 * classid
424 */
418 p->un.inner.last_ptr_id[prio] = cl->common.classid; 425 p->un.inner.last_ptr_id[prio] = cl->common.classid;
419 p->un.inner.ptr[prio] = NULL; 426 p->un.inner.ptr[prio] = NULL;
420 } 427 }
@@ -663,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
663 unsigned long start) 670 unsigned long start)
664{ 671{
665 /* don't run for longer than 2 jiffies; 2 is used instead of 672 /* don't run for longer than 2 jiffies; 2 is used instead of
666 1 to simplify things when jiffy is going to be incremented 673 * 1 to simplify things when jiffy is going to be incremented
667 too soon */ 674 * too soon
675 */
668 unsigned long stop_at = start + 2; 676 unsigned long stop_at = start + 2;
669 while (time_before(jiffies, stop_at)) { 677 while (time_before(jiffies, stop_at)) {
670 struct htb_class *cl; 678 struct htb_class *cl;
@@ -687,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
687 695
688 /* too much load - let's continue after a break for scheduling */ 696 /* too much load - let's continue after a break for scheduling */
689 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { 697 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
690 printk(KERN_WARNING "htb: too many events!\n"); 698 pr_warning("htb: too many events!\n");
691 q->warned |= HTB_WARN_TOOMANYEVENTS; 699 q->warned |= HTB_WARN_TOOMANYEVENTS;
692 } 700 }
693 701
@@ -695,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
695} 703}
696 704
697/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 705/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
698 is no such one exists. */ 706 * is no such one exists.
707 */
699static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, 708static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
700 u32 id) 709 u32 id)
701{ 710{
@@ -739,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
739 for (i = 0; i < 65535; i++) { 748 for (i = 0; i < 65535; i++) {
740 if (!*sp->pptr && *sp->pid) { 749 if (!*sp->pptr && *sp->pid) {
741 /* ptr was invalidated but id is valid - try to recover 750 /* ptr was invalidated but id is valid - try to recover
742 the original or next ptr */ 751 * the original or next ptr
752 */
743 *sp->pptr = 753 *sp->pptr =
744 htb_id_find_next_upper(prio, sp->root, *sp->pid); 754 htb_id_find_next_upper(prio, sp->root, *sp->pid);
745 } 755 }
746 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 756 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
747 can become out of date quickly */ 757 * can become out of date quickly
758 */
748 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 759 if (!*sp->pptr) { /* we are at right end; rewind & go up */
749 *sp->pptr = sp->root; 760 *sp->pptr = sp->root;
750 while ((*sp->pptr)->rb_left) 761 while ((*sp->pptr)->rb_left)
@@ -772,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
772} 783}
773 784
774/* dequeues packet at given priority and level; call only if 785/* dequeues packet at given priority and level; call only if
775 you are sure that there is active class at prio/level */ 786 * you are sure that there is active class at prio/level
787 */
776static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, 788static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
777 int level) 789 int level)
778{ 790{
@@ -789,9 +801,10 @@ next:
789 return NULL; 801 return NULL;
790 802
791 /* class can be empty - it is unlikely but can be true if leaf 803 /* class can be empty - it is unlikely but can be true if leaf
792 qdisc drops packets in enqueue routine or if someone used 804 * qdisc drops packets in enqueue routine or if someone used
793 graft operation on the leaf since last dequeue; 805 * graft operation on the leaf since last dequeue;
794 simply deactivate and skip such class */ 806 * simply deactivate and skip such class
807 */
795 if (unlikely(cl->un.leaf.q->q.qlen == 0)) { 808 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
796 struct htb_class *next; 809 struct htb_class *next;
797 htb_deactivate(q, cl); 810 htb_deactivate(q, cl);
@@ -831,7 +844,8 @@ next:
831 ptr[0]) + prio); 844 ptr[0]) + prio);
832 } 845 }
833 /* this used to be after charge_class but this constelation 846 /* this used to be after charge_class but this constelation
834 gives us slightly better performance */ 847 * gives us slightly better performance
848 */
835 if (!cl->un.leaf.q->q.qlen) 849 if (!cl->un.leaf.q->q.qlen)
836 htb_deactivate(q, cl); 850 htb_deactivate(q, cl);
837 htb_charge_class(q, cl, level, skb); 851 htb_charge_class(q, cl, level, skb);
@@ -852,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
852 if (skb != NULL) { 866 if (skb != NULL) {
853ok: 867ok:
854 qdisc_bstats_update(sch, skb); 868 qdisc_bstats_update(sch, skb);
855 sch->flags &= ~TCQ_F_THROTTLED; 869 qdisc_unthrottled(sch);
856 sch->q.qlen--; 870 sch->q.qlen--;
857 return skb; 871 return skb;
858 } 872 }
@@ -883,6 +897,7 @@ ok:
883 m = ~q->row_mask[level]; 897 m = ~q->row_mask[level];
884 while (m != (int)(-1)) { 898 while (m != (int)(-1)) {
885 int prio = ffz(m); 899 int prio = ffz(m);
900
886 m |= 1 << prio; 901 m |= 1 << prio;
887 skb = htb_dequeue_tree(q, prio, level); 902 skb = htb_dequeue_tree(q, prio, level);
888 if (likely(skb != NULL)) 903 if (likely(skb != NULL))
@@ -987,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
987 return err; 1002 return err;
988 1003
989 if (tb[TCA_HTB_INIT] == NULL) { 1004 if (tb[TCA_HTB_INIT] == NULL) {
990 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); 1005 pr_err("HTB: hey probably you have bad tc tool ?\n");
991 return -EINVAL; 1006 return -EINVAL;
992 } 1007 }
993 gopt = nla_data(tb[TCA_HTB_INIT]); 1008 gopt = nla_data(tb[TCA_HTB_INIT]);
994 if (gopt->version != HTB_VER >> 16) { 1009 if (gopt->version != HTB_VER >> 16) {
995 printk(KERN_ERR 1010 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
996 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
997 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); 1011 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
998 return -EINVAL; 1012 return -EINVAL;
999 } 1013 }
@@ -1206,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
1206 cancel_work_sync(&q->work); 1220 cancel_work_sync(&q->work);
1207 qdisc_watchdog_cancel(&q->watchdog); 1221 qdisc_watchdog_cancel(&q->watchdog);
1208 /* This line used to be after htb_destroy_class call below 1222 /* This line used to be after htb_destroy_class call below
1209 and surprisingly it worked in 2.4. But it must precede it 1223 * and surprisingly it worked in 2.4. But it must precede it
1210 because filter need its target class alive to be able to call 1224 * because filter need its target class alive to be able to call
1211 unbind_filter on it (without Oops). */ 1225 * unbind_filter on it (without Oops).
1226 */
1212 tcf_destroy_chain(&q->filter_list); 1227 tcf_destroy_chain(&q->filter_list);
1213 1228
1214 for (i = 0; i < q->clhash.hashsize; i++) { 1229 for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1342,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1342 1357
1343 /* check maximal depth */ 1358 /* check maximal depth */
1344 if (parent && parent->parent && parent->parent->level < 2) { 1359 if (parent && parent->parent && parent->parent->level < 2) {
1345 printk(KERN_ERR "htb: tree is too deep\n"); 1360 pr_err("htb: tree is too deep\n");
1346 goto failure; 1361 goto failure;
1347 } 1362 }
1348 err = -ENOBUFS; 1363 err = -ENOBUFS;
1349 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) 1364 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1365 if (!cl)
1350 goto failure; 1366 goto failure;
1351 1367
1352 err = gen_new_estimator(&cl->bstats, &cl->rate_est, 1368 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1366,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1366 RB_CLEAR_NODE(&cl->node[prio]); 1382 RB_CLEAR_NODE(&cl->node[prio]);
1367 1383
1368 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1384 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1369 so that can't be used inside of sch_tree_lock 1385 * so that can't be used inside of sch_tree_lock
1370 -- thanks to Karlis Peisenieks */ 1386 * -- thanks to Karlis Peisenieks
1387 */
1371 new_q = qdisc_create_dflt(sch->dev_queue, 1388 new_q = qdisc_create_dflt(sch->dev_queue,
1372 &pfifo_qdisc_ops, classid); 1389 &pfifo_qdisc_ops, classid);
1373 sch_tree_lock(sch); 1390 sch_tree_lock(sch);
@@ -1419,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1419 } 1436 }
1420 1437
1421 /* it used to be a nasty bug here, we have to check that node 1438 /* it used to be a nasty bug here, we have to check that node
1422 is really leaf before changing cl->un.leaf ! */ 1439 * is really leaf before changing cl->un.leaf !
1440 */
1423 if (!cl->level) { 1441 if (!cl->level) {
1424 cl->quantum = rtab->rate.rate / q->rate2quantum; 1442 cl->quantum = rtab->rate.rate / q->rate2quantum;
1425 if (!hopt->quantum && cl->quantum < 1000) { 1443 if (!hopt->quantum && cl->quantum < 1000) {
1426 printk(KERN_WARNING 1444 pr_warning(
1427 "HTB: quantum of class %X is small. Consider r2q change.\n", 1445 "HTB: quantum of class %X is small. Consider r2q change.\n",
1428 cl->common.classid); 1446 cl->common.classid);
1429 cl->quantum = 1000; 1447 cl->quantum = 1000;
1430 } 1448 }
1431 if (!hopt->quantum && cl->quantum > 200000) { 1449 if (!hopt->quantum && cl->quantum > 200000) {
1432 printk(KERN_WARNING 1450 pr_warning(
1433 "HTB: quantum of class %X is big. Consider r2q change.\n", 1451 "HTB: quantum of class %X is big. Consider r2q change.\n",
1434 cl->common.classid); 1452 cl->common.classid);
1435 cl->quantum = 200000; 1453 cl->quantum = 200000;
@@ -1478,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1478 struct htb_class *cl = htb_find(classid, sch); 1496 struct htb_class *cl = htb_find(classid, sch);
1479 1497
1480 /*if (cl && !cl->level) return 0; 1498 /*if (cl && !cl->level) return 0;
1481 The line above used to be there to prevent attaching filters to 1499 * The line above used to be there to prevent attaching filters to
1482 leaves. But at least tc_index filter uses this just to get class 1500 * leaves. But at least tc_index filter uses this just to get class
1483 for other reasons so that we have to allow for it. 1501 * for other reasons so that we have to allow for it.
1484 ---- 1502 * ----
1485 19.6.2002 As Werner explained it is ok - bind filter is just 1503 * 19.6.2002 As Werner explained it is ok - bind filter is just
1486 another way to "lock" the class - unlike "get" this lock can 1504 * another way to "lock" the class - unlike "get" this lock can
1487 be broken by class during destroy IIUC. 1505 * be broken by class during destroy IIUC.
1488 */ 1506 */
1489 if (cl) 1507 if (cl)
1490 cl->filter_cnt++; 1508 cl->filter_cnt++;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ecc302f4d2a..ec5cbc84896 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
61 TC_H_MIN(ntx + 1))); 61 TC_H_MIN(ntx + 1)));
62 if (qdisc == NULL) 62 if (qdisc == NULL)
63 goto err; 63 goto err;
64 qdisc->flags |= TCQ_F_CAN_BYPASS;
65 priv->qdiscs[ntx] = qdisc; 64 priv->qdiscs[ntx] = qdisc;
66 } 65 }
67 66
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 00000000000..ea17cbed29e
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,418 @@
1/*
2 * net/sched/sch_mqprio.c
3 *
4 * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/types.h>
12#include <linux/slab.h>
13#include <linux/kernel.h>
14#include <linux/string.h>
15#include <linux/errno.h>
16#include <linux/skbuff.h>
17#include <net/netlink.h>
18#include <net/pkt_sched.h>
19#include <net/sch_generic.h>
20
21struct mqprio_sched {
22 struct Qdisc **qdiscs;
23 int hw_owned;
24};
25
26static void mqprio_destroy(struct Qdisc *sch)
27{
28 struct net_device *dev = qdisc_dev(sch);
29 struct mqprio_sched *priv = qdisc_priv(sch);
30 unsigned int ntx;
31
32 if (priv->qdiscs) {
33 for (ntx = 0;
34 ntx < dev->num_tx_queues && priv->qdiscs[ntx];
35 ntx++)
36 qdisc_destroy(priv->qdiscs[ntx]);
37 kfree(priv->qdiscs);
38 }
39
40 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
41 dev->netdev_ops->ndo_setup_tc(dev, 0);
42 else
43 netdev_set_num_tc(dev, 0);
44}
45
46static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
47{
48 int i, j;
49
50 /* Verify num_tc is not out of max range */
51 if (qopt->num_tc > TC_MAX_QUEUE)
52 return -EINVAL;
53
54 /* Verify priority mapping uses valid tcs */
55 for (i = 0; i < TC_BITMASK + 1; i++) {
56 if (qopt->prio_tc_map[i] >= qopt->num_tc)
57 return -EINVAL;
58 }
59
60 /* net_device does not support requested operation */
61 if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
62 return -EINVAL;
63
64 /* if hw owned qcount and qoffset are taken from LLD so
65 * no reason to verify them here
66 */
67 if (qopt->hw)
68 return 0;
69
70 for (i = 0; i < qopt->num_tc; i++) {
71 unsigned int last = qopt->offset[i] + qopt->count[i];
72
73 /* Verify the queue count is in tx range being equal to the
74 * real_num_tx_queues indicates the last queue is in use.
75 */
76 if (qopt->offset[i] >= dev->real_num_tx_queues ||
77 !qopt->count[i] ||
78 last > dev->real_num_tx_queues)
79 return -EINVAL;
80
81 /* Verify that the offset and counts do not overlap */
82 for (j = i + 1; j < qopt->num_tc; j++) {
83 if (last > qopt->offset[j])
84 return -EINVAL;
85 }
86 }
87
88 return 0;
89}
90
91static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
92{
93 struct net_device *dev = qdisc_dev(sch);
94 struct mqprio_sched *priv = qdisc_priv(sch);
95 struct netdev_queue *dev_queue;
96 struct Qdisc *qdisc;
97 int i, err = -EOPNOTSUPP;
98 struct tc_mqprio_qopt *qopt = NULL;
99
100 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
101 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
102
103 if (sch->parent != TC_H_ROOT)
104 return -EOPNOTSUPP;
105
106 if (!netif_is_multiqueue(dev))
107 return -EOPNOTSUPP;
108
109 if (nla_len(opt) < sizeof(*qopt))
110 return -EINVAL;
111
112 qopt = nla_data(opt);
113 if (mqprio_parse_opt(dev, qopt))
114 return -EINVAL;
115
116 /* pre-allocate qdisc, attachment can't fail */
117 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
118 GFP_KERNEL);
119 if (priv->qdiscs == NULL) {
120 err = -ENOMEM;
121 goto err;
122 }
123
124 for (i = 0; i < dev->num_tx_queues; i++) {
125 dev_queue = netdev_get_tx_queue(dev, i);
126 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
127 TC_H_MAKE(TC_H_MAJ(sch->handle),
128 TC_H_MIN(i + 1)));
129 if (qdisc == NULL) {
130 err = -ENOMEM;
131 goto err;
132 }
133 priv->qdiscs[i] = qdisc;
134 }
135
136 /* If the mqprio options indicate that hardware should own
137 * the queue mapping then run ndo_setup_tc otherwise use the
138 * supplied and verified mapping
139 */
140 if (qopt->hw) {
141 priv->hw_owned = 1;
142 err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
143 if (err)
144 goto err;
145 } else {
146 netdev_set_num_tc(dev, qopt->num_tc);
147 for (i = 0; i < qopt->num_tc; i++)
148 netdev_set_tc_queue(dev, i,
149 qopt->count[i], qopt->offset[i]);
150 }
151
152 /* Always use supplied priority mappings */
153 for (i = 0; i < TC_BITMASK + 1; i++)
154 netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
155
156 sch->flags |= TCQ_F_MQROOT;
157 return 0;
158
159err:
160 mqprio_destroy(sch);
161 return err;
162}
163
164static void mqprio_attach(struct Qdisc *sch)
165{
166 struct net_device *dev = qdisc_dev(sch);
167 struct mqprio_sched *priv = qdisc_priv(sch);
168 struct Qdisc *qdisc;
169 unsigned int ntx;
170
171 /* Attach underlying qdisc */
172 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
173 qdisc = priv->qdiscs[ntx];
174 qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
175 if (qdisc)
176 qdisc_destroy(qdisc);
177 }
178 kfree(priv->qdiscs);
179 priv->qdiscs = NULL;
180}
181
182static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
183 unsigned long cl)
184{
185 struct net_device *dev = qdisc_dev(sch);
186 unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
187
188 if (ntx >= dev->num_tx_queues)
189 return NULL;
190 return netdev_get_tx_queue(dev, ntx);
191}
192
193static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
194 struct Qdisc **old)
195{
196 struct net_device *dev = qdisc_dev(sch);
197 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
198
199 if (!dev_queue)
200 return -EINVAL;
201
202 if (dev->flags & IFF_UP)
203 dev_deactivate(dev);
204
205 *old = dev_graft_qdisc(dev_queue, new);
206
207 if (dev->flags & IFF_UP)
208 dev_activate(dev);
209
210 return 0;
211}
212
213static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
214{
215 struct net_device *dev = qdisc_dev(sch);
216 struct mqprio_sched *priv = qdisc_priv(sch);
217 unsigned char *b = skb_tail_pointer(skb);
218 struct tc_mqprio_qopt opt = { 0 };
219 struct Qdisc *qdisc;
220 unsigned int i;
221
222 sch->q.qlen = 0;
223 memset(&sch->bstats, 0, sizeof(sch->bstats));
224 memset(&sch->qstats, 0, sizeof(sch->qstats));
225
226 for (i = 0; i < dev->num_tx_queues; i++) {
227 qdisc = netdev_get_tx_queue(dev, i)->qdisc;
228 spin_lock_bh(qdisc_lock(qdisc));
229 sch->q.qlen += qdisc->q.qlen;
230 sch->bstats.bytes += qdisc->bstats.bytes;
231 sch->bstats.packets += qdisc->bstats.packets;
232 sch->qstats.qlen += qdisc->qstats.qlen;
233 sch->qstats.backlog += qdisc->qstats.backlog;
234 sch->qstats.drops += qdisc->qstats.drops;
235 sch->qstats.requeues += qdisc->qstats.requeues;
236 sch->qstats.overlimits += qdisc->qstats.overlimits;
237 spin_unlock_bh(qdisc_lock(qdisc));
238 }
239
240 opt.num_tc = netdev_get_num_tc(dev);
241 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
242 opt.hw = priv->hw_owned;
243
244 for (i = 0; i < netdev_get_num_tc(dev); i++) {
245 opt.count[i] = dev->tc_to_txq[i].count;
246 opt.offset[i] = dev->tc_to_txq[i].offset;
247 }
248
249 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
250
251 return skb->len;
252nla_put_failure:
253 nlmsg_trim(skb, b);
254 return -1;
255}
256
257static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
258{
259 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
260
261 if (!dev_queue)
262 return NULL;
263
264 return dev_queue->qdisc_sleeping;
265}
266
267static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
268{
269 struct net_device *dev = qdisc_dev(sch);
270 unsigned int ntx = TC_H_MIN(classid);
271
272 if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
273 return 0;
274 return ntx;
275}
276
277static void mqprio_put(struct Qdisc *sch, unsigned long cl)
278{
279}
280
281static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
282 struct sk_buff *skb, struct tcmsg *tcm)
283{
284 struct net_device *dev = qdisc_dev(sch);
285
286 if (cl <= netdev_get_num_tc(dev)) {
287 tcm->tcm_parent = TC_H_ROOT;
288 tcm->tcm_info = 0;
289 } else {
290 int i;
291 struct netdev_queue *dev_queue;
292
293 dev_queue = mqprio_queue_get(sch, cl);
294 tcm->tcm_parent = 0;
295 for (i = 0; i < netdev_get_num_tc(dev); i++) {
296 struct netdev_tc_txq tc = dev->tc_to_txq[i];
297 int q_idx = cl - netdev_get_num_tc(dev);
298
299 if (q_idx > tc.offset &&
300 q_idx <= tc.offset + tc.count) {
301 tcm->tcm_parent =
302 TC_H_MAKE(TC_H_MAJ(sch->handle),
303 TC_H_MIN(i + 1));
304 break;
305 }
306 }
307 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
308 }
309 tcm->tcm_handle |= TC_H_MIN(cl);
310 return 0;
311}
312
313static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
314 struct gnet_dump *d)
315 __releases(d->lock)
316 __acquires(d->lock)
317{
318 struct net_device *dev = qdisc_dev(sch);
319
320 if (cl <= netdev_get_num_tc(dev)) {
321 int i;
322 struct Qdisc *qdisc;
323 struct gnet_stats_queue qstats = {0};
324 struct gnet_stats_basic_packed bstats = {0};
325 struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
326
327 /* Drop lock here it will be reclaimed before touching
328 * statistics this is required because the d->lock we
329 * hold here is the look on dev_queue->qdisc_sleeping
330 * also acquired below.
331 */
332 spin_unlock_bh(d->lock);
333
334 for (i = tc.offset; i < tc.offset + tc.count; i++) {
335 qdisc = netdev_get_tx_queue(dev, i)->qdisc;
336 spin_lock_bh(qdisc_lock(qdisc));
337 bstats.bytes += qdisc->bstats.bytes;
338 bstats.packets += qdisc->bstats.packets;
339 qstats.qlen += qdisc->qstats.qlen;
340 qstats.backlog += qdisc->qstats.backlog;
341 qstats.drops += qdisc->qstats.drops;
342 qstats.requeues += qdisc->qstats.requeues;
343 qstats.overlimits += qdisc->qstats.overlimits;
344 spin_unlock_bh(qdisc_lock(qdisc));
345 }
346 /* Reclaim root sleeping lock before completing stats */
347 spin_lock_bh(d->lock);
348 if (gnet_stats_copy_basic(d, &bstats) < 0 ||
349 gnet_stats_copy_queue(d, &qstats) < 0)
350 return -1;
351 } else {
352 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
353
354 sch = dev_queue->qdisc_sleeping;
355 sch->qstats.qlen = sch->q.qlen;
356 if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
357 gnet_stats_copy_queue(d, &sch->qstats) < 0)
358 return -1;
359 }
360 return 0;
361}
362
363static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
364{
365 struct net_device *dev = qdisc_dev(sch);
366 unsigned long ntx;
367
368 if (arg->stop)
369 return;
370
371 /* Walk hierarchy with a virtual class per tc */
372 arg->count = arg->skip;
373 for (ntx = arg->skip;
374 ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
375 ntx++) {
376 if (arg->fn(sch, ntx + 1, arg) < 0) {
377 arg->stop = 1;
378 break;
379 }
380 arg->count++;
381 }
382}
383
384static const struct Qdisc_class_ops mqprio_class_ops = {
385 .graft = mqprio_graft,
386 .leaf = mqprio_leaf,
387 .get = mqprio_get,
388 .put = mqprio_put,
389 .walk = mqprio_walk,
390 .dump = mqprio_dump_class,
391 .dump_stats = mqprio_dump_class_stats,
392};
393
394static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
395 .cl_ops = &mqprio_class_ops,
396 .id = "mqprio",
397 .priv_size = sizeof(struct mqprio_sched),
398 .init = mqprio_init,
399 .destroy = mqprio_destroy,
400 .attach = mqprio_attach,
401 .dump = mqprio_dump,
402 .owner = THIS_MODULE,
403};
404
405static int __init mqprio_module_init(void)
406{
407 return register_qdisc(&mqprio_qdisc_ops);
408}
409
410static void __exit mqprio_module_exit(void)
411{
412 unregister_qdisc(&mqprio_qdisc_ops);
413}
414
415module_init(mqprio_module_init);
416module_exit(mqprio_module_exit);
417
418MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 436a2e75b32..edc1950e0e7 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
156 unsigned int len; 156 unsigned int len;
157 struct Qdisc *qdisc; 157 struct Qdisc *qdisc;
158 158
159 for (band = q->bands-1; band >= 0; band--) { 159 for (band = q->bands - 1; band >= 0; band--) {
160 qdisc = q->queues[band]; 160 qdisc = q->queues[band];
161 if (qdisc->ops->drop) { 161 if (qdisc->ops->drop) {
162 len = qdisc->ops->drop(qdisc); 162 len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
265 for (i = 0; i < q->max_bands; i++) 265 for (i = 0; i < q->max_bands; i++)
266 q->queues[i] = &noop_qdisc; 266 q->queues[i] = &noop_qdisc;
267 267
268 err = multiq_tune(sch,opt); 268 err = multiq_tune(sch, opt);
269 269
270 if (err) 270 if (err)
271 kfree(q->queues); 271 kfree(q->queues);
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
346 struct multiq_sched_data *q = qdisc_priv(sch); 346 struct multiq_sched_data *q = qdisc_priv(sch);
347 347
348 tcm->tcm_handle |= TC_H_MIN(cl); 348 tcm->tcm_handle |= TC_H_MIN(cl);
349 tcm->tcm_info = q->queues[cl-1]->handle; 349 tcm->tcm_info = q->queues[cl - 1]->handle;
350 return 0; 350 return 0;
351} 351}
352 352
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
378 arg->count++; 378 arg->count++;
379 continue; 379 continue;
380 } 380 }
381 if (arg->fn(sch, band+1, arg) < 0) { 381 if (arg->fn(sch, band + 1, arg) < 0) {
382 arg->stop = 1; 382 arg->stop = 1;
383 break; 383 break;
384 } 384 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6a3006b38dc..edbbf7ad662 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -19,12 +19,13 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/vmalloc.h>
22#include <linux/rtnetlink.h> 23#include <linux/rtnetlink.h>
23 24
24#include <net/netlink.h> 25#include <net/netlink.h>
25#include <net/pkt_sched.h> 26#include <net/pkt_sched.h>
26 27
27#define VERSION "1.2" 28#define VERSION "1.3"
28 29
29/* Network Emulation Queuing algorithm. 30/* Network Emulation Queuing algorithm.
30 ==================================== 31 ====================================
@@ -47,6 +48,20 @@
47 layering other disciplines. It does not need to do bandwidth 48 layering other disciplines. It does not need to do bandwidth
48 control either since that can be handled by using token 49 control either since that can be handled by using token
49 bucket or other rate control. 50 bucket or other rate control.
51
52 Correlated Loss Generator models
53
54 Added generation of correlated loss according to the
55 "Gilbert-Elliot" model, a 4-state markov model.
56
57 References:
58 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
59 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
60 and intuitive loss model for packet networks and its implementation
61 in the Netem module in the Linux kernel", available in [1]
62
63 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
64 Fabio Ludovici <fabio.ludovici at yahoo.it>
50*/ 65*/
51 66
52struct netem_sched_data { 67struct netem_sched_data {
@@ -73,6 +88,26 @@ struct netem_sched_data {
73 u32 size; 88 u32 size;
74 s16 table[0]; 89 s16 table[0];
75 } *delay_dist; 90 } *delay_dist;
91
92 enum {
93 CLG_RANDOM,
94 CLG_4_STATES,
95 CLG_GILB_ELL,
96 } loss_model;
97
98 /* Correlated Loss Generation models */
99 struct clgstate {
100 /* state of the Markov chain */
101 u8 state;
102
103 /* 4-states and Gilbert-Elliot models */
104 u32 a1; /* p13 for 4-states or p for GE */
105 u32 a2; /* p31 for 4-states or r for GE */
106 u32 a3; /* p32 for 4-states or h for GE */
107 u32 a4; /* p14 for 4-states or 1-k for GE */
108 u32 a5; /* p23 used only in 4-states */
109 } clg;
110
76}; 111};
77 112
78/* Time stamp put into socket buffer control block */ 113/* Time stamp put into socket buffer control block */
@@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state)
115 return answer; 150 return answer;
116} 151}
117 152
153/* loss_4state - 4-state model loss generator
154 * Generates losses according to the 4-state Markov chain adopted in
155 * the GI (General and Intuitive) loss model.
156 */
157static bool loss_4state(struct netem_sched_data *q)
158{
159 struct clgstate *clg = &q->clg;
160 u32 rnd = net_random();
161
162 /*
163 * Makes a comparision between rnd and the transition
164 * probabilities outgoing from the current state, then decides the
165 * next state and if the next packet has to be transmitted or lost.
166 * The four states correspond to:
167 * 1 => successfully transmitted packets within a gap period
168 * 4 => isolated losses within a gap period
169 * 3 => lost packets within a burst period
170 * 2 => successfully transmitted packets within a burst period
171 */
172 switch (clg->state) {
173 case 1:
174 if (rnd < clg->a4) {
175 clg->state = 4;
176 return true;
177 } else if (clg->a4 < rnd && rnd < clg->a1) {
178 clg->state = 3;
179 return true;
180 } else if (clg->a1 < rnd)
181 clg->state = 1;
182
183 break;
184 case 2:
185 if (rnd < clg->a5) {
186 clg->state = 3;
187 return true;
188 } else
189 clg->state = 2;
190
191 break;
192 case 3:
193 if (rnd < clg->a3)
194 clg->state = 2;
195 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
196 clg->state = 1;
197 return true;
198 } else if (clg->a2 + clg->a3 < rnd) {
199 clg->state = 3;
200 return true;
201 }
202 break;
203 case 4:
204 clg->state = 1;
205 break;
206 }
207
208 return false;
209}
210
211/* loss_gilb_ell - Gilbert-Elliot model loss generator
212 * Generates losses according to the Gilbert-Elliot loss model or
213 * its special cases (Gilbert or Simple Gilbert)
214 *
215 * Makes a comparision between random number and the transition
216 * probabilities outgoing from the current state, then decides the
217 * next state. A second random number is extracted and the comparision
218 * with the loss probability of the current state decides if the next
219 * packet will be transmitted or lost.
220 */
221static bool loss_gilb_ell(struct netem_sched_data *q)
222{
223 struct clgstate *clg = &q->clg;
224
225 switch (clg->state) {
226 case 1:
227 if (net_random() < clg->a1)
228 clg->state = 2;
229 if (net_random() < clg->a4)
230 return true;
231 case 2:
232 if (net_random() < clg->a2)
233 clg->state = 1;
234 if (clg->a3 > net_random())
235 return true;
236 }
237
238 return false;
239}
240
241static bool loss_event(struct netem_sched_data *q)
242{
243 switch (q->loss_model) {
244 case CLG_RANDOM:
245 /* Random packet drop 0 => none, ~0 => all */
246 return q->loss && q->loss >= get_crandom(&q->loss_cor);
247
248 case CLG_4_STATES:
249 /* 4state loss model algorithm (used also for GI model)
250 * Extracts a value from the markov 4 state loss generator,
251 * if it is 1 drops a packet and if needed writes the event in
252 * the kernel logs
253 */
254 return loss_4state(q);
255
256 case CLG_GILB_ELL:
257 /* Gilbert-Elliot loss model algorithm
258 * Extracts a value from the Gilbert-Elliot loss generator,
259 * if it is 1 drops a packet and if needed writes the event in
260 * the kernel logs
261 */
262 return loss_gilb_ell(q);
263 }
264
265 return false; /* not reached */
266}
267
268
118/* tabledist - return a pseudo-randomly distributed value with mean mu and 269/* tabledist - return a pseudo-randomly distributed value with mean mu and
119 * std deviation sigma. Uses table lookup to approximate the desired 270 * std deviation sigma. Uses table lookup to approximate the desired
120 * distribution, and a uniformly-distributed pseudo-random source. 271 * distribution, and a uniformly-distributed pseudo-random source.
@@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
161 int ret; 312 int ret;
162 int count = 1; 313 int count = 1;
163 314
164 pr_debug("netem_enqueue skb=%p\n", skb);
165
166 /* Random duplication */ 315 /* Random duplication */
167 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 316 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
168 ++count; 317 ++count;
169 318
170 /* Random packet drop 0 => none, ~0 => all */ 319 /* Drop packet? */
171 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 320 if (loss_event(q))
172 --count; 321 --count;
173 322
174 if (count == 0) { 323 if (count == 0) {
@@ -211,8 +360,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
211 } 360 }
212 361
213 cb = netem_skb_cb(skb); 362 cb = netem_skb_cb(skb);
214 if (q->gap == 0 || /* not doing reordering */ 363 if (q->gap == 0 || /* not doing reordering */
215 q->counter < q->gap || /* inside last reordering gap */ 364 q->counter < q->gap || /* inside last reordering gap */
216 q->reorder < get_crandom(&q->reorder_cor)) { 365 q->reorder < get_crandom(&q->reorder_cor)) {
217 psched_time_t now; 366 psched_time_t now;
218 psched_tdiff_t delay; 367 psched_tdiff_t delay;
@@ -238,17 +387,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
238 ret = NET_XMIT_SUCCESS; 387 ret = NET_XMIT_SUCCESS;
239 } 388 }
240 389
241 if (likely(ret == NET_XMIT_SUCCESS)) { 390 if (ret != NET_XMIT_SUCCESS) {
242 sch->q.qlen++; 391 if (net_xmit_drop_count(ret)) {
243 } else if (net_xmit_drop_count(ret)) { 392 sch->qstats.drops++;
244 sch->qstats.drops++; 393 return ret;
394 }
245 } 395 }
246 396
247 pr_debug("netem: enqueue ret %d\n", ret); 397 sch->q.qlen++;
248 return ret; 398 return NET_XMIT_SUCCESS;
249} 399}
250 400
251static unsigned int netem_drop(struct Qdisc* sch) 401static unsigned int netem_drop(struct Qdisc *sch)
252{ 402{
253 struct netem_sched_data *q = qdisc_priv(sch); 403 struct netem_sched_data *q = qdisc_priv(sch);
254 unsigned int len = 0; 404 unsigned int len = 0;
@@ -265,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
265 struct netem_sched_data *q = qdisc_priv(sch); 415 struct netem_sched_data *q = qdisc_priv(sch);
266 struct sk_buff *skb; 416 struct sk_buff *skb;
267 417
268 if (sch->flags & TCQ_F_THROTTLED) 418 if (qdisc_is_throttled(sch))
269 return NULL; 419 return NULL;
270 420
271 skb = q->qdisc->ops->peek(q->qdisc); 421 skb = q->qdisc->ops->peek(q->qdisc);
@@ -287,9 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
287 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 437 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
288 skb->tstamp.tv64 = 0; 438 skb->tstamp.tv64 = 0;
289#endif 439#endif
290 pr_debug("netem_dequeue: return skb=%p\n", skb); 440
291 qdisc_bstats_update(sch, skb);
292 sch->q.qlen--; 441 sch->q.qlen--;
442 qdisc_unthrottled(sch);
443 qdisc_bstats_update(sch, skb);
293 return skb; 444 return skb;
294 } 445 }
295 446
@@ -308,6 +459,16 @@ static void netem_reset(struct Qdisc *sch)
308 qdisc_watchdog_cancel(&q->watchdog); 459 qdisc_watchdog_cancel(&q->watchdog);
309} 460}
310 461
462static void dist_free(struct disttable *d)
463{
464 if (d) {
465 if (is_vmalloc_addr(d))
466 vfree(d);
467 else
468 kfree(d);
469 }
470}
471
311/* 472/*
312 * Distribution data is a variable size payload containing 473 * Distribution data is a variable size payload containing
313 * signed 16 bit values. 474 * signed 16 bit values.
@@ -315,16 +476,20 @@ static void netem_reset(struct Qdisc *sch)
315static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 476static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
316{ 477{
317 struct netem_sched_data *q = qdisc_priv(sch); 478 struct netem_sched_data *q = qdisc_priv(sch);
318 unsigned long n = nla_len(attr)/sizeof(__s16); 479 size_t n = nla_len(attr)/sizeof(__s16);
319 const __s16 *data = nla_data(attr); 480 const __s16 *data = nla_data(attr);
320 spinlock_t *root_lock; 481 spinlock_t *root_lock;
321 struct disttable *d; 482 struct disttable *d;
322 int i; 483 int i;
484 size_t s;
323 485
324 if (n > 65536) 486 if (n > NETEM_DIST_MAX)
325 return -EINVAL; 487 return -EINVAL;
326 488
327 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 489 s = sizeof(struct disttable) + n * sizeof(s16);
490 d = kmalloc(s, GFP_KERNEL);
491 if (!d)
492 d = vmalloc(s);
328 if (!d) 493 if (!d)
329 return -ENOMEM; 494 return -ENOMEM;
330 495
@@ -335,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
335 root_lock = qdisc_root_sleeping_lock(sch); 500 root_lock = qdisc_root_sleeping_lock(sch);
336 501
337 spin_lock_bh(root_lock); 502 spin_lock_bh(root_lock);
338 kfree(q->delay_dist); 503 dist_free(q->delay_dist);
339 q->delay_dist = d; 504 q->delay_dist = d;
340 spin_unlock_bh(root_lock); 505 spin_unlock_bh(root_lock);
341 return 0; 506 return 0;
@@ -369,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
369 init_crandom(&q->corrupt_cor, r->correlation); 534 init_crandom(&q->corrupt_cor, r->correlation);
370} 535}
371 536
537static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
538{
539 struct netem_sched_data *q = qdisc_priv(sch);
540 const struct nlattr *la;
541 int rem;
542
543 nla_for_each_nested(la, attr, rem) {
544 u16 type = nla_type(la);
545
546 switch(type) {
547 case NETEM_LOSS_GI: {
548 const struct tc_netem_gimodel *gi = nla_data(la);
549
550 if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
551 pr_info("netem: incorrect gi model size\n");
552 return -EINVAL;
553 }
554
555 q->loss_model = CLG_4_STATES;
556
557 q->clg.state = 1;
558 q->clg.a1 = gi->p13;
559 q->clg.a2 = gi->p31;
560 q->clg.a3 = gi->p32;
561 q->clg.a4 = gi->p14;
562 q->clg.a5 = gi->p23;
563 break;
564 }
565
566 case NETEM_LOSS_GE: {
567 const struct tc_netem_gemodel *ge = nla_data(la);
568
569 if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
570 pr_info("netem: incorrect gi model size\n");
571 return -EINVAL;
572 }
573
574 q->loss_model = CLG_GILB_ELL;
575 q->clg.state = 1;
576 q->clg.a1 = ge->p;
577 q->clg.a2 = ge->r;
578 q->clg.a3 = ge->h;
579 q->clg.a4 = ge->k1;
580 break;
581 }
582
583 default:
584 pr_info("netem: unknown loss type %u\n", type);
585 return -EINVAL;
586 }
587 }
588
589 return 0;
590}
591
372static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 592static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
373 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 593 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
374 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 594 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
375 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 595 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
596 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
376}; 597};
377 598
378static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 599static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -380,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
380{ 601{
381 int nested_len = nla_len(nla) - NLA_ALIGN(len); 602 int nested_len = nla_len(nla) - NLA_ALIGN(len);
382 603
383 if (nested_len < 0) 604 if (nested_len < 0) {
605 pr_info("netem: invalid attributes len %d\n", nested_len);
384 return -EINVAL; 606 return -EINVAL;
607 }
608
385 if (nested_len >= nla_attr_size(0)) 609 if (nested_len >= nla_attr_size(0))
386 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 610 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
387 nested_len, policy); 611 nested_len, policy);
612
388 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 613 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
389 return 0; 614 return 0;
390} 615}
@@ -407,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
407 632
408 ret = fifo_set_limit(q->qdisc, qopt->limit); 633 ret = fifo_set_limit(q->qdisc, qopt->limit);
409 if (ret) { 634 if (ret) {
410 pr_debug("netem: can't set fifo limit\n"); 635 pr_info("netem: can't set fifo limit\n");
411 return ret; 636 return ret;
412 } 637 }
413 638
@@ -440,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
440 if (tb[TCA_NETEM_CORRUPT]) 665 if (tb[TCA_NETEM_CORRUPT])
441 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 666 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
442 667
443 return 0; 668 q->loss_model = CLG_RANDOM;
669 if (tb[TCA_NETEM_LOSS])
670 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
671
672 return ret;
444} 673}
445 674
446/* 675/*
@@ -535,16 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
535 764
536 qdisc_watchdog_init(&q->watchdog, sch); 765 qdisc_watchdog_init(&q->watchdog, sch);
537 766
767 q->loss_model = CLG_RANDOM;
538 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, 768 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
539 TC_H_MAKE(sch->handle, 1)); 769 TC_H_MAKE(sch->handle, 1));
540 if (!q->qdisc) { 770 if (!q->qdisc) {
541 pr_debug("netem: qdisc create failed\n"); 771 pr_notice("netem: qdisc create tfifo qdisc failed\n");
542 return -ENOMEM; 772 return -ENOMEM;
543 } 773 }
544 774
545 ret = netem_change(sch, opt); 775 ret = netem_change(sch, opt);
546 if (ret) { 776 if (ret) {
547 pr_debug("netem: change failed\n"); 777 pr_info("netem: change failed\n");
548 qdisc_destroy(q->qdisc); 778 qdisc_destroy(q->qdisc);
549 } 779 }
550 return ret; 780 return ret;
@@ -556,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch)
556 786
557 qdisc_watchdog_cancel(&q->watchdog); 787 qdisc_watchdog_cancel(&q->watchdog);
558 qdisc_destroy(q->qdisc); 788 qdisc_destroy(q->qdisc);
559 kfree(q->delay_dist); 789 dist_free(q->delay_dist);
790}
791
792static int dump_loss_model(const struct netem_sched_data *q,
793 struct sk_buff *skb)
794{
795 struct nlattr *nest;
796
797 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
798 if (nest == NULL)
799 goto nla_put_failure;
800
801 switch (q->loss_model) {
802 case CLG_RANDOM:
803 /* legacy loss model */
804 nla_nest_cancel(skb, nest);
805 return 0; /* no data */
806
807 case CLG_4_STATES: {
808 struct tc_netem_gimodel gi = {
809 .p13 = q->clg.a1,
810 .p31 = q->clg.a2,
811 .p32 = q->clg.a3,
812 .p14 = q->clg.a4,
813 .p23 = q->clg.a5,
814 };
815
816 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
817 break;
818 }
819 case CLG_GILB_ELL: {
820 struct tc_netem_gemodel ge = {
821 .p = q->clg.a1,
822 .r = q->clg.a2,
823 .h = q->clg.a3,
824 .k1 = q->clg.a4,
825 };
826
827 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
828 break;
829 }
830 }
831
832 nla_nest_end(skb, nest);
833 return 0;
834
835nla_put_failure:
836 nla_nest_cancel(skb, nest);
837 return -1;
560} 838}
561 839
562static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 840static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
563{ 841{
564 const struct netem_sched_data *q = qdisc_priv(sch); 842 const struct netem_sched_data *q = qdisc_priv(sch);
565 unsigned char *b = skb_tail_pointer(skb); 843 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
566 struct nlattr *nla = (struct nlattr *) b;
567 struct tc_netem_qopt qopt; 844 struct tc_netem_qopt qopt;
568 struct tc_netem_corr cor; 845 struct tc_netem_corr cor;
569 struct tc_netem_reorder reorder; 846 struct tc_netem_reorder reorder;
@@ -590,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
590 corrupt.correlation = q->corrupt_cor.rho; 867 corrupt.correlation = q->corrupt_cor.rho;
591 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 868 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
592 869
593 nla->nla_len = skb_tail_pointer(skb) - b; 870 if (dump_loss_model(q, skb) != 0)
871 goto nla_put_failure;
594 872
595 return skb->len; 873 return nla_nest_end(skb, nla);
596 874
597nla_put_failure: 875nla_put_failure:
598 nlmsg_trim(skb, b); 876 nlmsg_trim(skb, nla);
599 return -1; 877 return -1;
600} 878}
601 879
880static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
881 struct sk_buff *skb, struct tcmsg *tcm)
882{
883 struct netem_sched_data *q = qdisc_priv(sch);
884
885 if (cl != 1) /* only one class */
886 return -ENOENT;
887
888 tcm->tcm_handle |= TC_H_MIN(1);
889 tcm->tcm_info = q->qdisc->handle;
890
891 return 0;
892}
893
894static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
895 struct Qdisc **old)
896{
897 struct netem_sched_data *q = qdisc_priv(sch);
898
899 if (new == NULL)
900 new = &noop_qdisc;
901
902 sch_tree_lock(sch);
903 *old = q->qdisc;
904 q->qdisc = new;
905 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
906 qdisc_reset(*old);
907 sch_tree_unlock(sch);
908
909 return 0;
910}
911
912static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
913{
914 struct netem_sched_data *q = qdisc_priv(sch);
915 return q->qdisc;
916}
917
918static unsigned long netem_get(struct Qdisc *sch, u32 classid)
919{
920 return 1;
921}
922
923static void netem_put(struct Qdisc *sch, unsigned long arg)
924{
925}
926
927static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
928{
929 if (!walker->stop) {
930 if (walker->count >= walker->skip)
931 if (walker->fn(sch, 1, walker) < 0) {
932 walker->stop = 1;
933 return;
934 }
935 walker->count++;
936 }
937}
938
939static const struct Qdisc_class_ops netem_class_ops = {
940 .graft = netem_graft,
941 .leaf = netem_leaf,
942 .get = netem_get,
943 .put = netem_put,
944 .walk = netem_walk,
945 .dump = netem_dump_class,
946};
947
602static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 948static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
603 .id = "netem", 949 .id = "netem",
950 .cl_ops = &netem_class_ops,
604 .priv_size = sizeof(struct netem_sched_data), 951 .priv_size = sizeof(struct netem_sched_data),
605 .enqueue = netem_enqueue, 952 .enqueue = netem_enqueue,
606 .dequeue = netem_dequeue, 953 .dequeue = netem_dequeue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fbd710d619b..2a318f2dc3e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23 23
24 24
25struct prio_sched_data 25struct prio_sched_data {
26{
27 int bands; 26 int bands;
28 struct tcf_proto *filter_list; 27 struct tcf_proto *filter_list;
29 u8 prio2band[TC_PRIO_MAX+1]; 28 u8 prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
54 if (!q->filter_list || err < 0) { 53 if (!q->filter_list || err < 0) {
55 if (TC_H_MAJ(band)) 54 if (TC_H_MAJ(band))
56 band = 0; 55 band = 0;
57 return q->queues[q->prio2band[band&TC_PRIO_MAX]]; 56 return q->queues[q->prio2band[band & TC_PRIO_MAX]];
58 } 57 }
59 band = res.classid; 58 band = res.classid;
60 } 59 }
@@ -106,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
106 return NULL; 105 return NULL;
107} 106}
108 107
109static struct sk_buff *prio_dequeue(struct Qdisc* sch) 108static struct sk_buff *prio_dequeue(struct Qdisc *sch)
110{ 109{
111 struct prio_sched_data *q = qdisc_priv(sch); 110 struct prio_sched_data *q = qdisc_priv(sch);
112 int prio; 111 int prio;
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
124 123
125} 124}
126 125
127static unsigned int prio_drop(struct Qdisc* sch) 126static unsigned int prio_drop(struct Qdisc *sch)
128{ 127{
129 struct prio_sched_data *q = qdisc_priv(sch); 128 struct prio_sched_data *q = qdisc_priv(sch);
130 int prio; 129 int prio;
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
143 142
144 143
145static void 144static void
146prio_reset(struct Qdisc* sch) 145prio_reset(struct Qdisc *sch)
147{ 146{
148 int prio; 147 int prio;
149 struct prio_sched_data *q = qdisc_priv(sch); 148 struct prio_sched_data *q = qdisc_priv(sch);
150 149
151 for (prio=0; prio<q->bands; prio++) 150 for (prio = 0; prio < q->bands; prio++)
152 qdisc_reset(q->queues[prio]); 151 qdisc_reset(q->queues[prio]);
153 sch->q.qlen = 0; 152 sch->q.qlen = 0;
154} 153}
155 154
156static void 155static void
157prio_destroy(struct Qdisc* sch) 156prio_destroy(struct Qdisc *sch)
158{ 157{
159 int prio; 158 int prio;
160 struct prio_sched_data *q = qdisc_priv(sch); 159 struct prio_sched_data *q = qdisc_priv(sch);
161 160
162 tcf_destroy_chain(&q->filter_list); 161 tcf_destroy_chain(&q->filter_list);
163 for (prio=0; prio<q->bands; prio++) 162 for (prio = 0; prio < q->bands; prio++)
164 qdisc_destroy(q->queues[prio]); 163 qdisc_destroy(q->queues[prio]);
165} 164}
166 165
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
177 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) 176 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
178 return -EINVAL; 177 return -EINVAL;
179 178
180 for (i=0; i<=TC_PRIO_MAX; i++) { 179 for (i = 0; i <= TC_PRIO_MAX; i++) {
181 if (qopt->priomap[i] >= qopt->bands) 180 if (qopt->priomap[i] >= qopt->bands)
182 return -EINVAL; 181 return -EINVAL;
183 } 182 }
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
186 q->bands = qopt->bands; 185 q->bands = qopt->bands;
187 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); 186 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
188 187
189 for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { 188 for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
190 struct Qdisc *child = q->queues[i]; 189 struct Qdisc *child = q->queues[i];
191 q->queues[i] = &noop_qdisc; 190 q->queues[i] = &noop_qdisc;
192 if (child != &noop_qdisc) { 191 if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
196 } 195 }
197 sch_tree_unlock(sch); 196 sch_tree_unlock(sch);
198 197
199 for (i=0; i<q->bands; i++) { 198 for (i = 0; i < q->bands; i++) {
200 if (q->queues[i] == &noop_qdisc) { 199 if (q->queues[i] == &noop_qdisc) {
201 struct Qdisc *child, *old; 200 struct Qdisc *child, *old;
201
202 child = qdisc_create_dflt(sch->dev_queue, 202 child = qdisc_create_dflt(sch->dev_queue,
203 &pfifo_qdisc_ops, 203 &pfifo_qdisc_ops,
204 TC_H_MAKE(sch->handle, i + 1)); 204 TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
224 struct prio_sched_data *q = qdisc_priv(sch); 224 struct prio_sched_data *q = qdisc_priv(sch);
225 int i; 225 int i;
226 226
227 for (i=0; i<TCQ_PRIO_BANDS; i++) 227 for (i = 0; i < TCQ_PRIO_BANDS; i++)
228 q->queues[i] = &noop_qdisc; 228 q->queues[i] = &noop_qdisc;
229 229
230 if (opt == NULL) { 230 if (opt == NULL) {
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
232 } else { 232 } else {
233 int err; 233 int err;
234 234
235 if ((err= prio_tune(sch, opt)) != 0) 235 if ((err = prio_tune(sch, opt)) != 0)
236 return err; 236 return err;
237 } 237 }
238 return 0; 238 return 0;
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
245 struct tc_prio_qopt opt; 245 struct tc_prio_qopt opt;
246 246
247 opt.bands = q->bands; 247 opt.bands = q->bands;
248 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); 248 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
249 249
250 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 250 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
251 251
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
342 arg->count++; 342 arg->count++;
343 continue; 343 continue;
344 } 344 }
345 if (arg->fn(sch, prio+1, arg) < 0) { 345 if (arg->fn(sch, prio + 1, arg) < 0) {
346 arg->stop = 1; 346 arg->stop = 1;
347 break; 347 break;
348 } 348 }
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
350 } 350 }
351} 351}
352 352
353static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) 353static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
354{ 354{
355 struct prio_sched_data *q = qdisc_priv(sch); 355 struct prio_sched_data *q = qdisc_priv(sch);
356 356
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9f98dbd32d4..6649463da1b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
36 if RED works correctly. 36 if RED works correctly.
37 */ 37 */
38 38
39struct red_sched_data 39struct red_sched_data {
40{
41 u32 limit; /* HARD maximal queue length */ 40 u32 limit; /* HARD maximal queue length */
42 unsigned char flags; 41 unsigned char flags;
43 struct red_parms parms; 42 struct red_parms parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
55 return q->flags & TC_RED_HARDDROP; 54 return q->flags & TC_RED_HARDDROP;
56} 55}
57 56
58static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) 57static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
59{ 58{
60 struct red_sched_data *q = qdisc_priv(sch); 59 struct red_sched_data *q = qdisc_priv(sch);
61 struct Qdisc *child = q->qdisc; 60 struct Qdisc *child = q->qdisc;
@@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
67 red_end_of_idle_period(&q->parms); 66 red_end_of_idle_period(&q->parms);
68 67
69 switch (red_action(&q->parms, q->parms.qavg)) { 68 switch (red_action(&q->parms, q->parms.qavg)) {
70 case RED_DONT_MARK: 69 case RED_DONT_MARK:
71 break; 70 break;
72 71
73 case RED_PROB_MARK: 72 case RED_PROB_MARK:
74 sch->qstats.overlimits++; 73 sch->qstats.overlimits++;
75 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { 74 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76 q->stats.prob_drop++; 75 q->stats.prob_drop++;
77 goto congestion_drop; 76 goto congestion_drop;
78 } 77 }
79 78
80 q->stats.prob_mark++; 79 q->stats.prob_mark++;
81 break; 80 break;
82 81
83 case RED_HARD_MARK: 82 case RED_HARD_MARK:
84 sch->qstats.overlimits++; 83 sch->qstats.overlimits++;
85 if (red_use_harddrop(q) || !red_use_ecn(q) || 84 if (red_use_harddrop(q) || !red_use_ecn(q) ||
86 !INET_ECN_set_ce(skb)) { 85 !INET_ECN_set_ce(skb)) {
87 q->stats.forced_drop++; 86 q->stats.forced_drop++;
88 goto congestion_drop; 87 goto congestion_drop;
89 } 88 }
90 89
91 q->stats.forced_mark++; 90 q->stats.forced_mark++;
92 break; 91 break;
93 } 92 }
94 93
95 ret = qdisc_enqueue(skb, child); 94 ret = qdisc_enqueue(skb, child);
@@ -106,7 +105,7 @@ congestion_drop:
106 return NET_XMIT_CN; 105 return NET_XMIT_CN;
107} 106}
108 107
109static struct sk_buff * red_dequeue(struct Qdisc* sch) 108static struct sk_buff *red_dequeue(struct Qdisc *sch)
110{ 109{
111 struct sk_buff *skb; 110 struct sk_buff *skb;
112 struct red_sched_data *q = qdisc_priv(sch); 111 struct red_sched_data *q = qdisc_priv(sch);
@@ -123,7 +122,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
123 return skb; 122 return skb;
124} 123}
125 124
126static struct sk_buff * red_peek(struct Qdisc* sch) 125static struct sk_buff *red_peek(struct Qdisc *sch)
127{ 126{
128 struct red_sched_data *q = qdisc_priv(sch); 127 struct red_sched_data *q = qdisc_priv(sch);
129 struct Qdisc *child = q->qdisc; 128 struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
131 return child->ops->peek(child); 130 return child->ops->peek(child);
132} 131}
133 132
134static unsigned int red_drop(struct Qdisc* sch) 133static unsigned int red_drop(struct Qdisc *sch)
135{ 134{
136 struct red_sched_data *q = qdisc_priv(sch); 135 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc; 136 struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
150 return 0; 149 return 0;
151} 150}
152 151
153static void red_reset(struct Qdisc* sch) 152static void red_reset(struct Qdisc *sch)
154{ 153{
155 struct red_sched_data *q = qdisc_priv(sch); 154 struct red_sched_data *q = qdisc_priv(sch);
156 155
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
217 return 0; 216 return 0;
218} 217}
219 218
220static int red_init(struct Qdisc* sch, struct nlattr *opt) 219static int red_init(struct Qdisc *sch, struct nlattr *opt)
221{ 220{
222 struct red_sched_data *q = qdisc_priv(sch); 221 struct red_sched_data *q = qdisc_priv(sch);
223 222
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
new file mode 100644
index 00000000000..0a833d0c1f6
--- /dev/null
+++ b/net/sched/sch_sfb.c
@@ -0,0 +1,709 @@
1/*
2 * net/sched/sch_sfb.c Stochastic Fair Blue
3 *
4 * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
12 * A New Class of Active Queue Management Algorithms.
13 * U. Michigan CSE-TR-387-99, April 1999.
14 *
15 * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
16 *
17 */
18
19#include <linux/module.h>
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/skbuff.h>
24#include <linux/random.h>
25#include <linux/jhash.h>
26#include <net/ip.h>
27#include <net/pkt_sched.h>
28#include <net/inet_ecn.h>
29
30/*
31 * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
32 * This implementation uses L = 8 and N = 16
33 * This permits us to split one 32bit hash (provided per packet by rxhash or
34 * external classifier) into 8 subhashes of 4 bits.
35 */
36#define SFB_BUCKET_SHIFT 4
37#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */
38#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1)
39#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */
40
41/* SFB algo uses a virtual queue, named "bin" */
42struct sfb_bucket {
43 u16 qlen; /* length of virtual queue */
44 u16 p_mark; /* marking probability */
45};
46
47/* We use a double buffering right before hash change
48 * (Section 4.4 of SFB reference : moving hash functions)
49 */
50struct sfb_bins {
51 u32 perturbation; /* jhash perturbation */
52 struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
53};
54
55struct sfb_sched_data {
56 struct Qdisc *qdisc;
57 struct tcf_proto *filter_list;
58 unsigned long rehash_interval;
59 unsigned long warmup_time; /* double buffering warmup time in jiffies */
60 u32 max;
61 u32 bin_size; /* maximum queue length per bin */
62 u32 increment; /* d1 */
63 u32 decrement; /* d2 */
64 u32 limit; /* HARD maximal queue length */
65 u32 penalty_rate;
66 u32 penalty_burst;
67 u32 tokens_avail;
68 unsigned long rehash_time;
69 unsigned long token_time;
70
71 u8 slot; /* current active bins (0 or 1) */
72 bool double_buffering;
73 struct sfb_bins bins[2];
74
75 struct {
76 u32 earlydrop;
77 u32 penaltydrop;
78 u32 bucketdrop;
79 u32 queuedrop;
80 u32 childdrop; /* drops in child qdisc */
81 u32 marked; /* ECN mark */
82 } stats;
83};
84
85/*
86 * Each queued skb might be hashed on one or two bins
87 * We store in skb_cb the two hash values.
88 * (A zero value means double buffering was not used)
89 */
90struct sfb_skb_cb {
91 u32 hashes[2];
92};
93
94static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
95{
96 BUILD_BUG_ON(sizeof(skb->cb) <
97 sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb));
98 return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
99}
100
101/*
102 * If using 'internal' SFB flow classifier, hash comes from skb rxhash
103 * If using external classifier, hash comes from the classid.
104 */
105static u32 sfb_hash(const struct sk_buff *skb, u32 slot)
106{
107 return sfb_skb_cb(skb)->hashes[slot];
108}
109
110/* Probabilities are coded as Q0.16 fixed-point values,
111 * with 0xFFFF representing 65535/65536 (almost 1.0)
112 * Addition and subtraction are saturating in [0, 65535]
113 */
114static u32 prob_plus(u32 p1, u32 p2)
115{
116 u32 res = p1 + p2;
117
118 return min_t(u32, res, SFB_MAX_PROB);
119}
120
121static u32 prob_minus(u32 p1, u32 p2)
122{
123 return p1 > p2 ? p1 - p2 : 0;
124}
125
126static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
127{
128 int i;
129 struct sfb_bucket *b = &q->bins[slot].bins[0][0];
130
131 for (i = 0; i < SFB_LEVELS; i++) {
132 u32 hash = sfbhash & SFB_BUCKET_MASK;
133
134 sfbhash >>= SFB_BUCKET_SHIFT;
135 if (b[hash].qlen < 0xFFFF)
136 b[hash].qlen++;
137 b += SFB_NUMBUCKETS; /* next level */
138 }
139}
140
141static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
142{
143 u32 sfbhash;
144
145 sfbhash = sfb_hash(skb, 0);
146 if (sfbhash)
147 increment_one_qlen(sfbhash, 0, q);
148
149 sfbhash = sfb_hash(skb, 1);
150 if (sfbhash)
151 increment_one_qlen(sfbhash, 1, q);
152}
153
154static void decrement_one_qlen(u32 sfbhash, u32 slot,
155 struct sfb_sched_data *q)
156{
157 int i;
158 struct sfb_bucket *b = &q->bins[slot].bins[0][0];
159
160 for (i = 0; i < SFB_LEVELS; i++) {
161 u32 hash = sfbhash & SFB_BUCKET_MASK;
162
163 sfbhash >>= SFB_BUCKET_SHIFT;
164 if (b[hash].qlen > 0)
165 b[hash].qlen--;
166 b += SFB_NUMBUCKETS; /* next level */
167 }
168}
169
170static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
171{
172 u32 sfbhash;
173
174 sfbhash = sfb_hash(skb, 0);
175 if (sfbhash)
176 decrement_one_qlen(sfbhash, 0, q);
177
178 sfbhash = sfb_hash(skb, 1);
179 if (sfbhash)
180 decrement_one_qlen(sfbhash, 1, q);
181}
182
183static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
184{
185 b->p_mark = prob_minus(b->p_mark, q->decrement);
186}
187
188static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
189{
190 b->p_mark = prob_plus(b->p_mark, q->increment);
191}
192
193static void sfb_zero_all_buckets(struct sfb_sched_data *q)
194{
195 memset(&q->bins, 0, sizeof(q->bins));
196}
197
198/*
199 * compute max qlen, max p_mark, and avg p_mark
200 */
201static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q)
202{
203 int i;
204 u32 qlen = 0, prob = 0, totalpm = 0;
205 const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0];
206
207 for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) {
208 if (qlen < b->qlen)
209 qlen = b->qlen;
210 totalpm += b->p_mark;
211 if (prob < b->p_mark)
212 prob = b->p_mark;
213 b++;
214 }
215 *prob_r = prob;
216 *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS);
217 return qlen;
218}
219
220
221static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
222{
223 q->bins[slot].perturbation = net_random();
224}
225
226static void sfb_swap_slot(struct sfb_sched_data *q)
227{
228 sfb_init_perturbation(q->slot, q);
229 q->slot ^= 1;
230 q->double_buffering = false;
231}
232
233/* Non elastic flows are allowed to use part of the bandwidth, expressed
234 * in "penalty_rate" packets per second, with "penalty_burst" burst
235 */
236static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
237{
238 if (q->penalty_rate == 0 || q->penalty_burst == 0)
239 return true;
240
241 if (q->tokens_avail < 1) {
242 unsigned long age = min(10UL * HZ, jiffies - q->token_time);
243
244 q->tokens_avail = (age * q->penalty_rate) / HZ;
245 if (q->tokens_avail > q->penalty_burst)
246 q->tokens_avail = q->penalty_burst;
247 q->token_time = jiffies;
248 if (q->tokens_avail < 1)
249 return true;
250 }
251
252 q->tokens_avail--;
253 return false;
254}
255
256static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
257 int *qerr, u32 *salt)
258{
259 struct tcf_result res;
260 int result;
261
262 result = tc_classify(skb, q->filter_list, &res);
263 if (result >= 0) {
264#ifdef CONFIG_NET_CLS_ACT
265 switch (result) {
266 case TC_ACT_STOLEN:
267 case TC_ACT_QUEUED:
268 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
269 case TC_ACT_SHOT:
270 return false;
271 }
272#endif
273 *salt = TC_H_MIN(res.classid);
274 return true;
275 }
276 return false;
277}
278
279static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
280{
281
282 struct sfb_sched_data *q = qdisc_priv(sch);
283 struct Qdisc *child = q->qdisc;
284 int i;
285 u32 p_min = ~0;
286 u32 minqlen = ~0;
287 u32 r, slot, salt, sfbhash;
288 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
289
290 if (q->rehash_interval > 0) {
291 unsigned long limit = q->rehash_time + q->rehash_interval;
292
293 if (unlikely(time_after(jiffies, limit))) {
294 sfb_swap_slot(q);
295 q->rehash_time = jiffies;
296 } else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
297 time_after(jiffies, limit - q->warmup_time))) {
298 q->double_buffering = true;
299 }
300 }
301
302 if (q->filter_list) {
303 /* If using external classifiers, get result and record it. */
304 if (!sfb_classify(skb, q, &ret, &salt))
305 goto other_drop;
306 } else {
307 salt = skb_get_rxhash(skb);
308 }
309
310 slot = q->slot;
311
312 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
313 if (!sfbhash)
314 sfbhash = 1;
315 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
316
317 for (i = 0; i < SFB_LEVELS; i++) {
318 u32 hash = sfbhash & SFB_BUCKET_MASK;
319 struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
320
321 sfbhash >>= SFB_BUCKET_SHIFT;
322 if (b->qlen == 0)
323 decrement_prob(b, q);
324 else if (b->qlen >= q->bin_size)
325 increment_prob(b, q);
326 if (minqlen > b->qlen)
327 minqlen = b->qlen;
328 if (p_min > b->p_mark)
329 p_min = b->p_mark;
330 }
331
332 slot ^= 1;
333 sfb_skb_cb(skb)->hashes[slot] = 0;
334
335 if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) {
336 sch->qstats.overlimits++;
337 if (minqlen >= q->max)
338 q->stats.bucketdrop++;
339 else
340 q->stats.queuedrop++;
341 goto drop;
342 }
343
344 if (unlikely(p_min >= SFB_MAX_PROB)) {
345 /* Inelastic flow */
346 if (q->double_buffering) {
347 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
348 if (!sfbhash)
349 sfbhash = 1;
350 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
351
352 for (i = 0; i < SFB_LEVELS; i++) {
353 u32 hash = sfbhash & SFB_BUCKET_MASK;
354 struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
355
356 sfbhash >>= SFB_BUCKET_SHIFT;
357 if (b->qlen == 0)
358 decrement_prob(b, q);
359 else if (b->qlen >= q->bin_size)
360 increment_prob(b, q);
361 }
362 }
363 if (sfb_rate_limit(skb, q)) {
364 sch->qstats.overlimits++;
365 q->stats.penaltydrop++;
366 goto drop;
367 }
368 goto enqueue;
369 }
370
371 r = net_random() & SFB_MAX_PROB;
372
373 if (unlikely(r < p_min)) {
374 if (unlikely(p_min > SFB_MAX_PROB / 2)) {
375 /* If we're marking that many packets, then either
376 * this flow is unresponsive, or we're badly congested.
377 * In either case, we want to start dropping packets.
378 */
379 if (r < (p_min - SFB_MAX_PROB / 2) * 2) {
380 q->stats.earlydrop++;
381 goto drop;
382 }
383 }
384 if (INET_ECN_set_ce(skb)) {
385 q->stats.marked++;
386 } else {
387 q->stats.earlydrop++;
388 goto drop;
389 }
390 }
391
392enqueue:
393 ret = qdisc_enqueue(skb, child);
394 if (likely(ret == NET_XMIT_SUCCESS)) {
395 sch->q.qlen++;
396 increment_qlen(skb, q);
397 } else if (net_xmit_drop_count(ret)) {
398 q->stats.childdrop++;
399 sch->qstats.drops++;
400 }
401 return ret;
402
403drop:
404 qdisc_drop(skb, sch);
405 return NET_XMIT_CN;
406other_drop:
407 if (ret & __NET_XMIT_BYPASS)
408 sch->qstats.drops++;
409 kfree_skb(skb);
410 return ret;
411}
412
413static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
414{
415 struct sfb_sched_data *q = qdisc_priv(sch);
416 struct Qdisc *child = q->qdisc;
417 struct sk_buff *skb;
418
419 skb = child->dequeue(q->qdisc);
420
421 if (skb) {
422 qdisc_bstats_update(sch, skb);
423 sch->q.qlen--;
424 decrement_qlen(skb, q);
425 }
426
427 return skb;
428}
429
430static struct sk_buff *sfb_peek(struct Qdisc *sch)
431{
432 struct sfb_sched_data *q = qdisc_priv(sch);
433 struct Qdisc *child = q->qdisc;
434
435 return child->ops->peek(child);
436}
437
438/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */
439
440static void sfb_reset(struct Qdisc *sch)
441{
442 struct sfb_sched_data *q = qdisc_priv(sch);
443
444 qdisc_reset(q->qdisc);
445 sch->q.qlen = 0;
446 q->slot = 0;
447 q->double_buffering = false;
448 sfb_zero_all_buckets(q);
449 sfb_init_perturbation(0, q);
450}
451
452static void sfb_destroy(struct Qdisc *sch)
453{
454 struct sfb_sched_data *q = qdisc_priv(sch);
455
456 tcf_destroy_chain(&q->filter_list);
457 qdisc_destroy(q->qdisc);
458}
459
460static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
461 [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) },
462};
463
464static const struct tc_sfb_qopt sfb_default_ops = {
465 .rehash_interval = 600 * MSEC_PER_SEC,
466 .warmup_time = 60 * MSEC_PER_SEC,
467 .limit = 0,
468 .max = 25,
469 .bin_size = 20,
470 .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */
471 .decrement = (SFB_MAX_PROB + 3000) / 6000,
472 .penalty_rate = 10,
473 .penalty_burst = 20,
474};
475
476static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
477{
478 struct sfb_sched_data *q = qdisc_priv(sch);
479 struct Qdisc *child;
480 struct nlattr *tb[TCA_SFB_MAX + 1];
481 const struct tc_sfb_qopt *ctl = &sfb_default_ops;
482 u32 limit;
483 int err;
484
485 if (opt) {
486 err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
487 if (err < 0)
488 return -EINVAL;
489
490 if (tb[TCA_SFB_PARMS] == NULL)
491 return -EINVAL;
492
493 ctl = nla_data(tb[TCA_SFB_PARMS]);
494 }
495
496 limit = ctl->limit;
497 if (limit == 0)
498 limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
499
500 child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
501 if (IS_ERR(child))
502 return PTR_ERR(child);
503
504 sch_tree_lock(sch);
505
506 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
507 qdisc_destroy(q->qdisc);
508 q->qdisc = child;
509
510 q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
511 q->warmup_time = msecs_to_jiffies(ctl->warmup_time);
512 q->rehash_time = jiffies;
513 q->limit = limit;
514 q->increment = ctl->increment;
515 q->decrement = ctl->decrement;
516 q->max = ctl->max;
517 q->bin_size = ctl->bin_size;
518 q->penalty_rate = ctl->penalty_rate;
519 q->penalty_burst = ctl->penalty_burst;
520 q->tokens_avail = ctl->penalty_burst;
521 q->token_time = jiffies;
522
523 q->slot = 0;
524 q->double_buffering = false;
525 sfb_zero_all_buckets(q);
526 sfb_init_perturbation(0, q);
527 sfb_init_perturbation(1, q);
528
529 sch_tree_unlock(sch);
530
531 return 0;
532}
533
534static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
535{
536 struct sfb_sched_data *q = qdisc_priv(sch);
537
538 q->qdisc = &noop_qdisc;
539 return sfb_change(sch, opt);
540}
541
542static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
543{
544 struct sfb_sched_data *q = qdisc_priv(sch);
545 struct nlattr *opts;
546 struct tc_sfb_qopt opt = {
547 .rehash_interval = jiffies_to_msecs(q->rehash_interval),
548 .warmup_time = jiffies_to_msecs(q->warmup_time),
549 .limit = q->limit,
550 .max = q->max,
551 .bin_size = q->bin_size,
552 .increment = q->increment,
553 .decrement = q->decrement,
554 .penalty_rate = q->penalty_rate,
555 .penalty_burst = q->penalty_burst,
556 };
557
558 sch->qstats.backlog = q->qdisc->qstats.backlog;
559 opts = nla_nest_start(skb, TCA_OPTIONS);
560 NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
561 return nla_nest_end(skb, opts);
562
563nla_put_failure:
564 nla_nest_cancel(skb, opts);
565 return -EMSGSIZE;
566}
567
568static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
569{
570 struct sfb_sched_data *q = qdisc_priv(sch);
571 struct tc_sfb_xstats st = {
572 .earlydrop = q->stats.earlydrop,
573 .penaltydrop = q->stats.penaltydrop,
574 .bucketdrop = q->stats.bucketdrop,
575 .queuedrop = q->stats.queuedrop,
576 .childdrop = q->stats.childdrop,
577 .marked = q->stats.marked,
578 };
579
580 st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);
581
582 return gnet_stats_copy_app(d, &st, sizeof(st));
583}
584
585static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
586 struct sk_buff *skb, struct tcmsg *tcm)
587{
588 return -ENOSYS;
589}
590
591static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
592 struct Qdisc **old)
593{
594 struct sfb_sched_data *q = qdisc_priv(sch);
595
596 if (new == NULL)
597 new = &noop_qdisc;
598
599 sch_tree_lock(sch);
600 *old = q->qdisc;
601 q->qdisc = new;
602 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
603 qdisc_reset(*old);
604 sch_tree_unlock(sch);
605 return 0;
606}
607
608static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
609{
610 struct sfb_sched_data *q = qdisc_priv(sch);
611
612 return q->qdisc;
613}
614
615static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
616{
617 return 1;
618}
619
620static void sfb_put(struct Qdisc *sch, unsigned long arg)
621{
622}
623
624static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
625 struct nlattr **tca, unsigned long *arg)
626{
627 return -ENOSYS;
628}
629
630static int sfb_delete(struct Qdisc *sch, unsigned long cl)
631{
632 return -ENOSYS;
633}
634
635static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
636{
637 if (!walker->stop) {
638 if (walker->count >= walker->skip)
639 if (walker->fn(sch, 1, walker) < 0) {
640 walker->stop = 1;
641 return;
642 }
643 walker->count++;
644 }
645}
646
647static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
648{
649 struct sfb_sched_data *q = qdisc_priv(sch);
650
651 if (cl)
652 return NULL;
653 return &q->filter_list;
654}
655
656static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
657 u32 classid)
658{
659 return 0;
660}
661
662
663static const struct Qdisc_class_ops sfb_class_ops = {
664 .graft = sfb_graft,
665 .leaf = sfb_leaf,
666 .get = sfb_get,
667 .put = sfb_put,
668 .change = sfb_change_class,
669 .delete = sfb_delete,
670 .walk = sfb_walk,
671 .tcf_chain = sfb_find_tcf,
672 .bind_tcf = sfb_bind,
673 .unbind_tcf = sfb_put,
674 .dump = sfb_dump_class,
675};
676
677static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
678 .id = "sfb",
679 .priv_size = sizeof(struct sfb_sched_data),
680 .cl_ops = &sfb_class_ops,
681 .enqueue = sfb_enqueue,
682 .dequeue = sfb_dequeue,
683 .peek = sfb_peek,
684 .init = sfb_init,
685 .reset = sfb_reset,
686 .destroy = sfb_destroy,
687 .change = sfb_change,
688 .dump = sfb_dump,
689 .dump_stats = sfb_dump_stats,
690 .owner = THIS_MODULE,
691};
692
693static int __init sfb_module_init(void)
694{
695 return register_qdisc(&sfb_qdisc_ops);
696}
697
698static void __exit sfb_module_exit(void)
699{
700 unregister_qdisc(&sfb_qdisc_ops);
701}
702
703module_init(sfb_module_init)
704module_exit(sfb_module_exit)
705
706MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline");
707MODULE_AUTHOR("Juliusz Chroboczek");
708MODULE_AUTHOR("Eric Dumazet");
709MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index edea8cefec6..c2e628dfaac 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/jhash.h> 22#include <linux/jhash.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/vmalloc.h>
24#include <net/ip.h> 25#include <net/ip.h>
25#include <net/netlink.h> 26#include <net/netlink.h>
26#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
76#define SFQ_DEPTH 128 /* max number of packets per flow */ 77#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */ 78#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255 79#define SFQ_EMPTY_SLOT 255
79#define SFQ_HASH_DIVISOR 1024 80#define SFQ_DEFAULT_HASH_DIVISOR 1024
81
80/* We use 16 bits to store allot, and want to handle packets up to 64K 82/* We use 16 bits to store allot, and want to handle packets up to 64K
81 * Scale allot by 8 (1<<3) so that no overflow occurs. 83 * Scale allot by 8 (1<<3) so that no overflow occurs.
82 */ 84 */
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index;
92 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] 94 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
93 * are 'pointers' to dep[] array 95 * are 'pointers' to dep[] array
94 */ 96 */
95struct sfq_head 97struct sfq_head {
96{
97 sfq_index next; 98 sfq_index next;
98 sfq_index prev; 99 sfq_index prev;
99}; 100};
@@ -108,13 +109,12 @@ struct sfq_slot {
108 short allot; /* credit for this slot */ 109 short allot; /* credit for this slot */
109}; 110};
110 111
111struct sfq_sched_data 112struct sfq_sched_data {
112{
113/* Parameters */ 113/* Parameters */
114 int perturb_period; 114 int perturb_period;
115 unsigned quantum; /* Allotment per round: MUST BE >= MTU */ 115 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
116 int limit; 116 int limit;
117 117 unsigned int divisor; /* number of slots in hash table */
118/* Variables */ 118/* Variables */
119 struct tcf_proto *filter_list; 119 struct tcf_proto *filter_list;
120 struct timer_list perturb_timer; 120 struct timer_list perturb_timer;
@@ -122,7 +122,7 @@ struct sfq_sched_data
122 sfq_index cur_depth; /* depth of longest slot */ 122 sfq_index cur_depth; /* depth of longest slot */
123 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ 123 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
124 struct sfq_slot *tail; /* current slot in round */ 124 struct sfq_slot *tail; /* current slot in round */
125 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ 125 sfq_index *ht; /* Hash table (divisor slots) */
126 struct sfq_slot slots[SFQ_SLOTS]; 126 struct sfq_slot slots[SFQ_SLOTS];
127 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ 127 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
128}; 128};
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
137 return &q->dep[val - SFQ_SLOTS]; 137 return &q->dep[val - SFQ_SLOTS];
138} 138}
139 139
140static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) 140static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
141{ 141{
142 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); 142 return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
143} 143}
144 144
145static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) 145static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
146{ 146{
147 u32 h, h2; 147 u32 h, h2;
148 148
@@ -157,13 +157,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
157 iph = ip_hdr(skb); 157 iph = ip_hdr(skb);
158 h = (__force u32)iph->daddr; 158 h = (__force u32)iph->daddr;
159 h2 = (__force u32)iph->saddr ^ iph->protocol; 159 h2 = (__force u32)iph->saddr ^ iph->protocol;
160 if (iph->frag_off & htons(IP_MF|IP_OFFSET)) 160 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
161 break; 161 break;
162 poff = proto_ports_offset(iph->protocol); 162 poff = proto_ports_offset(iph->protocol);
163 if (poff >= 0 && 163 if (poff >= 0 &&
164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { 164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
165 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
166 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff); 166 h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
167 } 167 }
168 break; 168 break;
169 } 169 }
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
181 if (poff >= 0 && 181 if (poff >= 0 &&
182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { 182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
183 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
184 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff); 184 h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
185 } 185 }
186 break; 186 break;
187 } 187 }
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
203 203
204 if (TC_H_MAJ(skb->priority) == sch->handle && 204 if (TC_H_MAJ(skb->priority) == sch->handle &&
205 TC_H_MIN(skb->priority) > 0 && 205 TC_H_MIN(skb->priority) > 0 &&
206 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) 206 TC_H_MIN(skb->priority) <= q->divisor)
207 return TC_H_MIN(skb->priority); 207 return TC_H_MIN(skb->priority);
208 208
209 if (!q->filter_list) 209 if (!q->filter_list)
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
221 return 0; 221 return 0;
222 } 222 }
223#endif 223#endif
224 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) 224 if (TC_H_MIN(res.classid) <= q->divisor)
225 return TC_H_MIN(res.classid); 225 return TC_H_MIN(res.classid);
226 } 226 }
227 return 0; 227 return 0;
@@ -491,13 +491,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
491 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 491 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
492 return -EINVAL; 492 return -EINVAL;
493 493
494 if (ctl->divisor &&
495 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
496 return -EINVAL;
497
494 sch_tree_lock(sch); 498 sch_tree_lock(sch);
495 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); 499 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
496 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 500 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
497 q->perturb_period = ctl->perturb_period * HZ; 501 q->perturb_period = ctl->perturb_period * HZ;
498 if (ctl->limit) 502 if (ctl->limit)
499 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 503 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
500 504 if (ctl->divisor)
505 q->divisor = ctl->divisor;
501 qlen = sch->q.qlen; 506 qlen = sch->q.qlen;
502 while (sch->q.qlen > q->limit) 507 while (sch->q.qlen > q->limit)
503 sfq_drop(sch); 508 sfq_drop(sch);
@@ -515,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
515static int sfq_init(struct Qdisc *sch, struct nlattr *opt) 520static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
516{ 521{
517 struct sfq_sched_data *q = qdisc_priv(sch); 522 struct sfq_sched_data *q = qdisc_priv(sch);
523 size_t sz;
518 int i; 524 int i;
519 525
520 q->perturb_timer.function = sfq_perturbation; 526 q->perturb_timer.function = sfq_perturbation;
521 q->perturb_timer.data = (unsigned long)sch; 527 q->perturb_timer.data = (unsigned long)sch;
522 init_timer_deferrable(&q->perturb_timer); 528 init_timer_deferrable(&q->perturb_timer);
523 529
524 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
525 q->ht[i] = SFQ_EMPTY_SLOT;
526
527 for (i = 0; i < SFQ_DEPTH; i++) { 530 for (i = 0; i < SFQ_DEPTH; i++) {
528 q->dep[i].next = i + SFQ_SLOTS; 531 q->dep[i].next = i + SFQ_SLOTS;
529 q->dep[i].prev = i + SFQ_SLOTS; 532 q->dep[i].prev = i + SFQ_SLOTS;
@@ -532,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
532 q->limit = SFQ_DEPTH - 1; 535 q->limit = SFQ_DEPTH - 1;
533 q->cur_depth = 0; 536 q->cur_depth = 0;
534 q->tail = NULL; 537 q->tail = NULL;
538 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
535 if (opt == NULL) { 539 if (opt == NULL) {
536 q->quantum = psched_mtu(qdisc_dev(sch)); 540 q->quantum = psched_mtu(qdisc_dev(sch));
537 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 541 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -543,10 +547,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
543 return err; 547 return err;
544 } 548 }
545 549
550 sz = sizeof(q->ht[0]) * q->divisor;
551 q->ht = kmalloc(sz, GFP_KERNEL);
552 if (!q->ht && sz > PAGE_SIZE)
553 q->ht = vmalloc(sz);
554 if (!q->ht)
555 return -ENOMEM;
556 for (i = 0; i < q->divisor; i++)
557 q->ht[i] = SFQ_EMPTY_SLOT;
558
546 for (i = 0; i < SFQ_SLOTS; i++) { 559 for (i = 0; i < SFQ_SLOTS; i++) {
547 slot_queue_init(&q->slots[i]); 560 slot_queue_init(&q->slots[i]);
548 sfq_link(q, i); 561 sfq_link(q, i);
549 } 562 }
563 if (q->limit >= 1)
564 sch->flags |= TCQ_F_CAN_BYPASS;
565 else
566 sch->flags &= ~TCQ_F_CAN_BYPASS;
550 return 0; 567 return 0;
551} 568}
552 569
@@ -557,6 +574,10 @@ static void sfq_destroy(struct Qdisc *sch)
557 tcf_destroy_chain(&q->filter_list); 574 tcf_destroy_chain(&q->filter_list);
558 q->perturb_period = 0; 575 q->perturb_period = 0;
559 del_timer_sync(&q->perturb_timer); 576 del_timer_sync(&q->perturb_timer);
577 if (is_vmalloc_addr(q->ht))
578 vfree(q->ht);
579 else
580 kfree(q->ht);
560} 581}
561 582
562static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 583static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -569,7 +590,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
569 opt.perturb_period = q->perturb_period / HZ; 590 opt.perturb_period = q->perturb_period / HZ;
570 591
571 opt.limit = q->limit; 592 opt.limit = q->limit;
572 opt.divisor = SFQ_HASH_DIVISOR; 593 opt.divisor = q->divisor;
573 opt.flows = q->limit; 594 opt.flows = q->limit;
574 595
575 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 596 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -594,6 +615,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
594static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, 615static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
595 u32 classid) 616 u32 classid)
596{ 617{
618 /* we cannot bypass queue discipline anymore */
619 sch->flags &= ~TCQ_F_CAN_BYPASS;
597 return 0; 620 return 0;
598} 621}
599 622
@@ -647,7 +670,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
647 if (arg->stop) 670 if (arg->stop)
648 return; 671 return;
649 672
650 for (i = 0; i < SFQ_HASH_DIVISOR; i++) { 673 for (i = 0; i < q->divisor; i++) {
651 if (q->ht[i] == SFQ_EMPTY_SLOT || 674 if (q->ht[i] == SFQ_EMPTY_SLOT ||
652 arg->count < arg->skip) { 675 arg->count < arg->skip) {
653 arg->count++; 676 arg->count++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index e93165820c3..1dcfb5223a8 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
97 changed the limit is not effective anymore. 97 changed the limit is not effective anymore.
98*/ 98*/
99 99
100struct tbf_sched_data 100struct tbf_sched_data {
101{
102/* Parameters */ 101/* Parameters */
103 u32 limit; /* Maximal length of backlog: bytes */ 102 u32 limit; /* Maximal length of backlog: bytes */
104 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ 103 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
115 struct qdisc_watchdog watchdog; /* Watchdog timer */ 114 struct qdisc_watchdog watchdog; /* Watchdog timer */
116}; 115};
117 116
118#define L2T(q,L) qdisc_l2t((q)->R_tab,L) 117#define L2T(q, L) qdisc_l2t((q)->R_tab, L)
119#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) 118#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
120 119
121static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) 120static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
122{ 121{
123 struct tbf_sched_data *q = qdisc_priv(sch); 122 struct tbf_sched_data *q = qdisc_priv(sch);
124 int ret; 123 int ret;
@@ -137,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
137 return NET_XMIT_SUCCESS; 136 return NET_XMIT_SUCCESS;
138} 137}
139 138
140static unsigned int tbf_drop(struct Qdisc* sch) 139static unsigned int tbf_drop(struct Qdisc *sch)
141{ 140{
142 struct tbf_sched_data *q = qdisc_priv(sch); 141 struct tbf_sched_data *q = qdisc_priv(sch);
143 unsigned int len = 0; 142 unsigned int len = 0;
@@ -149,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
149 return len; 148 return len;
150} 149}
151 150
152static struct sk_buff *tbf_dequeue(struct Qdisc* sch) 151static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
153{ 152{
154 struct tbf_sched_data *q = qdisc_priv(sch); 153 struct tbf_sched_data *q = qdisc_priv(sch);
155 struct sk_buff *skb; 154 struct sk_buff *skb;
@@ -185,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
185 q->tokens = toks; 184 q->tokens = toks;
186 q->ptokens = ptoks; 185 q->ptokens = ptoks;
187 sch->q.qlen--; 186 sch->q.qlen--;
188 sch->flags &= ~TCQ_F_THROTTLED; 187 qdisc_unthrottled(sch);
189 qdisc_bstats_update(sch, skb); 188 qdisc_bstats_update(sch, skb);
190 return skb; 189 return skb;
191 } 190 }
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
209 return NULL; 208 return NULL;
210} 209}
211 210
212static void tbf_reset(struct Qdisc* sch) 211static void tbf_reset(struct Qdisc *sch)
213{ 212{
214 struct tbf_sched_data *q = qdisc_priv(sch); 213 struct tbf_sched_data *q = qdisc_priv(sch);
215 214
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
227 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 226 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
228}; 227};
229 228
230static int tbf_change(struct Qdisc* sch, struct nlattr *opt) 229static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
231{ 230{
232 int err; 231 int err;
233 struct tbf_sched_data *q = qdisc_priv(sch); 232 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
236 struct qdisc_rate_table *rtab = NULL; 235 struct qdisc_rate_table *rtab = NULL;
237 struct qdisc_rate_table *ptab = NULL; 236 struct qdisc_rate_table *ptab = NULL;
238 struct Qdisc *child = NULL; 237 struct Qdisc *child = NULL;
239 int max_size,n; 238 int max_size, n;
240 239
241 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); 240 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
242 if (err < 0) 241 if (err < 0)
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
259 } 258 }
260 259
261 for (n = 0; n < 256; n++) 260 for (n = 0; n < 256; n++)
262 if (rtab->data[n] > qopt->buffer) break; 261 if (rtab->data[n] > qopt->buffer)
263 max_size = (n << qopt->rate.cell_log)-1; 262 break;
263 max_size = (n << qopt->rate.cell_log) - 1;
264 if (ptab) { 264 if (ptab) {
265 int size; 265 int size;
266 266
267 for (n = 0; n < 256; n++) 267 for (n = 0; n < 256; n++)
268 if (ptab->data[n] > qopt->mtu) break; 268 if (ptab->data[n] > qopt->mtu)
269 size = (n << qopt->peakrate.cell_log)-1; 269 break;
270 if (size < max_size) max_size = size; 270 size = (n << qopt->peakrate.cell_log) - 1;
271 if (size < max_size)
272 max_size = size;
271 } 273 }
272 if (max_size < 0) 274 if (max_size < 0)
273 goto done; 275 goto done;
@@ -310,7 +312,7 @@ done:
310 return err; 312 return err;
311} 313}
312 314
313static int tbf_init(struct Qdisc* sch, struct nlattr *opt) 315static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
314{ 316{
315 struct tbf_sched_data *q = qdisc_priv(sch); 317 struct tbf_sched_data *q = qdisc_priv(sch);
316 318
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
422 } 424 }
423} 425}
424 426
425static const struct Qdisc_class_ops tbf_class_ops = 427static const struct Qdisc_class_ops tbf_class_ops = {
426{
427 .graft = tbf_graft, 428 .graft = tbf_graft,
428 .leaf = tbf_leaf, 429 .leaf = tbf_leaf,
429 .get = tbf_get, 430 .get = tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d84e7329660..45cd30098e3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,8 +53,7 @@
53 which will not break load balancing, though native slave 53 which will not break load balancing, though native slave
54 traffic will have the highest priority. */ 54 traffic will have the highest priority. */
55 55
56struct teql_master 56struct teql_master {
57{
58 struct Qdisc_ops qops; 57 struct Qdisc_ops qops;
59 struct net_device *dev; 58 struct net_device *dev;
60 struct Qdisc *slaves; 59 struct Qdisc *slaves;
@@ -65,22 +64,21 @@ struct teql_master
65 unsigned long tx_dropped; 64 unsigned long tx_dropped;
66}; 65};
67 66
68struct teql_sched_data 67struct teql_sched_data {
69{
70 struct Qdisc *next; 68 struct Qdisc *next;
71 struct teql_master *m; 69 struct teql_master *m;
72 struct neighbour *ncache; 70 struct neighbour *ncache;
73 struct sk_buff_head q; 71 struct sk_buff_head q;
74}; 72};
75 73
76#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) 74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
77 75
78#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT) 76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
79 77
80/* "teql*" qdisc routines */ 78/* "teql*" qdisc routines */
81 79
82static int 80static int
83teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) 81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
84{ 82{
85 struct net_device *dev = qdisc_dev(sch); 83 struct net_device *dev = qdisc_dev(sch);
86 struct teql_sched_data *q = qdisc_priv(sch); 84 struct teql_sched_data *q = qdisc_priv(sch);
@@ -96,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
96} 94}
97 95
98static struct sk_buff * 96static struct sk_buff *
99teql_dequeue(struct Qdisc* sch) 97teql_dequeue(struct Qdisc *sch)
100{ 98{
101 struct teql_sched_data *dat = qdisc_priv(sch); 99 struct teql_sched_data *dat = qdisc_priv(sch);
102 struct netdev_queue *dat_queue; 100 struct netdev_queue *dat_queue;
@@ -118,13 +116,13 @@ teql_dequeue(struct Qdisc* sch)
118} 116}
119 117
120static struct sk_buff * 118static struct sk_buff *
121teql_peek(struct Qdisc* sch) 119teql_peek(struct Qdisc *sch)
122{ 120{
123 /* teql is meant to be used as root qdisc */ 121 /* teql is meant to be used as root qdisc */
124 return NULL; 122 return NULL;
125} 123}
126 124
127static __inline__ void 125static inline void
128teql_neigh_release(struct neighbour *n) 126teql_neigh_release(struct neighbour *n)
129{ 127{
130 if (n) 128 if (n)
@@ -132,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
132} 130}
133 131
134static void 132static void
135teql_reset(struct Qdisc* sch) 133teql_reset(struct Qdisc *sch)
136{ 134{
137 struct teql_sched_data *dat = qdisc_priv(sch); 135 struct teql_sched_data *dat = qdisc_priv(sch);
138 136
@@ -142,13 +140,14 @@ teql_reset(struct Qdisc* sch)
142} 140}
143 141
144static void 142static void
145teql_destroy(struct Qdisc* sch) 143teql_destroy(struct Qdisc *sch)
146{ 144{
147 struct Qdisc *q, *prev; 145 struct Qdisc *q, *prev;
148 struct teql_sched_data *dat = qdisc_priv(sch); 146 struct teql_sched_data *dat = qdisc_priv(sch);
149 struct teql_master *master = dat->m; 147 struct teql_master *master = dat->m;
150 148
151 if ((prev = master->slaves) != NULL) { 149 prev = master->slaves;
150 if (prev) {
152 do { 151 do {
153 q = NEXT_SLAVE(prev); 152 q = NEXT_SLAVE(prev);
154 if (q == sch) { 153 if (q == sch) {
@@ -180,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
180static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) 179static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
181{ 180{
182 struct net_device *dev = qdisc_dev(sch); 181 struct net_device *dev = qdisc_dev(sch);
183 struct teql_master *m = (struct teql_master*)sch->ops; 182 struct teql_master *m = (struct teql_master *)sch->ops;
184 struct teql_sched_data *q = qdisc_priv(sch); 183 struct teql_sched_data *q = qdisc_priv(sch);
185 184
186 if (dev->hard_header_len > m->dev->hard_header_len) 185 if (dev->hard_header_len > m->dev->hard_header_len)
@@ -291,7 +290,8 @@ restart:
291 nores = 0; 290 nores = 0;
292 busy = 0; 291 busy = 0;
293 292
294 if ((q = start) == NULL) 293 q = start;
294 if (!q)
295 goto drop; 295 goto drop;
296 296
297 do { 297 do {
@@ -356,10 +356,10 @@ drop:
356 356
357static int teql_master_open(struct net_device *dev) 357static int teql_master_open(struct net_device *dev)
358{ 358{
359 struct Qdisc * q; 359 struct Qdisc *q;
360 struct teql_master *m = netdev_priv(dev); 360 struct teql_master *m = netdev_priv(dev);
361 int mtu = 0xFFFE; 361 int mtu = 0xFFFE;
362 unsigned flags = IFF_NOARP|IFF_MULTICAST; 362 unsigned int flags = IFF_NOARP | IFF_MULTICAST;
363 363
364 if (m->slaves == NULL) 364 if (m->slaves == NULL)
365 return -EUNATCH; 365 return -EUNATCH;
@@ -427,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
427 do { 427 do {
428 if (new_mtu > qdisc_dev(q)->mtu) 428 if (new_mtu > qdisc_dev(q)->mtu)
429 return -EINVAL; 429 return -EINVAL;
430 } while ((q=NEXT_SLAVE(q)) != m->slaves); 430 } while ((q = NEXT_SLAVE(q)) != m->slaves);
431 } 431 }
432 432
433 dev->mtu = new_mtu; 433 dev->mtu = new_mtu;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5f1fb8bd862..6b04287913c 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1089,7 +1089,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
1089 base.inqueue.immediate); 1089 base.inqueue.immediate);
1090 struct sctp_endpoint *ep; 1090 struct sctp_endpoint *ep;
1091 struct sctp_chunk *chunk; 1091 struct sctp_chunk *chunk;
1092 struct sock *sk;
1093 struct sctp_inq *inqueue; 1092 struct sctp_inq *inqueue;
1094 int state; 1093 int state;
1095 sctp_subtype_t subtype; 1094 sctp_subtype_t subtype;
@@ -1097,7 +1096,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
1097 1096
1098 /* The association should be held so we should be safe. */ 1097 /* The association should be held so we should be safe. */
1099 ep = asoc->ep; 1098 ep = asoc->ep;
1100 sk = asoc->base.sk;
1101 1099
1102 inqueue = &asoc->base.inqueue; 1100 inqueue = &asoc->base.inqueue;
1103 sctp_association_hold(asoc); 1101 sctp_association_hold(asoc);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ea2192444ce..826661be73e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -948,14 +948,11 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
948 union sctp_addr addr; 948 union sctp_addr addr;
949 union sctp_addr *paddr = &addr; 949 union sctp_addr *paddr = &addr;
950 struct sctphdr *sh = sctp_hdr(skb); 950 struct sctphdr *sh = sctp_hdr(skb);
951 sctp_chunkhdr_t *ch;
952 union sctp_params params; 951 union sctp_params params;
953 sctp_init_chunk_t *init; 952 sctp_init_chunk_t *init;
954 struct sctp_transport *transport; 953 struct sctp_transport *transport;
955 struct sctp_af *af; 954 struct sctp_af *af;
956 955
957 ch = (sctp_chunkhdr_t *) skb->data;
958
959 /* 956 /*
960 * This code will NOT touch anything inside the chunk--it is 957 * This code will NOT touch anything inside the chunk--it is
961 * strictly READ-ONLY. 958 * strictly READ-ONLY.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 95e0c8eda1a..865ce7ba4e1 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -201,40 +201,40 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
201{ 201{
202 struct sock *sk = skb->sk; 202 struct sock *sk = skb->sk;
203 struct ipv6_pinfo *np = inet6_sk(sk); 203 struct ipv6_pinfo *np = inet6_sk(sk);
204 struct flowi fl; 204 struct flowi6 fl6;
205 205
206 memset(&fl, 0, sizeof(fl)); 206 memset(&fl6, 0, sizeof(fl6));
207 207
208 fl.proto = sk->sk_protocol; 208 fl6.flowi6_proto = sk->sk_protocol;
209 209
210 /* Fill in the dest address from the route entry passed with the skb 210 /* Fill in the dest address from the route entry passed with the skb
211 * and the source address from the transport. 211 * and the source address from the transport.
212 */ 212 */
213 ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr); 213 ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr);
214 ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr); 214 ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr);
215 215
216 fl.fl6_flowlabel = np->flow_label; 216 fl6.flowlabel = np->flow_label;
217 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); 217 IP6_ECN_flow_xmit(sk, fl6.flowlabel);
218 if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) 218 if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL)
219 fl.oif = transport->saddr.v6.sin6_scope_id; 219 fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id;
220 else 220 else
221 fl.oif = sk->sk_bound_dev_if; 221 fl6.flowi6_oif = sk->sk_bound_dev_if;
222 222
223 if (np->opt && np->opt->srcrt) { 223 if (np->opt && np->opt->srcrt) {
224 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; 224 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
225 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 225 ipv6_addr_copy(&fl6.daddr, rt0->addr);
226 } 226 }
227 227
228 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", 228 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
229 __func__, skb, skb->len, 229 __func__, skb, skb->len,
230 &fl.fl6_src, &fl.fl6_dst); 230 &fl6.saddr, &fl6.daddr);
231 231
232 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); 232 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
233 233
234 if (!(transport->param_flags & SPP_PMTUD_ENABLE)) 234 if (!(transport->param_flags & SPP_PMTUD_ENABLE))
235 skb->local_df = 1; 235 skb->local_df = 1;
236 236
237 return ip6_xmit(sk, skb, &fl, np->opt); 237 return ip6_xmit(sk, skb, &fl6, np->opt);
238} 238}
239 239
240/* Returns the dst cache entry for the given source and destination ip 240/* Returns the dst cache entry for the given source and destination ip
@@ -245,22 +245,22 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
245 union sctp_addr *saddr) 245 union sctp_addr *saddr)
246{ 246{
247 struct dst_entry *dst; 247 struct dst_entry *dst;
248 struct flowi fl; 248 struct flowi6 fl6;
249 249
250 memset(&fl, 0, sizeof(fl)); 250 memset(&fl6, 0, sizeof(fl6));
251 ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); 251 ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr);
252 if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) 252 if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
253 fl.oif = daddr->v6.sin6_scope_id; 253 fl6.flowi6_oif = daddr->v6.sin6_scope_id;
254 254
255 255
256 SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); 256 SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6.daddr);
257 257
258 if (saddr) { 258 if (saddr) {
259 ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); 259 ipv6_addr_copy(&fl6.saddr, &saddr->v6.sin6_addr);
260 SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); 260 SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6.saddr);
261 } 261 }
262 262
263 dst = ip6_route_output(&init_net, NULL, &fl); 263 dst = ip6_route_output(&init_net, NULL, &fl6);
264 if (!dst->error) { 264 if (!dst->error) {
265 struct rt6_info *rt; 265 struct rt6_info *rt;
266 rt = (struct rt6_info *)dst; 266 rt = (struct rt6_info *)dst;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 8c6d379b4bb..26dc005113a 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -545,13 +545,11 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
545 struct sctp_transport *transport = pkt->transport; 545 struct sctp_transport *transport = pkt->transport;
546 sctp_xmit_t status; 546 sctp_xmit_t status;
547 struct sctp_chunk *chunk, *chunk1; 547 struct sctp_chunk *chunk, *chunk1;
548 struct sctp_association *asoc;
549 int fast_rtx; 548 int fast_rtx;
550 int error = 0; 549 int error = 0;
551 int timer = 0; 550 int timer = 0;
552 int done = 0; 551 int done = 0;
553 552
554 asoc = q->asoc;
555 lqueue = &q->retransmit; 553 lqueue = &q->retransmit;
556 fast_rtx = q->fast_rtx; 554 fast_rtx = q->fast_rtx;
557 555
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e58f9476f29..152976ec0b7 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -468,32 +468,32 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
468 union sctp_addr *saddr) 468 union sctp_addr *saddr)
469{ 469{
470 struct rtable *rt; 470 struct rtable *rt;
471 struct flowi fl; 471 struct flowi4 fl4;
472 struct sctp_bind_addr *bp; 472 struct sctp_bind_addr *bp;
473 struct sctp_sockaddr_entry *laddr; 473 struct sctp_sockaddr_entry *laddr;
474 struct dst_entry *dst = NULL; 474 struct dst_entry *dst = NULL;
475 union sctp_addr dst_saddr; 475 union sctp_addr dst_saddr;
476 476
477 memset(&fl, 0x0, sizeof(struct flowi)); 477 memset(&fl4, 0x0, sizeof(struct flowi4));
478 fl.fl4_dst = daddr->v4.sin_addr.s_addr; 478 fl4.daddr = daddr->v4.sin_addr.s_addr;
479 fl.fl_ip_dport = daddr->v4.sin_port; 479 fl4.fl4_dport = daddr->v4.sin_port;
480 fl.proto = IPPROTO_SCTP; 480 fl4.flowi4_proto = IPPROTO_SCTP;
481 if (asoc) { 481 if (asoc) {
482 fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); 482 fl4.flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
483 fl.oif = asoc->base.sk->sk_bound_dev_if; 483 fl4.flowi4_oif = asoc->base.sk->sk_bound_dev_if;
484 fl.fl_ip_sport = htons(asoc->base.bind_addr.port); 484 fl4.fl4_sport = htons(asoc->base.bind_addr.port);
485 } 485 }
486 if (saddr) { 486 if (saddr) {
487 fl.fl4_src = saddr->v4.sin_addr.s_addr; 487 fl4.saddr = saddr->v4.sin_addr.s_addr;
488 fl.fl_ip_sport = saddr->v4.sin_port; 488 fl4.fl4_sport = saddr->v4.sin_port;
489 } 489 }
490 490
491 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", 491 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
492 __func__, &fl.fl4_dst, &fl.fl4_src); 492 __func__, &fl4.daddr, &fl4.saddr);
493 493
494 if (!ip_route_output_key(&init_net, &rt, &fl)) { 494 rt = ip_route_output_key(&init_net, &fl4);
495 if (!IS_ERR(rt))
495 dst = &rt->dst; 496 dst = &rt->dst;
496 }
497 497
498 /* If there is no association or if a source address is passed, no 498 /* If there is no association or if a source address is passed, no
499 * more validation is required. 499 * more validation is required.
@@ -533,9 +533,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
533 continue; 533 continue;
534 if ((laddr->state == SCTP_ADDR_SRC) && 534 if ((laddr->state == SCTP_ADDR_SRC) &&
535 (AF_INET == laddr->a.sa.sa_family)) { 535 (AF_INET == laddr->a.sa.sa_family)) {
536 fl.fl4_src = laddr->a.v4.sin_addr.s_addr; 536 fl4.saddr = laddr->a.v4.sin_addr.s_addr;
537 fl.fl_ip_sport = laddr->a.v4.sin_port; 537 fl4.fl4_sport = laddr->a.v4.sin_port;
538 if (!ip_route_output_key(&init_net, &rt, &fl)) { 538 rt = ip_route_output_key(&init_net, &fl4);
539 if (!IS_ERR(rt)) {
539 dst = &rt->dst; 540 dst = &rt->dst;
540 goto out_unlock; 541 goto out_unlock;
541 } 542 }
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b23428f3c0d..de98665db52 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3375,7 +3375,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3375 struct sctp_fwdtsn_skip *skiplist) 3375 struct sctp_fwdtsn_skip *skiplist)
3376{ 3376{
3377 struct sctp_chunk *retval = NULL; 3377 struct sctp_chunk *retval = NULL;
3378 struct sctp_fwdtsn_chunk *ftsn_chunk;
3379 struct sctp_fwdtsn_hdr ftsn_hdr; 3378 struct sctp_fwdtsn_hdr ftsn_hdr;
3380 struct sctp_fwdtsn_skip skip; 3379 struct sctp_fwdtsn_skip skip;
3381 size_t hint; 3380 size_t hint;
@@ -3388,8 +3387,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3388 if (!retval) 3387 if (!retval)
3389 return NULL; 3388 return NULL;
3390 3389
3391 ftsn_chunk = (struct sctp_fwdtsn_chunk *)retval->subh.fwdtsn_hdr;
3392
3393 ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); 3390 ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
3394 retval->subh.fwdtsn_hdr = 3391 retval->subh.fwdtsn_hdr =
3395 sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); 3392 sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 8e02550ff3e..3951a10605b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2928,7 +2928,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
2928 unsigned int optlen) 2928 unsigned int optlen)
2929{ 2929{
2930 struct sctp_sock *sp; 2930 struct sctp_sock *sp;
2931 struct sctp_endpoint *ep;
2932 struct sctp_association *asoc = NULL; 2931 struct sctp_association *asoc = NULL;
2933 struct sctp_setpeerprim prim; 2932 struct sctp_setpeerprim prim;
2934 struct sctp_chunk *chunk; 2933 struct sctp_chunk *chunk;
@@ -2936,7 +2935,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
2936 int err; 2935 int err;
2937 2936
2938 sp = sctp_sk(sk); 2937 sp = sctp_sk(sk);
2939 ep = sp->ep;
2940 2938
2941 if (!sctp_addip_enable) 2939 if (!sctp_addip_enable)
2942 return -EPERM; 2940 return -EPERM;
@@ -6102,15 +6100,16 @@ static void __sctp_write_space(struct sctp_association *asoc)
6102 wake_up_interruptible(&asoc->wait); 6100 wake_up_interruptible(&asoc->wait);
6103 6101
6104 if (sctp_writeable(sk)) { 6102 if (sctp_writeable(sk)) {
6105 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 6103 wait_queue_head_t *wq = sk_sleep(sk);
6106 wake_up_interruptible(sk_sleep(sk)); 6104
6105 if (wq && waitqueue_active(wq))
6106 wake_up_interruptible(wq);
6107 6107
6108 /* Note that we try to include the Async I/O support 6108 /* Note that we try to include the Async I/O support
6109 * here by modeling from the current TCP/UDP code. 6109 * here by modeling from the current TCP/UDP code.
6110 * We have not tested with it yet. 6110 * We have not tested with it yet.
6111 */ 6111 */
6112 if (sock->wq->fasync_list && 6112 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
6113 !(sk->sk_shutdown & SEND_SHUTDOWN))
6114 sock_wake_async(sock, 6113 sock_wake_async(sock,
6115 SOCK_WAKE_SPACE, POLL_OUT); 6114 SOCK_WAKE_SPACE, POLL_OUT);
6116 } 6115 }
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 747d5412c46..f1e40cebc98 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -344,7 +344,7 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map,
344 344
345 /* Refresh the gap ack information. */ 345 /* Refresh the gap ack information. */
346 if (sctp_tsnmap_has_gap(map)) { 346 if (sctp_tsnmap_has_gap(map)) {
347 __u16 start, end; 347 __u16 start = 0, end = 0;
348 sctp_tsnmap_iter_init(map, &iter); 348 sctp_tsnmap_iter_init(map, &iter);
349 while (sctp_tsnmap_next_gap_ack(map, &iter, 349 while (sctp_tsnmap_next_gap_ack(map, &iter,
350 &start, 350 &start,
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index c7f7e49609c..17678189d05 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -105,11 +105,8 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
105 gfp_t gfp) 105 gfp_t gfp)
106{ 106{
107 struct sk_buff_head temp; 107 struct sk_buff_head temp;
108 sctp_data_chunk_t *hdr;
109 struct sctp_ulpevent *event; 108 struct sctp_ulpevent *event;
110 109
111 hdr = (sctp_data_chunk_t *) chunk->chunk_hdr;
112
113 /* Create an event from the incoming chunk. */ 110 /* Create an event from the incoming chunk. */
114 event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); 111 event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
115 if (!event) 112 if (!event)
@@ -743,11 +740,9 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
743 struct sk_buff *pos, *tmp; 740 struct sk_buff *pos, *tmp;
744 struct sctp_ulpevent *cevent; 741 struct sctp_ulpevent *cevent;
745 struct sctp_stream *in; 742 struct sctp_stream *in;
746 __u16 sid, csid; 743 __u16 sid, csid, cssn;
747 __u16 ssn, cssn;
748 744
749 sid = event->stream; 745 sid = event->stream;
750 ssn = event->ssn;
751 in = &ulpq->asoc->ssnmap->in; 746 in = &ulpq->asoc->ssnmap->in;
752 747
753 event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; 748 event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
diff --git a/net/socket.c b/net/socket.c
index ac2219f90d5..5212447c86e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly;
240static struct inode *sock_alloc_inode(struct super_block *sb) 240static struct inode *sock_alloc_inode(struct super_block *sb)
241{ 241{
242 struct socket_alloc *ei; 242 struct socket_alloc *ei;
243 struct socket_wq *wq;
243 244
244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
245 if (!ei) 246 if (!ei)
246 return NULL; 247 return NULL;
247 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); 248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!ei->socket.wq) { 249 if (!wq) {
249 kmem_cache_free(sock_inode_cachep, ei); 250 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL; 251 return NULL;
251 } 252 }
252 init_waitqueue_head(&ei->socket.wq->wait); 253 init_waitqueue_head(&wq->wait);
253 ei->socket.wq->fasync_list = NULL; 254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
254 256
255 ei->socket.state = SS_UNCONNECTED; 257 ei->socket.state = SS_UNCONNECTED;
256 ei->socket.flags = 0; 258 ei->socket.flags = 0;
@@ -273,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head)
273static void sock_destroy_inode(struct inode *inode) 275static void sock_destroy_inode(struct inode *inode)
274{ 276{
275 struct socket_alloc *ei; 277 struct socket_alloc *ei;
278 struct socket_wq *wq;
276 279
277 ei = container_of(inode, struct socket_alloc, vfs_inode); 280 ei = container_of(inode, struct socket_alloc, vfs_inode);
278 call_rcu(&ei->socket.wq->rcu, wq_free_rcu); 281 wq = rcu_dereference_protected(ei->socket.wq, 1);
282 call_rcu(&wq->rcu, wq_free_rcu);
279 kmem_cache_free(sock_inode_cachep, ei); 283 kmem_cache_free(sock_inode_cachep, ei);
280} 284}
281 285
@@ -524,7 +528,7 @@ void sock_release(struct socket *sock)
524 module_put(owner); 528 module_put(owner);
525 } 529 }
526 530
527 if (sock->wq->fasync_list) 531 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
528 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 532 printk(KERN_ERR "sock_release: fasync list not empty!\n");
529 533
530 percpu_sub(sockets_in_use, 1); 534 percpu_sub(sockets_in_use, 1);
@@ -1108,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on)
1108{ 1112{
1109 struct socket *sock = filp->private_data; 1113 struct socket *sock = filp->private_data;
1110 struct sock *sk = sock->sk; 1114 struct sock *sk = sock->sk;
1115 struct socket_wq *wq;
1111 1116
1112 if (sk == NULL) 1117 if (sk == NULL)
1113 return -EINVAL; 1118 return -EINVAL;
1114 1119
1115 lock_sock(sk); 1120 lock_sock(sk);
1121 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1122 fasync_helper(fd, filp, on, &wq->fasync_list);
1116 1123
1117 fasync_helper(fd, filp, on, &sock->wq->fasync_list); 1124 if (!wq->fasync_list)
1118
1119 if (!sock->wq->fasync_list)
1120 sock_reset_flag(sk, SOCK_FASYNC); 1125 sock_reset_flag(sk, SOCK_FASYNC);
1121 else 1126 else
1122 sock_set_flag(sk, SOCK_FASYNC); 1127 sock_set_flag(sk, SOCK_FASYNC);
@@ -2583,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2583 2588
2584static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 2589static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2585{ 2590{
2591 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2592 bool convert_in = false, convert_out = false;
2593 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2594 struct ethtool_rxnfc __user *rxnfc;
2586 struct ifreq __user *ifr; 2595 struct ifreq __user *ifr;
2596 u32 rule_cnt = 0, actual_rule_cnt;
2597 u32 ethcmd;
2587 u32 data; 2598 u32 data;
2588 void __user *datap; 2599 int ret;
2589 2600
2590 ifr = compat_alloc_user_space(sizeof(*ifr)); 2601 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2602 return -EFAULT;
2591 2603
2592 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2604 compat_rxnfc = compat_ptr(data);
2605
2606 if (get_user(ethcmd, &compat_rxnfc->cmd))
2593 return -EFAULT; 2607 return -EFAULT;
2594 2608
2595 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2609 /* Most ethtool structures are defined without padding.
2610 * Unfortunately struct ethtool_rxnfc is an exception.
2611 */
2612 switch (ethcmd) {
2613 default:
2614 break;
2615 case ETHTOOL_GRXCLSRLALL:
2616 /* Buffer size is variable */
2617 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2618 return -EFAULT;
2619 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2620 return -ENOMEM;
2621 buf_size += rule_cnt * sizeof(u32);
2622 /* fall through */
2623 case ETHTOOL_GRXRINGS:
2624 case ETHTOOL_GRXCLSRLCNT:
2625 case ETHTOOL_GRXCLSRULE:
2626 convert_out = true;
2627 /* fall through */
2628 case ETHTOOL_SRXCLSRLDEL:
2629 case ETHTOOL_SRXCLSRLINS:
2630 buf_size += sizeof(struct ethtool_rxnfc);
2631 convert_in = true;
2632 break;
2633 }
2634
2635 ifr = compat_alloc_user_space(buf_size);
2636 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2637
2638 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2596 return -EFAULT; 2639 return -EFAULT;
2597 2640
2598 datap = compat_ptr(data); 2641 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2599 if (put_user(datap, &ifr->ifr_ifru.ifru_data)) 2642 &ifr->ifr_ifru.ifru_data))
2600 return -EFAULT; 2643 return -EFAULT;
2601 2644
2602 return dev_ioctl(net, SIOCETHTOOL, ifr); 2645 if (convert_in) {
2646 /* We expect there to be holes between fs.m_u and
2647 * fs.ring_cookie and at the end of fs, but nowhere else.
2648 */
2649 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
2650 sizeof(compat_rxnfc->fs.m_u) !=
2651 offsetof(struct ethtool_rxnfc, fs.m_u) +
2652 sizeof(rxnfc->fs.m_u));
2653 BUILD_BUG_ON(
2654 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2655 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2656 offsetof(struct ethtool_rxnfc, fs.location) -
2657 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2658
2659 if (copy_in_user(rxnfc, compat_rxnfc,
2660 (void *)(&rxnfc->fs.m_u + 1) -
2661 (void *)rxnfc) ||
2662 copy_in_user(&rxnfc->fs.ring_cookie,
2663 &compat_rxnfc->fs.ring_cookie,
2664 (void *)(&rxnfc->fs.location + 1) -
2665 (void *)&rxnfc->fs.ring_cookie) ||
2666 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2667 sizeof(rxnfc->rule_cnt)))
2668 return -EFAULT;
2669 }
2670
2671 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2672 if (ret)
2673 return ret;
2674
2675 if (convert_out) {
2676 if (copy_in_user(compat_rxnfc, rxnfc,
2677 (const void *)(&rxnfc->fs.m_u + 1) -
2678 (const void *)rxnfc) ||
2679 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2680 &rxnfc->fs.ring_cookie,
2681 (const void *)(&rxnfc->fs.location + 1) -
2682 (const void *)&rxnfc->fs.ring_cookie) ||
2683 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2684 sizeof(rxnfc->rule_cnt)))
2685 return -EFAULT;
2686
2687 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2688 /* As an optimisation, we only copy the actual
2689 * number of rules that the underlying
2690 * function returned. Since Mallory might
2691 * change the rule count in user memory, we
2692 * check that it is less than the rule count
2693 * originally given (as the user buffer size),
2694 * which has been range-checked.
2695 */
2696 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2697 return -EFAULT;
2698 if (actual_rule_cnt < rule_cnt)
2699 rule_cnt = actual_rule_cnt;
2700 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2701 &rxnfc->rule_locs[0],
2702 rule_cnt * sizeof(u32)))
2703 return -EFAULT;
2704 }
2705 }
2706
2707 return 0;
2603} 2708}
2604 2709
2605static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 2710static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
@@ -2643,7 +2748,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2643 2748
2644 old_fs = get_fs(); 2749 old_fs = get_fs();
2645 set_fs(KERNEL_DS); 2750 set_fs(KERNEL_DS);
2646 err = dev_ioctl(net, cmd, &kifr); 2751 err = dev_ioctl(net, cmd,
2752 (struct ifreq __user __force *) &kifr);
2647 set_fs(old_fs); 2753 set_fs(old_fs);
2648 2754
2649 return err; 2755 return err;
@@ -2752,7 +2858,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2752 2858
2753 old_fs = get_fs(); 2859 old_fs = get_fs();
2754 set_fs(KERNEL_DS); 2860 set_fs(KERNEL_DS);
2755 err = dev_ioctl(net, cmd, (void __user *)&ifr); 2861 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
2756 set_fs(old_fs); 2862 set_fs(old_fs);
2757 2863
2758 if (cmd == SIOCGIFMAP && !err) { 2864 if (cmd == SIOCGIFMAP && !err) {
@@ -2857,7 +2963,8 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2857 ret |= __get_user(rtdev, &(ur4->rt_dev)); 2963 ret |= __get_user(rtdev, &(ur4->rt_dev));
2858 if (rtdev) { 2964 if (rtdev) {
2859 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 2965 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
2860 r4.rt_dev = devname; devname[15] = 0; 2966 r4.rt_dev = (char __user __force *)devname;
2967 devname[15] = 0;
2861 } else 2968 } else
2862 r4.rt_dev = NULL; 2969 r4.rt_dev = NULL;
2863 2970
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 8b4061049d7..e3c36a27441 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name)
160 160
161EXPORT_SYMBOL_GPL(gss_mech_get_by_name); 161EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
162 162
163struct gss_api_mech *
164gss_mech_get_by_OID(struct xdr_netobj *obj)
165{
166 struct gss_api_mech *pos, *gm = NULL;
167
168 spin_lock(&registered_mechs_lock);
169 list_for_each_entry(pos, &registered_mechs, gm_list) {
170 if (obj->len == pos->gm_oid.len) {
171 if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
172 if (try_module_get(pos->gm_owner))
173 gm = pos;
174 break;
175 }
176 }
177 }
178 spin_unlock(&registered_mechs_lock);
179 return gm;
180
181}
182
183EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
184
163static inline int 185static inline int
164mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) 186mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
165{ 187{
@@ -193,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
193 215
194EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); 216EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
195 217
218int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
219{
220 struct gss_api_mech *pos = NULL;
221 int i = 0;
222
223 spin_lock(&registered_mechs_lock);
224 list_for_each_entry(pos, &registered_mechs, gm_list) {
225 array_ptr[i] = pos->gm_pfs->pseudoflavor;
226 i++;
227 }
228 spin_unlock(&registered_mechs_lock);
229 return i;
230}
231
232EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
233
196u32 234u32
197gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) 235gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
198{ 236{
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 2e9387b2384..ffb687671da 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -940,7 +940,7 @@ static int rpciod_start(void)
940 * Create the rpciod thread and wait for it to start. 940 * Create the rpciod thread and wait for it to start.
941 */ 941 */
942 dprintk("RPC: creating workqueue rpciod\n"); 942 dprintk("RPC: creating workqueue rpciod\n");
943 wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); 943 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
944 rpciod_workqueue = wq; 944 rpciod_workqueue = wq;
945 return rpciod_workqueue != NULL; 945 return rpciod_workqueue != NULL;
946} 946}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d802e941d36..b7d435c3f19 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
420static void svc_udp_data_ready(struct sock *sk, int count) 420static void svc_udp_data_ready(struct sock *sk, int count)
421{ 421{
422 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 422 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
423 wait_queue_head_t *wq = sk_sleep(sk);
423 424
424 if (svsk) { 425 if (svsk) {
425 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 426 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
@@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
428 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 429 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
429 svc_xprt_enqueue(&svsk->sk_xprt); 430 svc_xprt_enqueue(&svsk->sk_xprt);
430 } 431 }
431 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 432 if (wq && waitqueue_active(wq))
432 wake_up_interruptible(sk_sleep(sk)); 433 wake_up_interruptible(wq);
433} 434}
434 435
435/* 436/*
@@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count)
438static void svc_write_space(struct sock *sk) 439static void svc_write_space(struct sock *sk)
439{ 440{
440 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 441 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
442 wait_queue_head_t *wq = sk_sleep(sk);
441 443
442 if (svsk) { 444 if (svsk) {
443 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 445 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
@@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk)
445 svc_xprt_enqueue(&svsk->sk_xprt); 447 svc_xprt_enqueue(&svsk->sk_xprt);
446 } 448 }
447 449
448 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { 450 if (wq && waitqueue_active(wq)) {
449 dprintk("RPC svc_write_space: someone sleeping on %p\n", 451 dprintk("RPC svc_write_space: someone sleeping on %p\n",
450 svsk); 452 svsk);
451 wake_up_interruptible(sk_sleep(sk)); 453 wake_up_interruptible(wq);
452 } 454 }
453} 455}
454 456
@@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
739static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) 741static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
740{ 742{
741 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 743 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
744 wait_queue_head_t *wq;
742 745
743 dprintk("svc: socket %p TCP (listen) state change %d\n", 746 dprintk("svc: socket %p TCP (listen) state change %d\n",
744 sk, sk->sk_state); 747 sk, sk->sk_state);
@@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
761 printk("svc: socket %p: no user data\n", sk); 764 printk("svc: socket %p: no user data\n", sk);
762 } 765 }
763 766
764 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 767 wq = sk_sleep(sk);
765 wake_up_interruptible_all(sk_sleep(sk)); 768 if (wq && waitqueue_active(wq))
769 wake_up_interruptible_all(wq);
766} 770}
767 771
768/* 772/*
@@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
771static void svc_tcp_state_change(struct sock *sk) 775static void svc_tcp_state_change(struct sock *sk)
772{ 776{
773 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 777 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
778 wait_queue_head_t *wq = sk_sleep(sk);
774 779
775 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", 780 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
776 sk, sk->sk_state, sk->sk_user_data); 781 sk, sk->sk_state, sk->sk_user_data);
@@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk)
781 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 786 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
782 svc_xprt_enqueue(&svsk->sk_xprt); 787 svc_xprt_enqueue(&svsk->sk_xprt);
783 } 788 }
784 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 789 if (wq && waitqueue_active(wq))
785 wake_up_interruptible_all(sk_sleep(sk)); 790 wake_up_interruptible_all(wq);
786} 791}
787 792
788static void svc_tcp_data_ready(struct sock *sk, int count) 793static void svc_tcp_data_ready(struct sock *sk, int count)
789{ 794{
790 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 795 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
796 wait_queue_head_t *wq = sk_sleep(sk);
791 797
792 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 798 dprintk("svc: socket %p TCP data ready (svsk %p)\n",
793 sk, sk->sk_user_data); 799 sk, sk->sk_user_data);
@@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
795 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 801 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
796 svc_xprt_enqueue(&svsk->sk_xprt); 802 svc_xprt_enqueue(&svsk->sk_xprt);
797 } 803 }
798 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 804 if (wq && waitqueue_active(wq))
799 wake_up_interruptible(sk_sleep(sk)); 805 wake_up_interruptible(wq);
800} 806}
801 807
802/* 808/*
@@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1531{ 1537{
1532 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1538 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1533 struct sock *sk = svsk->sk_sk; 1539 struct sock *sk = svsk->sk_sk;
1540 wait_queue_head_t *wq;
1534 1541
1535 dprintk("svc: svc_sock_detach(%p)\n", svsk); 1542 dprintk("svc: svc_sock_detach(%p)\n", svsk);
1536 1543
@@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1539 sk->sk_data_ready = svsk->sk_odata; 1546 sk->sk_data_ready = svsk->sk_odata;
1540 sk->sk_write_space = svsk->sk_owspace; 1547 sk->sk_write_space = svsk->sk_owspace;
1541 1548
1542 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1549 wq = sk_sleep(sk);
1543 wake_up_interruptible(sk_sleep(sk)); 1550 if (wq && waitqueue_active(wq))
1551 wake_up_interruptible(wq);
1544} 1552}
1545 1553
1546/* 1554/*
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 0436927369f..2c5954b8593 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -29,18 +29,6 @@ config TIPC_ADVANCED
29 Saying Y here will open some advanced configuration for TIPC. 29 Saying Y here will open some advanced configuration for TIPC.
30 Most users do not need to bother; if unsure, just say N. 30 Most users do not need to bother; if unsure, just say N.
31 31
32config TIPC_NODES
33 int "Maximum number of nodes in a cluster"
34 depends on TIPC_ADVANCED
35 range 8 2047
36 default "255"
37 help
38 Specifies how many nodes can be supported in a TIPC cluster.
39 Can range from 8 to 2047 nodes; default is 255.
40
41 Setting this to a smaller value saves some memory;
42 setting it to higher allows for more nodes.
43
44config TIPC_PORTS 32config TIPC_PORTS
45 int "Maximum number of ports in a node" 33 int "Maximum number of ports in a node"
46 depends on TIPC_ADVANCED 34 depends on TIPC_ADVANCED
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 88463d9a6f1..a6fdab33877 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -2,7 +2,7 @@
2 * net/tipc/addr.c: TIPC address utility routines 2 * net/tipc/addr.c: TIPC address utility routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,7 @@
41 * tipc_addr_domain_valid - validates a network domain address 41 * tipc_addr_domain_valid - validates a network domain address
42 * 42 *
43 * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, 43 * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
44 * where Z, C, and N are non-zero and do not exceed the configured limits. 44 * where Z, C, and N are non-zero.
45 * 45 *
46 * Returns 1 if domain address is valid, otherwise 0 46 * Returns 1 if domain address is valid, otherwise 0
47 */ 47 */
@@ -51,10 +51,6 @@ int tipc_addr_domain_valid(u32 addr)
51 u32 n = tipc_node(addr); 51 u32 n = tipc_node(addr);
52 u32 c = tipc_cluster(addr); 52 u32 c = tipc_cluster(addr);
53 u32 z = tipc_zone(addr); 53 u32 z = tipc_zone(addr);
54 u32 max_nodes = tipc_max_nodes;
55
56 if (n > max_nodes)
57 return 0;
58 54
59 if (n && (!z || !c)) 55 if (n && (!z || !c))
60 return 0; 56 return 0;
@@ -66,8 +62,7 @@ int tipc_addr_domain_valid(u32 addr)
66/** 62/**
67 * tipc_addr_node_valid - validates a proposed network address for this node 63 * tipc_addr_node_valid - validates a proposed network address for this node
68 * 64 *
69 * Accepts <Z.C.N>, where Z, C, and N are non-zero and do not exceed 65 * Accepts <Z.C.N>, where Z, C, and N are non-zero.
70 * the configured limits.
71 * 66 *
72 * Returns 1 if address can be used, otherwise 0 67 * Returns 1 if address can be used, otherwise 0
73 */ 68 */
@@ -81,9 +76,9 @@ int tipc_in_scope(u32 domain, u32 addr)
81{ 76{
82 if (!domain || (domain == addr)) 77 if (!domain || (domain == addr))
83 return 1; 78 return 1;
84 if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */ 79 if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
85 return 1; 80 return 1;
86 if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */ 81 if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
87 return 1; 82 return 1;
88 return 0; 83 return 0;
89} 84}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 2490fadd0ca..8971aba99ae 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,6 +37,16 @@
37#ifndef _TIPC_ADDR_H 37#ifndef _TIPC_ADDR_H
38#define _TIPC_ADDR_H 38#define _TIPC_ADDR_H
39 39
40static inline u32 tipc_zone_mask(u32 addr)
41{
42 return addr & 0xff000000u;
43}
44
45static inline u32 tipc_cluster_mask(u32 addr)
46{
47 return addr & 0xfffff000u;
48}
49
40static inline int in_own_cluster(u32 addr) 50static inline int in_own_cluster(u32 addr)
41{ 51{
42 return !((addr ^ tipc_own_addr) >> 12); 52 return !((addr ^ tipc_own_addr) >> 12);
@@ -49,14 +59,13 @@ static inline int in_own_cluster(u32 addr)
49 * after a network hop. 59 * after a network hop.
50 */ 60 */
51 61
52static inline int addr_domain(int sc) 62static inline u32 addr_domain(u32 sc)
53{ 63{
54 if (likely(sc == TIPC_NODE_SCOPE)) 64 if (likely(sc == TIPC_NODE_SCOPE))
55 return tipc_own_addr; 65 return tipc_own_addr;
56 if (sc == TIPC_CLUSTER_SCOPE) 66 if (sc == TIPC_CLUSTER_SCOPE)
57 return tipc_addr(tipc_zone(tipc_own_addr), 67 return tipc_cluster_mask(tipc_own_addr);
58 tipc_cluster(tipc_own_addr), 0); 68 return tipc_zone_mask(tipc_own_addr);
59 return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
60} 69}
61 70
62int tipc_addr_domain_valid(u32); 71int tipc_addr_domain_valid(u32);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 70ab5ef4876..7dc1dc7151e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (c) 2004-2006, Ericsson AB 4 * Copyright (c) 2004-2006, Ericsson AB
5 * Copyright (c) 2004, Intel Corporation. 5 * Copyright (c) 2004, Intel Corporation.
6 * Copyright (c) 2005, Wind River Systems 6 * Copyright (c) 2005, 2010-2011, Wind River Systems
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
@@ -61,8 +61,8 @@
61 */ 61 */
62 62
63struct bcbearer_pair { 63struct bcbearer_pair {
64 struct bearer *primary; 64 struct tipc_bearer *primary;
65 struct bearer *secondary; 65 struct tipc_bearer *secondary;
66}; 66};
67 67
68/** 68/**
@@ -81,7 +81,7 @@ struct bcbearer_pair {
81 */ 81 */
82 82
83struct bcbearer { 83struct bcbearer {
84 struct bearer bearer; 84 struct tipc_bearer bearer;
85 struct media media; 85 struct media media;
86 struct bcbearer_pair bpairs[MAX_BEARERS]; 86 struct bcbearer_pair bpairs[MAX_BEARERS];
87 struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; 87 struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
@@ -93,6 +93,7 @@ struct bcbearer {
93 * struct bclink - link used for broadcast messages 93 * struct bclink - link used for broadcast messages
94 * @link: (non-standard) broadcast link structure 94 * @link: (non-standard) broadcast link structure
95 * @node: (non-standard) node structure representing b'cast link's peer node 95 * @node: (non-standard) node structure representing b'cast link's peer node
96 * @retransmit_to: node that most recently requested a retransmit
96 * 97 *
97 * Handles sequence numbering, fragmentation, bundling, etc. 98 * Handles sequence numbering, fragmentation, bundling, etc.
98 */ 99 */
@@ -100,6 +101,7 @@ struct bcbearer {
100struct bclink { 101struct bclink {
101 struct link link; 102 struct link link;
102 struct tipc_node node; 103 struct tipc_node node;
104 struct tipc_node *retransmit_to;
103}; 105};
104 106
105 107
@@ -184,6 +186,17 @@ static int bclink_ack_allowed(u32 n)
184 186
185 187
186/** 188/**
189 * tipc_bclink_retransmit_to - get most recent node to request retransmission
190 *
191 * Called with bc_lock locked
192 */
193
194struct tipc_node *tipc_bclink_retransmit_to(void)
195{
196 return bclink->retransmit_to;
197}
198
199/**
187 * bclink_retransmit_pkt - retransmit broadcast packets 200 * bclink_retransmit_pkt - retransmit broadcast packets
188 * @after: sequence number of last packet to *not* retransmit 201 * @after: sequence number of last packet to *not* retransmit
189 * @to: sequence number of last packet to retransmit 202 * @to: sequence number of last packet to retransmit
@@ -285,6 +298,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
285 msg = buf_msg(buf); 298 msg = buf_msg(buf);
286 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, 299 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
287 INT_H_SIZE, n_ptr->addr); 300 INT_H_SIZE, n_ptr->addr);
301 msg_set_non_seq(msg, 1);
288 msg_set_mc_netid(msg, tipc_net_id); 302 msg_set_mc_netid(msg, tipc_net_id);
289 msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); 303 msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
290 msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); 304 msg_set_bcgap_after(msg, n_ptr->bclink.gap_after);
@@ -405,8 +419,6 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
405 else 419 else
406 bclink_set_last_sent(); 420 bclink_set_last_sent();
407 421
408 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
409 bcl->stats.max_queue_sz = bcl->out_queue_size;
410 bcl->stats.queue_sz_counts++; 422 bcl->stats.queue_sz_counts++;
411 bcl->stats.accu_queue_sz += bcl->out_queue_size; 423 bcl->stats.accu_queue_sz += bcl->out_queue_size;
412 424
@@ -444,10 +456,9 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
444 tipc_node_unlock(node); 456 tipc_node_unlock(node);
445 spin_lock_bh(&bc_lock); 457 spin_lock_bh(&bc_lock);
446 bcl->stats.recv_nacks++; 458 bcl->stats.recv_nacks++;
447 bcl->owner->next = node; /* remember requestor */ 459 bclink->retransmit_to = node;
448 bclink_retransmit_pkt(msg_bcgap_after(msg), 460 bclink_retransmit_pkt(msg_bcgap_after(msg),
449 msg_bcgap_to(msg)); 461 msg_bcgap_to(msg));
450 bcl->owner->next = NULL;
451 spin_unlock_bh(&bc_lock); 462 spin_unlock_bh(&bc_lock);
452 } else { 463 } else {
453 tipc_bclink_peek_nack(msg_destnode(msg), 464 tipc_bclink_peek_nack(msg_destnode(msg),
@@ -574,8 +585,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
574 bcbearer->remains = tipc_bcast_nmap; 585 bcbearer->remains = tipc_bcast_nmap;
575 586
576 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { 587 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
577 struct bearer *p = bcbearer->bpairs[bp_index].primary; 588 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
578 struct bearer *s = bcbearer->bpairs[bp_index].secondary; 589 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
579 590
580 if (!p) 591 if (!p)
581 break; /* no more bearers to try */ 592 break; /* no more bearers to try */
@@ -584,11 +595,11 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
584 if (bcbearer->remains_new.count == bcbearer->remains.count) 595 if (bcbearer->remains_new.count == bcbearer->remains.count)
585 continue; /* bearer pair doesn't add anything */ 596 continue; /* bearer pair doesn't add anything */
586 597
587 if (p->publ.blocked || 598 if (p->blocked ||
588 p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { 599 p->media->send_msg(buf, p, &p->media->bcast_addr)) {
589 /* unable to send on primary bearer */ 600 /* unable to send on primary bearer */
590 if (!s || s->publ.blocked || 601 if (!s || s->blocked ||
591 s->media->send_msg(buf, &s->publ, 602 s->media->send_msg(buf, s,
592 &s->media->bcast_addr)) { 603 &s->media->bcast_addr)) {
593 /* unable to send on either bearer */ 604 /* unable to send on either bearer */
594 continue; 605 continue;
@@ -633,7 +644,7 @@ void tipc_bcbearer_sort(void)
633 memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); 644 memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
634 645
635 for (b_index = 0; b_index < MAX_BEARERS; b_index++) { 646 for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
636 struct bearer *b = &tipc_bearers[b_index]; 647 struct tipc_bearer *b = &tipc_bearers[b_index];
637 648
638 if (!b->active || !b->nodes.count) 649 if (!b->active || !b->nodes.count)
639 continue; 650 continue;
@@ -682,12 +693,12 @@ void tipc_bcbearer_sort(void)
682 693
683void tipc_bcbearer_push(void) 694void tipc_bcbearer_push(void)
684{ 695{
685 struct bearer *b_ptr; 696 struct tipc_bearer *b_ptr;
686 697
687 spin_lock_bh(&bc_lock); 698 spin_lock_bh(&bc_lock);
688 b_ptr = &bcbearer->bearer; 699 b_ptr = &bcbearer->bearer;
689 if (b_ptr->publ.blocked) { 700 if (b_ptr->blocked) {
690 b_ptr->publ.blocked = 0; 701 b_ptr->blocked = 0;
691 tipc_bearer_lock_push(b_ptr); 702 tipc_bearer_lock_push(b_ptr);
692 } 703 }
693 spin_unlock_bh(&bc_lock); 704 spin_unlock_bh(&bc_lock);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 51f8c5326ce..500c97f1c85 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -2,7 +2,7 @@
2 * net/tipc/bcast.h: Include file for TIPC broadcast code 2 * net/tipc/bcast.h: Include file for TIPC broadcast code
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -90,6 +90,7 @@ void tipc_port_list_free(struct port_list *pl_ptr);
90 90
91int tipc_bclink_init(void); 91int tipc_bclink_init(void);
92void tipc_bclink_stop(void); 92void tipc_bclink_stop(void);
93struct tipc_node *tipc_bclink_retransmit_to(void);
93void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); 94void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
94int tipc_bclink_send_msg(struct sk_buff *buf); 95int tipc_bclink_send_msg(struct sk_buff *buf);
95void tipc_bclink_recv_pkt(struct sk_buff *buf); 96void tipc_bclink_recv_pkt(struct sk_buff *buf);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 837b7a46788..411719feb80 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -2,7 +2,7 @@
2 * net/tipc/bearer.c: TIPC bearer code 2 * net/tipc/bearer.c: TIPC bearer code
3 * 3 *
4 * Copyright (c) 1996-2006, Ericsson AB 4 * Copyright (c) 1996-2006, Ericsson AB
5 * Copyright (c) 2004-2006, Wind River Systems 5 * Copyright (c) 2004-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -44,7 +44,7 @@
44static struct media media_list[MAX_MEDIA]; 44static struct media media_list[MAX_MEDIA];
45static u32 media_count; 45static u32 media_count;
46 46
47struct bearer tipc_bearers[MAX_BEARERS]; 47struct tipc_bearer tipc_bearers[MAX_BEARERS];
48 48
49/** 49/**
50 * media_name_valid - validate media name 50 * media_name_valid - validate media name
@@ -158,7 +158,6 @@ int tipc_register_media(u32 media_type,
158 m_ptr->disable_bearer = disable; 158 m_ptr->disable_bearer = disable;
159 m_ptr->addr2str = addr2str; 159 m_ptr->addr2str = addr2str;
160 memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr)); 160 memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr));
161 m_ptr->bcast = 1;
162 strcpy(m_ptr->name, name); 161 strcpy(m_ptr->name, name);
163 m_ptr->priority = bearer_priority; 162 m_ptr->priority = bearer_priority;
164 m_ptr->tolerance = link_tolerance; 163 m_ptr->tolerance = link_tolerance;
@@ -278,13 +277,13 @@ static int bearer_name_validate(const char *name,
278 * bearer_find - locates bearer object with matching bearer name 277 * bearer_find - locates bearer object with matching bearer name
279 */ 278 */
280 279
281static struct bearer *bearer_find(const char *name) 280static struct tipc_bearer *bearer_find(const char *name)
282{ 281{
283 struct bearer *b_ptr; 282 struct tipc_bearer *b_ptr;
284 u32 i; 283 u32 i;
285 284
286 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 285 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
287 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) 286 if (b_ptr->active && (!strcmp(b_ptr->name, name)))
288 return b_ptr; 287 return b_ptr;
289 } 288 }
290 return NULL; 289 return NULL;
@@ -294,16 +293,16 @@ static struct bearer *bearer_find(const char *name)
294 * tipc_bearer_find_interface - locates bearer object with matching interface name 293 * tipc_bearer_find_interface - locates bearer object with matching interface name
295 */ 294 */
296 295
297struct bearer *tipc_bearer_find_interface(const char *if_name) 296struct tipc_bearer *tipc_bearer_find_interface(const char *if_name)
298{ 297{
299 struct bearer *b_ptr; 298 struct tipc_bearer *b_ptr;
300 char *b_if_name; 299 char *b_if_name;
301 u32 i; 300 u32 i;
302 301
303 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 302 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
304 if (!b_ptr->active) 303 if (!b_ptr->active)
305 continue; 304 continue;
306 b_if_name = strchr(b_ptr->publ.name, ':') + 1; 305 b_if_name = strchr(b_ptr->name, ':') + 1;
307 if (!strcmp(b_if_name, if_name)) 306 if (!strcmp(b_if_name, if_name))
308 return b_ptr; 307 return b_ptr;
309 } 308 }
@@ -318,7 +317,7 @@ struct sk_buff *tipc_bearer_get_names(void)
318{ 317{
319 struct sk_buff *buf; 318 struct sk_buff *buf;
320 struct media *m_ptr; 319 struct media *m_ptr;
321 struct bearer *b_ptr; 320 struct tipc_bearer *b_ptr;
322 int i, j; 321 int i, j;
323 322
324 buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); 323 buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
@@ -331,8 +330,8 @@ struct sk_buff *tipc_bearer_get_names(void)
331 b_ptr = &tipc_bearers[j]; 330 b_ptr = &tipc_bearers[j];
332 if (b_ptr->active && (b_ptr->media == m_ptr)) { 331 if (b_ptr->active && (b_ptr->media == m_ptr)) {
333 tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, 332 tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
334 b_ptr->publ.name, 333 b_ptr->name,
335 strlen(b_ptr->publ.name) + 1); 334 strlen(b_ptr->name) + 1);
336 } 335 }
337 } 336 }
338 } 337 }
@@ -340,14 +339,14 @@ struct sk_buff *tipc_bearer_get_names(void)
340 return buf; 339 return buf;
341} 340}
342 341
343void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest) 342void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
344{ 343{
345 tipc_nmap_add(&b_ptr->nodes, dest); 344 tipc_nmap_add(&b_ptr->nodes, dest);
346 tipc_disc_update_link_req(b_ptr->link_req); 345 tipc_disc_update_link_req(b_ptr->link_req);
347 tipc_bcbearer_sort(); 346 tipc_bcbearer_sort();
348} 347}
349 348
350void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) 349void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
351{ 350{
352 tipc_nmap_remove(&b_ptr->nodes, dest); 351 tipc_nmap_remove(&b_ptr->nodes, dest);
353 tipc_disc_update_link_req(b_ptr->link_req); 352 tipc_disc_update_link_req(b_ptr->link_req);
@@ -362,12 +361,12 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest)
362 * bearer.lock must be taken before calling 361 * bearer.lock must be taken before calling
363 * Returns binary true(1) ore false(0) 362 * Returns binary true(1) ore false(0)
364 */ 363 */
365static int bearer_push(struct bearer *b_ptr) 364static int bearer_push(struct tipc_bearer *b_ptr)
366{ 365{
367 u32 res = 0; 366 u32 res = 0;
368 struct link *ln, *tln; 367 struct link *ln, *tln;
369 368
370 if (b_ptr->publ.blocked) 369 if (b_ptr->blocked)
371 return 0; 370 return 0;
372 371
373 while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) { 372 while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) {
@@ -382,13 +381,13 @@ static int bearer_push(struct bearer *b_ptr)
382 return list_empty(&b_ptr->cong_links); 381 return list_empty(&b_ptr->cong_links);
383} 382}
384 383
385void tipc_bearer_lock_push(struct bearer *b_ptr) 384void tipc_bearer_lock_push(struct tipc_bearer *b_ptr)
386{ 385{
387 int res; 386 int res;
388 387
389 spin_lock_bh(&b_ptr->publ.lock); 388 spin_lock_bh(&b_ptr->lock);
390 res = bearer_push(b_ptr); 389 res = bearer_push(b_ptr);
391 spin_unlock_bh(&b_ptr->publ.lock); 390 spin_unlock_bh(&b_ptr->lock);
392 if (res) 391 if (res)
393 tipc_bcbearer_push(); 392 tipc_bcbearer_push();
394} 393}
@@ -398,16 +397,14 @@ void tipc_bearer_lock_push(struct bearer *b_ptr)
398 * Interrupt enabling new requests after bearer congestion or blocking: 397 * Interrupt enabling new requests after bearer congestion or blocking:
399 * See bearer_send(). 398 * See bearer_send().
400 */ 399 */
401void tipc_continue(struct tipc_bearer *tb_ptr) 400void tipc_continue(struct tipc_bearer *b_ptr)
402{ 401{
403 struct bearer *b_ptr = (struct bearer *)tb_ptr; 402 spin_lock_bh(&b_ptr->lock);
404
405 spin_lock_bh(&b_ptr->publ.lock);
406 b_ptr->continue_count++; 403 b_ptr->continue_count++;
407 if (!list_empty(&b_ptr->cong_links)) 404 if (!list_empty(&b_ptr->cong_links))
408 tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr); 405 tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr);
409 b_ptr->publ.blocked = 0; 406 b_ptr->blocked = 0;
410 spin_unlock_bh(&b_ptr->publ.lock); 407 spin_unlock_bh(&b_ptr->lock);
411} 408}
412 409
413/* 410/*
@@ -418,7 +415,7 @@ void tipc_continue(struct tipc_bearer *tb_ptr)
418 * bearer.lock is busy 415 * bearer.lock is busy
419 */ 416 */
420 417
421static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_ptr) 418static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr)
422{ 419{
423 list_move_tail(&l_ptr->link_list, &b_ptr->cong_links); 420 list_move_tail(&l_ptr->link_list, &b_ptr->cong_links);
424} 421}
@@ -431,11 +428,11 @@ static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_p
431 * bearer.lock is free 428 * bearer.lock is free
432 */ 429 */
433 430
434void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr) 431void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr)
435{ 432{
436 spin_lock_bh(&b_ptr->publ.lock); 433 spin_lock_bh(&b_ptr->lock);
437 tipc_bearer_schedule_unlocked(b_ptr, l_ptr); 434 tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
438 spin_unlock_bh(&b_ptr->publ.lock); 435 spin_unlock_bh(&b_ptr->lock);
439} 436}
440 437
441 438
@@ -444,18 +441,18 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr)
444 * and if there is, try to resolve it before returning. 441 * and if there is, try to resolve it before returning.
445 * 'tipc_net_lock' is read_locked when this function is called 442 * 'tipc_net_lock' is read_locked when this function is called
446 */ 443 */
447int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr) 444int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr)
448{ 445{
449 int res = 1; 446 int res = 1;
450 447
451 if (list_empty(&b_ptr->cong_links)) 448 if (list_empty(&b_ptr->cong_links))
452 return 1; 449 return 1;
453 spin_lock_bh(&b_ptr->publ.lock); 450 spin_lock_bh(&b_ptr->lock);
454 if (!bearer_push(b_ptr)) { 451 if (!bearer_push(b_ptr)) {
455 tipc_bearer_schedule_unlocked(b_ptr, l_ptr); 452 tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
456 res = 0; 453 res = 0;
457 } 454 }
458 spin_unlock_bh(&b_ptr->publ.lock); 455 spin_unlock_bh(&b_ptr->lock);
459 return res; 456 return res;
460} 457}
461 458
@@ -463,9 +460,9 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
463 * tipc_bearer_congested - determines if bearer is currently congested 460 * tipc_bearer_congested - determines if bearer is currently congested
464 */ 461 */
465 462
466int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr) 463int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr)
467{ 464{
468 if (unlikely(b_ptr->publ.blocked)) 465 if (unlikely(b_ptr->blocked))
469 return 1; 466 return 1;
470 if (likely(list_empty(&b_ptr->cong_links))) 467 if (likely(list_empty(&b_ptr->cong_links)))
471 return 0; 468 return 0;
@@ -476,9 +473,9 @@ int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
476 * tipc_enable_bearer - enable bearer with the given name 473 * tipc_enable_bearer - enable bearer with the given name
477 */ 474 */
478 475
479int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) 476int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
480{ 477{
481 struct bearer *b_ptr; 478 struct tipc_bearer *b_ptr;
482 struct media *m_ptr; 479 struct media *m_ptr;
483 struct bearer_name b_name; 480 struct bearer_name b_name;
484 char addr_string[16]; 481 char addr_string[16];
@@ -496,9 +493,9 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
496 warn("Bearer <%s> rejected, illegal name\n", name); 493 warn("Bearer <%s> rejected, illegal name\n", name);
497 return -EINVAL; 494 return -EINVAL;
498 } 495 }
499 if (!tipc_addr_domain_valid(bcast_scope) || 496 if (!tipc_addr_domain_valid(disc_domain) ||
500 !tipc_in_scope(bcast_scope, tipc_own_addr)) { 497 !tipc_in_scope(disc_domain, tipc_own_addr)) {
501 warn("Bearer <%s> rejected, illegal broadcast scope\n", name); 498 warn("Bearer <%s> rejected, illegal discovery domain\n", name);
502 return -EINVAL; 499 return -EINVAL;
503 } 500 }
504 if ((priority < TIPC_MIN_LINK_PRI || 501 if ((priority < TIPC_MIN_LINK_PRI ||
@@ -528,7 +525,7 @@ restart:
528 bearer_id = i; 525 bearer_id = i;
529 continue; 526 continue;
530 } 527 }
531 if (!strcmp(name, tipc_bearers[i].publ.name)) { 528 if (!strcmp(name, tipc_bearers[i].name)) {
532 warn("Bearer <%s> rejected, already enabled\n", name); 529 warn("Bearer <%s> rejected, already enabled\n", name);
533 goto failed; 530 goto failed;
534 } 531 }
@@ -551,8 +548,8 @@ restart:
551 } 548 }
552 549
553 b_ptr = &tipc_bearers[bearer_id]; 550 b_ptr = &tipc_bearers[bearer_id];
554 strcpy(b_ptr->publ.name, name); 551 strcpy(b_ptr->name, name);
555 res = m_ptr->enable_bearer(&b_ptr->publ); 552 res = m_ptr->enable_bearer(b_ptr);
556 if (res) { 553 if (res) {
557 warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); 554 warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
558 goto failed; 555 goto failed;
@@ -562,18 +559,15 @@ restart:
562 b_ptr->media = m_ptr; 559 b_ptr->media = m_ptr;
563 b_ptr->net_plane = bearer_id + 'A'; 560 b_ptr->net_plane = bearer_id + 'A';
564 b_ptr->active = 1; 561 b_ptr->active = 1;
565 b_ptr->detect_scope = bcast_scope;
566 b_ptr->priority = priority; 562 b_ptr->priority = priority;
567 INIT_LIST_HEAD(&b_ptr->cong_links); 563 INIT_LIST_HEAD(&b_ptr->cong_links);
568 INIT_LIST_HEAD(&b_ptr->links); 564 INIT_LIST_HEAD(&b_ptr->links);
569 if (m_ptr->bcast) { 565 b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr,
570 b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, 566 disc_domain);
571 bcast_scope, 2); 567 spin_lock_init(&b_ptr->lock);
572 }
573 spin_lock_init(&b_ptr->publ.lock);
574 write_unlock_bh(&tipc_net_lock); 568 write_unlock_bh(&tipc_net_lock);
575 info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 569 info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
576 name, tipc_addr_string_fill(addr_string, bcast_scope), priority); 570 name, tipc_addr_string_fill(addr_string, disc_domain), priority);
577 return 0; 571 return 0;
578failed: 572failed:
579 write_unlock_bh(&tipc_net_lock); 573 write_unlock_bh(&tipc_net_lock);
@@ -587,7 +581,7 @@ failed:
587 581
588int tipc_block_bearer(const char *name) 582int tipc_block_bearer(const char *name)
589{ 583{
590 struct bearer *b_ptr = NULL; 584 struct tipc_bearer *b_ptr = NULL;
591 struct link *l_ptr; 585 struct link *l_ptr;
592 struct link *temp_l_ptr; 586 struct link *temp_l_ptr;
593 587
@@ -600,8 +594,8 @@ int tipc_block_bearer(const char *name)
600 } 594 }
601 595
602 info("Blocking bearer <%s>\n", name); 596 info("Blocking bearer <%s>\n", name);
603 spin_lock_bh(&b_ptr->publ.lock); 597 spin_lock_bh(&b_ptr->lock);
604 b_ptr->publ.blocked = 1; 598 b_ptr->blocked = 1;
605 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 599 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
606 struct tipc_node *n_ptr = l_ptr->owner; 600 struct tipc_node *n_ptr = l_ptr->owner;
607 601
@@ -609,7 +603,7 @@ int tipc_block_bearer(const char *name)
609 tipc_link_reset(l_ptr); 603 tipc_link_reset(l_ptr);
610 spin_unlock_bh(&n_ptr->lock); 604 spin_unlock_bh(&n_ptr->lock);
611 } 605 }
612 spin_unlock_bh(&b_ptr->publ.lock); 606 spin_unlock_bh(&b_ptr->lock);
613 read_unlock_bh(&tipc_net_lock); 607 read_unlock_bh(&tipc_net_lock);
614 return 0; 608 return 0;
615} 609}
@@ -620,27 +614,27 @@ int tipc_block_bearer(const char *name)
620 * Note: This routine assumes caller holds tipc_net_lock. 614 * Note: This routine assumes caller holds tipc_net_lock.
621 */ 615 */
622 616
623static void bearer_disable(struct bearer *b_ptr) 617static void bearer_disable(struct tipc_bearer *b_ptr)
624{ 618{
625 struct link *l_ptr; 619 struct link *l_ptr;
626 struct link *temp_l_ptr; 620 struct link *temp_l_ptr;
627 621
628 info("Disabling bearer <%s>\n", b_ptr->publ.name); 622 info("Disabling bearer <%s>\n", b_ptr->name);
629 tipc_disc_stop_link_req(b_ptr->link_req); 623 tipc_disc_stop_link_req(b_ptr->link_req);
630 spin_lock_bh(&b_ptr->publ.lock); 624 spin_lock_bh(&b_ptr->lock);
631 b_ptr->link_req = NULL; 625 b_ptr->link_req = NULL;
632 b_ptr->publ.blocked = 1; 626 b_ptr->blocked = 1;
633 b_ptr->media->disable_bearer(&b_ptr->publ); 627 b_ptr->media->disable_bearer(b_ptr);
634 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 628 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
635 tipc_link_delete(l_ptr); 629 tipc_link_delete(l_ptr);
636 } 630 }
637 spin_unlock_bh(&b_ptr->publ.lock); 631 spin_unlock_bh(&b_ptr->lock);
638 memset(b_ptr, 0, sizeof(struct bearer)); 632 memset(b_ptr, 0, sizeof(struct tipc_bearer));
639} 633}
640 634
641int tipc_disable_bearer(const char *name) 635int tipc_disable_bearer(const char *name)
642{ 636{
643 struct bearer *b_ptr; 637 struct tipc_bearer *b_ptr;
644 int res; 638 int res;
645 639
646 write_lock_bh(&tipc_net_lock); 640 write_lock_bh(&tipc_net_lock);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 85f451d5aac..31d6172b20f 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -2,7 +2,7 @@
2 * net/tipc/bearer.h: Include file for TIPC bearer code 2 * net/tipc/bearer.h: Include file for TIPC bearer code
3 * 3 *
4 * Copyright (c) 1996-2006, Ericsson AB 4 * Copyright (c) 1996-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -61,26 +61,7 @@ struct tipc_media_addr {
61 } dev_addr; 61 } dev_addr;
62}; 62};
63 63
64/** 64struct tipc_bearer;
65 * struct tipc_bearer - TIPC bearer info available to media code
66 * @usr_handle: pointer to additional media-specific information about bearer
67 * @mtu: max packet size bearer can support
68 * @blocked: non-zero if bearer is blocked
69 * @lock: spinlock for controlling access to bearer
70 * @addr: media-specific address associated with bearer
71 * @name: bearer name (format = media:interface)
72 *
73 * Note: TIPC initializes "name" and "lock" fields; media code is responsible
74 * for initialization all other fields when a bearer is enabled.
75 */
76struct tipc_bearer {
77 void *usr_handle;
78 u32 mtu;
79 int blocked;
80 spinlock_t lock;
81 struct tipc_media_addr addr;
82 char name[TIPC_MAX_BEARER_NAME];
83};
84 65
85/** 66/**
86 * struct media - TIPC media information available to internal users 67 * struct media - TIPC media information available to internal users
@@ -89,7 +70,6 @@ struct tipc_bearer {
89 * @disable_bearer: routine which disables a bearer 70 * @disable_bearer: routine which disables a bearer
90 * @addr2str: routine which converts bearer's address to string form 71 * @addr2str: routine which converts bearer's address to string form
91 * @bcast_addr: media address used in broadcasting 72 * @bcast_addr: media address used in broadcasting
92 * @bcast: non-zero if media supports broadcasting [currently mandatory]
93 * @priority: default link (and bearer) priority 73 * @priority: default link (and bearer) priority
94 * @tolerance: default time (in ms) before declaring link failure 74 * @tolerance: default time (in ms) before declaring link failure
95 * @window: default window (in packets) before declaring link congestion 75 * @window: default window (in packets) before declaring link congestion
@@ -106,7 +86,6 @@ struct media {
106 char *(*addr2str)(struct tipc_media_addr *a, 86 char *(*addr2str)(struct tipc_media_addr *a,
107 char *str_buf, int str_size); 87 char *str_buf, int str_size);
108 struct tipc_media_addr bcast_addr; 88 struct tipc_media_addr bcast_addr;
109 int bcast;
110 u32 priority; 89 u32 priority;
111 u32 tolerance; 90 u32 tolerance;
112 u32 window; 91 u32 window;
@@ -115,11 +94,15 @@ struct media {
115}; 94};
116 95
117/** 96/**
118 * struct bearer - TIPC bearer information available to internal users 97 * struct tipc_bearer - TIPC bearer structure
119 * @publ: bearer information available to privileged users 98 * @usr_handle: pointer to additional media-specific information about bearer
99 * @mtu: max packet size bearer can support
100 * @blocked: non-zero if bearer is blocked
101 * @lock: spinlock for controlling access to bearer
102 * @addr: media-specific address associated with bearer
103 * @name: bearer name (format = media:interface)
120 * @media: ptr to media structure associated with bearer 104 * @media: ptr to media structure associated with bearer
121 * @priority: default link priority for bearer 105 * @priority: default link priority for bearer
122 * @detect_scope: network address mask used during automatic link creation
123 * @identity: array index of this bearer within TIPC bearer array 106 * @identity: array index of this bearer within TIPC bearer array
124 * @link_req: ptr to (optional) structure making periodic link setup requests 107 * @link_req: ptr to (optional) structure making periodic link setup requests
125 * @links: list of non-congested links associated with bearer 108 * @links: list of non-congested links associated with bearer
@@ -128,13 +111,20 @@ struct media {
128 * @active: non-zero if bearer structure is represents a bearer 111 * @active: non-zero if bearer structure is represents a bearer
129 * @net_plane: network plane ('A' through 'H') currently associated with bearer 112 * @net_plane: network plane ('A' through 'H') currently associated with bearer
130 * @nodes: indicates which nodes in cluster can be reached through bearer 113 * @nodes: indicates which nodes in cluster can be reached through bearer
114 *
115 * Note: media-specific code is responsible for initialization of the fields
116 * indicated below when a bearer is enabled; TIPC's generic bearer code takes
117 * care of initializing all other fields.
131 */ 118 */
132 119struct tipc_bearer {
133struct bearer { 120 void *usr_handle; /* initalized by media */
134 struct tipc_bearer publ; 121 u32 mtu; /* initalized by media */
122 int blocked; /* initalized by media */
123 struct tipc_media_addr addr; /* initalized by media */
124 char name[TIPC_MAX_BEARER_NAME];
125 spinlock_t lock;
135 struct media *media; 126 struct media *media;
136 u32 priority; 127 u32 priority;
137 u32 detect_scope;
138 u32 identity; 128 u32 identity;
139 struct link_req *link_req; 129 struct link_req *link_req;
140 struct list_head links; 130 struct list_head links;
@@ -152,7 +142,7 @@ struct bearer_name {
152 142
153struct link; 143struct link;
154 144
155extern struct bearer tipc_bearers[]; 145extern struct tipc_bearer tipc_bearers[];
156 146
157/* 147/*
158 * TIPC routines available to supported media types 148 * TIPC routines available to supported media types
@@ -173,7 +163,7 @@ void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
173int tipc_block_bearer(const char *name); 163int tipc_block_bearer(const char *name);
174void tipc_continue(struct tipc_bearer *tb_ptr); 164void tipc_continue(struct tipc_bearer *tb_ptr);
175 165
176int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); 166int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority);
177int tipc_disable_bearer(const char *name); 167int tipc_disable_bearer(const char *name);
178 168
179/* 169/*
@@ -186,14 +176,14 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
186struct sk_buff *tipc_media_get_names(void); 176struct sk_buff *tipc_media_get_names(void);
187 177
188struct sk_buff *tipc_bearer_get_names(void); 178struct sk_buff *tipc_bearer_get_names(void);
189void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest); 179void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
190void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest); 180void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
191void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); 181void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr);
192struct bearer *tipc_bearer_find_interface(const char *if_name); 182struct tipc_bearer *tipc_bearer_find_interface(const char *if_name);
193int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); 183int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr);
194int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); 184int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr);
195void tipc_bearer_stop(void); 185void tipc_bearer_stop(void);
196void tipc_bearer_lock_push(struct bearer *b_ptr); 186void tipc_bearer_lock_push(struct tipc_bearer *b_ptr);
197 187
198 188
199/** 189/**
@@ -214,10 +204,11 @@ void tipc_bearer_lock_push(struct bearer *b_ptr);
214 * and let TIPC's link code deal with the undelivered message. 204 * and let TIPC's link code deal with the undelivered message.
215 */ 205 */
216 206
217static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf, 207static inline int tipc_bearer_send(struct tipc_bearer *b_ptr,
208 struct sk_buff *buf,
218 struct tipc_media_addr *dest) 209 struct tipc_media_addr *dest)
219{ 210{
220 return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest); 211 return !b_ptr->media->send_msg(buf, b_ptr, dest);
221} 212}
222 213
223#endif /* _TIPC_BEARER_H */ 214#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index e16750dcf3c..b25a396b7e1 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
2 * net/tipc/config.c: TIPC configuration management code 2 * net/tipc/config.c: TIPC configuration management code
3 * 3 *
4 * Copyright (c) 2002-2006, Ericsson AB 4 * Copyright (c) 2002-2006, Ericsson AB
5 * Copyright (c) 2004-2007, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -148,7 +148,7 @@ static struct sk_buff *cfg_enable_bearer(void)
148 148
149 args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); 149 args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
150 if (tipc_enable_bearer(args->name, 150 if (tipc_enable_bearer(args->name,
151 ntohl(args->detect_scope), 151 ntohl(args->disc_domain),
152 ntohl(args->priority))) 152 ntohl(args->priority)))
153 return tipc_cfg_reply_error_string("unable to enable bearer"); 153 return tipc_cfg_reply_error_string("unable to enable bearer");
154 154
@@ -260,25 +260,6 @@ static struct sk_buff *cfg_set_max_ports(void)
260 return tipc_cfg_reply_none(); 260 return tipc_cfg_reply_none();
261} 261}
262 262
263static struct sk_buff *cfg_set_max_nodes(void)
264{
265 u32 value;
266
267 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
268 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
269 value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
270 if (value == tipc_max_nodes)
271 return tipc_cfg_reply_none();
272 if (value != delimit(value, 8, 2047))
273 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
274 " (max nodes must be 8-2047)");
275 if (tipc_mode == TIPC_NET_MODE)
276 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
277 " (cannot change max nodes once TIPC has joined a network)");
278 tipc_max_nodes = value;
279 return tipc_cfg_reply_none();
280}
281
282static struct sk_buff *cfg_set_netid(void) 263static struct sk_buff *cfg_set_netid(void)
283{ 264{
284 u32 value; 265 u32 value;
@@ -397,9 +378,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
397 case TIPC_CMD_SET_MAX_SUBSCR: 378 case TIPC_CMD_SET_MAX_SUBSCR:
398 rep_tlv_buf = cfg_set_max_subscriptions(); 379 rep_tlv_buf = cfg_set_max_subscriptions();
399 break; 380 break;
400 case TIPC_CMD_SET_MAX_NODES:
401 rep_tlv_buf = cfg_set_max_nodes();
402 break;
403 case TIPC_CMD_SET_NETID: 381 case TIPC_CMD_SET_NETID:
404 rep_tlv_buf = cfg_set_netid(); 382 rep_tlv_buf = cfg_set_netid();
405 break; 383 break;
@@ -415,9 +393,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
415 case TIPC_CMD_GET_MAX_SUBSCR: 393 case TIPC_CMD_GET_MAX_SUBSCR:
416 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); 394 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
417 break; 395 break;
418 case TIPC_CMD_GET_MAX_NODES:
419 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
420 break;
421 case TIPC_CMD_GET_NETID: 396 case TIPC_CMD_GET_NETID:
422 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); 397 rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
423 break; 398 break;
@@ -431,6 +406,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
431 case TIPC_CMD_GET_MAX_SLAVES: 406 case TIPC_CMD_GET_MAX_SLAVES:
432 case TIPC_CMD_SET_MAX_CLUSTERS: 407 case TIPC_CMD_SET_MAX_CLUSTERS:
433 case TIPC_CMD_GET_MAX_CLUSTERS: 408 case TIPC_CMD_GET_MAX_CLUSTERS:
409 case TIPC_CMD_SET_MAX_NODES:
410 case TIPC_CMD_GET_MAX_NODES:
434 rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 411 rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
435 " (obsolete command)"); 412 " (obsolete command)");
436 break; 413 break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e071579e085..c9a73e7763f 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -2,7 +2,7 @@
2 * net/tipc/core.c: TIPC module code 2 * net/tipc/core.c: TIPC module code
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005-2006, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,6 @@
41#include "config.h" 41#include "config.h"
42 42
43 43
44#ifndef CONFIG_TIPC_NODES
45#define CONFIG_TIPC_NODES 255
46#endif
47
48#ifndef CONFIG_TIPC_PORTS 44#ifndef CONFIG_TIPC_PORTS
49#define CONFIG_TIPC_PORTS 8191 45#define CONFIG_TIPC_PORTS 8191
50#endif 46#endif
@@ -57,7 +53,6 @@
57 53
58int tipc_mode = TIPC_NOT_RUNNING; 54int tipc_mode = TIPC_NOT_RUNNING;
59int tipc_random; 55int tipc_random;
60atomic_t tipc_user_count = ATOMIC_INIT(0);
61 56
62const char tipc_alphabet[] = 57const char tipc_alphabet[] =
63 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; 58 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.";
@@ -65,7 +60,6 @@ const char tipc_alphabet[] =
65/* configurable TIPC parameters */ 60/* configurable TIPC parameters */
66 61
67u32 tipc_own_addr; 62u32 tipc_own_addr;
68int tipc_max_nodes;
69int tipc_max_ports; 63int tipc_max_ports;
70int tipc_max_subscriptions; 64int tipc_max_subscriptions;
71int tipc_max_publications; 65int tipc_max_publications;
@@ -193,7 +187,6 @@ static int __init tipc_init(void)
193 tipc_max_publications = 10000; 187 tipc_max_publications = 10000;
194 tipc_max_subscriptions = 2000; 188 tipc_max_subscriptions = 2000;
195 tipc_max_ports = CONFIG_TIPC_PORTS; 189 tipc_max_ports = CONFIG_TIPC_PORTS;
196 tipc_max_nodes = CONFIG_TIPC_NODES;
197 tipc_net_id = 4711; 190 tipc_net_id = 4711;
198 191
199 res = tipc_core_start(); 192 res = tipc_core_start();
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 997158546e2..436dda1159d 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -2,7 +2,7 @@
2 * net/tipc/core.h: Include file for TIPC global declarations 2 * net/tipc/core.h: Include file for TIPC global declarations
3 * 3 *
4 * Copyright (c) 2005-2006, Ericsson AB 4 * Copyright (c) 2005-2006, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -147,7 +147,6 @@ void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
147 */ 147 */
148 148
149extern u32 tipc_own_addr; 149extern u32 tipc_own_addr;
150extern int tipc_max_nodes;
151extern int tipc_max_ports; 150extern int tipc_max_ports;
152extern int tipc_max_subscriptions; 151extern int tipc_max_subscriptions;
153extern int tipc_max_publications; 152extern int tipc_max_publications;
@@ -161,7 +160,6 @@ extern int tipc_remote_management;
161extern int tipc_mode; 160extern int tipc_mode;
162extern int tipc_random; 161extern int tipc_random;
163extern const char tipc_alphabet[]; 162extern const char tipc_alphabet[];
164extern atomic_t tipc_user_count;
165 163
166 164
167/* 165/*
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fa026bd91a6..491eff56b9d 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -2,7 +2,7 @@
2 * net/tipc/discover.c 2 * net/tipc/discover.c
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005-2006, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -57,7 +57,7 @@
57 * @timer_intv: current interval between requests (in ms) 57 * @timer_intv: current interval between requests (in ms)
58 */ 58 */
59struct link_req { 59struct link_req {
60 struct bearer *bearer; 60 struct tipc_bearer *bearer;
61 struct tipc_media_addr dest; 61 struct tipc_media_addr dest;
62 struct sk_buff *buf; 62 struct sk_buff *buf;
63 struct timer_list timer; 63 struct timer_list timer;
@@ -67,27 +67,24 @@ struct link_req {
67/** 67/**
68 * tipc_disc_init_msg - initialize a link setup message 68 * tipc_disc_init_msg - initialize a link setup message
69 * @type: message type (request or response) 69 * @type: message type (request or response)
70 * @req_links: number of links associated with message
71 * @dest_domain: network domain of node(s) which should respond to message 70 * @dest_domain: network domain of node(s) which should respond to message
72 * @b_ptr: ptr to bearer issuing message 71 * @b_ptr: ptr to bearer issuing message
73 */ 72 */
74 73
75static struct sk_buff *tipc_disc_init_msg(u32 type, 74static struct sk_buff *tipc_disc_init_msg(u32 type,
76 u32 req_links,
77 u32 dest_domain, 75 u32 dest_domain,
78 struct bearer *b_ptr) 76 struct tipc_bearer *b_ptr)
79{ 77{
80 struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); 78 struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
81 struct tipc_msg *msg; 79 struct tipc_msg *msg;
82 80
83 if (buf) { 81 if (buf) {
84 msg = buf_msg(buf); 82 msg = buf_msg(buf);
85 tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); 83 tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
86 msg_set_non_seq(msg, 1); 84 msg_set_non_seq(msg, 1);
87 msg_set_req_links(msg, req_links);
88 msg_set_dest_domain(msg, dest_domain); 85 msg_set_dest_domain(msg, dest_domain);
89 msg_set_bc_netid(msg, tipc_net_id); 86 msg_set_bc_netid(msg, tipc_net_id);
90 msg_set_media_addr(msg, &b_ptr->publ.addr); 87 msg_set_media_addr(msg, &b_ptr->addr);
91 } 88 }
92 return buf; 89 return buf;
93} 90}
@@ -99,7 +96,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
99 * @media_addr: media address advertised by duplicated node 96 * @media_addr: media address advertised by duplicated node
100 */ 97 */
101 98
102static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, 99static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
103 struct tipc_media_addr *media_addr) 100 struct tipc_media_addr *media_addr)
104{ 101{
105 char node_addr_str[16]; 102 char node_addr_str[16];
@@ -111,7 +108,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
111 tipc_media_addr_printf(&pb, media_addr); 108 tipc_media_addr_printf(&pb, media_addr);
112 tipc_printbuf_validate(&pb); 109 tipc_printbuf_validate(&pb);
113 warn("Duplicate %s using %s seen on <%s>\n", 110 warn("Duplicate %s using %s seen on <%s>\n",
114 node_addr_str, media_addr_str, b_ptr->publ.name); 111 node_addr_str, media_addr_str, b_ptr->name);
115} 112}
116 113
117/** 114/**
@@ -120,19 +117,23 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
120 * @b_ptr: bearer that message arrived on 117 * @b_ptr: bearer that message arrived on
121 */ 118 */
122 119
123void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) 120void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
124{ 121{
122 struct tipc_node *n_ptr;
125 struct link *link; 123 struct link *link;
126 struct tipc_media_addr media_addr; 124 struct tipc_media_addr media_addr, *addr;
125 struct sk_buff *rbuf;
127 struct tipc_msg *msg = buf_msg(buf); 126 struct tipc_msg *msg = buf_msg(buf);
128 u32 dest = msg_dest_domain(msg); 127 u32 dest = msg_dest_domain(msg);
129 u32 orig = msg_prevnode(msg); 128 u32 orig = msg_prevnode(msg);
130 u32 net_id = msg_bc_netid(msg); 129 u32 net_id = msg_bc_netid(msg);
131 u32 type = msg_type(msg); 130 u32 type = msg_type(msg);
131 int link_fully_up;
132 132
133 msg_get_media_addr(msg, &media_addr); 133 msg_get_media_addr(msg, &media_addr);
134 buf_discard(buf); 134 buf_discard(buf);
135 135
136 /* Validate discovery message from requesting node */
136 if (net_id != tipc_net_id) 137 if (net_id != tipc_net_id)
137 return; 138 return;
138 if (!tipc_addr_domain_valid(dest)) 139 if (!tipc_addr_domain_valid(dest))
@@ -140,63 +141,76 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
140 if (!tipc_addr_node_valid(orig)) 141 if (!tipc_addr_node_valid(orig))
141 return; 142 return;
142 if (orig == tipc_own_addr) { 143 if (orig == tipc_own_addr) {
143 if (memcmp(&media_addr, &b_ptr->publ.addr, sizeof(media_addr))) 144 if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr)))
144 disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); 145 disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
145 return; 146 return;
146 } 147 }
147 if (!tipc_in_scope(dest, tipc_own_addr)) 148 if (!tipc_in_scope(dest, tipc_own_addr))
148 return; 149 return;
149 if (in_own_cluster(orig)) { 150 if (!in_own_cluster(orig))
150 /* Always accept link here */ 151 return;
151 struct sk_buff *rbuf;
152 struct tipc_media_addr *addr;
153 struct tipc_node *n_ptr = tipc_node_find(orig);
154 int link_fully_up;
155
156 if (n_ptr == NULL) {
157 n_ptr = tipc_node_create(orig);
158 if (!n_ptr)
159 return;
160 }
161 spin_lock_bh(&n_ptr->lock);
162
163 /* Don't talk to neighbor during cleanup after last session */
164 152
165 if (n_ptr->cleanup_required) { 153 /* Locate structure corresponding to requesting node */
166 spin_unlock_bh(&n_ptr->lock); 154 n_ptr = tipc_node_find(orig);
155 if (!n_ptr) {
156 n_ptr = tipc_node_create(orig);
157 if (!n_ptr)
167 return; 158 return;
168 } 159 }
160 tipc_node_lock(n_ptr);
161
162 /* Don't talk to neighbor during cleanup after last session */
163 if (n_ptr->cleanup_required) {
164 tipc_node_unlock(n_ptr);
165 return;
166 }
167
168 link = n_ptr->links[b_ptr->identity];
169 169
170 link = n_ptr->links[b_ptr->identity]; 170 /* Create a link endpoint for this bearer, if necessary */
171 if (!link) {
172 link = tipc_link_create(n_ptr, b_ptr, &media_addr);
171 if (!link) { 173 if (!link) {
172 link = tipc_link_create(b_ptr, orig, &media_addr); 174 tipc_node_unlock(n_ptr);
173 if (!link) { 175 return;
174 spin_unlock_bh(&n_ptr->lock);
175 return;
176 }
177 }
178 addr = &link->media_addr;
179 if (memcmp(addr, &media_addr, sizeof(*addr))) {
180 if (tipc_link_is_up(link) || (!link->started)) {
181 disc_dupl_alert(b_ptr, orig, &media_addr);
182 spin_unlock_bh(&n_ptr->lock);
183 return;
184 }
185 warn("Resetting link <%s>, peer interface address changed\n",
186 link->name);
187 memcpy(addr, &media_addr, sizeof(*addr));
188 tipc_link_reset(link);
189 } 176 }
190 link_fully_up = link_working_working(link); 177 }
191 spin_unlock_bh(&n_ptr->lock); 178
192 if ((type == DSC_RESP_MSG) || link_fully_up) 179 /*
180 * Ensure requesting node's media address is correct
181 *
182 * If media address doesn't match and the link is working, reject the
183 * request (must be from a duplicate node).
184 *
185 * If media address doesn't match and the link is not working, accept
186 * the new media address and reset the link to ensure it starts up
187 * cleanly.
188 */
189 addr = &link->media_addr;
190 if (memcmp(addr, &media_addr, sizeof(*addr))) {
191 if (tipc_link_is_up(link) || (!link->started)) {
192 disc_dupl_alert(b_ptr, orig, &media_addr);
193 tipc_node_unlock(n_ptr);
193 return; 194 return;
194 rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr); 195 }
195 if (rbuf != NULL) { 196 warn("Resetting link <%s>, peer interface address changed\n",
196 b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr); 197 link->name);
198 memcpy(addr, &media_addr, sizeof(*addr));
199 tipc_link_reset(link);
200 }
201
202 /* Accept discovery message & send response, if necessary */
203 link_fully_up = link_working_working(link);
204
205 if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) {
206 rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr);
207 if (rbuf) {
208 b_ptr->media->send_msg(rbuf, b_ptr, &media_addr);
197 buf_discard(rbuf); 209 buf_discard(rbuf);
198 } 210 }
199 } 211 }
212
213 tipc_node_unlock(n_ptr);
200} 214}
201 215
202/** 216/**
@@ -249,9 +263,9 @@ void tipc_disc_update_link_req(struct link_req *req)
249 263
250static void disc_timeout(struct link_req *req) 264static void disc_timeout(struct link_req *req)
251{ 265{
252 spin_lock_bh(&req->bearer->publ.lock); 266 spin_lock_bh(&req->bearer->lock);
253 267
254 req->bearer->media->send_msg(req->buf, &req->bearer->publ, &req->dest); 268 req->bearer->media->send_msg(req->buf, req->bearer, &req->dest);
255 269
256 if ((req->timer_intv == TIPC_LINK_REQ_SLOW) || 270 if ((req->timer_intv == TIPC_LINK_REQ_SLOW) ||
257 (req->timer_intv == TIPC_LINK_REQ_FAST)) { 271 (req->timer_intv == TIPC_LINK_REQ_FAST)) {
@@ -266,7 +280,7 @@ static void disc_timeout(struct link_req *req)
266 } 280 }
267 k_start_timer(&req->timer, req->timer_intv); 281 k_start_timer(&req->timer, req->timer_intv);
268 282
269 spin_unlock_bh(&req->bearer->publ.lock); 283 spin_unlock_bh(&req->bearer->lock);
270} 284}
271 285
272/** 286/**
@@ -274,15 +288,13 @@ static void disc_timeout(struct link_req *req)
274 * @b_ptr: ptr to bearer issuing requests 288 * @b_ptr: ptr to bearer issuing requests
275 * @dest: destination address for request messages 289 * @dest: destination address for request messages
276 * @dest_domain: network domain of node(s) which should respond to message 290 * @dest_domain: network domain of node(s) which should respond to message
277 * @req_links: max number of desired links
278 * 291 *
279 * Returns pointer to link request structure, or NULL if unable to create. 292 * Returns pointer to link request structure, or NULL if unable to create.
280 */ 293 */
281 294
282struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 295struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr,
283 const struct tipc_media_addr *dest, 296 const struct tipc_media_addr *dest,
284 u32 dest_domain, 297 u32 dest_domain)
285 u32 req_links)
286{ 298{
287 struct link_req *req; 299 struct link_req *req;
288 300
@@ -290,7 +302,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
290 if (!req) 302 if (!req)
291 return NULL; 303 return NULL;
292 304
293 req->buf = tipc_disc_init_msg(DSC_REQ_MSG, req_links, dest_domain, b_ptr); 305 req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr);
294 if (!req->buf) { 306 if (!req->buf) {
295 kfree(req); 307 kfree(req);
296 return NULL; 308 return NULL;
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index d2c3cffb79f..e48a167e47b 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -2,7 +2,7 @@
2 * net/tipc/discover.h 2 * net/tipc/discover.h
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,13 +39,12 @@
39 39
40struct link_req; 40struct link_req;
41 41
42struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 42struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr,
43 const struct tipc_media_addr *dest, 43 const struct tipc_media_addr *dest,
44 u32 dest_domain, 44 u32 dest_domain);
45 u32 req_links);
46void tipc_disc_update_link_req(struct link_req *req); 45void tipc_disc_update_link_req(struct link_req *req);
47void tipc_disc_stop_link_req(struct link_req *req); 46void tipc_disc_stop_link_req(struct link_req *req);
48 47
49void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); 48void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
50 49
51#endif 50#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 18702f58d11..43639ff1cbe 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2,7 +2,7 @@
2 * net/tipc/link.c: TIPC link code 2 * net/tipc/link.c: TIPC link code
3 * 3 *
4 * Copyright (c) 1996-2007, Ericsson AB 4 * Copyright (c) 1996-2007, Ericsson AB
5 * Copyright (c) 2004-2007, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -90,7 +90,7 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr,
90static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); 90static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
91static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf); 91static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf);
92static void link_set_supervision_props(struct link *l_ptr, u32 tolerance); 92static void link_set_supervision_props(struct link *l_ptr, u32 tolerance);
93static int link_send_sections_long(struct port *sender, 93static int link_send_sections_long(struct tipc_port *sender,
94 struct iovec const *msg_sect, 94 struct iovec const *msg_sect,
95 u32 num_sect, u32 destnode); 95 u32 num_sect, u32 destnode);
96static void link_check_defragm_bufs(struct link *l_ptr); 96static void link_check_defragm_bufs(struct link *l_ptr);
@@ -113,7 +113,7 @@ static void link_init_max_pkt(struct link *l_ptr)
113{ 113{
114 u32 max_pkt; 114 u32 max_pkt;
115 115
116 max_pkt = (l_ptr->b_ptr->publ.mtu & ~3); 116 max_pkt = (l_ptr->b_ptr->mtu & ~3);
117 if (max_pkt > MAX_MSG_SIZE) 117 if (max_pkt > MAX_MSG_SIZE)
118 max_pkt = MAX_MSG_SIZE; 118 max_pkt = MAX_MSG_SIZE;
119 119
@@ -246,9 +246,6 @@ static void link_timeout(struct link *l_ptr)
246 l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size; 246 l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size;
247 l_ptr->stats.queue_sz_counts++; 247 l_ptr->stats.queue_sz_counts++;
248 248
249 if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
250 l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
251
252 if (l_ptr->first_out) { 249 if (l_ptr->first_out) {
253 struct tipc_msg *msg = buf_msg(l_ptr->first_out); 250 struct tipc_msg *msg = buf_msg(l_ptr->first_out);
254 u32 length = msg_size(msg); 251 u32 length = msg_size(msg);
@@ -296,19 +293,35 @@ static void link_set_timer(struct link *l_ptr, u32 time)
296 293
297/** 294/**
298 * tipc_link_create - create a new link 295 * tipc_link_create - create a new link
296 * @n_ptr: pointer to associated node
299 * @b_ptr: pointer to associated bearer 297 * @b_ptr: pointer to associated bearer
300 * @peer: network address of node at other end of link
301 * @media_addr: media address to use when sending messages over link 298 * @media_addr: media address to use when sending messages over link
302 * 299 *
303 * Returns pointer to link. 300 * Returns pointer to link.
304 */ 301 */
305 302
306struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, 303struct link *tipc_link_create(struct tipc_node *n_ptr,
304 struct tipc_bearer *b_ptr,
307 const struct tipc_media_addr *media_addr) 305 const struct tipc_media_addr *media_addr)
308{ 306{
309 struct link *l_ptr; 307 struct link *l_ptr;
310 struct tipc_msg *msg; 308 struct tipc_msg *msg;
311 char *if_name; 309 char *if_name;
310 char addr_string[16];
311 u32 peer = n_ptr->addr;
312
313 if (n_ptr->link_cnt >= 2) {
314 tipc_addr_string_fill(addr_string, n_ptr->addr);
315 err("Attempt to establish third link to %s\n", addr_string);
316 return NULL;
317 }
318
319 if (n_ptr->links[b_ptr->identity]) {
320 tipc_addr_string_fill(addr_string, n_ptr->addr);
321 err("Attempt to establish second link on <%s> to %s\n",
322 b_ptr->name, addr_string);
323 return NULL;
324 }
312 325
313 l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); 326 l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
314 if (!l_ptr) { 327 if (!l_ptr) {
@@ -317,7 +330,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
317 } 330 }
318 331
319 l_ptr->addr = peer; 332 l_ptr->addr = peer;
320 if_name = strchr(b_ptr->publ.name, ':') + 1; 333 if_name = strchr(b_ptr->name, ':') + 1;
321 sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", 334 sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
322 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), 335 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
323 tipc_node(tipc_own_addr), 336 tipc_node(tipc_own_addr),
@@ -325,6 +338,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
325 tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); 338 tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
326 /* note: peer i/f is appended to link name by reset/activate */ 339 /* note: peer i/f is appended to link name by reset/activate */
327 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); 340 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
341 l_ptr->owner = n_ptr;
328 l_ptr->checkpoint = 1; 342 l_ptr->checkpoint = 1;
329 l_ptr->b_ptr = b_ptr; 343 l_ptr->b_ptr = b_ptr;
330 link_set_supervision_props(l_ptr, b_ptr->media->tolerance); 344 link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
@@ -348,11 +362,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
348 362
349 link_reset_statistics(l_ptr); 363 link_reset_statistics(l_ptr);
350 364
351 l_ptr->owner = tipc_node_attach_link(l_ptr); 365 tipc_node_attach_link(n_ptr, l_ptr);
352 if (!l_ptr->owner) {
353 kfree(l_ptr);
354 return NULL;
355 }
356 366
357 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); 367 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
358 list_add_tail(&l_ptr->link_list, &b_ptr->links); 368 list_add_tail(&l_ptr->link_list, &b_ptr->links);
@@ -391,7 +401,9 @@ void tipc_link_delete(struct link *l_ptr)
391 401
392static void link_start(struct link *l_ptr) 402static void link_start(struct link *l_ptr)
393{ 403{
404 tipc_node_lock(l_ptr->owner);
394 link_state_event(l_ptr, STARTING_EVT); 405 link_state_event(l_ptr, STARTING_EVT);
406 tipc_node_unlock(l_ptr->owner);
395} 407}
396 408
397/** 409/**
@@ -406,7 +418,7 @@ static void link_start(struct link *l_ptr)
406 418
407static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) 419static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
408{ 420{
409 struct port *p_ptr; 421 struct tipc_port *p_ptr;
410 422
411 spin_lock_bh(&tipc_port_list_lock); 423 spin_lock_bh(&tipc_port_list_lock);
412 p_ptr = tipc_port_lock(origport); 424 p_ptr = tipc_port_lock(origport);
@@ -415,7 +427,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
415 goto exit; 427 goto exit;
416 if (!list_empty(&p_ptr->wait_list)) 428 if (!list_empty(&p_ptr->wait_list))
417 goto exit; 429 goto exit;
418 p_ptr->publ.congested = 1; 430 p_ptr->congested = 1;
419 p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); 431 p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
420 list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); 432 list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
421 l_ptr->stats.link_congs++; 433 l_ptr->stats.link_congs++;
@@ -428,8 +440,8 @@ exit:
428 440
429void tipc_link_wakeup_ports(struct link *l_ptr, int all) 441void tipc_link_wakeup_ports(struct link *l_ptr, int all)
430{ 442{
431 struct port *p_ptr; 443 struct tipc_port *p_ptr;
432 struct port *temp_p_ptr; 444 struct tipc_port *temp_p_ptr;
433 int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; 445 int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size;
434 446
435 if (all) 447 if (all)
@@ -445,11 +457,11 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
445 if (win <= 0) 457 if (win <= 0)
446 break; 458 break;
447 list_del_init(&p_ptr->wait_list); 459 list_del_init(&p_ptr->wait_list);
448 spin_lock_bh(p_ptr->publ.lock); 460 spin_lock_bh(p_ptr->lock);
449 p_ptr->publ.congested = 0; 461 p_ptr->congested = 0;
450 p_ptr->wakeup(&p_ptr->publ); 462 p_ptr->wakeup(p_ptr);
451 win -= p_ptr->waiting_pkts; 463 win -= p_ptr->waiting_pkts;
452 spin_unlock_bh(p_ptr->publ.lock); 464 spin_unlock_bh(p_ptr->lock);
453 } 465 }
454 466
455exit: 467exit:
@@ -549,7 +561,7 @@ void tipc_link_reset(struct link *l_ptr)
549 tipc_node_link_down(l_ptr->owner, l_ptr); 561 tipc_node_link_down(l_ptr->owner, l_ptr);
550 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); 562 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
551 563
552 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && 564 if (was_active_link && tipc_node_active_links(l_ptr->owner) &&
553 l_ptr->owner->permit_changeover) { 565 l_ptr->owner->permit_changeover) {
554 l_ptr->reset_checkpoint = checkpoint; 566 l_ptr->reset_checkpoint = checkpoint;
555 l_ptr->exp_msg_count = START_CHANGEOVER; 567 l_ptr->exp_msg_count = START_CHANGEOVER;
@@ -824,7 +836,10 @@ static void link_add_to_outqueue(struct link *l_ptr,
824 l_ptr->last_out = buf; 836 l_ptr->last_out = buf;
825 } else 837 } else
826 l_ptr->first_out = l_ptr->last_out = buf; 838 l_ptr->first_out = l_ptr->last_out = buf;
839
827 l_ptr->out_queue_size++; 840 l_ptr->out_queue_size++;
841 if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
842 l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
828} 843}
829 844
830/* 845/*
@@ -867,9 +882,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
867 882
868 /* Packet can be queued or sent: */ 883 /* Packet can be queued or sent: */
869 884
870 if (queue_size > l_ptr->stats.max_queue_sz)
871 l_ptr->stats.max_queue_sz = queue_size;
872
873 if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && 885 if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) &&
874 !link_congested(l_ptr))) { 886 !link_congested(l_ptr))) {
875 link_add_to_outqueue(l_ptr, buf, msg); 887 link_add_to_outqueue(l_ptr, buf, msg);
@@ -1027,12 +1039,12 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
1027 * except for total message length. 1039 * except for total message length.
1028 * Returns user data length or errno. 1040 * Returns user data length or errno.
1029 */ 1041 */
1030int tipc_link_send_sections_fast(struct port *sender, 1042int tipc_link_send_sections_fast(struct tipc_port *sender,
1031 struct iovec const *msg_sect, 1043 struct iovec const *msg_sect,
1032 const u32 num_sect, 1044 const u32 num_sect,
1033 u32 destaddr) 1045 u32 destaddr)
1034{ 1046{
1035 struct tipc_msg *hdr = &sender->publ.phdr; 1047 struct tipc_msg *hdr = &sender->phdr;
1036 struct link *l_ptr; 1048 struct link *l_ptr;
1037 struct sk_buff *buf; 1049 struct sk_buff *buf;
1038 struct tipc_node *node; 1050 struct tipc_node *node;
@@ -1045,7 +1057,7 @@ again:
1045 * (Must not hold any locks while building message.) 1057 * (Must not hold any locks while building message.)
1046 */ 1058 */
1047 1059
1048 res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, 1060 res = tipc_msg_build(hdr, msg_sect, num_sect, sender->max_pkt,
1049 !sender->user_port, &buf); 1061 !sender->user_port, &buf);
1050 1062
1051 read_lock_bh(&tipc_net_lock); 1063 read_lock_bh(&tipc_net_lock);
@@ -1056,7 +1068,7 @@ again:
1056 if (likely(l_ptr)) { 1068 if (likely(l_ptr)) {
1057 if (likely(buf)) { 1069 if (likely(buf)) {
1058 res = link_send_buf_fast(l_ptr, buf, 1070 res = link_send_buf_fast(l_ptr, buf,
1059 &sender->publ.max_pkt); 1071 &sender->max_pkt);
1060 if (unlikely(res < 0)) 1072 if (unlikely(res < 0))
1061 buf_discard(buf); 1073 buf_discard(buf);
1062exit: 1074exit:
@@ -1075,7 +1087,7 @@ exit:
1075 if (link_congested(l_ptr) || 1087 if (link_congested(l_ptr) ||
1076 !list_empty(&l_ptr->b_ptr->cong_links)) { 1088 !list_empty(&l_ptr->b_ptr->cong_links)) {
1077 res = link_schedule_port(l_ptr, 1089 res = link_schedule_port(l_ptr,
1078 sender->publ.ref, res); 1090 sender->ref, res);
1079 goto exit; 1091 goto exit;
1080 } 1092 }
1081 1093
@@ -1084,12 +1096,12 @@ exit:
1084 * then re-try fast path or fragment the message 1096 * then re-try fast path or fragment the message
1085 */ 1097 */
1086 1098
1087 sender->publ.max_pkt = l_ptr->max_pkt; 1099 sender->max_pkt = l_ptr->max_pkt;
1088 tipc_node_unlock(node); 1100 tipc_node_unlock(node);
1089 read_unlock_bh(&tipc_net_lock); 1101 read_unlock_bh(&tipc_net_lock);
1090 1102
1091 1103
1092 if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) 1104 if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
1093 goto again; 1105 goto again;
1094 1106
1095 return link_send_sections_long(sender, msg_sect, 1107 return link_send_sections_long(sender, msg_sect,
@@ -1123,14 +1135,14 @@ exit:
1123 * 1135 *
1124 * Returns user data length or errno. 1136 * Returns user data length or errno.
1125 */ 1137 */
1126static int link_send_sections_long(struct port *sender, 1138static int link_send_sections_long(struct tipc_port *sender,
1127 struct iovec const *msg_sect, 1139 struct iovec const *msg_sect,
1128 u32 num_sect, 1140 u32 num_sect,
1129 u32 destaddr) 1141 u32 destaddr)
1130{ 1142{
1131 struct link *l_ptr; 1143 struct link *l_ptr;
1132 struct tipc_node *node; 1144 struct tipc_node *node;
1133 struct tipc_msg *hdr = &sender->publ.phdr; 1145 struct tipc_msg *hdr = &sender->phdr;
1134 u32 dsz = msg_data_sz(hdr); 1146 u32 dsz = msg_data_sz(hdr);
1135 u32 max_pkt, fragm_sz, rest; 1147 u32 max_pkt, fragm_sz, rest;
1136 struct tipc_msg fragm_hdr; 1148 struct tipc_msg fragm_hdr;
@@ -1142,7 +1154,7 @@ static int link_send_sections_long(struct port *sender,
1142 1154
1143again: 1155again:
1144 fragm_no = 1; 1156 fragm_no = 1;
1145 max_pkt = sender->publ.max_pkt - INT_H_SIZE; 1157 max_pkt = sender->max_pkt - INT_H_SIZE;
1146 /* leave room for tunnel header in case of link changeover */ 1158 /* leave room for tunnel header in case of link changeover */
1147 fragm_sz = max_pkt - INT_H_SIZE; 1159 fragm_sz = max_pkt - INT_H_SIZE;
1148 /* leave room for fragmentation header in each fragment */ 1160 /* leave room for fragmentation header in each fragment */
@@ -1157,7 +1169,7 @@ again:
1157 1169
1158 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, 1170 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
1159 INT_H_SIZE, msg_destnode(hdr)); 1171 INT_H_SIZE, msg_destnode(hdr));
1160 msg_set_link_selector(&fragm_hdr, sender->publ.ref); 1172 msg_set_link_selector(&fragm_hdr, sender->ref);
1161 msg_set_size(&fragm_hdr, max_pkt); 1173 msg_set_size(&fragm_hdr, max_pkt);
1162 msg_set_fragm_no(&fragm_hdr, 1); 1174 msg_set_fragm_no(&fragm_hdr, 1);
1163 1175
@@ -1238,13 +1250,13 @@ error:
1238 node = tipc_node_find(destaddr); 1250 node = tipc_node_find(destaddr);
1239 if (likely(node)) { 1251 if (likely(node)) {
1240 tipc_node_lock(node); 1252 tipc_node_lock(node);
1241 l_ptr = node->active_links[sender->publ.ref & 1]; 1253 l_ptr = node->active_links[sender->ref & 1];
1242 if (!l_ptr) { 1254 if (!l_ptr) {
1243 tipc_node_unlock(node); 1255 tipc_node_unlock(node);
1244 goto reject; 1256 goto reject;
1245 } 1257 }
1246 if (l_ptr->max_pkt < max_pkt) { 1258 if (l_ptr->max_pkt < max_pkt) {
1247 sender->publ.max_pkt = l_ptr->max_pkt; 1259 sender->max_pkt = l_ptr->max_pkt;
1248 tipc_node_unlock(node); 1260 tipc_node_unlock(node);
1249 for (; buf_chain; buf_chain = buf) { 1261 for (; buf_chain; buf_chain = buf) {
1250 buf = buf_chain->next; 1262 buf = buf_chain->next;
@@ -1441,7 +1453,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
1441 info("Outstanding acks: %lu\n", 1453 info("Outstanding acks: %lu\n",
1442 (unsigned long) TIPC_SKB_CB(buf)->handle); 1454 (unsigned long) TIPC_SKB_CB(buf)->handle);
1443 1455
1444 n_ptr = l_ptr->owner->next; 1456 n_ptr = tipc_bclink_retransmit_to();
1445 tipc_node_lock(n_ptr); 1457 tipc_node_lock(n_ptr);
1446 1458
1447 tipc_addr_string_fill(addr_string, n_ptr->addr); 1459 tipc_addr_string_fill(addr_string, n_ptr->addr);
@@ -1595,11 +1607,10 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1595 * structure (i.e. cannot be NULL), but bearer can be inactive. 1607 * structure (i.e. cannot be NULL), but bearer can be inactive.
1596 */ 1608 */
1597 1609
1598void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1610void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
1599{ 1611{
1600 read_lock_bh(&tipc_net_lock); 1612 read_lock_bh(&tipc_net_lock);
1601 while (head) { 1613 while (head) {
1602 struct bearer *b_ptr = (struct bearer *)tb_ptr;
1603 struct tipc_node *n_ptr; 1614 struct tipc_node *n_ptr;
1604 struct link *l_ptr; 1615 struct link *l_ptr;
1605 struct sk_buff *crs; 1616 struct sk_buff *crs;
@@ -1735,10 +1746,6 @@ deliver:
1735 tipc_node_unlock(n_ptr); 1746 tipc_node_unlock(n_ptr);
1736 tipc_link_recv_bundle(buf); 1747 tipc_link_recv_bundle(buf);
1737 continue; 1748 continue;
1738 case ROUTE_DISTRIBUTOR:
1739 tipc_node_unlock(n_ptr);
1740 buf_discard(buf);
1741 continue;
1742 case NAME_DISTRIBUTOR: 1749 case NAME_DISTRIBUTOR:
1743 tipc_node_unlock(n_ptr); 1750 tipc_node_unlock(n_ptr);
1744 tipc_named_recv(buf); 1751 tipc_named_recv(buf);
@@ -1765,6 +1772,10 @@ deliver:
1765 goto protocol_check; 1772 goto protocol_check;
1766 } 1773 }
1767 break; 1774 break;
1775 default:
1776 buf_discard(buf);
1777 buf = NULL;
1778 break;
1768 } 1779 }
1769 } 1780 }
1770 tipc_node_unlock(n_ptr); 1781 tipc_node_unlock(n_ptr);
@@ -1900,6 +1911,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1900 struct sk_buff *buf = NULL; 1911 struct sk_buff *buf = NULL;
1901 struct tipc_msg *msg = l_ptr->pmsg; 1912 struct tipc_msg *msg = l_ptr->pmsg;
1902 u32 msg_size = sizeof(l_ptr->proto_msg); 1913 u32 msg_size = sizeof(l_ptr->proto_msg);
1914 int r_flag;
1903 1915
1904 if (link_blocked(l_ptr)) 1916 if (link_blocked(l_ptr))
1905 return; 1917 return;
@@ -1950,15 +1962,14 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1950 msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); 1962 msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1));
1951 msg_set_seq_gap(msg, 0); 1963 msg_set_seq_gap(msg, 0);
1952 msg_set_next_sent(msg, 1); 1964 msg_set_next_sent(msg, 1);
1965 msg_set_probe(msg, 0);
1953 msg_set_link_tolerance(msg, l_ptr->tolerance); 1966 msg_set_link_tolerance(msg, l_ptr->tolerance);
1954 msg_set_linkprio(msg, l_ptr->priority); 1967 msg_set_linkprio(msg, l_ptr->priority);
1955 msg_set_max_pkt(msg, l_ptr->max_pkt_target); 1968 msg_set_max_pkt(msg, l_ptr->max_pkt_target);
1956 } 1969 }
1957 1970
1958 if (tipc_node_has_redundant_links(l_ptr->owner)) 1971 r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr));
1959 msg_set_redundant_link(msg); 1972 msg_set_redundant_link(msg, r_flag);
1960 else
1961 msg_clear_redundant_link(msg);
1962 msg_set_linkprio(msg, l_ptr->priority); 1973 msg_set_linkprio(msg, l_ptr->priority);
1963 1974
1964 /* Ensure sequence number will not fit : */ 1975 /* Ensure sequence number will not fit : */
@@ -1978,7 +1989,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1978 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); 1989 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
1979 return; 1990 return;
1980 } 1991 }
1981 msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
1982 1992
1983 /* Message can be sent */ 1993 /* Message can be sent */
1984 1994
@@ -2066,7 +2076,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
2066 l_ptr->peer_bearer_id = msg_bearer_id(msg); 2076 l_ptr->peer_bearer_id = msg_bearer_id(msg);
2067 2077
2068 /* Synchronize broadcast sequence numbers */ 2078 /* Synchronize broadcast sequence numbers */
2069 if (!tipc_node_has_redundant_links(l_ptr->owner)) 2079 if (!tipc_node_redundant_links(l_ptr->owner))
2070 l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg)); 2080 l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg));
2071 break; 2081 break;
2072 case STATE_MSG: 2082 case STATE_MSG:
@@ -2413,9 +2423,6 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2413 else 2423 else
2414 destaddr = msg_destnode(inmsg); 2424 destaddr = msg_destnode(inmsg);
2415 2425
2416 if (msg_routed(inmsg))
2417 msg_set_prevnode(inmsg, tipc_own_addr);
2418
2419 /* Prepare reusable fragment header: */ 2426 /* Prepare reusable fragment header: */
2420 2427
2421 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, 2428 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
@@ -2618,6 +2625,9 @@ static void link_check_defragm_bufs(struct link *l_ptr)
2618 2625
2619static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) 2626static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
2620{ 2627{
2628 if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
2629 return;
2630
2621 l_ptr->tolerance = tolerance; 2631 l_ptr->tolerance = tolerance;
2622 l_ptr->continuity_interval = 2632 l_ptr->continuity_interval =
2623 ((tolerance / 4) > 500) ? 500 : tolerance / 4; 2633 ((tolerance / 4) > 500) ? 500 : tolerance / 4;
@@ -2658,7 +2668,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
2658static struct link *link_find_link(const char *name, struct tipc_node **node) 2668static struct link *link_find_link(const char *name, struct tipc_node **node)
2659{ 2669{
2660 struct link_name link_name_parts; 2670 struct link_name link_name_parts;
2661 struct bearer *b_ptr; 2671 struct tipc_bearer *b_ptr;
2662 struct link *l_ptr; 2672 struct link *l_ptr;
2663 2673
2664 if (!link_name_validate(name, &link_name_parts)) 2674 if (!link_name_validate(name, &link_name_parts))
@@ -2961,7 +2971,7 @@ static void link_print(struct link *l_ptr, const char *str)
2961 2971
2962 tipc_printf(buf, str); 2972 tipc_printf(buf, str);
2963 tipc_printf(buf, "Link %x<%s>:", 2973 tipc_printf(buf, "Link %x<%s>:",
2964 l_ptr->addr, l_ptr->b_ptr->publ.name); 2974 l_ptr->addr, l_ptr->b_ptr->name);
2965 2975
2966#ifdef CONFIG_TIPC_DEBUG 2976#ifdef CONFIG_TIPC_DEBUG
2967 if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr)) 2977 if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr))
@@ -2981,9 +2991,9 @@ static void link_print(struct link *l_ptr, const char *str)
2981 != (l_ptr->out_queue_size - 1)) || 2991 != (l_ptr->out_queue_size - 1)) ||
2982 (l_ptr->last_out->next != NULL)) { 2992 (l_ptr->last_out->next != NULL)) {
2983 tipc_printf(buf, "\nSend queue inconsistency\n"); 2993 tipc_printf(buf, "\nSend queue inconsistency\n");
2984 tipc_printf(buf, "first_out= %x ", l_ptr->first_out); 2994 tipc_printf(buf, "first_out= %p ", l_ptr->first_out);
2985 tipc_printf(buf, "next_out= %x ", l_ptr->next_out); 2995 tipc_printf(buf, "next_out= %p ", l_ptr->next_out);
2986 tipc_printf(buf, "last_out= %x ", l_ptr->last_out); 2996 tipc_printf(buf, "last_out= %p ", l_ptr->last_out);
2987 } 2997 }
2988 } else 2998 } else
2989 tipc_printf(buf, "[]"); 2999 tipc_printf(buf, "[]");
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 70967e63702..e6a30dbe1aa 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -2,7 +2,7 @@
2 * net/tipc/link.h: Include file for TIPC link code 2 * net/tipc/link.h: Include file for TIPC link code
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -122,7 +122,7 @@ struct link {
122 u32 checkpoint; 122 u32 checkpoint;
123 u32 peer_session; 123 u32 peer_session;
124 u32 peer_bearer_id; 124 u32 peer_bearer_id;
125 struct bearer *b_ptr; 125 struct tipc_bearer *b_ptr;
126 u32 tolerance; 126 u32 tolerance;
127 u32 continuity_interval; 127 u32 continuity_interval;
128 u32 abort_limit; 128 u32 abort_limit;
@@ -196,24 +196,19 @@ struct link {
196 u32 bearer_congs; 196 u32 bearer_congs;
197 u32 deferred_recv; 197 u32 deferred_recv;
198 u32 duplicates; 198 u32 duplicates;
199 199 u32 max_queue_sz; /* send queue size high water mark */
200 /* for statistical profiling of send queue size */ 200 u32 accu_queue_sz; /* used for send queue size profiling */
201 201 u32 queue_sz_counts; /* used for send queue size profiling */
202 u32 max_queue_sz; 202 u32 msg_length_counts; /* used for message length profiling */
203 u32 accu_queue_sz; 203 u32 msg_lengths_total; /* used for message length profiling */
204 u32 queue_sz_counts; 204 u32 msg_length_profile[7]; /* used for msg. length profiling */
205
206 /* for statistical profiling of message lengths */
207
208 u32 msg_length_counts;
209 u32 msg_lengths_total;
210 u32 msg_length_profile[7];
211 } stats; 205 } stats;
212}; 206};
213 207
214struct port; 208struct tipc_port;
215 209
216struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, 210struct link *tipc_link_create(struct tipc_node *n_ptr,
211 struct tipc_bearer *b_ptr,
217 const struct tipc_media_addr *media_addr); 212 const struct tipc_media_addr *media_addr);
218void tipc_link_delete(struct link *l_ptr); 213void tipc_link_delete(struct link *l_ptr);
219void tipc_link_changeover(struct link *l_ptr); 214void tipc_link_changeover(struct link *l_ptr);
@@ -230,7 +225,7 @@ void tipc_link_reset(struct link *l_ptr);
230int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); 225int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
231int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); 226int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
232u32 tipc_link_get_max_pkt(u32 dest, u32 selector); 227u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
233int tipc_link_send_sections_fast(struct port *sender, 228int tipc_link_send_sections_fast(struct tipc_port *sender,
234 struct iovec const *msg_sect, 229 struct iovec const *msg_sect,
235 const u32 num_sect, 230 const u32 num_sect,
236 u32 destnode); 231 u32 destnode);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index bb6180c4fcb..6d92d17e7fb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -2,7 +2,7 @@
2 * net/tipc/msg.c: TIPC message header routines 2 * net/tipc/msg.c: TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -192,8 +192,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
192 default: 192 default:
193 tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg)); 193 tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg));
194 } 194 }
195 if (msg_routed(msg) && !msg_non_seq(msg))
196 tipc_printf(buf, "ROUT:");
197 if (msg_reroute_cnt(msg)) 195 if (msg_reroute_cnt(msg))
198 tipc_printf(buf, "REROUTED(%u):", 196 tipc_printf(buf, "REROUTED(%u):",
199 msg_reroute_cnt(msg)); 197 msg_reroute_cnt(msg));
@@ -210,8 +208,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
210 default: 208 default:
211 tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); 209 tipc_printf(buf, "UNKNOWN:%x", msg_type(msg));
212 } 210 }
213 if (msg_routed(msg))
214 tipc_printf(buf, "ROUT:");
215 if (msg_reroute_cnt(msg)) 211 if (msg_reroute_cnt(msg))
216 tipc_printf(buf, "REROUTED(%u):", 212 tipc_printf(buf, "REROUTED(%u):",
217 msg_reroute_cnt(msg)); 213 msg_reroute_cnt(msg));
@@ -232,13 +228,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
232 default: 228 default:
233 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); 229 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
234 } 230 }
235 if (msg_routed(msg))
236 tipc_printf(buf, "ROUT:");
237 if (msg_reroute_cnt(msg)) 231 if (msg_reroute_cnt(msg))
238 tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); 232 tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg));
239 break; 233 break;
240 case LINK_PROTOCOL: 234 case LINK_PROTOCOL:
241 tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg));
242 switch (msg_type(msg)) { 235 switch (msg_type(msg)) {
243 case STATE_MSG: 236 case STATE_MSG:
244 tipc_printf(buf, "STATE:"); 237 tipc_printf(buf, "STATE:");
@@ -275,33 +268,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
275 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); 268 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
276 } 269 }
277 break; 270 break;
278 case ROUTE_DISTRIBUTOR:
279 tipc_printf(buf, "ROUTING_MNG:");
280 switch (msg_type(msg)) {
281 case EXT_ROUTING_TABLE:
282 tipc_printf(buf, "EXT_TBL:");
283 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
284 break;
285 case LOCAL_ROUTING_TABLE:
286 tipc_printf(buf, "LOCAL_TBL:");
287 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
288 break;
289 case SLAVE_ROUTING_TABLE:
290 tipc_printf(buf, "DP_TBL:");
291 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
292 break;
293 case ROUTE_ADDITION:
294 tipc_printf(buf, "ADD:");
295 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
296 break;
297 case ROUTE_REMOVAL:
298 tipc_printf(buf, "REMOVE:");
299 tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
300 break;
301 default:
302 tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
303 }
304 break;
305 case LINK_CONFIG: 271 case LINK_CONFIG:
306 tipc_printf(buf, "CFG:"); 272 tipc_printf(buf, "CFG:");
307 switch (msg_type(msg)) { 273 switch (msg_type(msg)) {
@@ -381,20 +347,15 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
381 tipc_printf(buf, ":OPRT(%u):", msg_origport(msg)); 347 tipc_printf(buf, ":OPRT(%u):", msg_origport(msg));
382 tipc_printf(buf, ":DPRT(%u):", msg_destport(msg)); 348 tipc_printf(buf, ":DPRT(%u):", msg_destport(msg));
383 } 349 }
384 if (msg_routed(msg) && !msg_non_seq(msg))
385 tipc_printf(buf, ":TSEQN(%u)", msg_transp_seqno(msg));
386 } 350 }
387 if (msg_user(msg) == NAME_DISTRIBUTOR) { 351 if (msg_user(msg) == NAME_DISTRIBUTOR) {
388 tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg)); 352 tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg));
389 tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg)); 353 tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg));
390 if (msg_routed(msg))
391 tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg));
392 } 354 }
393 355
394 if (msg_user(msg) == LINK_CONFIG) { 356 if (msg_user(msg) == LINK_CONFIG) {
395 u32 *raw = (u32 *)msg; 357 u32 *raw = (u32 *)msg;
396 struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5]; 358 struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5];
397 tipc_printf(buf, ":REQL(%u):", msg_req_links(msg));
398 tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg)); 359 tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
399 tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg)); 360 tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
400 tipc_media_addr_printf(buf, orig); 361 tipc_media_addr_printf(buf, orig);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 92c4c4fd7b3..de02339fc17 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -2,7 +2,7 @@
2 * net/tipc/msg.h: Include file for TIPC message header routines 2 * net/tipc/msg.h: Include file for TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2007, Ericsson AB 4 * Copyright (c) 2000-2007, Ericsson AB
5 * Copyright (c) 2005-2008, Wind River Systems 5 * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -421,13 +421,6 @@ static inline int msg_is_dest(struct tipc_msg *m, u32 d)
421 return msg_short(m) || (msg_destnode(m) == d); 421 return msg_short(m) || (msg_destnode(m) == d);
422} 422}
423 423
424static inline u32 msg_routed(struct tipc_msg *m)
425{
426 if (likely(msg_short(m)))
427 return 0;
428 return (msg_destnode(m) ^ msg_orignode(m)) >> 11;
429}
430
431static inline u32 msg_nametype(struct tipc_msg *m) 424static inline u32 msg_nametype(struct tipc_msg *m)
432{ 425{
433 return msg_word(m, 8); 426 return msg_word(m, 8);
@@ -438,26 +431,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n)
438 msg_set_word(m, 8, n); 431 msg_set_word(m, 8, n);
439} 432}
440 433
441static inline u32 msg_transp_seqno(struct tipc_msg *m)
442{
443 return msg_word(m, 8);
444}
445
446static inline void msg_set_timestamp(struct tipc_msg *m, u32 n)
447{
448 msg_set_word(m, 8, n);
449}
450
451static inline u32 msg_timestamp(struct tipc_msg *m)
452{
453 return msg_word(m, 8);
454}
455
456static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n)
457{
458 msg_set_word(m, 8, n);
459}
460
461static inline u32 msg_nameinst(struct tipc_msg *m) 434static inline u32 msg_nameinst(struct tipc_msg *m)
462{ 435{
463 return msg_word(m, 9); 436 return msg_word(m, 9);
@@ -545,7 +518,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
545#define NAME_DISTRIBUTOR 11 518#define NAME_DISTRIBUTOR 11
546#define MSG_FRAGMENTER 12 519#define MSG_FRAGMENTER 12
547#define LINK_CONFIG 13 520#define LINK_CONFIG 13
548#define DSC_H_SIZE 40
549 521
550/* 522/*
551 * Connection management protocol messages 523 * Connection management protocol messages
@@ -577,16 +549,6 @@ static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n)
577 msg_set_bits(m, 1, 16, 0x1fff, n); 549 msg_set_bits(m, 1, 16, 0x1fff, n);
578} 550}
579 551
580static inline u32 msg_req_links(struct tipc_msg *m)
581{
582 return msg_bits(m, 1, 16, 0xfff);
583}
584
585static inline void msg_set_req_links(struct tipc_msg *m, u32 n)
586{
587 msg_set_bits(m, 1, 16, 0xfff, n);
588}
589
590 552
591/* 553/*
592 * Word 2 554 * Word 2
@@ -749,14 +711,9 @@ static inline u32 msg_redundant_link(struct tipc_msg *m)
749 return msg_bits(m, 5, 12, 0x1); 711 return msg_bits(m, 5, 12, 0x1);
750} 712}
751 713
752static inline void msg_set_redundant_link(struct tipc_msg *m) 714static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r)
753{ 715{
754 msg_set_bits(m, 5, 12, 0x1, 1); 716 msg_set_bits(m, 5, 12, 0x1, r);
755}
756
757static inline void msg_clear_redundant_link(struct tipc_msg *m)
758{
759 msg_set_bits(m, 5, 12, 0x1, 0);
760} 717}
761 718
762 719
@@ -805,21 +762,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
805} 762}
806 763
807/* 764/*
808 * Routing table message data
809 */
810
811
812static inline u32 msg_remote_node(struct tipc_msg *m)
813{
814 return msg_word(m, msg_hdr_sz(m)/4);
815}
816
817static inline void msg_set_remote_node(struct tipc_msg *m, u32 a)
818{
819 msg_set_word(m, msg_hdr_sz(m)/4, a);
820}
821
822/*
823 * Segmentation message types 765 * Segmentation message types
824 */ 766 */
825 767
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 483c226c958..c9fa6dfcf28 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -2,7 +2,7 @@
2 * net/tipc/name_distr.c: TIPC name distribution code 2 * net/tipc/name_distr.c: TIPC name distribution code
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -109,11 +109,9 @@ static void named_cluster_distribute(struct sk_buff *buf)
109{ 109{
110 struct sk_buff *buf_copy; 110 struct sk_buff *buf_copy;
111 struct tipc_node *n_ptr; 111 struct tipc_node *n_ptr;
112 u32 n_num;
113 112
114 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { 113 list_for_each_entry(n_ptr, &tipc_node_list, list) {
115 n_ptr = tipc_net.nodes[n_num]; 114 if (tipc_node_active_links(n_ptr)) {
116 if (n_ptr && tipc_node_has_active_links(n_ptr)) {
117 buf_copy = skb_copy(buf, GFP_ATOMIC); 115 buf_copy = skb_copy(buf, GFP_ATOMIC);
118 if (!buf_copy) 116 if (!buf_copy)
119 break; 117 break;
@@ -214,17 +212,16 @@ exit:
214} 212}
215 213
216/** 214/**
217 * node_is_down - remove publication associated with a failed node 215 * named_purge_publ - remove publication associated with a failed node
218 * 216 *
219 * Invoked for each publication issued by a newly failed node. 217 * Invoked for each publication issued by a newly failed node.
220 * Removes publication structure from name table & deletes it. 218 * Removes publication structure from name table & deletes it.
221 * In rare cases the link may have come back up again when this 219 * In rare cases the link may have come back up again when this
222 * function is called, and we have two items representing the same 220 * function is called, and we have two items representing the same
223 * publication. Nudge this item's key to distinguish it from the other. 221 * publication. Nudge this item's key to distinguish it from the other.
224 * (Note: Publication's node subscription is already unsubscribed.)
225 */ 222 */
226 223
227static void node_is_down(struct publication *publ) 224static void named_purge_publ(struct publication *publ)
228{ 225{
229 struct publication *p; 226 struct publication *p;
230 227
@@ -232,6 +229,8 @@ static void node_is_down(struct publication *publ)
232 publ->key += 1222345; 229 publ->key += 1222345;
233 p = tipc_nametbl_remove_publ(publ->type, publ->lower, 230 p = tipc_nametbl_remove_publ(publ->type, publ->lower,
234 publ->node, publ->ref, publ->key); 231 publ->node, publ->ref, publ->key);
232 if (p)
233 tipc_nodesub_unsubscribe(&p->subscr);
235 write_unlock_bh(&tipc_nametbl_lock); 234 write_unlock_bh(&tipc_nametbl_lock);
236 235
237 if (p != publ) { 236 if (p != publ) {
@@ -268,7 +267,8 @@ void tipc_named_recv(struct sk_buff *buf)
268 tipc_nodesub_subscribe(&publ->subscr, 267 tipc_nodesub_subscribe(&publ->subscr,
269 msg_orignode(msg), 268 msg_orignode(msg),
270 publ, 269 publ,
271 (net_ev_handler)node_is_down); 270 (net_ev_handler)
271 named_purge_publ);
272 } 272 }
273 } else if (msg_type(msg) == WITHDRAWAL) { 273 } else if (msg_type(msg) == WITHDRAWAL) {
274 publ = tipc_nametbl_remove_publ(ntohl(item->type), 274 publ = tipc_nametbl_remove_publ(ntohl(item->type),
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 9bacfd00b91..68b3dd63729 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -2,7 +2,7 @@
2 * net/tipc/net.c: TIPC network routing code 2 * net/tipc/net.c: TIPC network routing code
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,7 @@
39#include "name_distr.h" 39#include "name_distr.h"
40#include "subscr.h" 40#include "subscr.h"
41#include "port.h" 41#include "port.h"
42#include "node.h"
42#include "config.h" 43#include "config.h"
43 44
44/* 45/*
@@ -108,26 +109,6 @@
108*/ 109*/
109 110
110DEFINE_RWLOCK(tipc_net_lock); 111DEFINE_RWLOCK(tipc_net_lock);
111struct network tipc_net;
112
113static int net_start(void)
114{
115 tipc_net.nodes = kcalloc(tipc_max_nodes + 1,
116 sizeof(*tipc_net.nodes), GFP_ATOMIC);
117 tipc_net.highest_node = 0;
118
119 return tipc_net.nodes ? 0 : -ENOMEM;
120}
121
122static void net_stop(void)
123{
124 u32 n_num;
125
126 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++)
127 tipc_node_delete(tipc_net.nodes[n_num]);
128 kfree(tipc_net.nodes);
129 tipc_net.nodes = NULL;
130}
131 112
132static void net_route_named_msg(struct sk_buff *buf) 113static void net_route_named_msg(struct sk_buff *buf)
133{ 114{
@@ -217,9 +198,6 @@ int tipc_net_start(u32 addr)
217 tipc_named_reinit(); 198 tipc_named_reinit();
218 tipc_port_reinit(); 199 tipc_port_reinit();
219 200
220 res = net_start();
221 if (res)
222 return res;
223 res = tipc_bclink_init(); 201 res = tipc_bclink_init();
224 if (res) 202 if (res)
225 return res; 203 return res;
@@ -235,14 +213,16 @@ int tipc_net_start(u32 addr)
235 213
236void tipc_net_stop(void) 214void tipc_net_stop(void)
237{ 215{
216 struct tipc_node *node, *t_node;
217
238 if (tipc_mode != TIPC_NET_MODE) 218 if (tipc_mode != TIPC_NET_MODE)
239 return; 219 return;
240 write_lock_bh(&tipc_net_lock); 220 write_lock_bh(&tipc_net_lock);
241 tipc_bearer_stop(); 221 tipc_bearer_stop();
242 tipc_mode = TIPC_NODE_MODE; 222 tipc_mode = TIPC_NODE_MODE;
243 tipc_bclink_stop(); 223 tipc_bclink_stop();
244 net_stop(); 224 list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
225 tipc_node_delete(node);
245 write_unlock_bh(&tipc_net_lock); 226 write_unlock_bh(&tipc_net_lock);
246 info("Left network mode\n"); 227 info("Left network mode\n");
247} 228}
248
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 4ae59ad0489..9eb4b9e220e 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -2,7 +2,7 @@
2 * net/tipc/net.h: Include file for TIPC network routing code 2 * net/tipc/net.h: Include file for TIPC network routing code
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -37,23 +37,6 @@
37#ifndef _TIPC_NET_H 37#ifndef _TIPC_NET_H
38#define _TIPC_NET_H 38#define _TIPC_NET_H
39 39
40struct tipc_node;
41
42/**
43 * struct network - TIPC network structure
44 * @nodes: array of pointers to all nodes within cluster
45 * @highest_node: id of highest numbered node within cluster
46 * @links: number of (unicast) links to cluster
47 */
48
49struct network {
50 struct tipc_node **nodes;
51 u32 highest_node;
52 u32 links;
53};
54
55
56extern struct network tipc_net;
57extern rwlock_t tipc_net_lock; 40extern rwlock_t tipc_net_lock;
58 41
59void tipc_net_route_msg(struct sk_buff *buf); 42void tipc_net_route_msg(struct sk_buff *buf);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3af53e327f4..2d106ef4fa4 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2,7 +2,7 @@
2 * net/tipc/node.c: TIPC node management routines 2 * net/tipc/node.c: TIPC node management routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005-2006, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -44,9 +44,33 @@ static void node_established_contact(struct tipc_node *n_ptr);
44 44
45static DEFINE_SPINLOCK(node_create_lock); 45static DEFINE_SPINLOCK(node_create_lock);
46 46
47static struct hlist_head node_htable[NODE_HTABLE_SIZE];
48LIST_HEAD(tipc_node_list);
49static u32 tipc_num_nodes;
50
51static atomic_t tipc_num_links = ATOMIC_INIT(0);
47u32 tipc_own_tag; 52u32 tipc_own_tag;
48 53
49/** 54/**
55 * tipc_node_find - locate specified node object, if it exists
56 */
57
58struct tipc_node *tipc_node_find(u32 addr)
59{
60 struct tipc_node *node;
61 struct hlist_node *pos;
62
63 if (unlikely(!in_own_cluster(addr)))
64 return NULL;
65
66 hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) {
67 if (node->addr == addr)
68 return node;
69 }
70 return NULL;
71}
72
73/**
50 * tipc_node_create - create neighboring node 74 * tipc_node_create - create neighboring node
51 * 75 *
52 * Currently, this routine is called by neighbor discovery code, which holds 76 * Currently, this routine is called by neighbor discovery code, which holds
@@ -58,8 +82,7 @@ u32 tipc_own_tag;
58 82
59struct tipc_node *tipc_node_create(u32 addr) 83struct tipc_node *tipc_node_create(u32 addr)
60{ 84{
61 struct tipc_node *n_ptr; 85 struct tipc_node *n_ptr, *temp_node;
62 u32 n_num;
63 86
64 spin_lock_bh(&node_create_lock); 87 spin_lock_bh(&node_create_lock);
65 88
@@ -78,12 +101,19 @@ struct tipc_node *tipc_node_create(u32 addr)
78 101
79 n_ptr->addr = addr; 102 n_ptr->addr = addr;
80 spin_lock_init(&n_ptr->lock); 103 spin_lock_init(&n_ptr->lock);
104 INIT_HLIST_NODE(&n_ptr->hash);
105 INIT_LIST_HEAD(&n_ptr->list);
81 INIT_LIST_HEAD(&n_ptr->nsub); 106 INIT_LIST_HEAD(&n_ptr->nsub);
82 107
83 n_num = tipc_node(addr); 108 hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
84 tipc_net.nodes[n_num] = n_ptr; 109
85 if (n_num > tipc_net.highest_node) 110 list_for_each_entry(temp_node, &tipc_node_list, list) {
86 tipc_net.highest_node = n_num; 111 if (n_ptr->addr < temp_node->addr)
112 break;
113 }
114 list_add_tail(&n_ptr->list, &temp_node->list);
115
116 tipc_num_nodes++;
87 117
88 spin_unlock_bh(&node_create_lock); 118 spin_unlock_bh(&node_create_lock);
89 return n_ptr; 119 return n_ptr;
@@ -91,18 +121,11 @@ struct tipc_node *tipc_node_create(u32 addr)
91 121
92void tipc_node_delete(struct tipc_node *n_ptr) 122void tipc_node_delete(struct tipc_node *n_ptr)
93{ 123{
94 u32 n_num; 124 list_del(&n_ptr->list);
95 125 hlist_del(&n_ptr->hash);
96 if (!n_ptr)
97 return;
98
99 n_num = tipc_node(n_ptr->addr);
100 tipc_net.nodes[n_num] = NULL;
101 kfree(n_ptr); 126 kfree(n_ptr);
102 127
103 while (!tipc_net.nodes[tipc_net.highest_node]) 128 tipc_num_nodes--;
104 if (--tipc_net.highest_node == 0)
105 break;
106} 129}
107 130
108 131
@@ -200,54 +223,32 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
200 node_lost_contact(n_ptr); 223 node_lost_contact(n_ptr);
201} 224}
202 225
203int tipc_node_has_active_links(struct tipc_node *n_ptr) 226int tipc_node_active_links(struct tipc_node *n_ptr)
204{ 227{
205 return n_ptr->active_links[0] != NULL; 228 return n_ptr->active_links[0] != NULL;
206} 229}
207 230
208int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 231int tipc_node_redundant_links(struct tipc_node *n_ptr)
209{ 232{
210 return n_ptr->working_links > 1; 233 return n_ptr->working_links > 1;
211} 234}
212 235
213int tipc_node_is_up(struct tipc_node *n_ptr) 236int tipc_node_is_up(struct tipc_node *n_ptr)
214{ 237{
215 return tipc_node_has_active_links(n_ptr); 238 return tipc_node_active_links(n_ptr);
216} 239}
217 240
218struct tipc_node *tipc_node_attach_link(struct link *l_ptr) 241void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr)
219{ 242{
220 struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr); 243 n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
221 244 atomic_inc(&tipc_num_links);
222 if (!n_ptr) 245 n_ptr->link_cnt++;
223 n_ptr = tipc_node_create(l_ptr->addr);
224 if (n_ptr) {
225 u32 bearer_id = l_ptr->b_ptr->identity;
226 char addr_string[16];
227
228 if (n_ptr->link_cnt >= 2) {
229 err("Attempt to create third link to %s\n",
230 tipc_addr_string_fill(addr_string, n_ptr->addr));
231 return NULL;
232 }
233
234 if (!n_ptr->links[bearer_id]) {
235 n_ptr->links[bearer_id] = l_ptr;
236 tipc_net.links++;
237 n_ptr->link_cnt++;
238 return n_ptr;
239 }
240 err("Attempt to establish second link on <%s> to %s\n",
241 l_ptr->b_ptr->publ.name,
242 tipc_addr_string_fill(addr_string, l_ptr->addr));
243 }
244 return NULL;
245} 246}
246 247
247void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) 248void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
248{ 249{
249 n_ptr->links[l_ptr->b_ptr->identity] = NULL; 250 n_ptr->links[l_ptr->b_ptr->identity] = NULL;
250 tipc_net.links--; 251 atomic_dec(&tipc_num_links);
251 n_ptr->link_cnt--; 252 n_ptr->link_cnt--;
252} 253}
253 254
@@ -327,7 +328,6 @@ static void node_cleanup_finished(unsigned long node_addr)
327 328
328static void node_lost_contact(struct tipc_node *n_ptr) 329static void node_lost_contact(struct tipc_node *n_ptr)
329{ 330{
330 struct tipc_node_subscr *ns, *tns;
331 char addr_string[16]; 331 char addr_string[16];
332 u32 i; 332 u32 i;
333 333
@@ -365,12 +365,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
365 } 365 }
366 366
367 /* Notify subscribers */ 367 /* Notify subscribers */
368 list_for_each_entry_safe(ns, tns, &n_ptr->nsub, nodesub_list) { 368 tipc_nodesub_notify(n_ptr);
369 ns->node = NULL;
370 list_del_init(&ns->nodesub_list);
371 tipc_k_signal((Handler)ns->handle_node_down,
372 (unsigned long)ns->usr_handle);
373 }
374 369
375 /* Prevent re-contact with node until all cleanup is done */ 370 /* Prevent re-contact with node until all cleanup is done */
376 371
@@ -385,7 +380,6 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
385 struct tipc_node *n_ptr; 380 struct tipc_node *n_ptr;
386 struct tipc_node_info node_info; 381 struct tipc_node_info node_info;
387 u32 payload_size; 382 u32 payload_size;
388 u32 n_num;
389 383
390 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) 384 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
391 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 385 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -396,15 +390,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
396 " (network address)"); 390 " (network address)");
397 391
398 read_lock_bh(&tipc_net_lock); 392 read_lock_bh(&tipc_net_lock);
399 if (!tipc_net.nodes) { 393 if (!tipc_num_nodes) {
400 read_unlock_bh(&tipc_net_lock); 394 read_unlock_bh(&tipc_net_lock);
401 return tipc_cfg_reply_none(); 395 return tipc_cfg_reply_none();
402 } 396 }
403 397
404 /* For now, get space for all other nodes */ 398 /* For now, get space for all other nodes */
405 399
406 payload_size = TLV_SPACE(sizeof(node_info)) * 400 payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
407 (tipc_net.highest_node - 1);
408 if (payload_size > 32768u) { 401 if (payload_size > 32768u) {
409 read_unlock_bh(&tipc_net_lock); 402 read_unlock_bh(&tipc_net_lock);
410 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 403 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -418,9 +411,8 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
418 411
419 /* Add TLVs for all nodes in scope */ 412 /* Add TLVs for all nodes in scope */
420 413
421 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { 414 list_for_each_entry(n_ptr, &tipc_node_list, list) {
422 n_ptr = tipc_net.nodes[n_num]; 415 if (!tipc_in_scope(domain, n_ptr->addr))
423 if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr))
424 continue; 416 continue;
425 node_info.addr = htonl(n_ptr->addr); 417 node_info.addr = htonl(n_ptr->addr);
426 node_info.up = htonl(tipc_node_is_up(n_ptr)); 418 node_info.up = htonl(tipc_node_is_up(n_ptr));
@@ -439,7 +431,6 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
439 struct tipc_node *n_ptr; 431 struct tipc_node *n_ptr;
440 struct tipc_link_info link_info; 432 struct tipc_link_info link_info;
441 u32 payload_size; 433 u32 payload_size;
442 u32 n_num;
443 434
444 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) 435 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
445 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 436 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -456,7 +447,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
456 447
457 /* Get space for all unicast links + multicast link */ 448 /* Get space for all unicast links + multicast link */
458 449
459 payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1); 450 payload_size = TLV_SPACE(sizeof(link_info)) *
451 (atomic_read(&tipc_num_links) + 1);
460 if (payload_size > 32768u) { 452 if (payload_size > 32768u) {
461 read_unlock_bh(&tipc_net_lock); 453 read_unlock_bh(&tipc_net_lock);
462 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 454 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -470,18 +462,17 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
470 462
471 /* Add TLV for broadcast link */ 463 /* Add TLV for broadcast link */
472 464
473 link_info.dest = htonl(tipc_own_addr & 0xfffff00); 465 link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
474 link_info.up = htonl(1); 466 link_info.up = htonl(1);
475 strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); 467 strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
476 tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); 468 tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
477 469
478 /* Add TLVs for any other links in scope */ 470 /* Add TLVs for any other links in scope */
479 471
480 for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { 472 list_for_each_entry(n_ptr, &tipc_node_list, list) {
481 u32 i; 473 u32 i;
482 474
483 n_ptr = tipc_net.nodes[n_num]; 475 if (!tipc_in_scope(domain, n_ptr->addr))
484 if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr))
485 continue; 476 continue;
486 tipc_node_lock(n_ptr); 477 tipc_node_lock(n_ptr);
487 for (i = 0; i < MAX_BEARERS; i++) { 478 for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 206a8efa410..5c61afc7a0b 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -2,7 +2,7 @@
2 * net/tipc/node.h: Include file for TIPC node management routines 2 * net/tipc/node.h: Include file for TIPC node management routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -46,7 +46,8 @@
46 * struct tipc_node - TIPC node structure 46 * struct tipc_node - TIPC node structure
47 * @addr: network address of node 47 * @addr: network address of node
48 * @lock: spinlock governing access to structure 48 * @lock: spinlock governing access to structure
49 * @next: pointer to next node in sorted list of cluster's nodes 49 * @hash: links to adjacent nodes in unsorted hash chain
50 * @list: links to adjacent nodes in sorted list of cluster's nodes
50 * @nsub: list of "node down" subscriptions monitoring node 51 * @nsub: list of "node down" subscriptions monitoring node
51 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
52 * @links: pointers to all links to node 53 * @links: pointers to all links to node
@@ -69,7 +70,8 @@
69struct tipc_node { 70struct tipc_node {
70 u32 addr; 71 u32 addr;
71 spinlock_t lock; 72 spinlock_t lock;
72 struct tipc_node *next; 73 struct hlist_node hash;
74 struct list_head list;
73 struct list_head nsub; 75 struct list_head nsub;
74 struct link *active_links[2]; 76 struct link *active_links[2];
75 struct link *links[MAX_BEARERS]; 77 struct link *links[MAX_BEARERS];
@@ -90,27 +92,35 @@ struct tipc_node {
90 } bclink; 92 } bclink;
91}; 93};
92 94
95#define NODE_HTABLE_SIZE 512
96extern struct list_head tipc_node_list;
97
98/*
99 * A trivial power-of-two bitmask technique is used for speed, since this
100 * operation is done for every incoming TIPC packet. The number of hash table
101 * entries has been chosen so that no hash chain exceeds 8 nodes and will
102 * usually be much smaller (typically only a single node).
103 */
104static inline unsigned int tipc_hashfn(u32 addr)
105{
106 return addr & (NODE_HTABLE_SIZE - 1);
107}
108
93extern u32 tipc_own_tag; 109extern u32 tipc_own_tag;
94 110
111struct tipc_node *tipc_node_find(u32 addr);
95struct tipc_node *tipc_node_create(u32 addr); 112struct tipc_node *tipc_node_create(u32 addr);
96void tipc_node_delete(struct tipc_node *n_ptr); 113void tipc_node_delete(struct tipc_node *n_ptr);
97struct tipc_node *tipc_node_attach_link(struct link *l_ptr); 114void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr);
98void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); 115void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr);
99void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); 116void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr);
100void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); 117void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr);
101int tipc_node_has_active_links(struct tipc_node *n_ptr); 118int tipc_node_active_links(struct tipc_node *n_ptr);
102int tipc_node_has_redundant_links(struct tipc_node *n_ptr); 119int tipc_node_redundant_links(struct tipc_node *n_ptr);
103int tipc_node_is_up(struct tipc_node *n_ptr); 120int tipc_node_is_up(struct tipc_node *n_ptr);
104struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); 121struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
105struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); 122struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
106 123
107static inline struct tipc_node *tipc_node_find(u32 addr)
108{
109 if (likely(in_own_cluster(addr)))
110 return tipc_net.nodes[tipc_node(addr)];
111 return NULL;
112}
113
114static inline void tipc_node_lock(struct tipc_node *n_ptr) 124static inline void tipc_node_lock(struct tipc_node *n_ptr)
115{ 125{
116 spin_lock_bh(&n_ptr->lock); 126 spin_lock_bh(&n_ptr->lock);
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 018a55332d9..c3c2815ae63 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -2,7 +2,7 @@
2 * net/tipc/node_subscr.c: TIPC "node down" subscription handling 2 * net/tipc/node_subscr.c: TIPC "node down" subscription handling
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -76,3 +76,22 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub)
76 list_del_init(&node_sub->nodesub_list); 76 list_del_init(&node_sub->nodesub_list);
77 tipc_node_unlock(node_sub->node); 77 tipc_node_unlock(node_sub->node);
78} 78}
79
80/**
81 * tipc_nodesub_notify - notify subscribers that a node is unreachable
82 *
83 * Note: node is locked by caller
84 */
85
86void tipc_nodesub_notify(struct tipc_node *node)
87{
88 struct tipc_node_subscr *ns;
89
90 list_for_each_entry(ns, &node->nsub, nodesub_list) {
91 if (ns->handle_node_down) {
92 tipc_k_signal((Handler)ns->handle_node_down,
93 (unsigned long)ns->usr_handle);
94 ns->handle_node_down = NULL;
95 }
96 }
97}
diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h
index 006ed739f51..4bc2ca0867a 100644
--- a/net/tipc/node_subscr.h
+++ b/net/tipc/node_subscr.h
@@ -2,7 +2,7 @@
2 * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling 2 * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling
3 * 3 *
4 * Copyright (c) 1995-2006, Ericsson AB 4 * Copyright (c) 1995-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -59,5 +59,6 @@ struct tipc_node_subscr {
59void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, 59void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
60 void *usr_handle, net_ev_handler handle_down); 60 void *usr_handle, net_ev_handler handle_down);
61void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); 61void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub);
62void tipc_nodesub_notify(struct tipc_node *node);
62 63
63#endif 64#endif
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 067bab2a0b9..6ff78f9c7d6 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -2,7 +2,7 @@
2 * net/tipc/port.c: TIPC port code 2 * net/tipc/port.c: TIPC port code
3 * 3 *
4 * Copyright (c) 1992-2007, Ericsson AB 4 * Copyright (c) 1992-2007, Ericsson AB
5 * Copyright (c) 2004-2008, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -54,33 +54,19 @@ static DEFINE_SPINLOCK(queue_lock);
54 54
55static LIST_HEAD(ports); 55static LIST_HEAD(ports);
56static void port_handle_node_down(unsigned long ref); 56static void port_handle_node_down(unsigned long ref);
57static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err); 57static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
58static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err); 58static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
59static void port_timeout(unsigned long ref); 59static void port_timeout(unsigned long ref);
60 60
61 61
62static u32 port_peernode(struct port *p_ptr) 62static u32 port_peernode(struct tipc_port *p_ptr)
63{ 63{
64 return msg_destnode(&p_ptr->publ.phdr); 64 return msg_destnode(&p_ptr->phdr);
65} 65}
66 66
67static u32 port_peerport(struct port *p_ptr) 67static u32 port_peerport(struct tipc_port *p_ptr)
68{ 68{
69 return msg_destport(&p_ptr->publ.phdr); 69 return msg_destport(&p_ptr->phdr);
70}
71
72static u32 port_out_seqno(struct port *p_ptr)
73{
74 return msg_transp_seqno(&p_ptr->publ.phdr);
75}
76
77static void port_incr_out_seqno(struct port *p_ptr)
78{
79 struct tipc_msg *m = &p_ptr->publ.phdr;
80
81 if (likely(!msg_routed(m)))
82 return;
83 msg_set_transp_seqno(m, (msg_transp_seqno(m) + 1));
84} 70}
85 71
86/** 72/**
@@ -94,7 +80,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
94 struct sk_buff *buf; 80 struct sk_buff *buf;
95 struct sk_buff *ibuf = NULL; 81 struct sk_buff *ibuf = NULL;
96 struct port_list dports = {0, NULL, }; 82 struct port_list dports = {0, NULL, };
97 struct port *oport = tipc_port_deref(ref); 83 struct tipc_port *oport = tipc_port_deref(ref);
98 int ext_targets; 84 int ext_targets;
99 int res; 85 int res;
100 86
@@ -103,7 +89,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
103 89
104 /* Create multicast message */ 90 /* Create multicast message */
105 91
106 hdr = &oport->publ.phdr; 92 hdr = &oport->phdr;
107 msg_set_type(hdr, TIPC_MCAST_MSG); 93 msg_set_type(hdr, TIPC_MCAST_MSG);
108 msg_set_nametype(hdr, seq->type); 94 msg_set_nametype(hdr, seq->type);
109 msg_set_namelower(hdr, seq->lower); 95 msg_set_namelower(hdr, seq->lower);
@@ -211,7 +197,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
211 void (*wakeup)(struct tipc_port *), 197 void (*wakeup)(struct tipc_port *),
212 const u32 importance) 198 const u32 importance)
213{ 199{
214 struct port *p_ptr; 200 struct tipc_port *p_ptr;
215 struct tipc_msg *msg; 201 struct tipc_msg *msg;
216 u32 ref; 202 u32 ref;
217 203
@@ -220,21 +206,19 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
220 warn("Port creation failed, no memory\n"); 206 warn("Port creation failed, no memory\n");
221 return NULL; 207 return NULL;
222 } 208 }
223 ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); 209 ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
224 if (!ref) { 210 if (!ref) {
225 warn("Port creation failed, reference table exhausted\n"); 211 warn("Port creation failed, reference table exhausted\n");
226 kfree(p_ptr); 212 kfree(p_ptr);
227 return NULL; 213 return NULL;
228 } 214 }
229 215
230 p_ptr->publ.usr_handle = usr_handle; 216 p_ptr->usr_handle = usr_handle;
231 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; 217 p_ptr->max_pkt = MAX_PKT_DEFAULT;
232 p_ptr->publ.ref = ref; 218 p_ptr->ref = ref;
233 msg = &p_ptr->publ.phdr; 219 msg = &p_ptr->phdr;
234 tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); 220 tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
235 msg_set_origport(msg, ref); 221 msg_set_origport(msg, ref);
236 p_ptr->last_in_seqno = 41;
237 p_ptr->sent = 1;
238 INIT_LIST_HEAD(&p_ptr->wait_list); 222 INIT_LIST_HEAD(&p_ptr->wait_list);
239 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); 223 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
240 p_ptr->dispatcher = dispatcher; 224 p_ptr->dispatcher = dispatcher;
@@ -246,12 +230,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
246 INIT_LIST_HEAD(&p_ptr->port_list); 230 INIT_LIST_HEAD(&p_ptr->port_list);
247 list_add_tail(&p_ptr->port_list, &ports); 231 list_add_tail(&p_ptr->port_list, &ports);
248 spin_unlock_bh(&tipc_port_list_lock); 232 spin_unlock_bh(&tipc_port_list_lock);
249 return &(p_ptr->publ); 233 return p_ptr;
250} 234}
251 235
252int tipc_deleteport(u32 ref) 236int tipc_deleteport(u32 ref)
253{ 237{
254 struct port *p_ptr; 238 struct tipc_port *p_ptr;
255 struct sk_buff *buf = NULL; 239 struct sk_buff *buf = NULL;
256 240
257 tipc_withdraw(ref, 0, NULL); 241 tipc_withdraw(ref, 0, NULL);
@@ -263,7 +247,7 @@ int tipc_deleteport(u32 ref)
263 tipc_port_unlock(p_ptr); 247 tipc_port_unlock(p_ptr);
264 248
265 k_cancel_timer(&p_ptr->timer); 249 k_cancel_timer(&p_ptr->timer);
266 if (p_ptr->publ.connected) { 250 if (p_ptr->connected) {
267 buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); 251 buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
268 tipc_nodesub_unsubscribe(&p_ptr->subscription); 252 tipc_nodesub_unsubscribe(&p_ptr->subscription);
269 } 253 }
@@ -279,14 +263,14 @@ int tipc_deleteport(u32 ref)
279 return 0; 263 return 0;
280} 264}
281 265
282static int port_unreliable(struct port *p_ptr) 266static int port_unreliable(struct tipc_port *p_ptr)
283{ 267{
284 return msg_src_droppable(&p_ptr->publ.phdr); 268 return msg_src_droppable(&p_ptr->phdr);
285} 269}
286 270
287int tipc_portunreliable(u32 ref, unsigned int *isunreliable) 271int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
288{ 272{
289 struct port *p_ptr; 273 struct tipc_port *p_ptr;
290 274
291 p_ptr = tipc_port_lock(ref); 275 p_ptr = tipc_port_lock(ref);
292 if (!p_ptr) 276 if (!p_ptr)
@@ -298,24 +282,24 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
298 282
299int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) 283int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
300{ 284{
301 struct port *p_ptr; 285 struct tipc_port *p_ptr;
302 286
303 p_ptr = tipc_port_lock(ref); 287 p_ptr = tipc_port_lock(ref);
304 if (!p_ptr) 288 if (!p_ptr)
305 return -EINVAL; 289 return -EINVAL;
306 msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0)); 290 msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0));
307 tipc_port_unlock(p_ptr); 291 tipc_port_unlock(p_ptr);
308 return 0; 292 return 0;
309} 293}
310 294
311static int port_unreturnable(struct port *p_ptr) 295static int port_unreturnable(struct tipc_port *p_ptr)
312{ 296{
313 return msg_dest_droppable(&p_ptr->publ.phdr); 297 return msg_dest_droppable(&p_ptr->phdr);
314} 298}
315 299
316int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) 300int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
317{ 301{
318 struct port *p_ptr; 302 struct tipc_port *p_ptr;
319 303
320 p_ptr = tipc_port_lock(ref); 304 p_ptr = tipc_port_lock(ref);
321 if (!p_ptr) 305 if (!p_ptr)
@@ -327,12 +311,12 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
327 311
328int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) 312int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
329{ 313{
330 struct port *p_ptr; 314 struct tipc_port *p_ptr;
331 315
332 p_ptr = tipc_port_lock(ref); 316 p_ptr = tipc_port_lock(ref);
333 if (!p_ptr) 317 if (!p_ptr)
334 return -EINVAL; 318 return -EINVAL;
335 msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0)); 319 msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0));
336 tipc_port_unlock(p_ptr); 320 tipc_port_unlock(p_ptr);
337 return 0; 321 return 0;
338} 322}
@@ -345,7 +329,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
345static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, 329static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
346 u32 origport, u32 orignode, 330 u32 origport, u32 orignode,
347 u32 usr, u32 type, u32 err, 331 u32 usr, u32 type, u32 err,
348 u32 seqno, u32 ack) 332 u32 ack)
349{ 333{
350 struct sk_buff *buf; 334 struct sk_buff *buf;
351 struct tipc_msg *msg; 335 struct tipc_msg *msg;
@@ -358,7 +342,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
358 msg_set_destport(msg, destport); 342 msg_set_destport(msg, destport);
359 msg_set_origport(msg, origport); 343 msg_set_origport(msg, origport);
360 msg_set_orignode(msg, orignode); 344 msg_set_orignode(msg, orignode);
361 msg_set_transp_seqno(msg, seqno);
362 msg_set_msgcnt(msg, ack); 345 msg_set_msgcnt(msg, ack);
363 } 346 }
364 return buf; 347 return buf;
@@ -413,10 +396,10 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
413 /* send self-abort message when rejecting on a connected port */ 396 /* send self-abort message when rejecting on a connected port */
414 if (msg_connected(msg)) { 397 if (msg_connected(msg)) {
415 struct sk_buff *abuf = NULL; 398 struct sk_buff *abuf = NULL;
416 struct port *p_ptr = tipc_port_lock(msg_destport(msg)); 399 struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
417 400
418 if (p_ptr) { 401 if (p_ptr) {
419 if (p_ptr->publ.connected) 402 if (p_ptr->connected)
420 abuf = port_build_self_abort_msg(p_ptr, err); 403 abuf = port_build_self_abort_msg(p_ptr, err);
421 tipc_port_unlock(p_ptr); 404 tipc_port_unlock(p_ptr);
422 } 405 }
@@ -429,7 +412,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
429 return data_sz; 412 return data_sz;
430} 413}
431 414
432int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 415int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
433 struct iovec const *msg_sect, u32 num_sect, 416 struct iovec const *msg_sect, u32 num_sect,
434 int err) 417 int err)
435{ 418{
@@ -446,13 +429,13 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
446 429
447static void port_timeout(unsigned long ref) 430static void port_timeout(unsigned long ref)
448{ 431{
449 struct port *p_ptr = tipc_port_lock(ref); 432 struct tipc_port *p_ptr = tipc_port_lock(ref);
450 struct sk_buff *buf = NULL; 433 struct sk_buff *buf = NULL;
451 434
452 if (!p_ptr) 435 if (!p_ptr)
453 return; 436 return;
454 437
455 if (!p_ptr->publ.connected) { 438 if (!p_ptr->connected) {
456 tipc_port_unlock(p_ptr); 439 tipc_port_unlock(p_ptr);
457 return; 440 return;
458 } 441 }
@@ -463,14 +446,12 @@ static void port_timeout(unsigned long ref)
463 } else { 446 } else {
464 buf = port_build_proto_msg(port_peerport(p_ptr), 447 buf = port_build_proto_msg(port_peerport(p_ptr),
465 port_peernode(p_ptr), 448 port_peernode(p_ptr),
466 p_ptr->publ.ref, 449 p_ptr->ref,
467 tipc_own_addr, 450 tipc_own_addr,
468 CONN_MANAGER, 451 CONN_MANAGER,
469 CONN_PROBE, 452 CONN_PROBE,
470 TIPC_OK, 453 TIPC_OK,
471 port_out_seqno(p_ptr),
472 0); 454 0);
473 port_incr_out_seqno(p_ptr);
474 p_ptr->probing_state = PROBING; 455 p_ptr->probing_state = PROBING;
475 k_start_timer(&p_ptr->timer, p_ptr->probing_interval); 456 k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
476 } 457 }
@@ -481,7 +462,7 @@ static void port_timeout(unsigned long ref)
481 462
482static void port_handle_node_down(unsigned long ref) 463static void port_handle_node_down(unsigned long ref)
483{ 464{
484 struct port *p_ptr = tipc_port_lock(ref); 465 struct tipc_port *p_ptr = tipc_port_lock(ref);
485 struct sk_buff *buf = NULL; 466 struct sk_buff *buf = NULL;
486 467
487 if (!p_ptr) 468 if (!p_ptr)
@@ -492,73 +473,71 @@ static void port_handle_node_down(unsigned long ref)
492} 473}
493 474
494 475
495static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err) 476static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err)
496{ 477{
497 u32 imp = msg_importance(&p_ptr->publ.phdr); 478 u32 imp = msg_importance(&p_ptr->phdr);
498 479
499 if (!p_ptr->publ.connected) 480 if (!p_ptr->connected)
500 return NULL; 481 return NULL;
501 if (imp < TIPC_CRITICAL_IMPORTANCE) 482 if (imp < TIPC_CRITICAL_IMPORTANCE)
502 imp++; 483 imp++;
503 return port_build_proto_msg(p_ptr->publ.ref, 484 return port_build_proto_msg(p_ptr->ref,
504 tipc_own_addr, 485 tipc_own_addr,
505 port_peerport(p_ptr), 486 port_peerport(p_ptr),
506 port_peernode(p_ptr), 487 port_peernode(p_ptr),
507 imp, 488 imp,
508 TIPC_CONN_MSG, 489 TIPC_CONN_MSG,
509 err, 490 err,
510 p_ptr->last_in_seqno + 1,
511 0); 491 0);
512} 492}
513 493
514 494
515static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err) 495static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err)
516{ 496{
517 u32 imp = msg_importance(&p_ptr->publ.phdr); 497 u32 imp = msg_importance(&p_ptr->phdr);
518 498
519 if (!p_ptr->publ.connected) 499 if (!p_ptr->connected)
520 return NULL; 500 return NULL;
521 if (imp < TIPC_CRITICAL_IMPORTANCE) 501 if (imp < TIPC_CRITICAL_IMPORTANCE)
522 imp++; 502 imp++;
523 return port_build_proto_msg(port_peerport(p_ptr), 503 return port_build_proto_msg(port_peerport(p_ptr),
524 port_peernode(p_ptr), 504 port_peernode(p_ptr),
525 p_ptr->publ.ref, 505 p_ptr->ref,
526 tipc_own_addr, 506 tipc_own_addr,
527 imp, 507 imp,
528 TIPC_CONN_MSG, 508 TIPC_CONN_MSG,
529 err, 509 err,
530 port_out_seqno(p_ptr),
531 0); 510 0);
532} 511}
533 512
534void tipc_port_recv_proto_msg(struct sk_buff *buf) 513void tipc_port_recv_proto_msg(struct sk_buff *buf)
535{ 514{
536 struct tipc_msg *msg = buf_msg(buf); 515 struct tipc_msg *msg = buf_msg(buf);
537 struct port *p_ptr = tipc_port_lock(msg_destport(msg)); 516 struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
538 u32 err = TIPC_OK; 517 u32 err = TIPC_OK;
539 struct sk_buff *r_buf = NULL; 518 struct sk_buff *r_buf = NULL;
540 struct sk_buff *abort_buf = NULL; 519 struct sk_buff *abort_buf = NULL;
541 520
542 if (!p_ptr) { 521 if (!p_ptr) {
543 err = TIPC_ERR_NO_PORT; 522 err = TIPC_ERR_NO_PORT;
544 } else if (p_ptr->publ.connected) { 523 } else if (p_ptr->connected) {
545 if ((port_peernode(p_ptr) != msg_orignode(msg)) || 524 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
546 (port_peerport(p_ptr) != msg_origport(msg))) { 525 (port_peerport(p_ptr) != msg_origport(msg))) {
547 err = TIPC_ERR_NO_PORT; 526 err = TIPC_ERR_NO_PORT;
548 } else if (msg_type(msg) == CONN_ACK) { 527 } else if (msg_type(msg) == CONN_ACK) {
549 int wakeup = tipc_port_congested(p_ptr) && 528 int wakeup = tipc_port_congested(p_ptr) &&
550 p_ptr->publ.congested && 529 p_ptr->congested &&
551 p_ptr->wakeup; 530 p_ptr->wakeup;
552 p_ptr->acked += msg_msgcnt(msg); 531 p_ptr->acked += msg_msgcnt(msg);
553 if (tipc_port_congested(p_ptr)) 532 if (tipc_port_congested(p_ptr))
554 goto exit; 533 goto exit;
555 p_ptr->publ.congested = 0; 534 p_ptr->congested = 0;
556 if (!wakeup) 535 if (!wakeup)
557 goto exit; 536 goto exit;
558 p_ptr->wakeup(&p_ptr->publ); 537 p_ptr->wakeup(p_ptr);
559 goto exit; 538 goto exit;
560 } 539 }
561 } else if (p_ptr->publ.published) { 540 } else if (p_ptr->published) {
562 err = TIPC_ERR_NO_PORT; 541 err = TIPC_ERR_NO_PORT;
563 } 542 }
564 if (err) { 543 if (err) {
@@ -569,7 +548,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
569 TIPC_HIGH_IMPORTANCE, 548 TIPC_HIGH_IMPORTANCE,
570 TIPC_CONN_MSG, 549 TIPC_CONN_MSG,
571 err, 550 err,
572 0,
573 0); 551 0);
574 goto exit; 552 goto exit;
575 } 553 }
@@ -583,11 +561,9 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
583 CONN_MANAGER, 561 CONN_MANAGER,
584 CONN_PROBE_REPLY, 562 CONN_PROBE_REPLY,
585 TIPC_OK, 563 TIPC_OK,
586 port_out_seqno(p_ptr),
587 0); 564 0);
588 } 565 }
589 p_ptr->probing_state = CONFIRMED; 566 p_ptr->probing_state = CONFIRMED;
590 port_incr_out_seqno(p_ptr);
591exit: 567exit:
592 if (p_ptr) 568 if (p_ptr)
593 tipc_port_unlock(p_ptr); 569 tipc_port_unlock(p_ptr);
@@ -596,29 +572,29 @@ exit:
596 buf_discard(buf); 572 buf_discard(buf);
597} 573}
598 574
599static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id) 575static void port_print(struct tipc_port *p_ptr, struct print_buf *buf, int full_id)
600{ 576{
601 struct publication *publ; 577 struct publication *publ;
602 578
603 if (full_id) 579 if (full_id)
604 tipc_printf(buf, "<%u.%u.%u:%u>:", 580 tipc_printf(buf, "<%u.%u.%u:%u>:",
605 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), 581 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
606 tipc_node(tipc_own_addr), p_ptr->publ.ref); 582 tipc_node(tipc_own_addr), p_ptr->ref);
607 else 583 else
608 tipc_printf(buf, "%-10u:", p_ptr->publ.ref); 584 tipc_printf(buf, "%-10u:", p_ptr->ref);
609 585
610 if (p_ptr->publ.connected) { 586 if (p_ptr->connected) {
611 u32 dport = port_peerport(p_ptr); 587 u32 dport = port_peerport(p_ptr);
612 u32 destnode = port_peernode(p_ptr); 588 u32 destnode = port_peernode(p_ptr);
613 589
614 tipc_printf(buf, " connected to <%u.%u.%u:%u>", 590 tipc_printf(buf, " connected to <%u.%u.%u:%u>",
615 tipc_zone(destnode), tipc_cluster(destnode), 591 tipc_zone(destnode), tipc_cluster(destnode),
616 tipc_node(destnode), dport); 592 tipc_node(destnode), dport);
617 if (p_ptr->publ.conn_type != 0) 593 if (p_ptr->conn_type != 0)
618 tipc_printf(buf, " via {%u,%u}", 594 tipc_printf(buf, " via {%u,%u}",
619 p_ptr->publ.conn_type, 595 p_ptr->conn_type,
620 p_ptr->publ.conn_instance); 596 p_ptr->conn_instance);
621 } else if (p_ptr->publ.published) { 597 } else if (p_ptr->published) {
622 tipc_printf(buf, " bound to"); 598 tipc_printf(buf, " bound to");
623 list_for_each_entry(publ, &p_ptr->publications, pport_list) { 599 list_for_each_entry(publ, &p_ptr->publications, pport_list) {
624 if (publ->lower == publ->upper) 600 if (publ->lower == publ->upper)
@@ -639,7 +615,7 @@ struct sk_buff *tipc_port_get_ports(void)
639 struct sk_buff *buf; 615 struct sk_buff *buf;
640 struct tlv_desc *rep_tlv; 616 struct tlv_desc *rep_tlv;
641 struct print_buf pb; 617 struct print_buf pb;
642 struct port *p_ptr; 618 struct tipc_port *p_ptr;
643 int str_len; 619 int str_len;
644 620
645 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY)); 621 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY));
@@ -650,9 +626,9 @@ struct sk_buff *tipc_port_get_ports(void)
650 tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY); 626 tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY);
651 spin_lock_bh(&tipc_port_list_lock); 627 spin_lock_bh(&tipc_port_list_lock);
652 list_for_each_entry(p_ptr, &ports, port_list) { 628 list_for_each_entry(p_ptr, &ports, port_list) {
653 spin_lock_bh(p_ptr->publ.lock); 629 spin_lock_bh(p_ptr->lock);
654 port_print(p_ptr, &pb, 0); 630 port_print(p_ptr, &pb, 0);
655 spin_unlock_bh(p_ptr->publ.lock); 631 spin_unlock_bh(p_ptr->lock);
656 } 632 }
657 spin_unlock_bh(&tipc_port_list_lock); 633 spin_unlock_bh(&tipc_port_list_lock);
658 str_len = tipc_printbuf_validate(&pb); 634 str_len = tipc_printbuf_validate(&pb);
@@ -665,12 +641,12 @@ struct sk_buff *tipc_port_get_ports(void)
665 641
666void tipc_port_reinit(void) 642void tipc_port_reinit(void)
667{ 643{
668 struct port *p_ptr; 644 struct tipc_port *p_ptr;
669 struct tipc_msg *msg; 645 struct tipc_msg *msg;
670 646
671 spin_lock_bh(&tipc_port_list_lock); 647 spin_lock_bh(&tipc_port_list_lock);
672 list_for_each_entry(p_ptr, &ports, port_list) { 648 list_for_each_entry(p_ptr, &ports, port_list) {
673 msg = &p_ptr->publ.phdr; 649 msg = &p_ptr->phdr;
674 if (msg_orignode(msg) == tipc_own_addr) 650 if (msg_orignode(msg) == tipc_own_addr)
675 break; 651 break;
676 msg_set_prevnode(msg, tipc_own_addr); 652 msg_set_prevnode(msg, tipc_own_addr);
@@ -695,7 +671,7 @@ static void port_dispatcher_sigh(void *dummy)
695 spin_unlock_bh(&queue_lock); 671 spin_unlock_bh(&queue_lock);
696 672
697 while (buf) { 673 while (buf) {
698 struct port *p_ptr; 674 struct tipc_port *p_ptr;
699 struct user_port *up_ptr; 675 struct user_port *up_ptr;
700 struct tipc_portid orig; 676 struct tipc_portid orig;
701 struct tipc_name_seq dseq; 677 struct tipc_name_seq dseq;
@@ -720,8 +696,8 @@ static void port_dispatcher_sigh(void *dummy)
720 orig.node = msg_orignode(msg); 696 orig.node = msg_orignode(msg);
721 up_ptr = p_ptr->user_port; 697 up_ptr = p_ptr->user_port;
722 usr_handle = up_ptr->usr_handle; 698 usr_handle = up_ptr->usr_handle;
723 connected = p_ptr->publ.connected; 699 connected = p_ptr->connected;
724 published = p_ptr->publ.published; 700 published = p_ptr->published;
725 701
726 if (unlikely(msg_errcode(msg))) 702 if (unlikely(msg_errcode(msg)))
727 goto err; 703 goto err;
@@ -732,6 +708,7 @@ static void port_dispatcher_sigh(void *dummy)
732 tipc_conn_msg_event cb = up_ptr->conn_msg_cb; 708 tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
733 u32 peer_port = port_peerport(p_ptr); 709 u32 peer_port = port_peerport(p_ptr);
734 u32 peer_node = port_peernode(p_ptr); 710 u32 peer_node = port_peernode(p_ptr);
711 u32 dsz;
735 712
736 tipc_port_unlock(p_ptr); 713 tipc_port_unlock(p_ptr);
737 if (unlikely(!cb)) 714 if (unlikely(!cb))
@@ -742,13 +719,14 @@ static void port_dispatcher_sigh(void *dummy)
742 } else if ((msg_origport(msg) != peer_port) || 719 } else if ((msg_origport(msg) != peer_port) ||
743 (msg_orignode(msg) != peer_node)) 720 (msg_orignode(msg) != peer_node))
744 goto reject; 721 goto reject;
745 if (unlikely(++p_ptr->publ.conn_unacked >= 722 dsz = msg_data_sz(msg);
746 TIPC_FLOW_CONTROL_WIN)) 723 if (unlikely(dsz &&
724 (++p_ptr->conn_unacked >=
725 TIPC_FLOW_CONTROL_WIN)))
747 tipc_acknowledge(dref, 726 tipc_acknowledge(dref,
748 p_ptr->publ.conn_unacked); 727 p_ptr->conn_unacked);
749 skb_pull(buf, msg_hdr_sz(msg)); 728 skb_pull(buf, msg_hdr_sz(msg));
750 cb(usr_handle, dref, &buf, msg_data(msg), 729 cb(usr_handle, dref, &buf, msg_data(msg), dsz);
751 msg_data_sz(msg));
752 break; 730 break;
753 } 731 }
754 case TIPC_DIRECT_MSG:{ 732 case TIPC_DIRECT_MSG:{
@@ -872,7 +850,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf)
872 850
873static void port_wakeup_sh(unsigned long ref) 851static void port_wakeup_sh(unsigned long ref)
874{ 852{
875 struct port *p_ptr; 853 struct tipc_port *p_ptr;
876 struct user_port *up_ptr; 854 struct user_port *up_ptr;
877 tipc_continue_event cb = NULL; 855 tipc_continue_event cb = NULL;
878 void *uh = NULL; 856 void *uh = NULL;
@@ -898,14 +876,14 @@ static void port_wakeup(struct tipc_port *p_ptr)
898 876
899void tipc_acknowledge(u32 ref, u32 ack) 877void tipc_acknowledge(u32 ref, u32 ack)
900{ 878{
901 struct port *p_ptr; 879 struct tipc_port *p_ptr;
902 struct sk_buff *buf = NULL; 880 struct sk_buff *buf = NULL;
903 881
904 p_ptr = tipc_port_lock(ref); 882 p_ptr = tipc_port_lock(ref);
905 if (!p_ptr) 883 if (!p_ptr)
906 return; 884 return;
907 if (p_ptr->publ.connected) { 885 if (p_ptr->connected) {
908 p_ptr->publ.conn_unacked -= ack; 886 p_ptr->conn_unacked -= ack;
909 buf = port_build_proto_msg(port_peerport(p_ptr), 887 buf = port_build_proto_msg(port_peerport(p_ptr),
910 port_peernode(p_ptr), 888 port_peernode(p_ptr),
911 ref, 889 ref,
@@ -913,7 +891,6 @@ void tipc_acknowledge(u32 ref, u32 ack)
913 CONN_MANAGER, 891 CONN_MANAGER,
914 CONN_ACK, 892 CONN_ACK,
915 TIPC_OK, 893 TIPC_OK,
916 port_out_seqno(p_ptr),
917 ack); 894 ack);
918 } 895 }
919 tipc_port_unlock(p_ptr); 896 tipc_port_unlock(p_ptr);
@@ -936,14 +913,14 @@ int tipc_createport(void *usr_handle,
936 u32 *portref) 913 u32 *portref)
937{ 914{
938 struct user_port *up_ptr; 915 struct user_port *up_ptr;
939 struct port *p_ptr; 916 struct tipc_port *p_ptr;
940 917
941 up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); 918 up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
942 if (!up_ptr) { 919 if (!up_ptr) {
943 warn("Port creation failed, no memory\n"); 920 warn("Port creation failed, no memory\n");
944 return -ENOMEM; 921 return -ENOMEM;
945 } 922 }
946 p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher, 923 p_ptr = (struct tipc_port *)tipc_createport_raw(NULL, port_dispatcher,
947 port_wakeup, importance); 924 port_wakeup, importance);
948 if (!p_ptr) { 925 if (!p_ptr) {
949 kfree(up_ptr); 926 kfree(up_ptr);
@@ -952,7 +929,7 @@ int tipc_createport(void *usr_handle,
952 929
953 p_ptr->user_port = up_ptr; 930 p_ptr->user_port = up_ptr;
954 up_ptr->usr_handle = usr_handle; 931 up_ptr->usr_handle = usr_handle;
955 up_ptr->ref = p_ptr->publ.ref; 932 up_ptr->ref = p_ptr->ref;
956 up_ptr->err_cb = error_cb; 933 up_ptr->err_cb = error_cb;
957 up_ptr->named_err_cb = named_error_cb; 934 up_ptr->named_err_cb = named_error_cb;
958 up_ptr->conn_err_cb = conn_error_cb; 935 up_ptr->conn_err_cb = conn_error_cb;
@@ -960,26 +937,26 @@ int tipc_createport(void *usr_handle,
960 up_ptr->named_msg_cb = named_msg_cb; 937 up_ptr->named_msg_cb = named_msg_cb;
961 up_ptr->conn_msg_cb = conn_msg_cb; 938 up_ptr->conn_msg_cb = conn_msg_cb;
962 up_ptr->continue_event_cb = continue_event_cb; 939 up_ptr->continue_event_cb = continue_event_cb;
963 *portref = p_ptr->publ.ref; 940 *portref = p_ptr->ref;
964 tipc_port_unlock(p_ptr); 941 tipc_port_unlock(p_ptr);
965 return 0; 942 return 0;
966} 943}
967 944
968int tipc_portimportance(u32 ref, unsigned int *importance) 945int tipc_portimportance(u32 ref, unsigned int *importance)
969{ 946{
970 struct port *p_ptr; 947 struct tipc_port *p_ptr;
971 948
972 p_ptr = tipc_port_lock(ref); 949 p_ptr = tipc_port_lock(ref);
973 if (!p_ptr) 950 if (!p_ptr)
974 return -EINVAL; 951 return -EINVAL;
975 *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); 952 *importance = (unsigned int)msg_importance(&p_ptr->phdr);
976 tipc_port_unlock(p_ptr); 953 tipc_port_unlock(p_ptr);
977 return 0; 954 return 0;
978} 955}
979 956
980int tipc_set_portimportance(u32 ref, unsigned int imp) 957int tipc_set_portimportance(u32 ref, unsigned int imp)
981{ 958{
982 struct port *p_ptr; 959 struct tipc_port *p_ptr;
983 960
984 if (imp > TIPC_CRITICAL_IMPORTANCE) 961 if (imp > TIPC_CRITICAL_IMPORTANCE)
985 return -EINVAL; 962 return -EINVAL;
@@ -987,7 +964,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
987 p_ptr = tipc_port_lock(ref); 964 p_ptr = tipc_port_lock(ref);
988 if (!p_ptr) 965 if (!p_ptr)
989 return -EINVAL; 966 return -EINVAL;
990 msg_set_importance(&p_ptr->publ.phdr, (u32)imp); 967 msg_set_importance(&p_ptr->phdr, (u32)imp);
991 tipc_port_unlock(p_ptr); 968 tipc_port_unlock(p_ptr);
992 return 0; 969 return 0;
993} 970}
@@ -995,7 +972,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
995 972
996int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) 973int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
997{ 974{
998 struct port *p_ptr; 975 struct tipc_port *p_ptr;
999 struct publication *publ; 976 struct publication *publ;
1000 u32 key; 977 u32 key;
1001 int res = -EINVAL; 978 int res = -EINVAL;
@@ -1004,7 +981,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1004 if (!p_ptr) 981 if (!p_ptr)
1005 return -EINVAL; 982 return -EINVAL;
1006 983
1007 if (p_ptr->publ.connected) 984 if (p_ptr->connected)
1008 goto exit; 985 goto exit;
1009 if (seq->lower > seq->upper) 986 if (seq->lower > seq->upper)
1010 goto exit; 987 goto exit;
@@ -1016,11 +993,11 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1016 goto exit; 993 goto exit;
1017 } 994 }
1018 publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, 995 publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
1019 scope, p_ptr->publ.ref, key); 996 scope, p_ptr->ref, key);
1020 if (publ) { 997 if (publ) {
1021 list_add(&publ->pport_list, &p_ptr->publications); 998 list_add(&publ->pport_list, &p_ptr->publications);
1022 p_ptr->pub_count++; 999 p_ptr->pub_count++;
1023 p_ptr->publ.published = 1; 1000 p_ptr->published = 1;
1024 res = 0; 1001 res = 0;
1025 } 1002 }
1026exit: 1003exit:
@@ -1030,7 +1007,7 @@ exit:
1030 1007
1031int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) 1008int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1032{ 1009{
1033 struct port *p_ptr; 1010 struct tipc_port *p_ptr;
1034 struct publication *publ; 1011 struct publication *publ;
1035 struct publication *tpubl; 1012 struct publication *tpubl;
1036 int res = -EINVAL; 1013 int res = -EINVAL;
@@ -1063,37 +1040,36 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1063 } 1040 }
1064 } 1041 }
1065 if (list_empty(&p_ptr->publications)) 1042 if (list_empty(&p_ptr->publications))
1066 p_ptr->publ.published = 0; 1043 p_ptr->published = 0;
1067 tipc_port_unlock(p_ptr); 1044 tipc_port_unlock(p_ptr);
1068 return res; 1045 return res;
1069} 1046}
1070 1047
1071int tipc_connect2port(u32 ref, struct tipc_portid const *peer) 1048int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
1072{ 1049{
1073 struct port *p_ptr; 1050 struct tipc_port *p_ptr;
1074 struct tipc_msg *msg; 1051 struct tipc_msg *msg;
1075 int res = -EINVAL; 1052 int res = -EINVAL;
1076 1053
1077 p_ptr = tipc_port_lock(ref); 1054 p_ptr = tipc_port_lock(ref);
1078 if (!p_ptr) 1055 if (!p_ptr)
1079 return -EINVAL; 1056 return -EINVAL;
1080 if (p_ptr->publ.published || p_ptr->publ.connected) 1057 if (p_ptr->published || p_ptr->connected)
1081 goto exit; 1058 goto exit;
1082 if (!peer->ref) 1059 if (!peer->ref)
1083 goto exit; 1060 goto exit;
1084 1061
1085 msg = &p_ptr->publ.phdr; 1062 msg = &p_ptr->phdr;
1086 msg_set_destnode(msg, peer->node); 1063 msg_set_destnode(msg, peer->node);
1087 msg_set_destport(msg, peer->ref); 1064 msg_set_destport(msg, peer->ref);
1088 msg_set_orignode(msg, tipc_own_addr); 1065 msg_set_orignode(msg, tipc_own_addr);
1089 msg_set_origport(msg, p_ptr->publ.ref); 1066 msg_set_origport(msg, p_ptr->ref);
1090 msg_set_transp_seqno(msg, 42);
1091 msg_set_type(msg, TIPC_CONN_MSG); 1067 msg_set_type(msg, TIPC_CONN_MSG);
1092 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1068 msg_set_hdr_sz(msg, SHORT_H_SIZE);
1093 1069
1094 p_ptr->probing_interval = PROBING_INTERVAL; 1070 p_ptr->probing_interval = PROBING_INTERVAL;
1095 p_ptr->probing_state = CONFIRMED; 1071 p_ptr->probing_state = CONFIRMED;
1096 p_ptr->publ.connected = 1; 1072 p_ptr->connected = 1;
1097 k_start_timer(&p_ptr->timer, p_ptr->probing_interval); 1073 k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
1098 1074
1099 tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, 1075 tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
@@ -1102,7 +1078,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
1102 res = 0; 1078 res = 0;
1103exit: 1079exit:
1104 tipc_port_unlock(p_ptr); 1080 tipc_port_unlock(p_ptr);
1105 p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); 1081 p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
1106 return res; 1082 return res;
1107} 1083}
1108 1084
@@ -1120,7 +1096,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
1120 tp_ptr->connected = 0; 1096 tp_ptr->connected = 0;
1121 /* let timer expire on it's own to avoid deadlock! */ 1097 /* let timer expire on it's own to avoid deadlock! */
1122 tipc_nodesub_unsubscribe( 1098 tipc_nodesub_unsubscribe(
1123 &((struct port *)tp_ptr)->subscription); 1099 &((struct tipc_port *)tp_ptr)->subscription);
1124 res = 0; 1100 res = 0;
1125 } else { 1101 } else {
1126 res = -ENOTCONN; 1102 res = -ENOTCONN;
@@ -1135,7 +1111,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
1135 1111
1136int tipc_disconnect(u32 ref) 1112int tipc_disconnect(u32 ref)
1137{ 1113{
1138 struct port *p_ptr; 1114 struct tipc_port *p_ptr;
1139 int res; 1115 int res;
1140 1116
1141 p_ptr = tipc_port_lock(ref); 1117 p_ptr = tipc_port_lock(ref);
@@ -1151,15 +1127,15 @@ int tipc_disconnect(u32 ref)
1151 */ 1127 */
1152int tipc_shutdown(u32 ref) 1128int tipc_shutdown(u32 ref)
1153{ 1129{
1154 struct port *p_ptr; 1130 struct tipc_port *p_ptr;
1155 struct sk_buff *buf = NULL; 1131 struct sk_buff *buf = NULL;
1156 1132
1157 p_ptr = tipc_port_lock(ref); 1133 p_ptr = tipc_port_lock(ref);
1158 if (!p_ptr) 1134 if (!p_ptr)
1159 return -EINVAL; 1135 return -EINVAL;
1160 1136
1161 if (p_ptr->publ.connected) { 1137 if (p_ptr->connected) {
1162 u32 imp = msg_importance(&p_ptr->publ.phdr); 1138 u32 imp = msg_importance(&p_ptr->phdr);
1163 if (imp < TIPC_CRITICAL_IMPORTANCE) 1139 if (imp < TIPC_CRITICAL_IMPORTANCE)
1164 imp++; 1140 imp++;
1165 buf = port_build_proto_msg(port_peerport(p_ptr), 1141 buf = port_build_proto_msg(port_peerport(p_ptr),
@@ -1169,7 +1145,6 @@ int tipc_shutdown(u32 ref)
1169 imp, 1145 imp,
1170 TIPC_CONN_MSG, 1146 TIPC_CONN_MSG,
1171 TIPC_CONN_SHUTDOWN, 1147 TIPC_CONN_SHUTDOWN,
1172 port_out_seqno(p_ptr),
1173 0); 1148 0);
1174 } 1149 }
1175 tipc_port_unlock(p_ptr); 1150 tipc_port_unlock(p_ptr);
@@ -1182,13 +1157,13 @@ int tipc_shutdown(u32 ref)
1182 * message for this node. 1157 * message for this node.
1183 */ 1158 */
1184 1159
1185static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, 1160static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect,
1186 struct iovec const *msg_sect) 1161 struct iovec const *msg_sect)
1187{ 1162{
1188 struct sk_buff *buf; 1163 struct sk_buff *buf;
1189 int res; 1164 int res;
1190 1165
1191 res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect, 1166 res = tipc_msg_build(&sender->phdr, msg_sect, num_sect,
1192 MAX_MSG_SIZE, !sender->user_port, &buf); 1167 MAX_MSG_SIZE, !sender->user_port, &buf);
1193 if (likely(buf)) 1168 if (likely(buf))
1194 tipc_port_recv_msg(buf); 1169 tipc_port_recv_msg(buf);
@@ -1201,15 +1176,15 @@ static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
1201 1176
1202int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) 1177int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1203{ 1178{
1204 struct port *p_ptr; 1179 struct tipc_port *p_ptr;
1205 u32 destnode; 1180 u32 destnode;
1206 int res; 1181 int res;
1207 1182
1208 p_ptr = tipc_port_deref(ref); 1183 p_ptr = tipc_port_deref(ref);
1209 if (!p_ptr || !p_ptr->publ.connected) 1184 if (!p_ptr || !p_ptr->connected)
1210 return -EINVAL; 1185 return -EINVAL;
1211 1186
1212 p_ptr->publ.congested = 1; 1187 p_ptr->congested = 1;
1213 if (!tipc_port_congested(p_ptr)) { 1188 if (!tipc_port_congested(p_ptr)) {
1214 destnode = port_peernode(p_ptr); 1189 destnode = port_peernode(p_ptr);
1215 if (likely(destnode != tipc_own_addr)) 1190 if (likely(destnode != tipc_own_addr))
@@ -1219,14 +1194,14 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1219 res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1194 res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
1220 1195
1221 if (likely(res != -ELINKCONG)) { 1196 if (likely(res != -ELINKCONG)) {
1222 port_incr_out_seqno(p_ptr); 1197 p_ptr->congested = 0;
1223 p_ptr->publ.congested = 0; 1198 if (res > 0)
1224 p_ptr->sent++; 1199 p_ptr->sent++;
1225 return res; 1200 return res;
1226 } 1201 }
1227 } 1202 }
1228 if (port_unreliable(p_ptr)) { 1203 if (port_unreliable(p_ptr)) {
1229 p_ptr->publ.congested = 0; 1204 p_ptr->congested = 0;
1230 /* Just calculate msg length and return */ 1205 /* Just calculate msg length and return */
1231 return tipc_msg_calc_data_size(msg_sect, num_sect); 1206 return tipc_msg_calc_data_size(msg_sect, num_sect);
1232 } 1207 }
@@ -1240,17 +1215,17 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1240int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, 1215int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1241 unsigned int num_sect, struct iovec const *msg_sect) 1216 unsigned int num_sect, struct iovec const *msg_sect)
1242{ 1217{
1243 struct port *p_ptr; 1218 struct tipc_port *p_ptr;
1244 struct tipc_msg *msg; 1219 struct tipc_msg *msg;
1245 u32 destnode = domain; 1220 u32 destnode = domain;
1246 u32 destport; 1221 u32 destport;
1247 int res; 1222 int res;
1248 1223
1249 p_ptr = tipc_port_deref(ref); 1224 p_ptr = tipc_port_deref(ref);
1250 if (!p_ptr || p_ptr->publ.connected) 1225 if (!p_ptr || p_ptr->connected)
1251 return -EINVAL; 1226 return -EINVAL;
1252 1227
1253 msg = &p_ptr->publ.phdr; 1228 msg = &p_ptr->phdr;
1254 msg_set_type(msg, TIPC_NAMED_MSG); 1229 msg_set_type(msg, TIPC_NAMED_MSG);
1255 msg_set_orignode(msg, tipc_own_addr); 1230 msg_set_orignode(msg, tipc_own_addr);
1256 msg_set_origport(msg, ref); 1231 msg_set_origport(msg, ref);
@@ -1263,13 +1238,17 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1263 msg_set_destport(msg, destport); 1238 msg_set_destport(msg, destport);
1264 1239
1265 if (likely(destport)) { 1240 if (likely(destport)) {
1266 p_ptr->sent++;
1267 if (likely(destnode == tipc_own_addr)) 1241 if (likely(destnode == tipc_own_addr))
1268 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1242 res = tipc_port_recv_sections(p_ptr, num_sect,
1269 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, 1243 msg_sect);
1270 destnode); 1244 else
1271 if (likely(res != -ELINKCONG)) 1245 res = tipc_link_send_sections_fast(p_ptr, msg_sect,
1246 num_sect, destnode);
1247 if (likely(res != -ELINKCONG)) {
1248 if (res > 0)
1249 p_ptr->sent++;
1272 return res; 1250 return res;
1251 }
1273 if (port_unreliable(p_ptr)) { 1252 if (port_unreliable(p_ptr)) {
1274 /* Just calculate msg length and return */ 1253 /* Just calculate msg length and return */
1275 return tipc_msg_calc_data_size(msg_sect, num_sect); 1254 return tipc_msg_calc_data_size(msg_sect, num_sect);
@@ -1287,27 +1266,32 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1287int tipc_send2port(u32 ref, struct tipc_portid const *dest, 1266int tipc_send2port(u32 ref, struct tipc_portid const *dest,
1288 unsigned int num_sect, struct iovec const *msg_sect) 1267 unsigned int num_sect, struct iovec const *msg_sect)
1289{ 1268{
1290 struct port *p_ptr; 1269 struct tipc_port *p_ptr;
1291 struct tipc_msg *msg; 1270 struct tipc_msg *msg;
1292 int res; 1271 int res;
1293 1272
1294 p_ptr = tipc_port_deref(ref); 1273 p_ptr = tipc_port_deref(ref);
1295 if (!p_ptr || p_ptr->publ.connected) 1274 if (!p_ptr || p_ptr->connected)
1296 return -EINVAL; 1275 return -EINVAL;
1297 1276
1298 msg = &p_ptr->publ.phdr; 1277 msg = &p_ptr->phdr;
1299 msg_set_type(msg, TIPC_DIRECT_MSG); 1278 msg_set_type(msg, TIPC_DIRECT_MSG);
1300 msg_set_orignode(msg, tipc_own_addr); 1279 msg_set_orignode(msg, tipc_own_addr);
1301 msg_set_origport(msg, ref); 1280 msg_set_origport(msg, ref);
1302 msg_set_destnode(msg, dest->node); 1281 msg_set_destnode(msg, dest->node);
1303 msg_set_destport(msg, dest->ref); 1282 msg_set_destport(msg, dest->ref);
1304 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); 1283 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
1305 p_ptr->sent++; 1284
1306 if (dest->node == tipc_own_addr) 1285 if (dest->node == tipc_own_addr)
1307 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1286 res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
1308 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, dest->node); 1287 else
1309 if (likely(res != -ELINKCONG)) 1288 res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
1289 dest->node);
1290 if (likely(res != -ELINKCONG)) {
1291 if (res > 0)
1292 p_ptr->sent++;
1310 return res; 1293 return res;
1294 }
1311 if (port_unreliable(p_ptr)) { 1295 if (port_unreliable(p_ptr)) {
1312 /* Just calculate msg length and return */ 1296 /* Just calculate msg length and return */
1313 return tipc_msg_calc_data_size(msg_sect, num_sect); 1297 return tipc_msg_calc_data_size(msg_sect, num_sect);
@@ -1322,15 +1306,15 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
1322int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, 1306int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
1323 struct sk_buff *buf, unsigned int dsz) 1307 struct sk_buff *buf, unsigned int dsz)
1324{ 1308{
1325 struct port *p_ptr; 1309 struct tipc_port *p_ptr;
1326 struct tipc_msg *msg; 1310 struct tipc_msg *msg;
1327 int res; 1311 int res;
1328 1312
1329 p_ptr = (struct port *)tipc_ref_deref(ref); 1313 p_ptr = (struct tipc_port *)tipc_ref_deref(ref);
1330 if (!p_ptr || p_ptr->publ.connected) 1314 if (!p_ptr || p_ptr->connected)
1331 return -EINVAL; 1315 return -EINVAL;
1332 1316
1333 msg = &p_ptr->publ.phdr; 1317 msg = &p_ptr->phdr;
1334 msg_set_type(msg, TIPC_DIRECT_MSG); 1318 msg_set_type(msg, TIPC_DIRECT_MSG);
1335 msg_set_orignode(msg, tipc_own_addr); 1319 msg_set_orignode(msg, tipc_own_addr);
1336 msg_set_origport(msg, ref); 1320 msg_set_origport(msg, ref);
@@ -1343,12 +1327,16 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
1343 1327
1344 skb_push(buf, DIR_MSG_H_SIZE); 1328 skb_push(buf, DIR_MSG_H_SIZE);
1345 skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE); 1329 skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
1346 p_ptr->sent++; 1330
1347 if (dest->node == tipc_own_addr) 1331 if (dest->node == tipc_own_addr)
1348 return tipc_port_recv_msg(buf); 1332 res = tipc_port_recv_msg(buf);
1349 res = tipc_send_buf_fast(buf, dest->node); 1333 else
1350 if (likely(res != -ELINKCONG)) 1334 res = tipc_send_buf_fast(buf, dest->node);
1335 if (likely(res != -ELINKCONG)) {
1336 if (res > 0)
1337 p_ptr->sent++;
1351 return res; 1338 return res;
1339 }
1352 if (port_unreliable(p_ptr)) 1340 if (port_unreliable(p_ptr))
1353 return dsz; 1341 return dsz;
1354 return -ELINKCONG; 1342 return -ELINKCONG;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8e84b989949..87b9424ae0e 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -2,7 +2,7 @@
2 * net/tipc/port.h: Include file for TIPC port code 2 * net/tipc/port.h: Include file for TIPC port code
3 * 3 *
4 * Copyright (c) 1994-2007, Ericsson AB 4 * Copyright (c) 1994-2007, Ericsson AB
5 * Copyright (c) 2004-2007, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,7 @@ struct user_port {
95}; 95};
96 96
97/** 97/**
98 * struct tipc_port - TIPC port info available to socket API 98 * struct tipc_port - TIPC port structure
99 * @usr_handle: pointer to additional user-defined information about port 99 * @usr_handle: pointer to additional user-defined information about port
100 * @lock: pointer to spinlock for controlling access to port 100 * @lock: pointer to spinlock for controlling access to port
101 * @connected: non-zero if port is currently connected to a peer port 101 * @connected: non-zero if port is currently connected to a peer port
@@ -107,43 +107,33 @@ struct user_port {
107 * @max_pkt: maximum packet size "hint" used when building messages sent by port 107 * @max_pkt: maximum packet size "hint" used when building messages sent by port
108 * @ref: unique reference to port in TIPC object registry 108 * @ref: unique reference to port in TIPC object registry
109 * @phdr: preformatted message header used when sending messages 109 * @phdr: preformatted message header used when sending messages
110 */
111struct tipc_port {
112 void *usr_handle;
113 spinlock_t *lock;
114 int connected;
115 u32 conn_type;
116 u32 conn_instance;
117 u32 conn_unacked;
118 int published;
119 u32 congested;
120 u32 max_pkt;
121 u32 ref;
122 struct tipc_msg phdr;
123};
124
125/**
126 * struct port - TIPC port structure
127 * @publ: TIPC port info available to privileged users
128 * @port_list: adjacent ports in TIPC's global list of ports 110 * @port_list: adjacent ports in TIPC's global list of ports
129 * @dispatcher: ptr to routine which handles received messages 111 * @dispatcher: ptr to routine which handles received messages
130 * @wakeup: ptr to routine to call when port is no longer congested 112 * @wakeup: ptr to routine to call when port is no longer congested
131 * @user_port: ptr to user port associated with port (if any) 113 * @user_port: ptr to user port associated with port (if any)
132 * @wait_list: adjacent ports in list of ports waiting on link congestion 114 * @wait_list: adjacent ports in list of ports waiting on link congestion
133 * @waiting_pkts: 115 * @waiting_pkts:
134 * @sent: 116 * @sent: # of non-empty messages sent by port
135 * @acked: 117 * @acked: # of non-empty message acknowledgements from connected port's peer
136 * @publications: list of publications for port 118 * @publications: list of publications for port
137 * @pub_count: total # of publications port has made during its lifetime 119 * @pub_count: total # of publications port has made during its lifetime
138 * @probing_state: 120 * @probing_state:
139 * @probing_interval: 121 * @probing_interval:
140 * @last_in_seqno:
141 * @timer_ref: 122 * @timer_ref:
142 * @subscription: "node down" subscription used to terminate failed connections 123 * @subscription: "node down" subscription used to terminate failed connections
143 */ 124 */
144 125struct tipc_port {
145struct port { 126 void *usr_handle;
146 struct tipc_port publ; 127 spinlock_t *lock;
128 int connected;
129 u32 conn_type;
130 u32 conn_instance;
131 u32 conn_unacked;
132 int published;
133 u32 congested;
134 u32 max_pkt;
135 u32 ref;
136 struct tipc_msg phdr;
147 struct list_head port_list; 137 struct list_head port_list;
148 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); 138 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
149 void (*wakeup)(struct tipc_port *); 139 void (*wakeup)(struct tipc_port *);
@@ -156,7 +146,6 @@ struct port {
156 u32 pub_count; 146 u32 pub_count;
157 u32 probing_state; 147 u32 probing_state;
158 u32 probing_interval; 148 u32 probing_interval;
159 u32 last_in_seqno;
160 struct timer_list timer; 149 struct timer_list timer;
161 struct tipc_node_subscr subscription; 150 struct tipc_node_subscr subscription;
162}; 151};
@@ -230,7 +219,7 @@ int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
230int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, 219int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
231 unsigned int section_count, struct iovec const *msg); 220 unsigned int section_count, struct iovec const *msg);
232 221
233int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 222int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
234 struct iovec const *msg_sect, u32 num_sect, 223 struct iovec const *msg_sect, u32 num_sect,
235 int err); 224 int err);
236struct sk_buff *tipc_port_get_ports(void); 225struct sk_buff *tipc_port_get_ports(void);
@@ -242,9 +231,9 @@ void tipc_port_reinit(void);
242 * tipc_port_lock - lock port instance referred to and return its pointer 231 * tipc_port_lock - lock port instance referred to and return its pointer
243 */ 232 */
244 233
245static inline struct port *tipc_port_lock(u32 ref) 234static inline struct tipc_port *tipc_port_lock(u32 ref)
246{ 235{
247 return (struct port *)tipc_ref_lock(ref); 236 return (struct tipc_port *)tipc_ref_lock(ref);
248} 237}
249 238
250/** 239/**
@@ -253,27 +242,27 @@ static inline struct port *tipc_port_lock(u32 ref)
253 * Can use pointer instead of tipc_ref_unlock() since port is already locked. 242 * Can use pointer instead of tipc_ref_unlock() since port is already locked.
254 */ 243 */
255 244
256static inline void tipc_port_unlock(struct port *p_ptr) 245static inline void tipc_port_unlock(struct tipc_port *p_ptr)
257{ 246{
258 spin_unlock_bh(p_ptr->publ.lock); 247 spin_unlock_bh(p_ptr->lock);
259} 248}
260 249
261static inline struct port *tipc_port_deref(u32 ref) 250static inline struct tipc_port *tipc_port_deref(u32 ref)
262{ 251{
263 return (struct port *)tipc_ref_deref(ref); 252 return (struct tipc_port *)tipc_ref_deref(ref);
264} 253}
265 254
266static inline u32 tipc_peer_port(struct port *p_ptr) 255static inline u32 tipc_peer_port(struct tipc_port *p_ptr)
267{ 256{
268 return msg_destport(&p_ptr->publ.phdr); 257 return msg_destport(&p_ptr->phdr);
269} 258}
270 259
271static inline u32 tipc_peer_node(struct port *p_ptr) 260static inline u32 tipc_peer_node(struct tipc_port *p_ptr)
272{ 261{
273 return msg_destnode(&p_ptr->publ.phdr); 262 return msg_destnode(&p_ptr->phdr);
274} 263}
275 264
276static inline int tipc_port_congested(struct port *p_ptr) 265static inline int tipc_port_congested(struct tipc_port *p_ptr)
277{ 266{
278 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); 267 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
279} 268}
@@ -284,7 +273,7 @@ static inline int tipc_port_congested(struct port *p_ptr)
284 273
285static inline int tipc_port_recv_msg(struct sk_buff *buf) 274static inline int tipc_port_recv_msg(struct sk_buff *buf)
286{ 275{
287 struct port *p_ptr; 276 struct tipc_port *p_ptr;
288 struct tipc_msg *msg = buf_msg(buf); 277 struct tipc_msg *msg = buf_msg(buf);
289 u32 destport = msg_destport(msg); 278 u32 destport = msg_destport(msg);
290 u32 dsz = msg_data_sz(msg); 279 u32 dsz = msg_data_sz(msg);
@@ -299,7 +288,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
299 /* validate destination & pass to port, otherwise reject message */ 288 /* validate destination & pass to port, otherwise reject message */
300 p_ptr = tipc_port_lock(destport); 289 p_ptr = tipc_port_lock(destport);
301 if (likely(p_ptr)) { 290 if (likely(p_ptr)) {
302 if (likely(p_ptr->publ.connected)) { 291 if (likely(p_ptr->connected)) {
303 if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) || 292 if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) ||
304 (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) || 293 (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) ||
305 (unlikely(!msg_connected(msg)))) { 294 (unlikely(!msg_connected(msg)))) {
@@ -308,7 +297,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
308 goto reject; 297 goto reject;
309 } 298 }
310 } 299 }
311 err = p_ptr->dispatcher(&p_ptr->publ, buf); 300 err = p_ptr->dispatcher(p_ptr, buf);
312 tipc_port_unlock(p_ptr); 301 tipc_port_unlock(p_ptr);
313 if (likely(!err)) 302 if (likely(!err))
314 return dsz; 303 return dsz;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 2b02a3a8031..29d94d53198 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2,7 +2,7 @@
2 * net/tipc/socket.c: TIPC socket API 2 * net/tipc/socket.c: TIPC socket API
3 * 3 *
4 * Copyright (c) 2001-2007, Ericsson AB 4 * Copyright (c) 2001-2007, Ericsson AB
5 * Copyright (c) 2004-2008, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -58,6 +58,9 @@ struct tipc_sock {
58#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 58#define tipc_sk(sk) ((struct tipc_sock *)(sk))
59#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p)) 59#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p))
60 60
61#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \
62 (sock->state == SS_DISCONNECTING))
63
61static int backlog_rcv(struct sock *sk, struct sk_buff *skb); 64static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
62static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); 65static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
63static void wakeupdispatch(struct tipc_port *tport); 66static void wakeupdispatch(struct tipc_port *tport);
@@ -241,7 +244,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
241 tipc_set_portunreliable(tp_ptr->ref, 1); 244 tipc_set_portunreliable(tp_ptr->ref, 1);
242 } 245 }
243 246
244 atomic_inc(&tipc_user_count);
245 return 0; 247 return 0;
246} 248}
247 249
@@ -290,7 +292,7 @@ static int release(struct socket *sock)
290 if (buf == NULL) 292 if (buf == NULL)
291 break; 293 break;
292 atomic_dec(&tipc_queue_size); 294 atomic_dec(&tipc_queue_size);
293 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) 295 if (TIPC_SKB_CB(buf)->handle != 0)
294 buf_discard(buf); 296 buf_discard(buf);
295 else { 297 else {
296 if ((sock->state == SS_CONNECTING) || 298 if ((sock->state == SS_CONNECTING) ||
@@ -321,7 +323,6 @@ static int release(struct socket *sock)
321 sock_put(sk); 323 sock_put(sk);
322 sock->sk = NULL; 324 sock->sk = NULL;
323 325
324 atomic_dec(&tipc_user_count);
325 return res; 326 return res;
326} 327}
327 328
@@ -495,6 +496,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
495 if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) 496 if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
496 return -EACCES; 497 return -EACCES;
497 498
499 if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
500 return -EMSGSIZE;
498 if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) 501 if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
499 return -EFAULT; 502 return -EFAULT;
500 if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) 503 if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
@@ -911,15 +914,13 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
911 struct tipc_port *tport = tipc_sk_port(sk); 914 struct tipc_port *tport = tipc_sk_port(sk);
912 struct sk_buff *buf; 915 struct sk_buff *buf;
913 struct tipc_msg *msg; 916 struct tipc_msg *msg;
917 long timeout;
914 unsigned int sz; 918 unsigned int sz;
915 u32 err; 919 u32 err;
916 int res; 920 int res;
917 921
918 /* Catch invalid receive requests */ 922 /* Catch invalid receive requests */
919 923
920 if (m->msg_iovlen != 1)
921 return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */
922
923 if (unlikely(!buf_len)) 924 if (unlikely(!buf_len))
924 return -EINVAL; 925 return -EINVAL;
925 926
@@ -930,6 +931,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
930 goto exit; 931 goto exit;
931 } 932 }
932 933
934 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
933restart: 935restart:
934 936
935 /* Look for a message in receive queue; wait if necessary */ 937 /* Look for a message in receive queue; wait if necessary */
@@ -939,17 +941,15 @@ restart:
939 res = -ENOTCONN; 941 res = -ENOTCONN;
940 goto exit; 942 goto exit;
941 } 943 }
942 if (flags & MSG_DONTWAIT) { 944 if (timeout <= 0L) {
943 res = -EWOULDBLOCK; 945 res = timeout ? timeout : -EWOULDBLOCK;
944 goto exit; 946 goto exit;
945 } 947 }
946 release_sock(sk); 948 release_sock(sk);
947 res = wait_event_interruptible(*sk_sleep(sk), 949 timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
948 (!skb_queue_empty(&sk->sk_receive_queue) || 950 tipc_rx_ready(sock),
949 (sock->state == SS_DISCONNECTING))); 951 timeout);
950 lock_sock(sk); 952 lock_sock(sk);
951 if (res)
952 goto exit;
953 } 953 }
954 954
955 /* Look at first message in receive queue */ 955 /* Look at first message in receive queue */
@@ -991,11 +991,10 @@ restart:
991 sz = buf_len; 991 sz = buf_len;
992 m->msg_flags |= MSG_TRUNC; 992 m->msg_flags |= MSG_TRUNC;
993 } 993 }
994 if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg), 994 res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
995 sz))) { 995 m->msg_iov, sz);
996 res = -EFAULT; 996 if (res)
997 goto exit; 997 goto exit;
998 }
999 res = sz; 998 res = sz;
1000 } else { 999 } else {
1001 if ((sock->state == SS_READY) || 1000 if ((sock->state == SS_READY) ||
@@ -1038,19 +1037,15 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1038 struct tipc_port *tport = tipc_sk_port(sk); 1037 struct tipc_port *tport = tipc_sk_port(sk);
1039 struct sk_buff *buf; 1038 struct sk_buff *buf;
1040 struct tipc_msg *msg; 1039 struct tipc_msg *msg;
1040 long timeout;
1041 unsigned int sz; 1041 unsigned int sz;
1042 int sz_to_copy, target, needed; 1042 int sz_to_copy, target, needed;
1043 int sz_copied = 0; 1043 int sz_copied = 0;
1044 char __user *crs = m->msg_iov->iov_base;
1045 unsigned char *buf_crs;
1046 u32 err; 1044 u32 err;
1047 int res = 0; 1045 int res = 0;
1048 1046
1049 /* Catch invalid receive attempts */ 1047 /* Catch invalid receive attempts */
1050 1048
1051 if (m->msg_iovlen != 1)
1052 return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */
1053
1054 if (unlikely(!buf_len)) 1049 if (unlikely(!buf_len))
1055 return -EINVAL; 1050 return -EINVAL;
1056 1051
@@ -1063,7 +1058,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1063 } 1058 }
1064 1059
1065 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); 1060 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1066 1061 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1067restart: 1062restart:
1068 1063
1069 /* Look for a message in receive queue; wait if necessary */ 1064 /* Look for a message in receive queue; wait if necessary */
@@ -1073,17 +1068,15 @@ restart:
1073 res = -ENOTCONN; 1068 res = -ENOTCONN;
1074 goto exit; 1069 goto exit;
1075 } 1070 }
1076 if (flags & MSG_DONTWAIT) { 1071 if (timeout <= 0L) {
1077 res = -EWOULDBLOCK; 1072 res = timeout ? timeout : -EWOULDBLOCK;
1078 goto exit; 1073 goto exit;
1079 } 1074 }
1080 release_sock(sk); 1075 release_sock(sk);
1081 res = wait_event_interruptible(*sk_sleep(sk), 1076 timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
1082 (!skb_queue_empty(&sk->sk_receive_queue) || 1077 tipc_rx_ready(sock),
1083 (sock->state == SS_DISCONNECTING))); 1078 timeout);
1084 lock_sock(sk); 1079 lock_sock(sk);
1085 if (res)
1086 goto exit;
1087 } 1080 }
1088 1081
1089 /* Look at first message in receive queue */ 1082 /* Look at first message in receive queue */
@@ -1112,24 +1105,25 @@ restart:
1112 /* Capture message data (if valid) & compute return value (always) */ 1105 /* Capture message data (if valid) & compute return value (always) */
1113 1106
1114 if (!err) { 1107 if (!err) {
1115 buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); 1108 u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
1116 sz = (unsigned char *)msg + msg_size(msg) - buf_crs;
1117 1109
1110 sz -= offset;
1118 needed = (buf_len - sz_copied); 1111 needed = (buf_len - sz_copied);
1119 sz_to_copy = (sz <= needed) ? sz : needed; 1112 sz_to_copy = (sz <= needed) ? sz : needed;
1120 if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) { 1113
1121 res = -EFAULT; 1114 res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
1115 m->msg_iov, sz_to_copy);
1116 if (res)
1122 goto exit; 1117 goto exit;
1123 } 1118
1124 sz_copied += sz_to_copy; 1119 sz_copied += sz_to_copy;
1125 1120
1126 if (sz_to_copy < sz) { 1121 if (sz_to_copy < sz) {
1127 if (!(flags & MSG_PEEK)) 1122 if (!(flags & MSG_PEEK))
1128 TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy; 1123 TIPC_SKB_CB(buf)->handle =
1124 (void *)(unsigned long)(offset + sz_to_copy);
1129 goto exit; 1125 goto exit;
1130 } 1126 }
1131
1132 crs += sz_to_copy;
1133 } else { 1127 } else {
1134 if (sz_copied != 0) 1128 if (sz_copied != 0)
1135 goto exit; /* can't add error msg to valid data */ 1129 goto exit; /* can't add error msg to valid data */
@@ -1256,7 +1250,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1256 1250
1257 /* Enqueue message (finally!) */ 1251 /* Enqueue message (finally!) */
1258 1252
1259 TIPC_SKB_CB(buf)->handle = msg_data(msg); 1253 TIPC_SKB_CB(buf)->handle = 0;
1260 atomic_inc(&tipc_queue_size); 1254 atomic_inc(&tipc_queue_size);
1261 __skb_queue_tail(&sk->sk_receive_queue, buf); 1255 __skb_queue_tail(&sk->sk_receive_queue, buf);
1262 1256
@@ -1608,7 +1602,7 @@ restart:
1608 buf = __skb_dequeue(&sk->sk_receive_queue); 1602 buf = __skb_dequeue(&sk->sk_receive_queue);
1609 if (buf) { 1603 if (buf) {
1610 atomic_dec(&tipc_queue_size); 1604 atomic_dec(&tipc_queue_size);
1611 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) { 1605 if (TIPC_SKB_CB(buf)->handle != 0) {
1612 buf_discard(buf); 1606 buf_discard(buf);
1613 goto restart; 1607 goto restart;
1614 } 1608 }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ca04479c3d4..aae9eae1340 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -2,7 +2,7 @@
2 * net/tipc/subscr.c: TIPC network topology service 2 * net/tipc/subscr.c: TIPC network topology service
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -160,7 +160,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
160 160
161static void subscr_timeout(struct subscription *sub) 161static void subscr_timeout(struct subscription *sub)
162{ 162{
163 struct port *server_port; 163 struct tipc_port *server_port;
164 164
165 /* Validate server port reference (in case subscriber is terminating) */ 165 /* Validate server port reference (in case subscriber is terminating) */
166 166
@@ -472,8 +472,6 @@ static void subscr_named_msg_event(void *usr_handle,
472 struct tipc_portid const *orig, 472 struct tipc_portid const *orig,
473 struct tipc_name_seq const *dest) 473 struct tipc_name_seq const *dest)
474{ 474{
475 static struct iovec msg_sect = {NULL, 0};
476
477 struct subscriber *subscriber; 475 struct subscriber *subscriber;
478 u32 server_port_ref; 476 u32 server_port_ref;
479 477
@@ -508,7 +506,7 @@ static void subscr_named_msg_event(void *usr_handle,
508 506
509 /* Lock server port (& save lock address for future use) */ 507 /* Lock server port (& save lock address for future use) */
510 508
511 subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock; 509 subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock;
512 510
513 /* Add subscriber to topology server's subscriber list */ 511 /* Add subscriber to topology server's subscriber list */
514 512
@@ -523,7 +521,7 @@ static void subscr_named_msg_event(void *usr_handle,
523 521
524 /* Send an ACK- to complete connection handshaking */ 522 /* Send an ACK- to complete connection handshaking */
525 523
526 tipc_send(server_port_ref, 1, &msg_sect); 524 tipc_send(server_port_ref, 0, NULL);
527 525
528 /* Handle optional subscription request */ 526 /* Handle optional subscription request */
529 527
@@ -542,7 +540,6 @@ int tipc_subscr_start(void)
542 spin_lock_init(&topsrv.lock); 540 spin_lock_init(&topsrv.lock);
543 INIT_LIST_HEAD(&topsrv.subscriber_list); 541 INIT_LIST_HEAD(&topsrv.subscriber_list);
544 542
545 spin_lock_bh(&topsrv.lock);
546 res = tipc_createport(NULL, 543 res = tipc_createport(NULL,
547 TIPC_CRITICAL_IMPORTANCE, 544 TIPC_CRITICAL_IMPORTANCE,
548 NULL, 545 NULL,
@@ -563,12 +560,10 @@ int tipc_subscr_start(void)
563 goto failed; 560 goto failed;
564 } 561 }
565 562
566 spin_unlock_bh(&topsrv.lock);
567 return 0; 563 return 0;
568 564
569failed: 565failed:
570 err("Failed to create subscription service\n"); 566 err("Failed to create subscription service\n");
571 spin_unlock_bh(&topsrv.lock);
572 return res; 567 return res;
573} 568}
574 569
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d28620..1663e1a2efd 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
850 * Get the parent directory, calculate the hash for last 850 * Get the parent directory, calculate the hash for last
851 * component. 851 * component.
852 */ 852 */
853 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 853 err = kern_path_parent(sunaddr->sun_path, &nd);
854 if (err) 854 if (err)
855 goto out_mknod_parent; 855 goto out_mknod_parent;
856 856
@@ -1124,7 +1124,7 @@ restart:
1124 1124
1125 /* Latch our state. 1125 /* Latch our state.
1126 1126
1127 It is tricky place. We need to grab write lock and cannot 1127 It is tricky place. We need to grab our state lock and cannot
1128 drop lock on peer. It is dangerous because deadlock is 1128 drop lock on peer. It is dangerous because deadlock is
1129 possible. Connect to self case and simultaneous 1129 possible. Connect to self case and simultaneous
1130 attempt to connect are eliminated by checking socket 1130 attempt to connect are eliminated by checking socket
@@ -1171,7 +1171,7 @@ restart:
1171 newsk->sk_type = sk->sk_type; 1171 newsk->sk_type = sk->sk_type;
1172 init_peercred(newsk); 1172 init_peercred(newsk);
1173 newu = unix_sk(newsk); 1173 newu = unix_sk(newsk);
1174 newsk->sk_wq = &newu->peer_wq; 1174 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1175 otheru = unix_sk(other); 1175 otheru = unix_sk(other);
1176 1176
1177 /* copy address information from listening to new sock*/ 1177 /* copy address information from listening to new sock*/
@@ -1475,6 +1475,12 @@ restart:
1475 goto out_free; 1475 goto out_free;
1476 } 1476 }
1477 1477
1478 if (sk_filter(other, skb) < 0) {
1479 /* Toss the packet but do not return any error to the sender */
1480 err = len;
1481 goto out_free;
1482 }
1483
1478 unix_state_lock(other); 1484 unix_state_lock(other);
1479 err = -EPERM; 1485 err = -EPERM;
1480 if (!unix_may_send(sk, other)) 1486 if (!unix_may_send(sk, other))
@@ -1561,7 +1567,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1561 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1567 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1562 struct sock *sk = sock->sk; 1568 struct sock *sk = sock->sk;
1563 struct sock *other = NULL; 1569 struct sock *other = NULL;
1564 struct sockaddr_un *sunaddr = msg->msg_name;
1565 int err, size; 1570 int err, size;
1566 struct sk_buff *skb; 1571 struct sk_buff *skb;
1567 int sent = 0; 1572 int sent = 0;
@@ -1584,7 +1589,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1584 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1589 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1585 goto out_err; 1590 goto out_err;
1586 } else { 1591 } else {
1587 sunaddr = NULL;
1588 err = -ENOTCONN; 1592 err = -ENOTCONN;
1589 other = unix_peer(sk); 1593 other = unix_peer(sk);
1590 if (!other) 1594 if (!other)
@@ -1724,7 +1728,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1724 1728
1725 msg->msg_namelen = 0; 1729 msg->msg_namelen = 0;
1726 1730
1727 mutex_lock(&u->readlock); 1731 err = mutex_lock_interruptible(&u->readlock);
1732 if (err) {
1733 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1734 goto out;
1735 }
1728 1736
1729 skb = skb_recv_datagram(sk, flags, noblock, &err); 1737 skb = skb_recv_datagram(sk, flags, noblock, &err);
1730 if (!skb) { 1738 if (!skb) {
@@ -1864,7 +1872,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1864 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1872 memset(&tmp_scm, 0, sizeof(tmp_scm));
1865 } 1873 }
1866 1874
1867 mutex_lock(&u->readlock); 1875 err = mutex_lock_interruptible(&u->readlock);
1876 if (err) {
1877 err = sock_intr_errno(timeo);
1878 goto out;
1879 }
1868 1880
1869 do { 1881 do {
1870 int chunk; 1882 int chunk;
@@ -1895,11 +1907,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1895 1907
1896 timeo = unix_stream_data_wait(sk, timeo); 1908 timeo = unix_stream_data_wait(sk, timeo);
1897 1909
1898 if (signal_pending(current)) { 1910 if (signal_pending(current)
1911 || mutex_lock_interruptible(&u->readlock)) {
1899 err = sock_intr_errno(timeo); 1912 err = sock_intr_errno(timeo);
1900 goto out; 1913 goto out;
1901 } 1914 }
1902 mutex_lock(&u->readlock); 1915
1903 continue; 1916 continue;
1904 unlock: 1917 unlock:
1905 unix_state_unlock(sk); 1918 unix_state_unlock(sk);
@@ -1978,36 +1991,38 @@ static int unix_shutdown(struct socket *sock, int mode)
1978 1991
1979 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); 1992 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1980 1993
1981 if (mode) { 1994 if (!mode)
1982 unix_state_lock(sk); 1995 return 0;
1983 sk->sk_shutdown |= mode; 1996
1984 other = unix_peer(sk); 1997 unix_state_lock(sk);
1985 if (other) 1998 sk->sk_shutdown |= mode;
1986 sock_hold(other); 1999 other = unix_peer(sk);
1987 unix_state_unlock(sk); 2000 if (other)
1988 sk->sk_state_change(sk); 2001 sock_hold(other);
1989 2002 unix_state_unlock(sk);
1990 if (other && 2003 sk->sk_state_change(sk);
1991 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2004
1992 2005 if (other &&
1993 int peer_mode = 0; 2006 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1994 2007
1995 if (mode&RCV_SHUTDOWN) 2008 int peer_mode = 0;
1996 peer_mode |= SEND_SHUTDOWN; 2009
1997 if (mode&SEND_SHUTDOWN) 2010 if (mode&RCV_SHUTDOWN)
1998 peer_mode |= RCV_SHUTDOWN; 2011 peer_mode |= SEND_SHUTDOWN;
1999 unix_state_lock(other); 2012 if (mode&SEND_SHUTDOWN)
2000 other->sk_shutdown |= peer_mode; 2013 peer_mode |= RCV_SHUTDOWN;
2001 unix_state_unlock(other); 2014 unix_state_lock(other);
2002 other->sk_state_change(other); 2015 other->sk_shutdown |= peer_mode;
2003 if (peer_mode == SHUTDOWN_MASK) 2016 unix_state_unlock(other);
2004 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2017 other->sk_state_change(other);
2005 else if (peer_mode & RCV_SHUTDOWN) 2018 if (peer_mode == SHUTDOWN_MASK)
2006 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2019 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2007 } 2020 else if (peer_mode & RCV_SHUTDOWN)
2008 if (other) 2021 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2009 sock_put(other);
2010 } 2022 }
2023 if (other)
2024 sock_put(other);
2025
2011 return 0; 2026 return 0;
2012} 2027}
2013 2028
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828..b6f4b994eb3 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
104 /* 104 /*
105 * Socket ? 105 * Socket ?
106 */ 106 */
107 if (S_ISSOCK(inode->i_mode)) { 107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 108 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 109 struct sock *s = sock->sk;
110 110
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 74944a2dd43..788a12c1eb5 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -59,8 +59,6 @@
59#include <asm/uaccess.h> /* copy_to/from_user */ 59#include <asm/uaccess.h> /* copy_to/from_user */
60#include <linux/init.h> /* __initfunc et al. */ 60#include <linux/init.h> /* __initfunc et al. */
61 61
62#define KMEM_SAFETYZONE 8
63
64#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) 62#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev)))
65 63
66/* 64/*
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e9a5f8ca4c2..fe01de29bfe 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -718,13 +718,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
718 wdev->ps = false; 718 wdev->ps = false;
719 /* allow mac80211 to determine the timeout */ 719 /* allow mac80211 to determine the timeout */
720 wdev->ps_timeout = -1; 720 wdev->ps_timeout = -1;
721 if (rdev->ops->set_power_mgmt)
722 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
723 wdev->ps,
724 wdev->ps_timeout)) {
725 /* assume this means it's off */
726 wdev->ps = false;
727 }
728 721
729 if (!dev->ethtool_ops) 722 if (!dev->ethtool_ops)
730 dev->ethtool_ops = &cfg80211_ethtool_ops; 723 dev->ethtool_ops = &cfg80211_ethtool_ops;
@@ -813,6 +806,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
813 rdev->opencount++; 806 rdev->opencount++;
814 mutex_unlock(&rdev->devlist_mtx); 807 mutex_unlock(&rdev->devlist_mtx);
815 cfg80211_unlock_rdev(rdev); 808 cfg80211_unlock_rdev(rdev);
809
810 /*
811 * Configure power management to the driver here so that its
812 * correctly set also after interface type changes etc.
813 */
814 if (wdev->iftype == NL80211_IFTYPE_STATION &&
815 rdev->ops->set_power_mgmt)
816 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
817 wdev->ps,
818 wdev->ps_timeout)) {
819 /* assume this means it's off */
820 wdev->ps = false;
821 }
816 break; 822 break;
817 case NETDEV_UNREGISTER: 823 case NETDEV_UNREGISTER:
818 /* 824 /*
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index ca4c825be93..9bde4d1d3e9 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,5 +1,6 @@
1#include <linux/utsname.h> 1#include <linux/utsname.h>
2#include <net/cfg80211.h> 2#include <net/cfg80211.h>
3#include "core.h"
3#include "ethtool.h" 4#include "ethtool.h"
4 5
5static void cfg80211_get_drvinfo(struct net_device *dev, 6static void cfg80211_get_drvinfo(struct net_device *dev,
@@ -37,9 +38,41 @@ static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs,
37 regs->len = 0; 38 regs->len = 0;
38} 39}
39 40
41static void cfg80211_get_ringparam(struct net_device *dev,
42 struct ethtool_ringparam *rp)
43{
44 struct wireless_dev *wdev = dev->ieee80211_ptr;
45 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
46
47 memset(rp, 0, sizeof(*rp));
48
49 if (rdev->ops->get_ringparam)
50 rdev->ops->get_ringparam(wdev->wiphy,
51 &rp->tx_pending, &rp->tx_max_pending,
52 &rp->rx_pending, &rp->rx_max_pending);
53}
54
55static int cfg80211_set_ringparam(struct net_device *dev,
56 struct ethtool_ringparam *rp)
57{
58 struct wireless_dev *wdev = dev->ieee80211_ptr;
59 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
60
61 if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
62 return -EINVAL;
63
64 if (rdev->ops->set_ringparam)
65 return rdev->ops->set_ringparam(wdev->wiphy,
66 rp->tx_pending, rp->rx_pending);
67
68 return -ENOTSUPP;
69}
70
40const struct ethtool_ops cfg80211_ethtool_ops = { 71const struct ethtool_ops cfg80211_ethtool_ops = {
41 .get_drvinfo = cfg80211_get_drvinfo, 72 .get_drvinfo = cfg80211_get_drvinfo,
42 .get_regs_len = cfg80211_get_regs_len, 73 .get_regs_len = cfg80211_get_regs_len,
43 .get_regs = cfg80211_get_regs, 74 .get_regs = cfg80211_get_regs,
44 .get_link = ethtool_op_get_link, 75 .get_link = ethtool_op_get_link,
76 .get_ringparam = cfg80211_get_ringparam,
77 .set_ringparam = cfg80211_set_ringparam,
45}; 78};
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9b62710891a..4ebce4284e9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1968,13 +1968,41 @@ static int parse_station_flags(struct genl_info *info,
1968 return 0; 1968 return 0;
1969} 1969}
1970 1970
1971static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
1972 int attr)
1973{
1974 struct nlattr *rate;
1975 u16 bitrate;
1976
1977 rate = nla_nest_start(msg, attr);
1978 if (!rate)
1979 goto nla_put_failure;
1980
1981 /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
1982 bitrate = cfg80211_calculate_bitrate(info);
1983 if (bitrate > 0)
1984 NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
1985
1986 if (info->flags & RATE_INFO_FLAGS_MCS)
1987 NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, info->mcs);
1988 if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
1989 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
1990 if (info->flags & RATE_INFO_FLAGS_SHORT_GI)
1991 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
1992
1993 nla_nest_end(msg, rate);
1994 return true;
1995
1996nla_put_failure:
1997 return false;
1998}
1999
1971static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, 2000static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1972 int flags, struct net_device *dev, 2001 int flags, struct net_device *dev,
1973 const u8 *mac_addr, struct station_info *sinfo) 2002 const u8 *mac_addr, struct station_info *sinfo)
1974{ 2003{
1975 void *hdr; 2004 void *hdr;
1976 struct nlattr *sinfoattr, *txrate; 2005 struct nlattr *sinfoattr;
1977 u16 bitrate;
1978 2006
1979 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); 2007 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
1980 if (!hdr) 2008 if (!hdr)
@@ -2013,24 +2041,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
2013 NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, 2041 NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
2014 sinfo->signal_avg); 2042 sinfo->signal_avg);
2015 if (sinfo->filled & STATION_INFO_TX_BITRATE) { 2043 if (sinfo->filled & STATION_INFO_TX_BITRATE) {
2016 txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); 2044 if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
2017 if (!txrate) 2045 NL80211_STA_INFO_TX_BITRATE))
2046 goto nla_put_failure;
2047 }
2048 if (sinfo->filled & STATION_INFO_RX_BITRATE) {
2049 if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
2050 NL80211_STA_INFO_RX_BITRATE))
2018 goto nla_put_failure; 2051 goto nla_put_failure;
2019
2020 /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
2021 bitrate = cfg80211_calculate_bitrate(&sinfo->txrate);
2022 if (bitrate > 0)
2023 NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
2024
2025 if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS)
2026 NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS,
2027 sinfo->txrate.mcs);
2028 if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
2029 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
2030 if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI)
2031 NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
2032
2033 nla_nest_end(msg, txrate);
2034 } 2052 }
2035 if (sinfo->filled & STATION_INFO_RX_PACKETS) 2053 if (sinfo->filled & STATION_INFO_RX_PACKETS)
2036 NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS, 2054 NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS,
@@ -2718,7 +2736,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
2718 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 2736 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2719 NL80211_CMD_GET_MESH_CONFIG); 2737 NL80211_CMD_GET_MESH_CONFIG);
2720 if (!hdr) 2738 if (!hdr)
2721 goto nla_put_failure; 2739 goto out;
2722 pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); 2740 pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG);
2723 if (!pinfoattr) 2741 if (!pinfoattr)
2724 goto nla_put_failure; 2742 goto nla_put_failure;
@@ -2759,6 +2777,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
2759 2777
2760 nla_put_failure: 2778 nla_put_failure:
2761 genlmsg_cancel(msg, hdr); 2779 genlmsg_cancel(msg, hdr);
2780 out:
2762 nlmsg_free(msg); 2781 nlmsg_free(msg);
2763 return -ENOBUFS; 2782 return -ENOBUFS;
2764} 2783}
@@ -2954,7 +2973,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
2954 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 2973 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2955 NL80211_CMD_GET_REG); 2974 NL80211_CMD_GET_REG);
2956 if (!hdr) 2975 if (!hdr)
2957 goto nla_put_failure; 2976 goto put_failure;
2958 2977
2959 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, 2978 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
2960 cfg80211_regdomain->alpha2); 2979 cfg80211_regdomain->alpha2);
@@ -3001,6 +3020,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
3001 3020
3002nla_put_failure: 3021nla_put_failure:
3003 genlmsg_cancel(msg, hdr); 3022 genlmsg_cancel(msg, hdr);
3023put_failure:
3004 nlmsg_free(msg); 3024 nlmsg_free(msg);
3005 err = -EMSGSIZE; 3025 err = -EMSGSIZE;
3006out: 3026out:
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 37693b6ef23..3332d5bce31 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -63,6 +63,10 @@ static struct regulatory_request *last_request;
63/* To trigger userspace events */ 63/* To trigger userspace events */
64static struct platform_device *reg_pdev; 64static struct platform_device *reg_pdev;
65 65
66static struct device_type reg_device_type = {
67 .uevent = reg_device_uevent,
68};
69
66/* 70/*
67 * Central wireless core regulatory domains, we only need two, 71 * Central wireless core regulatory domains, we only need two,
68 * the current one and a world regulatory domain in case we have no 72 * the current one and a world regulatory domain in case we have no
@@ -362,16 +366,11 @@ static inline void reg_regdb_query(const char *alpha2) {}
362 366
363/* 367/*
364 * This lets us keep regulatory code which is updated on a regulatory 368 * This lets us keep regulatory code which is updated on a regulatory
365 * basis in userspace. 369 * basis in userspace. Country information is filled in by
370 * reg_device_uevent
366 */ 371 */
367static int call_crda(const char *alpha2) 372static int call_crda(const char *alpha2)
368{ 373{
369 char country_env[9 + 2] = "COUNTRY=";
370 char *envp[] = {
371 country_env,
372 NULL
373 };
374
375 if (!is_world_regdom((char *) alpha2)) 374 if (!is_world_regdom((char *) alpha2))
376 pr_info("Calling CRDA for country: %c%c\n", 375 pr_info("Calling CRDA for country: %c%c\n",
377 alpha2[0], alpha2[1]); 376 alpha2[0], alpha2[1]);
@@ -381,10 +380,7 @@ static int call_crda(const char *alpha2)
381 /* query internal regulatory database (if it exists) */ 380 /* query internal regulatory database (if it exists) */
382 reg_regdb_query(alpha2); 381 reg_regdb_query(alpha2);
383 382
384 country_env[8] = alpha2[0]; 383 return kobject_uevent(&reg_pdev->dev.kobj, KOBJ_CHANGE);
385 country_env[9] = alpha2[1];
386
387 return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, envp);
388} 384}
389 385
390/* Used by nl80211 before kmalloc'ing our regulatory domain */ 386/* Used by nl80211 before kmalloc'ing our regulatory domain */
@@ -1801,9 +1797,9 @@ void regulatory_hint_disconnect(void)
1801 1797
1802static bool freq_is_chan_12_13_14(u16 freq) 1798static bool freq_is_chan_12_13_14(u16 freq)
1803{ 1799{
1804 if (freq == ieee80211_channel_to_frequency(12) || 1800 if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) ||
1805 freq == ieee80211_channel_to_frequency(13) || 1801 freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) ||
1806 freq == ieee80211_channel_to_frequency(14)) 1802 freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ))
1807 return true; 1803 return true;
1808 return false; 1804 return false;
1809} 1805}
@@ -2087,6 +2083,25 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2087 return r; 2083 return r;
2088} 2084}
2089 2085
2086#ifdef CONFIG_HOTPLUG
2087int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
2088{
2089 if (last_request && !last_request->processed) {
2090 if (add_uevent_var(env, "COUNTRY=%c%c",
2091 last_request->alpha2[0],
2092 last_request->alpha2[1]))
2093 return -ENOMEM;
2094 }
2095
2096 return 0;
2097}
2098#else
2099int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
2100{
2101 return -ENODEV;
2102}
2103#endif /* CONFIG_HOTPLUG */
2104
2090/* Caller must hold cfg80211_mutex */ 2105/* Caller must hold cfg80211_mutex */
2091void reg_device_remove(struct wiphy *wiphy) 2106void reg_device_remove(struct wiphy *wiphy)
2092{ 2107{
@@ -2118,6 +2133,8 @@ int __init regulatory_init(void)
2118 if (IS_ERR(reg_pdev)) 2133 if (IS_ERR(reg_pdev))
2119 return PTR_ERR(reg_pdev); 2134 return PTR_ERR(reg_pdev);
2120 2135
2136 reg_pdev->dev.type = &reg_device_type;
2137
2121 spin_lock_init(&reg_requests_lock); 2138 spin_lock_init(&reg_requests_lock);
2122 spin_lock_init(&reg_pending_beacons_lock); 2139 spin_lock_init(&reg_pending_beacons_lock);
2123 2140
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index c4695d07af2..b67d1c3a2fb 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -8,6 +8,7 @@ bool reg_is_valid_request(const char *alpha2);
8 8
9int regulatory_hint_user(const char *alpha2); 9int regulatory_hint_user(const char *alpha2);
10 10
11int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env);
11void reg_device_remove(struct wiphy *wiphy); 12void reg_device_remove(struct wiphy *wiphy);
12 13
13int __init regulatory_init(void); 14int __init regulatory_init(void);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7620ae2fcf1..6a750bc6bcf 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
29} 29}
30EXPORT_SYMBOL(ieee80211_get_response_rate); 30EXPORT_SYMBOL(ieee80211_get_response_rate);
31 31
32int ieee80211_channel_to_frequency(int chan) 32int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band)
33{ 33{
34 if (chan < 14) 34 /* see 802.11 17.3.8.3.2 and Annex J
35 return 2407 + chan * 5; 35 * there are overlapping channel numbers in 5GHz and 2GHz bands */
36 36 if (band == IEEE80211_BAND_5GHZ) {
37 if (chan == 14) 37 if (chan >= 182 && chan <= 196)
38 return 2484; 38 return 4000 + chan * 5;
39 39 else
40 /* FIXME: 802.11j 17.3.8.3.2 */ 40 return 5000 + chan * 5;
41 return (chan + 1000) * 5; 41 } else { /* IEEE80211_BAND_2GHZ */
42 if (chan == 14)
43 return 2484;
44 else if (chan < 14)
45 return 2407 + chan * 5;
46 else
47 return 0; /* not supported */
48 }
42} 49}
43EXPORT_SYMBOL(ieee80211_channel_to_frequency); 50EXPORT_SYMBOL(ieee80211_channel_to_frequency);
44 51
45int ieee80211_frequency_to_channel(int freq) 52int ieee80211_frequency_to_channel(int freq)
46{ 53{
54 /* see 802.11 17.3.8.3.2 and Annex J */
47 if (freq == 2484) 55 if (freq == 2484)
48 return 14; 56 return 14;
49 57 else if (freq < 2484)
50 if (freq < 2484)
51 return (freq - 2407) / 5; 58 return (freq - 2407) / 5;
52 59 else if (freq >= 4910 && freq <= 4980)
53 /* FIXME: 802.11j 17.3.8.3.2 */ 60 return (freq - 4000) / 5;
54 return freq/5 - 1000; 61 else
62 return (freq - 5000) / 5;
55} 63}
56EXPORT_SYMBOL(ieee80211_frequency_to_channel); 64EXPORT_SYMBOL(ieee80211_frequency_to_channel);
57 65
@@ -159,12 +167,15 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
159 167
160 /* 168 /*
161 * Disallow pairwise keys with non-zero index unless it's WEP 169 * Disallow pairwise keys with non-zero index unless it's WEP
162 * (because current deployments use pairwise WEP keys with 170 * or a vendor specific cipher (because current deployments use
163 * non-zero indizes but 802.11i clearly specifies to use zero) 171 * pairwise WEP keys with non-zero indices and for vendor specific
172 * ciphers this should be validated in the driver or hardware level
173 * - but 802.11i clearly specifies to use zero)
164 */ 174 */
165 if (pairwise && key_idx && 175 if (pairwise && key_idx &&
166 params->cipher != WLAN_CIPHER_SUITE_WEP40 && 176 ((params->cipher == WLAN_CIPHER_SUITE_TKIP) ||
167 params->cipher != WLAN_CIPHER_SUITE_WEP104) 177 (params->cipher == WLAN_CIPHER_SUITE_CCMP) ||
178 (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC)))
168 return -EINVAL; 179 return -EINVAL;
169 180
170 switch (params->cipher) { 181 switch (params->cipher) {
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index d112f038edf..0bf169bb770 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq)
267 * -EINVAL for impossible things. 267 * -EINVAL for impossible things.
268 */ 268 */
269 if (freq->e == 0) { 269 if (freq->e == 0) {
270 enum ieee80211_band band = IEEE80211_BAND_2GHZ;
270 if (freq->m < 0) 271 if (freq->m < 0)
271 return 0; 272 return 0;
272 return ieee80211_channel_to_frequency(freq->m); 273 if (freq->m > 14)
274 band = IEEE80211_BAND_5GHZ;
275 return ieee80211_channel_to_frequency(freq->m, band);
273 } else { 276 } else {
274 int i, div = 1000000; 277 int i, div = 1000000;
275 for (i = 0; i < freq->e; i++) 278 for (i = 0; i < freq->e; i++)
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 2196e55e4f6..e6759c9660b 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -5,7 +5,6 @@
5config X25 5config X25
6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)" 6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
7 depends on EXPERIMENTAL 7 depends on EXPERIMENTAL
8 depends on BKL # should be fixable
9 ---help--- 8 ---help---
10 X.25 is a set of standardized network protocols, similar in scope to 9 X.25 is a set of standardized network protocols, similar in scope to
11 frame relay; the one physical line from your box to the X.25 network 10 frame relay; the one physical line from your box to the X.25 network
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ad96ee90fe2..4680b1e4c79 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -40,7 +40,6 @@
40#include <linux/errno.h> 40#include <linux/errno.h>
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/smp_lock.h>
44#include <linux/timer.h> 43#include <linux/timer.h>
45#include <linux/string.h> 44#include <linux/string.h>
46#include <linux/net.h> 45#include <linux/net.h>
@@ -432,15 +431,6 @@ void x25_destroy_socket_from_timer(struct sock *sk)
432 sock_put(sk); 431 sock_put(sk);
433} 432}
434 433
435static void x25_destroy_socket(struct sock *sk)
436{
437 sock_hold(sk);
438 lock_sock(sk);
439 __x25_destroy_socket(sk);
440 release_sock(sk);
441 sock_put(sk);
442}
443
444/* 434/*
445 * Handling for system calls applied via the various interfaces to a 435 * Handling for system calls applied via the various interfaces to a
446 * X.25 socket object. 436 * X.25 socket object.
@@ -647,18 +637,19 @@ static int x25_release(struct socket *sock)
647 struct sock *sk = sock->sk; 637 struct sock *sk = sock->sk;
648 struct x25_sock *x25; 638 struct x25_sock *x25;
649 639
650 lock_kernel();
651 if (!sk) 640 if (!sk)
652 goto out; 641 return 0;
653 642
654 x25 = x25_sk(sk); 643 x25 = x25_sk(sk);
655 644
645 sock_hold(sk);
646 lock_sock(sk);
656 switch (x25->state) { 647 switch (x25->state) {
657 648
658 case X25_STATE_0: 649 case X25_STATE_0:
659 case X25_STATE_2: 650 case X25_STATE_2:
660 x25_disconnect(sk, 0, 0, 0); 651 x25_disconnect(sk, 0, 0, 0);
661 x25_destroy_socket(sk); 652 __x25_destroy_socket(sk);
662 goto out; 653 goto out;
663 654
664 case X25_STATE_1: 655 case X25_STATE_1:
@@ -678,7 +669,8 @@ static int x25_release(struct socket *sock)
678 669
679 sock_orphan(sk); 670 sock_orphan(sk);
680out: 671out:
681 unlock_kernel(); 672 release_sock(sk);
673 sock_put(sk);
682 return 0; 674 return 0;
683} 675}
684 676
@@ -1085,7 +1077,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1085 size_t size; 1077 size_t size;
1086 int qbit = 0, rc = -EINVAL; 1078 int qbit = 0, rc = -EINVAL;
1087 1079
1088 lock_kernel(); 1080 lock_sock(sk);
1089 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT)) 1081 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT))
1090 goto out; 1082 goto out;
1091 1083
@@ -1148,7 +1140,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1148 1140
1149 size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN; 1141 size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN;
1150 1142
1143 release_sock(sk);
1151 skb = sock_alloc_send_skb(sk, size, noblock, &rc); 1144 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
1145 lock_sock(sk);
1152 if (!skb) 1146 if (!skb)
1153 goto out; 1147 goto out;
1154 X25_SKB_CB(skb)->flags = msg->msg_flags; 1148 X25_SKB_CB(skb)->flags = msg->msg_flags;
@@ -1231,26 +1225,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1231 len++; 1225 len++;
1232 } 1226 }
1233 1227
1234 /*
1235 * lock_sock() is currently only used to serialize this x25_kick()
1236 * against input-driven x25_kick() calls. It currently only blocks
1237 * incoming packets for this socket and does not protect against
1238 * any other socket state changes and is not called from anywhere
1239 * else. As x25_kick() cannot block and as long as all socket
1240 * operations are BKL-wrapped, we don't need take to care about
1241 * purging the backlog queue in x25_release().
1242 *
1243 * Using lock_sock() to protect all socket operations entirely
1244 * (and making the whole x25 stack SMP aware) unfortunately would
1245 * require major changes to {send,recv}msg and skb allocation methods.
1246 * -> 2.5 ;)
1247 */
1248 lock_sock(sk);
1249 x25_kick(sk); 1228 x25_kick(sk);
1250 release_sock(sk);
1251 rc = len; 1229 rc = len;
1252out: 1230out:
1253 unlock_kernel(); 1231 release_sock(sk);
1254 return rc; 1232 return rc;
1255out_kfree_skb: 1233out_kfree_skb:
1256 kfree_skb(skb); 1234 kfree_skb(skb);
@@ -1271,7 +1249,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1271 unsigned char *asmptr; 1249 unsigned char *asmptr;
1272 int rc = -ENOTCONN; 1250 int rc = -ENOTCONN;
1273 1251
1274 lock_kernel(); 1252 lock_sock(sk);
1275 /* 1253 /*
1276 * This works for seqpacket too. The receiver has ordered the queue for 1254 * This works for seqpacket too. The receiver has ordered the queue for
1277 * us! We do one quick check first though 1255 * us! We do one quick check first though
@@ -1300,8 +1278,10 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1300 msg->msg_flags |= MSG_OOB; 1278 msg->msg_flags |= MSG_OOB;
1301 } else { 1279 } else {
1302 /* Now we can treat all alike */ 1280 /* Now we can treat all alike */
1281 release_sock(sk);
1303 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1282 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1304 flags & MSG_DONTWAIT, &rc); 1283 flags & MSG_DONTWAIT, &rc);
1284 lock_sock(sk);
1305 if (!skb) 1285 if (!skb)
1306 goto out; 1286 goto out;
1307 1287
@@ -1338,14 +1318,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1338 1318
1339 msg->msg_namelen = sizeof(struct sockaddr_x25); 1319 msg->msg_namelen = sizeof(struct sockaddr_x25);
1340 1320
1341 lock_sock(sk);
1342 x25_check_rbuf(sk); 1321 x25_check_rbuf(sk);
1343 release_sock(sk);
1344 rc = copied; 1322 rc = copied;
1345out_free_dgram: 1323out_free_dgram:
1346 skb_free_datagram(sk, skb); 1324 skb_free_datagram(sk, skb);
1347out: 1325out:
1348 unlock_kernel(); 1326 release_sock(sk);
1349 return rc; 1327 return rc;
1350} 1328}
1351 1329
@@ -1581,18 +1559,18 @@ out_cud_release:
1581 1559
1582 case SIOCX25CALLACCPTAPPRV: { 1560 case SIOCX25CALLACCPTAPPRV: {
1583 rc = -EINVAL; 1561 rc = -EINVAL;
1584 lock_kernel(); 1562 lock_sock(sk);
1585 if (sk->sk_state != TCP_CLOSE) 1563 if (sk->sk_state != TCP_CLOSE)
1586 break; 1564 break;
1587 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); 1565 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
1588 unlock_kernel(); 1566 release_sock(sk);
1589 rc = 0; 1567 rc = 0;
1590 break; 1568 break;
1591 } 1569 }
1592 1570
1593 case SIOCX25SENDCALLACCPT: { 1571 case SIOCX25SENDCALLACCPT: {
1594 rc = -EINVAL; 1572 rc = -EINVAL;
1595 lock_kernel(); 1573 lock_sock(sk);
1596 if (sk->sk_state != TCP_ESTABLISHED) 1574 if (sk->sk_state != TCP_ESTABLISHED)
1597 break; 1575 break;
1598 /* must call accptapprv above */ 1576 /* must call accptapprv above */
@@ -1600,7 +1578,7 @@ out_cud_release:
1600 break; 1578 break;
1601 x25_write_internal(sk, X25_CALL_ACCEPTED); 1579 x25_write_internal(sk, X25_CALL_ACCEPTED);
1602 x25->state = X25_STATE_3; 1580 x25->state = X25_STATE_3;
1603 unlock_kernel(); 1581 release_sock(sk);
1604 rc = 0; 1582 rc = 0;
1605 break; 1583 break;
1606 } 1584 }
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index d00649fb251..0144271d218 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -68,8 +68,11 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
68 frontlen = skb_headroom(skb); 68 frontlen = skb_headroom(skb);
69 69
70 while (skb->len > 0) { 70 while (skb->len > 0) {
71 if ((skbn = sock_alloc_send_skb(sk, frontlen + max_len, 71 release_sock(sk);
72 noblock, &err)) == NULL){ 72 skbn = sock_alloc_send_skb(sk, frontlen + max_len,
73 noblock, &err);
74 lock_sock(sk);
75 if (!skbn) {
73 if (err == -EWOULDBLOCK && noblock){ 76 if (err == -EWOULDBLOCK && noblock){
74 kfree_skb(skb); 77 kfree_skb(skb);
75 return sent; 78 return sent;
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index c631047e1b2..aa429eefe91 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ 5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
6 xfrm_input.o xfrm_output.o xfrm_algo.o \ 6 xfrm_input.o xfrm_output.o xfrm_algo.o \
7 xfrm_sysctl.o 7 xfrm_sysctl.o xfrm_replay.o
8obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o 8obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
9obj-$(CONFIG_XFRM_USER) += xfrm_user.o 9obj-$(CONFIG_XFRM_USER) += xfrm_user.o
10obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o 10obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 8b4d6e3246e..58064d9e565 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -618,21 +618,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry,
618 (entry->compat && !strcmp(name, entry->compat))); 618 (entry->compat && !strcmp(name, entry->compat)));
619} 619}
620 620
621struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) 621struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe)
622{ 622{
623 return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, 623 return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name,
624 probe); 624 probe);
625} 625}
626EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); 626EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
627 627
628struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) 628struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe)
629{ 629{
630 return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, 630 return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name,
631 probe); 631 probe);
632} 632}
633EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); 633EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
634 634
635struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) 635struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe)
636{ 636{
637 return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, 637 return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name,
638 probe); 638 probe);
@@ -654,7 +654,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry,
654 !strcmp(name, entry->name); 654 !strcmp(name, entry->name);
655} 655}
656 656
657struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) 657struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe)
658{ 658{
659 struct xfrm_aead_name data = { 659 struct xfrm_aead_name data = {
660 .name = name, 660 .name = name,
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 8e69533d231..7199d78b2aa 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -4,29 +4,32 @@
4#include <linux/xfrm.h> 4#include <linux/xfrm.h>
5#include <linux/socket.h> 5#include <linux/socket.h>
6 6
7static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) 7static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)
8{ 8{
9 return ntohl(addr->a4); 9 return ntohl(addr->a4);
10} 10}
11 11
12static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) 12static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)
13{ 13{
14 return ntohl(addr->a6[2] ^ addr->a6[3]); 14 return ntohl(addr->a6[2] ^ addr->a6[3]);
15} 15}
16 16
17static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) 17static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
18 const xfrm_address_t *saddr)
18{ 19{
19 u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; 20 u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;
20 return ntohl((__force __be32)sum); 21 return ntohl((__force __be32)sum);
21} 22}
22 23
23static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) 24static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
25 const xfrm_address_t *saddr)
24{ 26{
25 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ 27 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
26 saddr->a6[2] ^ saddr->a6[3]); 28 saddr->a6[2] ^ saddr->a6[3]);
27} 29}
28 30
29static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, 31static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr,
32 const xfrm_address_t *saddr,
30 u32 reqid, unsigned short family, 33 u32 reqid, unsigned short family,
31 unsigned int hmask) 34 unsigned int hmask)
32{ 35{
@@ -42,8 +45,8 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
42 return (h ^ (h >> 16)) & hmask; 45 return (h ^ (h >> 16)) & hmask;
43} 46}
44 47
45static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, 48static inline unsigned __xfrm_src_hash(const xfrm_address_t *daddr,
46 xfrm_address_t *saddr, 49 const xfrm_address_t *saddr,
47 unsigned short family, 50 unsigned short family,
48 unsigned int hmask) 51 unsigned int hmask)
49{ 52{
@@ -60,8 +63,8 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
60} 63}
61 64
62static inline unsigned int 65static inline unsigned int
63__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, 66__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto,
64 unsigned int hmask) 67 unsigned short family, unsigned int hmask)
65{ 68{
66 unsigned int h = (__force u32)spi ^ proto; 69 unsigned int h = (__force u32)spi ^ proto;
67 switch (family) { 70 switch (family) {
@@ -80,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
80 return (index ^ (index >> 8)) & hmask; 83 return (index ^ (index >> 8)) & hmask;
81} 84}
82 85
83static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) 86static inline unsigned int __sel_hash(const struct xfrm_selector *sel,
87 unsigned short family, unsigned int hmask)
84{ 88{
85 xfrm_address_t *daddr = &sel->daddr; 89 const xfrm_address_t *daddr = &sel->daddr;
86 xfrm_address_t *saddr = &sel->saddr; 90 const xfrm_address_t *saddr = &sel->saddr;
87 unsigned int h = 0; 91 unsigned int h = 0;
88 92
89 switch (family) { 93 switch (family) {
@@ -107,7 +111,9 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short
107 return h & hmask; 111 return h & hmask;
108} 112}
109 113
110static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) 114static inline unsigned int __addr_hash(const xfrm_address_t *daddr,
115 const xfrm_address_t *saddr,
116 unsigned short family, unsigned int hmask)
111{ 117{
112 unsigned int h = 0; 118 unsigned int h = 0;
113 119
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 45f1c98d4fc..872065ca7f8 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -107,6 +107,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
107 struct net *net = dev_net(skb->dev); 107 struct net *net = dev_net(skb->dev);
108 int err; 108 int err;
109 __be32 seq; 109 __be32 seq;
110 __be32 seq_hi;
110 struct xfrm_state *x; 111 struct xfrm_state *x;
111 xfrm_address_t *daddr; 112 xfrm_address_t *daddr;
112 struct xfrm_mode *inner_mode; 113 struct xfrm_mode *inner_mode;
@@ -118,7 +119,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
118 if (encap_type < 0) { 119 if (encap_type < 0) {
119 async = 1; 120 async = 1;
120 x = xfrm_input_state(skb); 121 x = xfrm_input_state(skb);
121 seq = XFRM_SKB_CB(skb)->seq.input; 122 seq = XFRM_SKB_CB(skb)->seq.input.low;
122 goto resume; 123 goto resume;
123 } 124 }
124 125
@@ -172,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
172 goto drop_unlock; 173 goto drop_unlock;
173 } 174 }
174 175
175 if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { 176 if (x->props.replay_window && x->repl->check(x, skb, seq)) {
176 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 177 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
177 goto drop_unlock; 178 goto drop_unlock;
178 } 179 }
@@ -184,7 +185,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
184 185
185 spin_unlock(&x->lock); 186 spin_unlock(&x->lock);
186 187
187 XFRM_SKB_CB(skb)->seq.input = seq; 188 seq_hi = htonl(xfrm_replay_seqhi(x, seq));
189
190 XFRM_SKB_CB(skb)->seq.input.low = seq;
191 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
188 192
189 nexthdr = x->type->input(x, skb); 193 nexthdr = x->type->input(x, skb);
190 194
@@ -206,8 +210,7 @@ resume:
206 /* only the first xfrm gets the encap type */ 210 /* only the first xfrm gets the encap type */
207 encap_type = 0; 211 encap_type = 0;
208 212
209 if (x->props.replay_window) 213 x->repl->advance(x, seq);
210 xfrm_replay_advance(x, seq);
211 214
212 x->curlft.bytes += skb->len; 215 x->curlft.bytes += skb->len;
213 x->curlft.packets++; 216 x->curlft.packets++;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 64f2ae1fdc1..1aba03f449c 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -67,17 +67,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
67 goto error; 67 goto error;
68 } 68 }
69 69
70 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { 70 err = x->repl->overflow(x, skb);
71 XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; 71 if (err) {
72 if (unlikely(x->replay.oseq == 0)) { 72 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
73 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 73 goto error;
74 x->replay.oseq--;
75 xfrm_audit_state_replay_overflow(x, skb);
76 err = -EOVERFLOW;
77 goto error;
78 }
79 if (xfrm_aevent_is_on(net))
80 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
81 } 74 }
82 75
83 x->curlft.bytes += skb->len; 76 x->curlft.bytes += skb->len;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6459588befc..15792d8b627 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -50,37 +50,40 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
51static void xfrm_init_pmtu(struct dst_entry *dst); 51static void xfrm_init_pmtu(struct dst_entry *dst);
52static int stale_bundle(struct dst_entry *dst); 52static int stale_bundle(struct dst_entry *dst);
53static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, 53static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family);
54 struct flowi *fl, int family, int strict);
55 54
56 55
57static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 56static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
58 int dir); 57 int dir);
59 58
60static inline int 59static inline int
61__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) 60__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
62{ 61{
63 return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && 62 const struct flowi4 *fl4 = &fl->u.ip4;
64 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && 63
65 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 64 return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) &&
66 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 65 addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) &&
67 (fl->proto == sel->proto || !sel->proto) && 66 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
68 (fl->oif == sel->ifindex || !sel->ifindex); 67 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
68 (fl4->flowi4_proto == sel->proto || !sel->proto) &&
69 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
69} 70}
70 71
71static inline int 72static inline int
72__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) 73__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
73{ 74{
74 return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && 75 const struct flowi6 *fl6 = &fl->u.ip6;
75 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && 76
76 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && 77 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
77 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && 78 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
78 (fl->proto == sel->proto || !sel->proto) && 79 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
79 (fl->oif == sel->ifindex || !sel->ifindex); 80 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
81 (fl6->flowi6_proto == sel->proto || !sel->proto) &&
82 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
80} 83}
81 84
82int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, 85int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
83 unsigned short family) 86 unsigned short family)
84{ 87{
85 switch (family) { 88 switch (family) {
86 case AF_INET: 89 case AF_INET:
@@ -92,8 +95,8 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
92} 95}
93 96
94static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, 97static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
95 xfrm_address_t *saddr, 98 const xfrm_address_t *saddr,
96 xfrm_address_t *daddr, 99 const xfrm_address_t *daddr,
97 int family) 100 int family)
98{ 101{
99 struct xfrm_policy_afinfo *afinfo; 102 struct xfrm_policy_afinfo *afinfo;
@@ -311,7 +314,9 @@ static inline unsigned int idx_hash(struct net *net, u32 index)
311 return __idx_hash(index, net->xfrm.policy_idx_hmask); 314 return __idx_hash(index, net->xfrm.policy_idx_hmask);
312} 315}
313 316
314static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) 317static struct hlist_head *policy_hash_bysel(struct net *net,
318 const struct xfrm_selector *sel,
319 unsigned short family, int dir)
315{ 320{
316 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 321 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
317 unsigned int hash = __sel_hash(sel, family, hmask); 322 unsigned int hash = __sel_hash(sel, family, hmask);
@@ -321,7 +326,10 @@ static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selecto
321 net->xfrm.policy_bydst[dir].table + hash); 326 net->xfrm.policy_bydst[dir].table + hash);
322} 327}
323 328
324static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) 329static struct hlist_head *policy_hash_direct(struct net *net,
330 const xfrm_address_t *daddr,
331 const xfrm_address_t *saddr,
332 unsigned short family, int dir)
325{ 333{
326 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 334 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
327 unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 335 unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
@@ -864,32 +872,33 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
864 * 872 *
865 * Returns 0 if policy found, else an -errno. 873 * Returns 0 if policy found, else an -errno.
866 */ 874 */
867static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, 875static int xfrm_policy_match(const struct xfrm_policy *pol,
876 const struct flowi *fl,
868 u8 type, u16 family, int dir) 877 u8 type, u16 family, int dir)
869{ 878{
870 struct xfrm_selector *sel = &pol->selector; 879 const struct xfrm_selector *sel = &pol->selector;
871 int match, ret = -ESRCH; 880 int match, ret = -ESRCH;
872 881
873 if (pol->family != family || 882 if (pol->family != family ||
874 (fl->mark & pol->mark.m) != pol->mark.v || 883 (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
875 pol->type != type) 884 pol->type != type)
876 return ret; 885 return ret;
877 886
878 match = xfrm_selector_match(sel, fl, family); 887 match = xfrm_selector_match(sel, fl, family);
879 if (match) 888 if (match)
880 ret = security_xfrm_policy_lookup(pol->security, fl->secid, 889 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
881 dir); 890 dir);
882 891
883 return ret; 892 return ret;
884} 893}
885 894
886static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, 895static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
887 struct flowi *fl, 896 const struct flowi *fl,
888 u16 family, u8 dir) 897 u16 family, u8 dir)
889{ 898{
890 int err; 899 int err;
891 struct xfrm_policy *pol, *ret; 900 struct xfrm_policy *pol, *ret;
892 xfrm_address_t *daddr, *saddr; 901 const xfrm_address_t *daddr, *saddr;
893 struct hlist_node *entry; 902 struct hlist_node *entry;
894 struct hlist_head *chain; 903 struct hlist_head *chain;
895 u32 priority = ~0U; 904 u32 priority = ~0U;
@@ -941,7 +950,7 @@ fail:
941} 950}
942 951
943static struct xfrm_policy * 952static struct xfrm_policy *
944__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) 953__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
945{ 954{
946#ifdef CONFIG_XFRM_SUB_POLICY 955#ifdef CONFIG_XFRM_SUB_POLICY
947 struct xfrm_policy *pol; 956 struct xfrm_policy *pol;
@@ -954,7 +963,7 @@ __xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
954} 963}
955 964
956static struct flow_cache_object * 965static struct flow_cache_object *
957xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, 966xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
958 u8 dir, struct flow_cache_object *old_obj, void *ctx) 967 u8 dir, struct flow_cache_object *old_obj, void *ctx)
959{ 968{
960 struct xfrm_policy *pol; 969 struct xfrm_policy *pol;
@@ -990,7 +999,8 @@ static inline int policy_to_flow_dir(int dir)
990 } 999 }
991} 1000}
992 1001
993static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 1002static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1003 const struct flowi *fl)
994{ 1004{
995 struct xfrm_policy *pol; 1005 struct xfrm_policy *pol;
996 1006
@@ -1006,7 +1016,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
1006 goto out; 1016 goto out;
1007 } 1017 }
1008 err = security_xfrm_policy_lookup(pol->security, 1018 err = security_xfrm_policy_lookup(pol->security,
1009 fl->secid, 1019 fl->flowi_secid,
1010 policy_to_flow_dir(dir)); 1020 policy_to_flow_dir(dir));
1011 if (!err) 1021 if (!err)
1012 xfrm_pol_hold(pol); 1022 xfrm_pol_hold(pol);
@@ -1098,7 +1108,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1098 return 0; 1108 return 0;
1099} 1109}
1100 1110
1101static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) 1111static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1102{ 1112{
1103 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); 1113 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1104 1114
@@ -1157,9 +1167,8 @@ xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1157/* Resolve list of templates for the flow, given policy. */ 1167/* Resolve list of templates for the flow, given policy. */
1158 1168
1159static int 1169static int
1160xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, 1170xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1161 struct xfrm_state **xfrm, 1171 struct xfrm_state **xfrm, unsigned short family)
1162 unsigned short family)
1163{ 1172{
1164 struct net *net = xp_net(policy); 1173 struct net *net = xp_net(policy);
1165 int nx; 1174 int nx;
@@ -1214,9 +1223,8 @@ fail:
1214} 1223}
1215 1224
1216static int 1225static int
1217xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, 1226xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1218 struct xfrm_state **xfrm, 1227 struct xfrm_state **xfrm, unsigned short family)
1219 unsigned short family)
1220{ 1228{
1221 struct xfrm_state *tp[XFRM_MAX_DEPTH]; 1229 struct xfrm_state *tp[XFRM_MAX_DEPTH];
1222 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; 1230 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
@@ -1256,7 +1264,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1256 * still valid. 1264 * still valid.
1257 */ 1265 */
1258 1266
1259static inline int xfrm_get_tos(struct flowi *fl, int family) 1267static inline int xfrm_get_tos(const struct flowi *fl, int family)
1260{ 1268{
1261 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1269 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1262 int tos; 1270 int tos;
@@ -1340,7 +1348,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1340 default: 1348 default:
1341 BUG(); 1349 BUG();
1342 } 1350 }
1343 xdst = dst_alloc(dst_ops); 1351 xdst = dst_alloc(dst_ops, 0);
1344 xfrm_policy_put_afinfo(afinfo); 1352 xfrm_policy_put_afinfo(afinfo);
1345 1353
1346 if (likely(xdst)) 1354 if (likely(xdst))
@@ -1369,7 +1377,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1369} 1377}
1370 1378
1371static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, 1379static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1372 struct flowi *fl) 1380 const struct flowi *fl)
1373{ 1381{
1374 struct xfrm_policy_afinfo *afinfo = 1382 struct xfrm_policy_afinfo *afinfo =
1375 xfrm_policy_get_afinfo(xdst->u.dst.ops->family); 1383 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1392,7 +1400,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1392 1400
1393static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, 1401static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1394 struct xfrm_state **xfrm, int nx, 1402 struct xfrm_state **xfrm, int nx,
1395 struct flowi *fl, 1403 const struct flowi *fl,
1396 struct dst_entry *dst) 1404 struct dst_entry *dst)
1397{ 1405{
1398 struct net *net = xp_net(policy); 1406 struct net *net = xp_net(policy);
@@ -1508,7 +1516,7 @@ free_dst:
1508} 1516}
1509 1517
1510static int inline 1518static int inline
1511xfrm_dst_alloc_copy(void **target, void *src, int size) 1519xfrm_dst_alloc_copy(void **target, const void *src, int size)
1512{ 1520{
1513 if (!*target) { 1521 if (!*target) {
1514 *target = kmalloc(size, GFP_ATOMIC); 1522 *target = kmalloc(size, GFP_ATOMIC);
@@ -1520,7 +1528,7 @@ xfrm_dst_alloc_copy(void **target, void *src, int size)
1520} 1528}
1521 1529
1522static int inline 1530static int inline
1523xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) 1531xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel)
1524{ 1532{
1525#ifdef CONFIG_XFRM_SUB_POLICY 1533#ifdef CONFIG_XFRM_SUB_POLICY
1526 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1534 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1532,7 +1540,7 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1532} 1540}
1533 1541
1534static int inline 1542static int inline
1535xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) 1543xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl)
1536{ 1544{
1537#ifdef CONFIG_XFRM_SUB_POLICY 1545#ifdef CONFIG_XFRM_SUB_POLICY
1538 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 1546 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1542,7 +1550,7 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1542#endif 1550#endif
1543} 1551}
1544 1552
1545static int xfrm_expand_policies(struct flowi *fl, u16 family, 1553static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1546 struct xfrm_policy **pols, 1554 struct xfrm_policy **pols,
1547 int *num_pols, int *num_xfrms) 1555 int *num_pols, int *num_xfrms)
1548{ 1556{
@@ -1588,7 +1596,7 @@ static int xfrm_expand_policies(struct flowi *fl, u16 family,
1588 1596
1589static struct xfrm_dst * 1597static struct xfrm_dst *
1590xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, 1598xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1591 struct flowi *fl, u16 family, 1599 const struct flowi *fl, u16 family,
1592 struct dst_entry *dst_orig) 1600 struct dst_entry *dst_orig)
1593{ 1601{
1594 struct net *net = xp_net(pols[0]); 1602 struct net *net = xp_net(pols[0]);
@@ -1631,7 +1639,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1631} 1639}
1632 1640
1633static struct flow_cache_object * 1641static struct flow_cache_object *
1634xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, 1642xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1635 struct flow_cache_object *oldflo, void *ctx) 1643 struct flow_cache_object *oldflo, void *ctx)
1636{ 1644{
1637 struct dst_entry *dst_orig = (struct dst_entry *)ctx; 1645 struct dst_entry *dst_orig = (struct dst_entry *)ctx;
@@ -1730,18 +1738,36 @@ error:
1730 return ERR_PTR(err); 1738 return ERR_PTR(err);
1731} 1739}
1732 1740
1741static struct dst_entry *make_blackhole(struct net *net, u16 family,
1742 struct dst_entry *dst_orig)
1743{
1744 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1745 struct dst_entry *ret;
1746
1747 if (!afinfo) {
1748 dst_release(dst_orig);
1749 ret = ERR_PTR(-EINVAL);
1750 } else {
1751 ret = afinfo->blackhole_route(net, dst_orig);
1752 }
1753 xfrm_policy_put_afinfo(afinfo);
1754
1755 return ret;
1756}
1757
1733/* Main function: finds/creates a bundle for given flow. 1758/* Main function: finds/creates a bundle for given flow.
1734 * 1759 *
1735 * At the moment we eat a raw IP route. Mostly to speed up lookups 1760 * At the moment we eat a raw IP route. Mostly to speed up lookups
1736 * on interfaces with disabled IPsec. 1761 * on interfaces with disabled IPsec.
1737 */ 1762 */
1738int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, 1763struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
1739 struct sock *sk, int flags) 1764 const struct flowi *fl,
1765 struct sock *sk, int flags)
1740{ 1766{
1741 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1767 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1742 struct flow_cache_object *flo; 1768 struct flow_cache_object *flo;
1743 struct xfrm_dst *xdst; 1769 struct xfrm_dst *xdst;
1744 struct dst_entry *dst, *dst_orig = *dst_p, *route; 1770 struct dst_entry *dst, *route;
1745 u16 family = dst_orig->ops->family; 1771 u16 family = dst_orig->ops->family;
1746 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1772 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1747 int i, err, num_pols, num_xfrms = 0, drop_pols = 0; 1773 int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
@@ -1778,6 +1804,8 @@ restart:
1778 goto no_transform; 1804 goto no_transform;
1779 } 1805 }
1780 1806
1807 dst_hold(&xdst->u.dst);
1808
1781 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 1809 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
1782 xdst->u.dst.next = xfrm_policy_sk_bundles; 1810 xdst->u.dst.next = xfrm_policy_sk_bundles;
1783 xfrm_policy_sk_bundles = &xdst->u.dst; 1811 xfrm_policy_sk_bundles = &xdst->u.dst;
@@ -1823,9 +1851,10 @@ restart:
1823 dst_release(dst); 1851 dst_release(dst);
1824 xfrm_pols_put(pols, drop_pols); 1852 xfrm_pols_put(pols, drop_pols);
1825 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 1853 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1826 return -EREMOTE; 1854
1855 return make_blackhole(net, family, dst_orig);
1827 } 1856 }
1828 if (flags & XFRM_LOOKUP_WAIT) { 1857 if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) {
1829 DECLARE_WAITQUEUE(wait, current); 1858 DECLARE_WAITQUEUE(wait, current);
1830 1859
1831 add_wait_queue(&net->xfrm.km_waitq, &wait); 1860 add_wait_queue(&net->xfrm.km_waitq, &wait);
@@ -1867,47 +1896,33 @@ no_transform:
1867 goto error; 1896 goto error;
1868 } else if (num_xfrms > 0) { 1897 } else if (num_xfrms > 0) {
1869 /* Flow transformed */ 1898 /* Flow transformed */
1870 *dst_p = dst;
1871 dst_release(dst_orig); 1899 dst_release(dst_orig);
1872 } else { 1900 } else {
1873 /* Flow passes untransformed */ 1901 /* Flow passes untransformed */
1874 dst_release(dst); 1902 dst_release(dst);
1903 dst = dst_orig;
1875 } 1904 }
1876ok: 1905ok:
1877 xfrm_pols_put(pols, drop_pols); 1906 xfrm_pols_put(pols, drop_pols);
1878 return 0; 1907 return dst;
1879 1908
1880nopol: 1909nopol:
1881 if (!(flags & XFRM_LOOKUP_ICMP)) 1910 if (!(flags & XFRM_LOOKUP_ICMP)) {
1911 dst = dst_orig;
1882 goto ok; 1912 goto ok;
1913 }
1883 err = -ENOENT; 1914 err = -ENOENT;
1884error: 1915error:
1885 dst_release(dst); 1916 dst_release(dst);
1886dropdst: 1917dropdst:
1887 dst_release(dst_orig); 1918 dst_release(dst_orig);
1888 *dst_p = NULL;
1889 xfrm_pols_put(pols, drop_pols); 1919 xfrm_pols_put(pols, drop_pols);
1890 return err; 1920 return ERR_PTR(err);
1891}
1892EXPORT_SYMBOL(__xfrm_lookup);
1893
1894int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1895 struct sock *sk, int flags)
1896{
1897 int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
1898
1899 if (err == -EREMOTE) {
1900 dst_release(*dst_p);
1901 *dst_p = NULL;
1902 err = -EAGAIN;
1903 }
1904
1905 return err;
1906} 1921}
1907EXPORT_SYMBOL(xfrm_lookup); 1922EXPORT_SYMBOL(xfrm_lookup);
1908 1923
1909static inline int 1924static inline int
1910xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) 1925xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
1911{ 1926{
1912 struct xfrm_state *x; 1927 struct xfrm_state *x;
1913 1928
@@ -1926,7 +1941,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1926 */ 1941 */
1927 1942
1928static inline int 1943static inline int
1929xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 1944xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
1930 unsigned short family) 1945 unsigned short family)
1931{ 1946{
1932 if (xfrm_state_kern(x)) 1947 if (xfrm_state_kern(x))
@@ -1949,7 +1964,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1949 * Otherwise "-2 - errored_index" is returned. 1964 * Otherwise "-2 - errored_index" is returned.
1950 */ 1965 */
1951static inline int 1966static inline int
1952xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 1967xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
1953 unsigned short family) 1968 unsigned short family)
1954{ 1969{
1955 int idx = start; 1970 int idx = start;
@@ -1981,13 +1996,13 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1981 return -EAFNOSUPPORT; 1996 return -EAFNOSUPPORT;
1982 1997
1983 afinfo->decode_session(skb, fl, reverse); 1998 afinfo->decode_session(skb, fl, reverse);
1984 err = security_xfrm_decode_session(skb, &fl->secid); 1999 err = security_xfrm_decode_session(skb, &fl->flowi_secid);
1985 xfrm_policy_put_afinfo(afinfo); 2000 xfrm_policy_put_afinfo(afinfo);
1986 return err; 2001 return err;
1987} 2002}
1988EXPORT_SYMBOL(__xfrm_decode_session); 2003EXPORT_SYMBOL(__xfrm_decode_session);
1989 2004
1990static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) 2005static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
1991{ 2006{
1992 for (; k < sp->len; k++) { 2007 for (; k < sp->len; k++) {
1993 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { 2008 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
@@ -2162,7 +2177,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2162 struct net *net = dev_net(skb->dev); 2177 struct net *net = dev_net(skb->dev);
2163 struct flowi fl; 2178 struct flowi fl;
2164 struct dst_entry *dst; 2179 struct dst_entry *dst;
2165 int res; 2180 int res = 1;
2166 2181
2167 if (xfrm_decode_session(skb, &fl, family) < 0) { 2182 if (xfrm_decode_session(skb, &fl, family) < 0) {
2168 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); 2183 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
@@ -2170,9 +2185,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2170 } 2185 }
2171 2186
2172 skb_dst_force(skb); 2187 skb_dst_force(skb);
2173 dst = skb_dst(skb);
2174 2188
2175 res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; 2189 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
2190 if (IS_ERR(dst)) {
2191 res = 0;
2192 dst = NULL;
2193 }
2176 skb_dst_set(skb, dst); 2194 skb_dst_set(skb, dst);
2177 return res; 2195 return res;
2178} 2196}
@@ -2210,7 +2228,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2210 2228
2211static int stale_bundle(struct dst_entry *dst) 2229static int stale_bundle(struct dst_entry *dst)
2212{ 2230{
2213 return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); 2231 return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC);
2214} 2232}
2215 2233
2216void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2234void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -2282,8 +2300,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
2282 * still valid. 2300 * still valid.
2283 */ 2301 */
2284 2302
2285static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, 2303static int xfrm_bundle_ok(struct xfrm_dst *first, int family)
2286 struct flowi *fl, int family, int strict)
2287{ 2304{
2288 struct dst_entry *dst = &first->u.dst; 2305 struct dst_entry *dst = &first->u.dst;
2289 struct xfrm_dst *last; 2306 struct xfrm_dst *last;
@@ -2292,26 +2309,12 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2292 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || 2309 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2293 (dst->dev && !netif_running(dst->dev))) 2310 (dst->dev && !netif_running(dst->dev)))
2294 return 0; 2311 return 0;
2295#ifdef CONFIG_XFRM_SUB_POLICY
2296 if (fl) {
2297 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2298 return 0;
2299 if (first->partner &&
2300 !xfrm_selector_match(first->partner, fl, family))
2301 return 0;
2302 }
2303#endif
2304 2312
2305 last = NULL; 2313 last = NULL;
2306 2314
2307 do { 2315 do {
2308 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2316 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2309 2317
2310 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2311 return 0;
2312 if (fl && pol &&
2313 !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2314 return 0;
2315 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2318 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2316 return 0; 2319 return 0;
2317 if (xdst->xfrm_genid != dst->xfrm->genid) 2320 if (xdst->xfrm_genid != dst->xfrm->genid)
@@ -2320,11 +2323,6 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2320 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2323 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2321 return 0; 2324 return 0;
2322 2325
2323 if (strict && fl &&
2324 !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2325 !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2326 return 0;
2327
2328 mtu = dst_mtu(dst->child); 2326 mtu = dst_mtu(dst->child);
2329 if (xdst->child_mtu_cached != mtu) { 2327 if (xdst->child_mtu_cached != mtu) {
2330 last = xdst; 2328 last = xdst;
@@ -2735,8 +2733,8 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2735#endif 2733#endif
2736 2734
2737#ifdef CONFIG_XFRM_MIGRATE 2735#ifdef CONFIG_XFRM_MIGRATE
2738static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, 2736static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
2739 struct xfrm_selector *sel_tgt) 2737 const struct xfrm_selector *sel_tgt)
2740{ 2738{
2741 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { 2739 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2742 if (sel_tgt->family == sel_cmp->family && 2740 if (sel_tgt->family == sel_cmp->family &&
@@ -2756,7 +2754,7 @@ static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2756 return 0; 2754 return 0;
2757} 2755}
2758 2756
2759static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, 2757static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
2760 u8 dir, u8 type) 2758 u8 dir, u8 type)
2761{ 2759{
2762 struct xfrm_policy *pol, *ret = NULL; 2760 struct xfrm_policy *pol, *ret = NULL;
@@ -2792,7 +2790,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2792 return ret; 2790 return ret;
2793} 2791}
2794 2792
2795static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) 2793static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
2796{ 2794{
2797 int match = 0; 2795 int match = 0;
2798 2796
@@ -2862,7 +2860,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
2862 return 0; 2860 return 0;
2863} 2861}
2864 2862
2865static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) 2863static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
2866{ 2864{
2867 int i, j; 2865 int i, j;
2868 2866
@@ -2896,7 +2894,7 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2896 return 0; 2894 return 0;
2897} 2895}
2898 2896
2899int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2897int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2900 struct xfrm_migrate *m, int num_migrate, 2898 struct xfrm_migrate *m, int num_migrate,
2901 struct xfrm_kmaddress *k) 2899 struct xfrm_kmaddress *k)
2902{ 2900{
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
new file mode 100644
index 00000000000..2f5be5b1574
--- /dev/null
+++ b/net/xfrm/xfrm_replay.c
@@ -0,0 +1,534 @@
1/*
2 * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c.
3 *
4 * Copyright (C) 2010 secunet Security Networks AG
5 * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#include <net/xfrm.h>
22
23u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
24{
25 u32 seq, seq_hi, bottom;
26 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
27
28 if (!(x->props.flags & XFRM_STATE_ESN))
29 return 0;
30
31 seq = ntohl(net_seq);
32 seq_hi = replay_esn->seq_hi;
33 bottom = replay_esn->seq - replay_esn->replay_window + 1;
34
35 if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) {
36 /* A. same subspace */
37 if (unlikely(seq < bottom))
38 seq_hi++;
39 } else {
40 /* B. window spans two subspaces */
41 if (unlikely(seq >= bottom))
42 seq_hi--;
43 }
44
45 return seq_hi;
46}
47
48static void xfrm_replay_notify(struct xfrm_state *x, int event)
49{
50 struct km_event c;
51 /* we send notify messages in case
52 * 1. we updated on of the sequence numbers, and the seqno difference
53 * is at least x->replay_maxdiff, in this case we also update the
54 * timeout of our timer function
55 * 2. if x->replay_maxage has elapsed since last update,
56 * and there were changes
57 *
58 * The state structure must be locked!
59 */
60
61 switch (event) {
62 case XFRM_REPLAY_UPDATE:
63 if (x->replay_maxdiff &&
64 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
65 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
66 if (x->xflags & XFRM_TIME_DEFER)
67 event = XFRM_REPLAY_TIMEOUT;
68 else
69 return;
70 }
71
72 break;
73
74 case XFRM_REPLAY_TIMEOUT:
75 if (memcmp(&x->replay, &x->preplay,
76 sizeof(struct xfrm_replay_state)) == 0) {
77 x->xflags |= XFRM_TIME_DEFER;
78 return;
79 }
80
81 break;
82 }
83
84 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
85 c.event = XFRM_MSG_NEWAE;
86 c.data.aevent = event;
87 km_state_notify(x, &c);
88
89 if (x->replay_maxage &&
90 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
91 x->xflags &= ~XFRM_TIME_DEFER;
92}
93
94static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
95{
96 int err = 0;
97 struct net *net = xs_net(x);
98
99 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
100 XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq;
101 if (unlikely(x->replay.oseq == 0)) {
102 x->replay.oseq--;
103 xfrm_audit_state_replay_overflow(x, skb);
104 err = -EOVERFLOW;
105
106 return err;
107 }
108 if (xfrm_aevent_is_on(net))
109 x->repl->notify(x, XFRM_REPLAY_UPDATE);
110 }
111
112 return err;
113}
114
115static int xfrm_replay_check(struct xfrm_state *x,
116 struct sk_buff *skb, __be32 net_seq)
117{
118 u32 diff;
119 u32 seq = ntohl(net_seq);
120
121 if (unlikely(seq == 0))
122 goto err;
123
124 if (likely(seq > x->replay.seq))
125 return 0;
126
127 diff = x->replay.seq - seq;
128 if (diff >= min_t(unsigned int, x->props.replay_window,
129 sizeof(x->replay.bitmap) * 8)) {
130 x->stats.replay_window++;
131 goto err;
132 }
133
134 if (x->replay.bitmap & (1U << diff)) {
135 x->stats.replay++;
136 goto err;
137 }
138 return 0;
139
140err:
141 xfrm_audit_state_replay(x, skb, net_seq);
142 return -EINVAL;
143}
144
145static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
146{
147 u32 diff;
148 u32 seq = ntohl(net_seq);
149
150 if (!x->props.replay_window)
151 return;
152
153 if (seq > x->replay.seq) {
154 diff = seq - x->replay.seq;
155 if (diff < x->props.replay_window)
156 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
157 else
158 x->replay.bitmap = 1;
159 x->replay.seq = seq;
160 } else {
161 diff = x->replay.seq - seq;
162 x->replay.bitmap |= (1U << diff);
163 }
164
165 if (xfrm_aevent_is_on(xs_net(x)))
166 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
167}
168
169static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
170{
171 int err = 0;
172 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
173 struct net *net = xs_net(x);
174
175 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
176 XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq;
177 if (unlikely(replay_esn->oseq == 0)) {
178 replay_esn->oseq--;
179 xfrm_audit_state_replay_overflow(x, skb);
180 err = -EOVERFLOW;
181
182 return err;
183 }
184 if (xfrm_aevent_is_on(net))
185 x->repl->notify(x, XFRM_REPLAY_UPDATE);
186 }
187
188 return err;
189}
190
191static int xfrm_replay_check_bmp(struct xfrm_state *x,
192 struct sk_buff *skb, __be32 net_seq)
193{
194 unsigned int bitnr, nr;
195 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
196 u32 seq = ntohl(net_seq);
197 u32 diff = replay_esn->seq - seq;
198 u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
199
200 if (unlikely(seq == 0))
201 goto err;
202
203 if (likely(seq > replay_esn->seq))
204 return 0;
205
206 if (diff >= replay_esn->replay_window) {
207 x->stats.replay_window++;
208 goto err;
209 }
210
211 if (pos >= diff) {
212 bitnr = (pos - diff) % replay_esn->replay_window;
213 nr = bitnr >> 5;
214 bitnr = bitnr & 0x1F;
215 if (replay_esn->bmp[nr] & (1U << bitnr))
216 goto err_replay;
217 } else {
218 bitnr = replay_esn->replay_window - (diff - pos);
219 nr = bitnr >> 5;
220 bitnr = bitnr & 0x1F;
221 if (replay_esn->bmp[nr] & (1U << bitnr))
222 goto err_replay;
223 }
224 return 0;
225
226err_replay:
227 x->stats.replay++;
228err:
229 xfrm_audit_state_replay(x, skb, net_seq);
230 return -EINVAL;
231}
232
233static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
234{
235 unsigned int bitnr, nr, i;
236 u32 diff;
237 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
238 u32 seq = ntohl(net_seq);
239 u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
240
241 if (!replay_esn->replay_window)
242 return;
243
244 if (seq > replay_esn->seq) {
245 diff = seq - replay_esn->seq;
246
247 if (diff < replay_esn->replay_window) {
248 for (i = 1; i < diff; i++) {
249 bitnr = (pos + i) % replay_esn->replay_window;
250 nr = bitnr >> 5;
251 bitnr = bitnr & 0x1F;
252 replay_esn->bmp[nr] &= ~(1U << bitnr);
253 }
254
255 bitnr = (pos + diff) % replay_esn->replay_window;
256 nr = bitnr >> 5;
257 bitnr = bitnr & 0x1F;
258 replay_esn->bmp[nr] |= (1U << bitnr);
259 } else {
260 nr = replay_esn->replay_window >> 5;
261 for (i = 0; i <= nr; i++)
262 replay_esn->bmp[i] = 0;
263
264 bitnr = (pos + diff) % replay_esn->replay_window;
265 nr = bitnr >> 5;
266 bitnr = bitnr & 0x1F;
267 replay_esn->bmp[nr] |= (1U << bitnr);
268 }
269
270 replay_esn->seq = seq;
271 } else {
272 diff = replay_esn->seq - seq;
273
274 if (pos >= diff) {
275 bitnr = (pos - diff) % replay_esn->replay_window;
276 nr = bitnr >> 5;
277 bitnr = bitnr & 0x1F;
278 replay_esn->bmp[nr] |= (1U << bitnr);
279 } else {
280 bitnr = replay_esn->replay_window - (diff - pos);
281 nr = bitnr >> 5;
282 bitnr = bitnr & 0x1F;
283 replay_esn->bmp[nr] |= (1U << bitnr);
284 }
285 }
286
287 if (xfrm_aevent_is_on(xs_net(x)))
288 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
289}
290
291static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
292{
293 struct km_event c;
294 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
295 struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn;
296
297 /* we send notify messages in case
298 * 1. we updated on of the sequence numbers, and the seqno difference
299 * is at least x->replay_maxdiff, in this case we also update the
300 * timeout of our timer function
301 * 2. if x->replay_maxage has elapsed since last update,
302 * and there were changes
303 *
304 * The state structure must be locked!
305 */
306
307 switch (event) {
308 case XFRM_REPLAY_UPDATE:
309 if (x->replay_maxdiff &&
310 (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
311 (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) {
312 if (x->xflags & XFRM_TIME_DEFER)
313 event = XFRM_REPLAY_TIMEOUT;
314 else
315 return;
316 }
317
318 break;
319
320 case XFRM_REPLAY_TIMEOUT:
321 if (memcmp(x->replay_esn, x->preplay_esn,
322 xfrm_replay_state_esn_len(replay_esn)) == 0) {
323 x->xflags |= XFRM_TIME_DEFER;
324 return;
325 }
326
327 break;
328 }
329
330 memcpy(x->preplay_esn, x->replay_esn,
331 xfrm_replay_state_esn_len(replay_esn));
332 c.event = XFRM_MSG_NEWAE;
333 c.data.aevent = event;
334 km_state_notify(x, &c);
335
336 if (x->replay_maxage &&
337 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
338 x->xflags &= ~XFRM_TIME_DEFER;
339}
340
341static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
342{
343 int err = 0;
344 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
345 struct net *net = xs_net(x);
346
347 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
348 XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq;
349 XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi;
350
351 if (unlikely(replay_esn->oseq == 0)) {
352 XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi;
353
354 if (replay_esn->oseq_hi == 0) {
355 replay_esn->oseq--;
356 replay_esn->oseq_hi--;
357 xfrm_audit_state_replay_overflow(x, skb);
358 err = -EOVERFLOW;
359
360 return err;
361 }
362 }
363 if (xfrm_aevent_is_on(net))
364 x->repl->notify(x, XFRM_REPLAY_UPDATE);
365 }
366
367 return err;
368}
369
370static int xfrm_replay_check_esn(struct xfrm_state *x,
371 struct sk_buff *skb, __be32 net_seq)
372{
373 unsigned int bitnr, nr;
374 u32 diff;
375 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
376 u32 seq = ntohl(net_seq);
377 u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
378 u32 wsize = replay_esn->replay_window;
379 u32 top = replay_esn->seq;
380 u32 bottom = top - wsize + 1;
381
382 if (unlikely(seq == 0 && replay_esn->seq_hi == 0 &&
383 (replay_esn->seq < replay_esn->replay_window - 1)))
384 goto err;
385
386 diff = top - seq;
387
388 if (likely(top >= wsize - 1)) {
389 /* A. same subspace */
390 if (likely(seq > top) || seq < bottom)
391 return 0;
392 } else {
393 /* B. window spans two subspaces */
394 if (likely(seq > top && seq < bottom))
395 return 0;
396 if (seq >= bottom)
397 diff = ~seq + top + 1;
398 }
399
400 if (diff >= replay_esn->replay_window) {
401 x->stats.replay_window++;
402 goto err;
403 }
404
405 if (pos >= diff) {
406 bitnr = (pos - diff) % replay_esn->replay_window;
407 nr = bitnr >> 5;
408 bitnr = bitnr & 0x1F;
409 if (replay_esn->bmp[nr] & (1U << bitnr))
410 goto err_replay;
411 } else {
412 bitnr = replay_esn->replay_window - (diff - pos);
413 nr = bitnr >> 5;
414 bitnr = bitnr & 0x1F;
415 if (replay_esn->bmp[nr] & (1U << bitnr))
416 goto err_replay;
417 }
418 return 0;
419
420err_replay:
421 x->stats.replay++;
422err:
423 xfrm_audit_state_replay(x, skb, net_seq);
424 return -EINVAL;
425}
426
427static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
428{
429 unsigned int bitnr, nr, i;
430 int wrap;
431 u32 diff, pos, seq, seq_hi;
432 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
433
434 if (!replay_esn->replay_window)
435 return;
436
437 seq = ntohl(net_seq);
438 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
439 seq_hi = xfrm_replay_seqhi(x, net_seq);
440 wrap = seq_hi - replay_esn->seq_hi;
441
442 if ((!wrap && seq > replay_esn->seq) || wrap > 0) {
443 if (likely(!wrap))
444 diff = seq - replay_esn->seq;
445 else
446 diff = ~replay_esn->seq + seq + 1;
447
448 if (diff < replay_esn->replay_window) {
449 for (i = 1; i < diff; i++) {
450 bitnr = (pos + i) % replay_esn->replay_window;
451 nr = bitnr >> 5;
452 bitnr = bitnr & 0x1F;
453 replay_esn->bmp[nr] &= ~(1U << bitnr);
454 }
455
456 bitnr = (pos + diff) % replay_esn->replay_window;
457 nr = bitnr >> 5;
458 bitnr = bitnr & 0x1F;
459 replay_esn->bmp[nr] |= (1U << bitnr);
460 } else {
461 nr = replay_esn->replay_window >> 5;
462 for (i = 0; i <= nr; i++)
463 replay_esn->bmp[i] = 0;
464
465 bitnr = (pos + diff) % replay_esn->replay_window;
466 nr = bitnr >> 5;
467 bitnr = bitnr & 0x1F;
468 replay_esn->bmp[nr] |= (1U << bitnr);
469 }
470
471 replay_esn->seq = seq;
472
473 if (unlikely(wrap > 0))
474 replay_esn->seq_hi++;
475 } else {
476 diff = replay_esn->seq - seq;
477
478 if (pos >= diff) {
479 bitnr = (pos - diff) % replay_esn->replay_window;
480 nr = bitnr >> 5;
481 bitnr = bitnr & 0x1F;
482 replay_esn->bmp[nr] |= (1U << bitnr);
483 } else {
484 bitnr = replay_esn->replay_window - (diff - pos);
485 nr = bitnr >> 5;
486 bitnr = bitnr & 0x1F;
487 replay_esn->bmp[nr] |= (1U << bitnr);
488 }
489 }
490
491 if (xfrm_aevent_is_on(xs_net(x)))
492 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
493}
494
495static struct xfrm_replay xfrm_replay_legacy = {
496 .advance = xfrm_replay_advance,
497 .check = xfrm_replay_check,
498 .notify = xfrm_replay_notify,
499 .overflow = xfrm_replay_overflow,
500};
501
502static struct xfrm_replay xfrm_replay_bmp = {
503 .advance = xfrm_replay_advance_bmp,
504 .check = xfrm_replay_check_bmp,
505 .notify = xfrm_replay_notify_bmp,
506 .overflow = xfrm_replay_overflow_bmp,
507};
508
509static struct xfrm_replay xfrm_replay_esn = {
510 .advance = xfrm_replay_advance_esn,
511 .check = xfrm_replay_check_esn,
512 .notify = xfrm_replay_notify_bmp,
513 .overflow = xfrm_replay_overflow_esn,
514};
515
516int xfrm_init_replay(struct xfrm_state *x)
517{
518 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
519
520 if (replay_esn) {
521 if (replay_esn->replay_window >
522 replay_esn->bmp_len * sizeof(__u32))
523 return -EINVAL;
524
525 if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn)
526 x->repl = &xfrm_replay_esn;
527 else
528 x->repl = &xfrm_replay_bmp;
529 } else
530 x->repl = &xfrm_replay_legacy;
531
532 return 0;
533}
534EXPORT_SYMBOL(xfrm_init_replay);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 220ebc05c7a..f83a3d1da81 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -42,16 +42,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
42static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); 42static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
43static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 43static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
44 44
45#ifdef CONFIG_AUDITSYSCALL
46static void xfrm_audit_state_replay(struct xfrm_state *x,
47 struct sk_buff *skb, __be32 net_seq);
48#else
49#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0)
50#endif /* CONFIG_AUDITSYSCALL */
51
52static inline unsigned int xfrm_dst_hash(struct net *net, 45static inline unsigned int xfrm_dst_hash(struct net *net,
53 xfrm_address_t *daddr, 46 const xfrm_address_t *daddr,
54 xfrm_address_t *saddr, 47 const xfrm_address_t *saddr,
55 u32 reqid, 48 u32 reqid,
56 unsigned short family) 49 unsigned short family)
57{ 50{
@@ -59,15 +52,16 @@ static inline unsigned int xfrm_dst_hash(struct net *net,
59} 52}
60 53
61static inline unsigned int xfrm_src_hash(struct net *net, 54static inline unsigned int xfrm_src_hash(struct net *net,
62 xfrm_address_t *daddr, 55 const xfrm_address_t *daddr,
63 xfrm_address_t *saddr, 56 const xfrm_address_t *saddr,
64 unsigned short family) 57 unsigned short family)
65{ 58{
66 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); 59 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
67} 60}
68 61
69static inline unsigned int 62static inline unsigned int
70xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) 63xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
64 __be32 spi, u8 proto, unsigned short family)
71{ 65{
72 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); 66 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
73} 67}
@@ -362,6 +356,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
362 kfree(x->calg); 356 kfree(x->calg);
363 kfree(x->encap); 357 kfree(x->encap);
364 kfree(x->coaddr); 358 kfree(x->coaddr);
359 kfree(x->replay_esn);
360 kfree(x->preplay_esn);
365 if (x->inner_mode) 361 if (x->inner_mode)
366 xfrm_put_mode(x->inner_mode); 362 xfrm_put_mode(x->inner_mode);
367 if (x->inner_mode_iaf) 363 if (x->inner_mode_iaf)
@@ -656,9 +652,9 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
656EXPORT_SYMBOL(xfrm_sad_getinfo); 652EXPORT_SYMBOL(xfrm_sad_getinfo);
657 653
658static int 654static int
659xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, 655xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
660 struct xfrm_tmpl *tmpl, 656 const struct xfrm_tmpl *tmpl,
661 xfrm_address_t *daddr, xfrm_address_t *saddr, 657 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
662 unsigned short family) 658 unsigned short family)
663{ 659{
664 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 660 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
@@ -677,7 +673,10 @@ xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
677 return 0; 673 return 0;
678} 674}
679 675
680static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) 676static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
677 const xfrm_address_t *daddr,
678 __be32 spi, u8 proto,
679 unsigned short family)
681{ 680{
682 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); 681 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
683 struct xfrm_state *x; 682 struct xfrm_state *x;
@@ -699,7 +698,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad
699 return NULL; 698 return NULL;
700} 699}
701 700
702static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) 701static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
702 const xfrm_address_t *daddr,
703 const xfrm_address_t *saddr,
704 u8 proto, unsigned short family)
703{ 705{
704 unsigned int h = xfrm_src_hash(net, daddr, saddr, family); 706 unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
705 struct xfrm_state *x; 707 struct xfrm_state *x;
@@ -746,8 +748,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
746} 748}
747 749
748static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, 750static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
749 struct flowi *fl, unsigned short family, 751 const struct flowi *fl, unsigned short family,
750 xfrm_address_t *daddr, xfrm_address_t *saddr,
751 struct xfrm_state **best, int *acq_in_progress, 752 struct xfrm_state **best, int *acq_in_progress,
752 int *error) 753 int *error)
753{ 754{
@@ -784,8 +785,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
784} 785}
785 786
786struct xfrm_state * 787struct xfrm_state *
787xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 788xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
788 struct flowi *fl, struct xfrm_tmpl *tmpl, 789 const struct flowi *fl, struct xfrm_tmpl *tmpl,
789 struct xfrm_policy *pol, int *err, 790 struct xfrm_policy *pol, int *err,
790 unsigned short family) 791 unsigned short family)
791{ 792{
@@ -813,7 +814,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
813 tmpl->mode == x->props.mode && 814 tmpl->mode == x->props.mode &&
814 tmpl->id.proto == x->id.proto && 815 tmpl->id.proto == x->id.proto &&
815 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 816 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
816 xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, 817 xfrm_state_look_at(pol, x, fl, encap_family,
817 &best, &acquire_in_progress, &error); 818 &best, &acquire_in_progress, &error);
818 } 819 }
819 if (best) 820 if (best)
@@ -829,7 +830,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
829 tmpl->mode == x->props.mode && 830 tmpl->mode == x->props.mode &&
830 tmpl->id.proto == x->id.proto && 831 tmpl->id.proto == x->id.proto &&
831 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) 832 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
832 xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, 833 xfrm_state_look_at(pol, x, fl, encap_family,
833 &best, &acquire_in_progress, &error); 834 &best, &acquire_in_progress, &error);
834 } 835 }
835 836
@@ -853,7 +854,7 @@ found:
853 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); 854 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
854 memcpy(&x->mark, &pol->mark, sizeof(x->mark)); 855 memcpy(&x->mark, &pol->mark, sizeof(x->mark));
855 856
856 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); 857 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
857 if (error) { 858 if (error) {
858 x->km.state = XFRM_STATE_DEAD; 859 x->km.state = XFRM_STATE_DEAD;
859 to_put = x; 860 to_put = x;
@@ -991,7 +992,11 @@ void xfrm_state_insert(struct xfrm_state *x)
991EXPORT_SYMBOL(xfrm_state_insert); 992EXPORT_SYMBOL(xfrm_state_insert);
992 993
993/* xfrm_state_lock is held */ 994/* xfrm_state_lock is held */
994static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) 995static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
996 unsigned short family, u8 mode,
997 u32 reqid, u8 proto,
998 const xfrm_address_t *daddr,
999 const xfrm_address_t *saddr, int create)
995{ 1000{
996 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); 1001 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
997 struct hlist_node *entry; 1002 struct hlist_node *entry;
@@ -1369,7 +1374,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
1369EXPORT_SYMBOL(xfrm_state_check_expire); 1374EXPORT_SYMBOL(xfrm_state_check_expire);
1370 1375
1371struct xfrm_state * 1376struct xfrm_state *
1372xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, 1377xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1373 u8 proto, unsigned short family) 1378 u8 proto, unsigned short family)
1374{ 1379{
1375 struct xfrm_state *x; 1380 struct xfrm_state *x;
@@ -1383,7 +1388,7 @@ EXPORT_SYMBOL(xfrm_state_lookup);
1383 1388
1384struct xfrm_state * 1389struct xfrm_state *
1385xfrm_state_lookup_byaddr(struct net *net, u32 mark, 1390xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1386 xfrm_address_t *daddr, xfrm_address_t *saddr, 1391 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1387 u8 proto, unsigned short family) 1392 u8 proto, unsigned short family)
1388{ 1393{
1389 struct xfrm_state *x; 1394 struct xfrm_state *x;
@@ -1397,7 +1402,7 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1397 1402
1398struct xfrm_state * 1403struct xfrm_state *
1399xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, 1404xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
1400 xfrm_address_t *daddr, xfrm_address_t *saddr, 1405 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1401 int create, unsigned short family) 1406 int create, unsigned short family)
1402{ 1407{
1403 struct xfrm_state *x; 1408 struct xfrm_state *x;
@@ -1609,54 +1614,6 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk)
1609} 1614}
1610EXPORT_SYMBOL(xfrm_state_walk_done); 1615EXPORT_SYMBOL(xfrm_state_walk_done);
1611 1616
1612
1613void xfrm_replay_notify(struct xfrm_state *x, int event)
1614{
1615 struct km_event c;
1616 /* we send notify messages in case
1617 * 1. we updated on of the sequence numbers, and the seqno difference
1618 * is at least x->replay_maxdiff, in this case we also update the
1619 * timeout of our timer function
1620 * 2. if x->replay_maxage has elapsed since last update,
1621 * and there were changes
1622 *
1623 * The state structure must be locked!
1624 */
1625
1626 switch (event) {
1627 case XFRM_REPLAY_UPDATE:
1628 if (x->replay_maxdiff &&
1629 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1630 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1631 if (x->xflags & XFRM_TIME_DEFER)
1632 event = XFRM_REPLAY_TIMEOUT;
1633 else
1634 return;
1635 }
1636
1637 break;
1638
1639 case XFRM_REPLAY_TIMEOUT:
1640 if ((x->replay.seq == x->preplay.seq) &&
1641 (x->replay.bitmap == x->preplay.bitmap) &&
1642 (x->replay.oseq == x->preplay.oseq)) {
1643 x->xflags |= XFRM_TIME_DEFER;
1644 return;
1645 }
1646
1647 break;
1648 }
1649
1650 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1651 c.event = XFRM_MSG_NEWAE;
1652 c.data.aevent = event;
1653 km_state_notify(x, &c);
1654
1655 if (x->replay_maxage &&
1656 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1657 x->xflags &= ~XFRM_TIME_DEFER;
1658}
1659
1660static void xfrm_replay_timer_handler(unsigned long data) 1617static void xfrm_replay_timer_handler(unsigned long data)
1661{ 1618{
1662 struct xfrm_state *x = (struct xfrm_state*)data; 1619 struct xfrm_state *x = (struct xfrm_state*)data;
@@ -1665,7 +1622,7 @@ static void xfrm_replay_timer_handler(unsigned long data)
1665 1622
1666 if (x->km.state == XFRM_STATE_VALID) { 1623 if (x->km.state == XFRM_STATE_VALID) {
1667 if (xfrm_aevent_is_on(xs_net(x))) 1624 if (xfrm_aevent_is_on(xs_net(x)))
1668 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); 1625 x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1669 else 1626 else
1670 x->xflags |= XFRM_TIME_DEFER; 1627 x->xflags |= XFRM_TIME_DEFER;
1671 } 1628 }
@@ -1673,61 +1630,10 @@ static void xfrm_replay_timer_handler(unsigned long data)
1673 spin_unlock(&x->lock); 1630 spin_unlock(&x->lock);
1674} 1631}
1675 1632
1676int xfrm_replay_check(struct xfrm_state *x,
1677 struct sk_buff *skb, __be32 net_seq)
1678{
1679 u32 diff;
1680 u32 seq = ntohl(net_seq);
1681
1682 if (unlikely(seq == 0))
1683 goto err;
1684
1685 if (likely(seq > x->replay.seq))
1686 return 0;
1687
1688 diff = x->replay.seq - seq;
1689 if (diff >= min_t(unsigned int, x->props.replay_window,
1690 sizeof(x->replay.bitmap) * 8)) {
1691 x->stats.replay_window++;
1692 goto err;
1693 }
1694
1695 if (x->replay.bitmap & (1U << diff)) {
1696 x->stats.replay++;
1697 goto err;
1698 }
1699 return 0;
1700
1701err:
1702 xfrm_audit_state_replay(x, skb, net_seq);
1703 return -EINVAL;
1704}
1705
1706void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1707{
1708 u32 diff;
1709 u32 seq = ntohl(net_seq);
1710
1711 if (seq > x->replay.seq) {
1712 diff = seq - x->replay.seq;
1713 if (diff < x->props.replay_window)
1714 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1715 else
1716 x->replay.bitmap = 1;
1717 x->replay.seq = seq;
1718 } else {
1719 diff = x->replay.seq - seq;
1720 x->replay.bitmap |= (1U << diff);
1721 }
1722
1723 if (xfrm_aevent_is_on(xs_net(x)))
1724 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1725}
1726
1727static LIST_HEAD(xfrm_km_list); 1633static LIST_HEAD(xfrm_km_list);
1728static DEFINE_RWLOCK(xfrm_km_lock); 1634static DEFINE_RWLOCK(xfrm_km_lock);
1729 1635
1730void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 1636void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1731{ 1637{
1732 struct xfrm_mgr *km; 1638 struct xfrm_mgr *km;
1733 1639
@@ -1738,7 +1644,7 @@ void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1738 read_unlock(&xfrm_km_lock); 1644 read_unlock(&xfrm_km_lock);
1739} 1645}
1740 1646
1741void km_state_notify(struct xfrm_state *x, struct km_event *c) 1647void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1742{ 1648{
1743 struct xfrm_mgr *km; 1649 struct xfrm_mgr *km;
1744 read_lock(&xfrm_km_lock); 1650 read_lock(&xfrm_km_lock);
@@ -1819,9 +1725,9 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1819EXPORT_SYMBOL(km_policy_expired); 1725EXPORT_SYMBOL(km_policy_expired);
1820 1726
1821#ifdef CONFIG_XFRM_MIGRATE 1727#ifdef CONFIG_XFRM_MIGRATE
1822int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 1728int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
1823 struct xfrm_migrate *m, int num_migrate, 1729 const struct xfrm_migrate *m, int num_migrate,
1824 struct xfrm_kmaddress *k) 1730 const struct xfrm_kmaddress *k)
1825{ 1731{
1826 int err = -EINVAL; 1732 int err = -EINVAL;
1827 int ret; 1733 int ret;
@@ -2001,7 +1907,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2001 return res; 1907 return res;
2002} 1908}
2003 1909
2004int xfrm_init_state(struct xfrm_state *x) 1910int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
2005{ 1911{
2006 struct xfrm_state_afinfo *afinfo; 1912 struct xfrm_state_afinfo *afinfo;
2007 struct xfrm_mode *inner_mode; 1913 struct xfrm_mode *inner_mode;
@@ -2074,12 +1980,25 @@ int xfrm_init_state(struct xfrm_state *x)
2074 if (x->outer_mode == NULL) 1980 if (x->outer_mode == NULL)
2075 goto error; 1981 goto error;
2076 1982
1983 if (init_replay) {
1984 err = xfrm_init_replay(x);
1985 if (err)
1986 goto error;
1987 }
1988
2077 x->km.state = XFRM_STATE_VALID; 1989 x->km.state = XFRM_STATE_VALID;
2078 1990
2079error: 1991error:
2080 return err; 1992 return err;
2081} 1993}
2082 1994
1995EXPORT_SYMBOL(__xfrm_init_state);
1996
1997int xfrm_init_state(struct xfrm_state *x)
1998{
1999 return __xfrm_init_state(x, true);
2000}
2001
2083EXPORT_SYMBOL(xfrm_init_state); 2002EXPORT_SYMBOL(xfrm_init_state);
2084 2003
2085int __net_init xfrm_state_init(struct net *net) 2004int __net_init xfrm_state_init(struct net *net)
@@ -2236,7 +2155,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2236} 2155}
2237EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); 2156EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2238 2157
2239static void xfrm_audit_state_replay(struct xfrm_state *x, 2158void xfrm_audit_state_replay(struct xfrm_state *x,
2240 struct sk_buff *skb, __be32 net_seq) 2159 struct sk_buff *skb, __be32 net_seq)
2241{ 2160{
2242 struct audit_buffer *audit_buf; 2161 struct audit_buffer *audit_buf;
@@ -2251,6 +2170,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x,
2251 spi, spi, ntohl(net_seq)); 2170 spi, spi, ntohl(net_seq));
2252 audit_log_end(audit_buf); 2171 audit_log_end(audit_buf);
2253} 2172}
2173EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2254 2174
2255void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) 2175void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2256{ 2176{
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 61291965c5f..fc152d28753 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -119,6 +119,19 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)
119 return 0; 119 return 0;
120} 120}
121 121
122static inline int verify_replay(struct xfrm_usersa_info *p,
123 struct nlattr **attrs)
124{
125 struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
126
127 if (!rt)
128 return 0;
129
130 if (p->replay_window != 0)
131 return -EINVAL;
132
133 return 0;
134}
122 135
123static int verify_newsa_info(struct xfrm_usersa_info *p, 136static int verify_newsa_info(struct xfrm_usersa_info *p,
124 struct nlattr **attrs) 137 struct nlattr **attrs)
@@ -214,6 +227,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
214 goto out; 227 goto out;
215 if ((err = verify_sec_ctx_len(attrs))) 228 if ((err = verify_sec_ctx_len(attrs)))
216 goto out; 229 goto out;
230 if ((err = verify_replay(p, attrs)))
231 goto out;
217 232
218 err = -EINVAL; 233 err = -EINVAL;
219 switch (p->mode) { 234 switch (p->mode) {
@@ -234,7 +249,7 @@ out:
234} 249}
235 250
236static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, 251static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
237 struct xfrm_algo_desc *(*get_byname)(char *, int), 252 struct xfrm_algo_desc *(*get_byname)(const char *, int),
238 struct nlattr *rta) 253 struct nlattr *rta)
239{ 254{
240 struct xfrm_algo *p, *ualg; 255 struct xfrm_algo *p, *ualg;
@@ -345,6 +360,33 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
345 return 0; 360 return 0;
346} 361}
347 362
363static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn,
364 struct xfrm_replay_state_esn **preplay_esn,
365 struct nlattr *rta)
366{
367 struct xfrm_replay_state_esn *p, *pp, *up;
368
369 if (!rta)
370 return 0;
371
372 up = nla_data(rta);
373
374 p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
375 if (!p)
376 return -ENOMEM;
377
378 pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
379 if (!pp) {
380 kfree(p);
381 return -ENOMEM;
382 }
383
384 *replay_esn = p;
385 *preplay_esn = pp;
386
387 return 0;
388}
389
348static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) 390static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
349{ 391{
350 int len = 0; 392 int len = 0;
@@ -380,10 +422,20 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
380static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) 422static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs)
381{ 423{
382 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; 424 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
425 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
383 struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; 426 struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
384 struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; 427 struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
385 struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; 428 struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
386 429
430 if (re) {
431 struct xfrm_replay_state_esn *replay_esn;
432 replay_esn = nla_data(re);
433 memcpy(x->replay_esn, replay_esn,
434 xfrm_replay_state_esn_len(replay_esn));
435 memcpy(x->preplay_esn, replay_esn,
436 xfrm_replay_state_esn_len(replay_esn));
437 }
438
387 if (rp) { 439 if (rp) {
388 struct xfrm_replay_state *replay; 440 struct xfrm_replay_state *replay;
389 replay = nla_data(rp); 441 replay = nla_data(rp);
@@ -459,7 +511,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
459 511
460 xfrm_mark_get(attrs, &x->mark); 512 xfrm_mark_get(attrs, &x->mark);
461 513
462 err = xfrm_init_state(x); 514 err = __xfrm_init_state(x, false);
463 if (err) 515 if (err)
464 goto error; 516 goto error;
465 517
@@ -467,16 +519,19 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
467 security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) 519 security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
468 goto error; 520 goto error;
469 521
522 if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
523 attrs[XFRMA_REPLAY_ESN_VAL])))
524 goto error;
525
470 x->km.seq = p->seq; 526 x->km.seq = p->seq;
471 x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; 527 x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth;
472 /* sysctl_xfrm_aevent_etime is in 100ms units */ 528 /* sysctl_xfrm_aevent_etime is in 100ms units */
473 x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; 529 x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M;
474 x->preplay.bitmap = 0;
475 x->preplay.seq = x->replay.seq+x->replay_maxdiff;
476 x->preplay.oseq = x->replay.oseq +x->replay_maxdiff;
477 530
478 /* override default values from above */ 531 if ((err = xfrm_init_replay(x)))
532 goto error;
479 533
534 /* override default values from above */
480 xfrm_update_ae_params(x, attrs); 535 xfrm_update_ae_params(x, attrs);
481 536
482 return x; 537 return x;
@@ -497,9 +552,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
497 struct xfrm_state *x; 552 struct xfrm_state *x;
498 int err; 553 int err;
499 struct km_event c; 554 struct km_event c;
500 uid_t loginuid = NETLINK_CB(skb).loginuid; 555 uid_t loginuid = audit_get_loginuid(current);
501 u32 sessionid = NETLINK_CB(skb).sessionid; 556 u32 sessionid = audit_get_sessionid(current);
502 u32 sid = NETLINK_CB(skb).sid; 557 u32 sid;
503 558
504 err = verify_newsa_info(p, attrs); 559 err = verify_newsa_info(p, attrs);
505 if (err) 560 if (err)
@@ -515,6 +570,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
515 else 570 else
516 err = xfrm_state_update(x); 571 err = xfrm_state_update(x);
517 572
573 security_task_getsecid(current, &sid);
518 xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); 574 xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
519 575
520 if (err < 0) { 576 if (err < 0) {
@@ -575,9 +631,9 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
575 int err = -ESRCH; 631 int err = -ESRCH;
576 struct km_event c; 632 struct km_event c;
577 struct xfrm_usersa_id *p = nlmsg_data(nlh); 633 struct xfrm_usersa_id *p = nlmsg_data(nlh);
578 uid_t loginuid = NETLINK_CB(skb).loginuid; 634 uid_t loginuid = audit_get_loginuid(current);
579 u32 sessionid = NETLINK_CB(skb).sessionid; 635 u32 sessionid = audit_get_sessionid(current);
580 u32 sid = NETLINK_CB(skb).sid; 636 u32 sid;
581 637
582 x = xfrm_user_state_lookup(net, p, attrs, &err); 638 x = xfrm_user_state_lookup(net, p, attrs, &err);
583 if (x == NULL) 639 if (x == NULL)
@@ -602,6 +658,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
602 km_state_notify(x, &c); 658 km_state_notify(x, &c);
603 659
604out: 660out:
661 security_task_getsecid(current, &sid);
605 xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); 662 xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
606 xfrm_state_put(x); 663 xfrm_state_put(x);
607 return err; 664 return err;
@@ -705,6 +762,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
705 if (xfrm_mark_put(skb, &x->mark)) 762 if (xfrm_mark_put(skb, &x->mark))
706 goto nla_put_failure; 763 goto nla_put_failure;
707 764
765 if (x->replay_esn)
766 NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL,
767 xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn);
768
708 if (x->security && copy_sec_ctx(x->security, skb) < 0) 769 if (x->security && copy_sec_ctx(x->security, skb) < 0)
709 goto nla_put_failure; 770 goto nla_put_failure;
710 771
@@ -1265,9 +1326,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1265 struct km_event c; 1326 struct km_event c;
1266 int err; 1327 int err;
1267 int excl; 1328 int excl;
1268 uid_t loginuid = NETLINK_CB(skb).loginuid; 1329 uid_t loginuid = audit_get_loginuid(current);
1269 u32 sessionid = NETLINK_CB(skb).sessionid; 1330 u32 sessionid = audit_get_sessionid(current);
1270 u32 sid = NETLINK_CB(skb).sid; 1331 u32 sid;
1271 1332
1272 err = verify_newpolicy_info(p); 1333 err = verify_newpolicy_info(p);
1273 if (err) 1334 if (err)
@@ -1286,6 +1347,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1286 * a type XFRM_MSG_UPDPOLICY - JHS */ 1347 * a type XFRM_MSG_UPDPOLICY - JHS */
1287 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 1348 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
1288 err = xfrm_policy_insert(p->dir, xp, excl); 1349 err = xfrm_policy_insert(p->dir, xp, excl);
1350 security_task_getsecid(current, &sid);
1289 xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); 1351 xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
1290 1352
1291 if (err) { 1353 if (err) {
@@ -1522,10 +1584,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1522 NETLINK_CB(skb).pid); 1584 NETLINK_CB(skb).pid);
1523 } 1585 }
1524 } else { 1586 } else {
1525 uid_t loginuid = NETLINK_CB(skb).loginuid; 1587 uid_t loginuid = audit_get_loginuid(current);
1526 u32 sessionid = NETLINK_CB(skb).sessionid; 1588 u32 sessionid = audit_get_sessionid(current);
1527 u32 sid = NETLINK_CB(skb).sid; 1589 u32 sid;
1528 1590
1591 security_task_getsecid(current, &sid);
1529 xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, 1592 xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
1530 sid); 1593 sid);
1531 1594
@@ -1553,9 +1616,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
1553 struct xfrm_audit audit_info; 1616 struct xfrm_audit audit_info;
1554 int err; 1617 int err;
1555 1618
1556 audit_info.loginuid = NETLINK_CB(skb).loginuid; 1619 audit_info.loginuid = audit_get_loginuid(current);
1557 audit_info.sessionid = NETLINK_CB(skb).sessionid; 1620 audit_info.sessionid = audit_get_sessionid(current);
1558 audit_info.secid = NETLINK_CB(skb).sid; 1621 security_task_getsecid(current, &audit_info.secid);
1559 err = xfrm_state_flush(net, p->proto, &audit_info); 1622 err = xfrm_state_flush(net, p->proto, &audit_info);
1560 if (err) { 1623 if (err) {
1561 if (err == -ESRCH) /* empty table */ 1624 if (err == -ESRCH) /* empty table */
@@ -1572,17 +1635,21 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
1572 return 0; 1635 return 0;
1573} 1636}
1574 1637
1575static inline size_t xfrm_aevent_msgsize(void) 1638static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x)
1576{ 1639{
1640 size_t replay_size = x->replay_esn ?
1641 xfrm_replay_state_esn_len(x->replay_esn) :
1642 sizeof(struct xfrm_replay_state);
1643
1577 return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) 1644 return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id))
1578 + nla_total_size(sizeof(struct xfrm_replay_state)) 1645 + nla_total_size(replay_size)
1579 + nla_total_size(sizeof(struct xfrm_lifetime_cur)) 1646 + nla_total_size(sizeof(struct xfrm_lifetime_cur))
1580 + nla_total_size(sizeof(struct xfrm_mark)) 1647 + nla_total_size(sizeof(struct xfrm_mark))
1581 + nla_total_size(4) /* XFRM_AE_RTHR */ 1648 + nla_total_size(4) /* XFRM_AE_RTHR */
1582 + nla_total_size(4); /* XFRM_AE_ETHR */ 1649 + nla_total_size(4); /* XFRM_AE_ETHR */
1583} 1650}
1584 1651
1585static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) 1652static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
1586{ 1653{
1587 struct xfrm_aevent_id *id; 1654 struct xfrm_aevent_id *id;
1588 struct nlmsghdr *nlh; 1655 struct nlmsghdr *nlh;
@@ -1600,7 +1667,13 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
1600 id->reqid = x->props.reqid; 1667 id->reqid = x->props.reqid;
1601 id->flags = c->data.aevent; 1668 id->flags = c->data.aevent;
1602 1669
1603 NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); 1670 if (x->replay_esn)
1671 NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL,
1672 xfrm_replay_state_esn_len(x->replay_esn),
1673 x->replay_esn);
1674 else
1675 NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay);
1676
1604 NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); 1677 NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft);
1605 1678
1606 if (id->flags & XFRM_AE_RTHR) 1679 if (id->flags & XFRM_AE_RTHR)
@@ -1633,16 +1706,16 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1633 struct xfrm_aevent_id *p = nlmsg_data(nlh); 1706 struct xfrm_aevent_id *p = nlmsg_data(nlh);
1634 struct xfrm_usersa_id *id = &p->sa_id; 1707 struct xfrm_usersa_id *id = &p->sa_id;
1635 1708
1636 r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC);
1637 if (r_skb == NULL)
1638 return -ENOMEM;
1639
1640 mark = xfrm_mark_get(attrs, &m); 1709 mark = xfrm_mark_get(attrs, &m);
1641 1710
1642 x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); 1711 x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family);
1643 if (x == NULL) { 1712 if (x == NULL)
1644 kfree_skb(r_skb);
1645 return -ESRCH; 1713 return -ESRCH;
1714
1715 r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
1716 if (r_skb == NULL) {
1717 xfrm_state_put(x);
1718 return -ENOMEM;
1646 } 1719 }
1647 1720
1648 /* 1721 /*
@@ -1674,9 +1747,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1674 struct xfrm_mark m; 1747 struct xfrm_mark m;
1675 struct xfrm_aevent_id *p = nlmsg_data(nlh); 1748 struct xfrm_aevent_id *p = nlmsg_data(nlh);
1676 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; 1749 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
1750 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
1677 struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; 1751 struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
1678 1752
1679 if (!lt && !rp) 1753 if (!lt && !rp && !re)
1680 return err; 1754 return err;
1681 1755
1682 /* pedantic mode - thou shalt sayeth replaceth */ 1756 /* pedantic mode - thou shalt sayeth replaceth */
@@ -1720,9 +1794,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
1720 if (err) 1794 if (err)
1721 return err; 1795 return err;
1722 1796
1723 audit_info.loginuid = NETLINK_CB(skb).loginuid; 1797 audit_info.loginuid = audit_get_loginuid(current);
1724 audit_info.sessionid = NETLINK_CB(skb).sessionid; 1798 audit_info.sessionid = audit_get_sessionid(current);
1725 audit_info.secid = NETLINK_CB(skb).sid; 1799 security_task_getsecid(current, &audit_info.secid);
1726 err = xfrm_policy_flush(net, type, &audit_info); 1800 err = xfrm_policy_flush(net, type, &audit_info);
1727 if (err) { 1801 if (err) {
1728 if (err == -ESRCH) /* empty table */ 1802 if (err == -ESRCH) /* empty table */
@@ -1789,9 +1863,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1789 1863
1790 err = 0; 1864 err = 0;
1791 if (up->hard) { 1865 if (up->hard) {
1792 uid_t loginuid = NETLINK_CB(skb).loginuid; 1866 uid_t loginuid = audit_get_loginuid(current);
1793 uid_t sessionid = NETLINK_CB(skb).sessionid; 1867 u32 sessionid = audit_get_sessionid(current);
1794 u32 sid = NETLINK_CB(skb).sid; 1868 u32 sid;
1869
1870 security_task_getsecid(current, &sid);
1795 xfrm_policy_delete(xp, p->dir); 1871 xfrm_policy_delete(xp, p->dir);
1796 xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); 1872 xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
1797 1873
@@ -1830,9 +1906,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1830 km_state_expired(x, ue->hard, current->pid); 1906 km_state_expired(x, ue->hard, current->pid);
1831 1907
1832 if (ue->hard) { 1908 if (ue->hard) {
1833 uid_t loginuid = NETLINK_CB(skb).loginuid; 1909 uid_t loginuid = audit_get_loginuid(current);
1834 uid_t sessionid = NETLINK_CB(skb).sessionid; 1910 u32 sessionid = audit_get_sessionid(current);
1835 u32 sid = NETLINK_CB(skb).sid; 1911 u32 sid;
1912
1913 security_task_getsecid(current, &sid);
1836 __xfrm_state_delete(x); 1914 __xfrm_state_delete(x);
1837 xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); 1915 xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
1838 } 1916 }
@@ -1986,7 +2064,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
1986#endif 2064#endif
1987 2065
1988#ifdef CONFIG_XFRM_MIGRATE 2066#ifdef CONFIG_XFRM_MIGRATE
1989static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) 2067static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb)
1990{ 2068{
1991 struct xfrm_user_migrate um; 2069 struct xfrm_user_migrate um;
1992 2070
@@ -2004,7 +2082,7 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb)
2004 return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); 2082 return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um);
2005} 2083}
2006 2084
2007static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) 2085static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb)
2008{ 2086{
2009 struct xfrm_user_kmaddress uk; 2087 struct xfrm_user_kmaddress uk;
2010 2088
@@ -2025,11 +2103,11 @@ static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma)
2025 + userpolicy_type_attrsize(); 2103 + userpolicy_type_attrsize();
2026} 2104}
2027 2105
2028static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, 2106static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
2029 int num_migrate, struct xfrm_kmaddress *k, 2107 int num_migrate, const struct xfrm_kmaddress *k,
2030 struct xfrm_selector *sel, u8 dir, u8 type) 2108 const struct xfrm_selector *sel, u8 dir, u8 type)
2031{ 2109{
2032 struct xfrm_migrate *mp; 2110 const struct xfrm_migrate *mp;
2033 struct xfrm_userpolicy_id *pol_id; 2111 struct xfrm_userpolicy_id *pol_id;
2034 struct nlmsghdr *nlh; 2112 struct nlmsghdr *nlh;
2035 int i; 2113 int i;
@@ -2061,9 +2139,9 @@ nlmsg_failure:
2061 return -EMSGSIZE; 2139 return -EMSGSIZE;
2062} 2140}
2063 2141
2064static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2142static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2065 struct xfrm_migrate *m, int num_migrate, 2143 const struct xfrm_migrate *m, int num_migrate,
2066 struct xfrm_kmaddress *k) 2144 const struct xfrm_kmaddress *k)
2067{ 2145{
2068 struct net *net = &init_net; 2146 struct net *net = &init_net;
2069 struct sk_buff *skb; 2147 struct sk_buff *skb;
@@ -2079,9 +2157,9 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2079 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); 2157 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
2080} 2158}
2081#else 2159#else
2082static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, 2160static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2083 struct xfrm_migrate *m, int num_migrate, 2161 const struct xfrm_migrate *m, int num_migrate,
2084 struct xfrm_kmaddress *k) 2162 const struct xfrm_kmaddress *k)
2085{ 2163{
2086 return -ENOPROTOOPT; 2164 return -ENOPROTOOPT;
2087} 2165}
@@ -2137,6 +2215,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2137 [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, 2215 [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) },
2138 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, 2216 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
2139 [XFRMA_TFCPAD] = { .type = NLA_U32 }, 2217 [XFRMA_TFCPAD] = { .type = NLA_U32 },
2218 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
2140}; 2219};
2141 2220
2142static struct xfrm_link { 2221static struct xfrm_link {
@@ -2220,7 +2299,7 @@ static inline size_t xfrm_expire_msgsize(void)
2220 + nla_total_size(sizeof(struct xfrm_mark)); 2299 + nla_total_size(sizeof(struct xfrm_mark));
2221} 2300}
2222 2301
2223static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) 2302static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
2224{ 2303{
2225 struct xfrm_user_expire *ue; 2304 struct xfrm_user_expire *ue;
2226 struct nlmsghdr *nlh; 2305 struct nlmsghdr *nlh;
@@ -2242,7 +2321,7 @@ nla_put_failure:
2242 return -EMSGSIZE; 2321 return -EMSGSIZE;
2243} 2322}
2244 2323
2245static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) 2324static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
2246{ 2325{
2247 struct net *net = xs_net(x); 2326 struct net *net = xs_net(x);
2248 struct sk_buff *skb; 2327 struct sk_buff *skb;
@@ -2259,12 +2338,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
2259 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); 2338 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
2260} 2339}
2261 2340
2262static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) 2341static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
2263{ 2342{
2264 struct net *net = xs_net(x); 2343 struct net *net = xs_net(x);
2265 struct sk_buff *skb; 2344 struct sk_buff *skb;
2266 2345
2267 skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); 2346 skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
2268 if (skb == NULL) 2347 if (skb == NULL)
2269 return -ENOMEM; 2348 return -ENOMEM;
2270 2349
@@ -2274,7 +2353,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
2274 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); 2353 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
2275} 2354}
2276 2355
2277static int xfrm_notify_sa_flush(struct km_event *c) 2356static int xfrm_notify_sa_flush(const struct km_event *c)
2278{ 2357{
2279 struct net *net = c->net; 2358 struct net *net = c->net;
2280 struct xfrm_usersa_flush *p; 2359 struct xfrm_usersa_flush *p;
@@ -2318,6 +2397,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
2318 l += nla_total_size(sizeof(*x->encap)); 2397 l += nla_total_size(sizeof(*x->encap));
2319 if (x->tfcpad) 2398 if (x->tfcpad)
2320 l += nla_total_size(sizeof(x->tfcpad)); 2399 l += nla_total_size(sizeof(x->tfcpad));
2400 if (x->replay_esn)
2401 l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn));
2321 if (x->security) 2402 if (x->security)
2322 l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + 2403 l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) +
2323 x->security->ctx_len); 2404 x->security->ctx_len);
@@ -2330,7 +2411,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
2330 return l; 2411 return l;
2331} 2412}
2332 2413
2333static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) 2414static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
2334{ 2415{
2335 struct net *net = xs_net(x); 2416 struct net *net = xs_net(x);
2336 struct xfrm_usersa_info *p; 2417 struct xfrm_usersa_info *p;
@@ -2387,7 +2468,7 @@ nla_put_failure:
2387 return -1; 2468 return -1;
2388} 2469}
2389 2470
2390static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) 2471static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c)
2391{ 2472{
2392 2473
2393 switch (c->event) { 2474 switch (c->event) {
@@ -2546,7 +2627,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
2546} 2627}
2547 2628
2548static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, 2629static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
2549 int dir, struct km_event *c) 2630 int dir, const struct km_event *c)
2550{ 2631{
2551 struct xfrm_user_polexpire *upe; 2632 struct xfrm_user_polexpire *upe;
2552 struct nlmsghdr *nlh; 2633 struct nlmsghdr *nlh;
@@ -2576,7 +2657,7 @@ nlmsg_failure:
2576 return -EMSGSIZE; 2657 return -EMSGSIZE;
2577} 2658}
2578 2659
2579static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2660static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2580{ 2661{
2581 struct net *net = xp_net(xp); 2662 struct net *net = xp_net(xp);
2582 struct sk_buff *skb; 2663 struct sk_buff *skb;
@@ -2591,7 +2672,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
2591 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); 2672 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
2592} 2673}
2593 2674
2594static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) 2675static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
2595{ 2676{
2596 struct net *net = xp_net(xp); 2677 struct net *net = xp_net(xp);
2597 struct xfrm_userpolicy_info *p; 2678 struct xfrm_userpolicy_info *p;
@@ -2656,7 +2737,7 @@ nlmsg_failure:
2656 return -1; 2737 return -1;
2657} 2738}
2658 2739
2659static int xfrm_notify_policy_flush(struct km_event *c) 2740static int xfrm_notify_policy_flush(const struct km_event *c)
2660{ 2741{
2661 struct net *net = c->net; 2742 struct net *net = c->net;
2662 struct nlmsghdr *nlh; 2743 struct nlmsghdr *nlh;
@@ -2681,7 +2762,7 @@ nlmsg_failure:
2681 return -1; 2762 return -1;
2682} 2763}
2683 2764
2684static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2765static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2685{ 2766{
2686 2767
2687 switch (c->event) { 2768 switch (c->event) {